1#!/usr/local/bin/python
2#
3#
4# an aggressive little script for trimming duplicate cookies
5from __future__ import print_function
6import argparse
7import re
8
9wordlist = [
10    'hadnot',
11    'donot', 'hadnt',
12    'dont', 'have', 'more', 'will', 'your',
13    'and', 'are', 'had', 'the', 'you',
14    'am', 'an', 'is', 'll', 've', 'we',
15    'a', 'd', 'i', 'm', 's',
16]
17
18
19def hash(fortune):
20    f = fortune
21    f = f.lower()
22    f = re.sub('[\W_]', '', f)
23    for word in wordlist:
24        f = re.sub(word, '', f)
25#    f = re.sub('[aeiouy]', '', f)
26#    f = re.sub('[^aeiouy]', '', f)
27    f = f[:30]
28#    f = f[-30:]
29    return f
30
31
32def edit(datfile):
33    dups = {}
34    fortunes = []
35    fortune = ""
36    with open(datfile, "r") as datfiledf:
37        for line in datfiledf:
38            if line == "%\n":
39                key = hash(fortune)
40                if key not in dups:
41                    dups[key] = []
42                dups[key].append(fortune)
43                fortunes.append(fortune)
44                fortune = ""
45            else:
46                fortune += line
47    for key in list(dups.keys()):
48        if len(dups[key]) == 1:
49            del dups[key]
50    with open(datfile + "~", "w") as o:
51        for fortune in fortunes:
52            key = hash(fortune)
53            if key in dups:
54                print('\n' * 50)
55                for f in dups[key]:
56                    if f != fortune:
57                        print(f, '%')
58                print(fortune, '%')
59                if input("Remove last fortune? ") == 'y':
60                    del dups[key]
61                    continue
62            o.write(fortune + "%\n")
63
64parser = argparse.ArgumentParser(description="trimming duplicate cookies")
65parser.add_argument("filename", type=str, nargs=1)
66args = parser.parse_args()
67edit(args.filename[0])
68