# word_list_compile
# compare two files, compile common words with file name and
line number
import string, re
def wordList(words):
patt
= re.compile(r'\d+')
# eliminate words with digits, strip
punctuation and whitespace, lowercase
word_list = [word.strip().strip(string.punctuation).lower() for word \
in words.split() if not patt.search(word)]
# eliminate blank words
return [word for
word in word_list if word != '']
def matchtermer(fn1,
fn2):
dd
= {}
# file to compare
against
f1 = open(fn1).read()
# file to compare
f2 = open(fn2).readlines()
word_list = wordList(f1)
for i, line in enumerate(f2):
for word in line.split():
word = word.strip().strip(string.punctuation).lower()
if word in
word_list:
dd.setdefault(word,
[]).append((fn2, i+1))
return dd
fn1 =
r'H:\TEMP\temsys\612-Deck.txt'
fn2 =
r'H:\TEMP\temsys\612-DeckNotes.txt'
dd
= matchtermer(fn1, fn2)
for key in dd:
print key
for item in dd[key]:
print ' ', item
'''
>>> case
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
3)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
8)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
15)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
19)
studs
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
3)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
5)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
8)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
10)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
16)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
18)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
20)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
21)
installation
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
1)
additional
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
1)
beam
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
4)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
6)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
6)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
9)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
11)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
12)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
14)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
20)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
22)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
22)
deck
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
2)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
14)
beams
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
2)
notes
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
1)
center
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
6)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
12)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
22)
perpendicular
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
2)
fewer
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
3)
to
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
2)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
14)
stud
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
1)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
4)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
5)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
7)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
9)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
10)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
15)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
19)
than
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
3)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
8)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
17)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
18)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
20)
flutes
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
3)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
5)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
8)
('H:\\TEMP\\temsys\\612-DeckNotes.txt',
10)
>>>
'''