# word_list_compile

# compare two files, compile common words with file name and line number

 

import string, re

 

def wordList(words):

    patt = re.compile(r'\d+')

    # eliminate words with digits, strip punctuation and whitespace, lowercase

    word_list = [word.strip().strip(string.punctuation).lower() for word \

                 in words.split() if not patt.search(word)]

    # eliminate blank words

    return [word for word in word_list if word != '']

 

def matchtermer(fn1, fn2):

    dd = {}

    # file to compare against

    f1 = open(fn1).read()

    # file to compare

    f2 = open(fn2).readlines()

    word_list = wordList(f1)

    for i, line in enumerate(f2):

        for word in line.split():

            word = word.strip().strip(string.punctuation).lower()

            if word in word_list:

                dd.setdefault(word, []).append((fn2, i+1))

    return dd

 

fn1 = r'H:\TEMP\temsys\612-Deck.txt'

fn2 = r'H:\TEMP\temsys\612-DeckNotes.txt'

dd = matchtermer(fn1, fn2)

for key in dd:

    print key

    for item in dd[key]:

        print '   ', item

 

'''

>>> case

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 3)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 8)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 15)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 19)

studs

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 3)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 5)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 8)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 10)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 16)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 18)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 20)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 21)

installation

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 1)

additional

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 1)

beam

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 4)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 6)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 6)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 9)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 11)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 12)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 14)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 20)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 22)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 22)

deck

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 2)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 14)

beams

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 2)

notes

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 1)

center

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 6)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 12)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 22)

perpendicular

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 2)

fewer

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 3)

to

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 2)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 14)

stud

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 1)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 4)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 5)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 7)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 9)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 10)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 15)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 19)

than

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 3)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 8)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 17)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 18)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 20)

flutes

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 3)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 5)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 8)

    ('H:\\TEMP\\temsys\\612-DeckNotes.txt', 10)

>>>

'''