# variable length sub string dictionary

 

import re

 

"""

Return an index list of all occurrances of 'item' in string 's'.

Optional start search position 'i'

"""

def indexList(s, item, i=0):

    i_list = []

    while True:

        try:

            i = s.index(item, i)

            i_list.append(i)

            i += 1

        except:

            break

    return i_list

 

s = 'Having the base plate subassembly mark at each column will do two things - reference the large scale base plate details and provide the checker with a check of the column bases in the model without having to check each column in the model.'

 

sub_length = 4

 

subList = [s[i:i+sub_length] for i in range(len(s)) if re.match(r'[a-z]{%s}' % sub_length, s[i:i+sub_length])]

 

dd = {}

for subi in subList:

    if not dd.has_key(subi):

        dd[subi] = indexList(s, subi)

 

for key in dd:

    print '%s = %s' % (key, dd[key])

 

'''

>>> ence = [80]

renc = [79]

fere = [77]

scal = [95]

thin = [66]

colu = [47, 164, 219]

ving = [2, 200]

mode = [184, 233]

ovid = [126]

suba = [22]

havi = [198]

refe = [75]

cker = [139]

ails = [115]

itho = [191]

avin = [1, 199]

prov = [124]

thou = [192]

olum = [48, 165, 220]

odel = [185, 234]

vide = [127]

mark = [34]

late = [17, 107]

tail = [114]

hing = [67]

ssem = [26]

mbly = [29]

eren = [78]

cale = [96]

ings = [68]

embl = [28]

bass = [24]

ases = [172]

ecke = [138]

efer = [76]

plat = [16, 106]

lumn = [49, 166, 221]

base = [11, 101, 171]

with = [144, 190]

arge = [90]

hout = [193]

larg = [89]

asse = [25]

semb = [27]

etai = [113]

will = [54]

deta = [112]

heck = [137, 152, 209]

ubas = [23]

each = [42, 214]

rovi = [125]

chec = [136, 151, 208]

'''

 

'''

>>> em = [28]

ck = [139, 154, 211]

ac = [43, 215]

ch = [44, 136, 151, 208, 216]

co = [47, 164, 219]

ro = [125]

od = [185, 234]

ai = [115]

vi = [2, 127, 200]

ca = [96]

in = [3, 68, 177, 201, 226]

ea = [42, 214]

al = [97]

ce = [82]

an = [120]

rg = [91]

as = [12, 25, 102, 172]

ar = [35, 90]

at = [18, 39, 108]

et = [113]

av = [1, 199]

ut = [195]

gs = [70]

id = [128]

es = [174]

er = [78, 141]

pr = [124]

ec = [138, 153, 210]

le = [98]

la = [17, 89, 107]

il = [55, 116]

tw = [62]

ll = [56]

nc = [81]

nd = [121]

ng = [4, 69, 202]

wo = [63]

lu = [49, 166, 221]

en = [80]

ls = [117]

th = [7, 66, 85, 132, 146, 160, 180, 192, 229]

te = [19, 109]

pl = [16, 106]

ly = [31]

do = [59]

ef = [76]

ba = [11, 24, 101, 171]

el = [187, 236]

bl = [30]

de = [112, 129, 186, 235]

to = [205]

wi = [54, 144, 190]

it = [145, 191]

hi = [67]

fe = [77]

re = [75, 79]

ha = [198]

he = [8, 86, 133, 137, 152, 161, 181, 209, 230]

ol = [48, 165, 220]

ma = [34]

ke = [140]

mb = [29]

ss = [26]

ta = [114]

mo = [184, 233]

mn = [51, 168, 223]

ge = [92]

su = [22]

um = [50, 167, 222]

ho = [193]

of = [157]

sc = [95]

ov = [126]

ou = [194]

rk = [36]

se = [13, 27, 103, 173]

ub = [23]

>>>

'''

 

'''

s = 'AttributeError: ConfigParser instance has no attribute help'.lower()

subList1 = re.findall(r'[a-z]{2}', s)

# print subList1

'''

'''

i = 0

subList = []

for j in range(len(s)):

    subi = s[i:i+2]

    if re.match(r'[a-z]{2}', subi):

        subList.append(subi)

    i += 1

print subList

print len(subList)

print

'''