# Parse matrix data
and sequence data files
def parseArray(fn,
dataset=1, key='PO', term='/'):
'''
Read a formatted data file in matrix format
and
compile data into a dictionary
'''
f = open(fn)
# skip to required data set
for _ in range(dataset):
try:
line = f.next()
while not line.startswith(key):
line = f.next()
except StopIteration, e:
print 'We have reached the end of
the file!'
f.close()
return False
headerList = line.strip().split()[1:]
lineList = []
line = f.next().strip()
while not line.startswith(term):
if line != '':
lineList.append(line.strip().split())
line = f.next().strip()
f.close()
# Key list
keys = [i[0] for i in lineList]
# Values list
values = [[float(s) for s in item] for item
in [j[1:] for j in lineList]]
# Create a dictionary from keys and values
lineDict = dict(zip(keys, values))
dataDict = {}
for i, item in enumerate(headerList):
dataDict[item] = {}
for key in lineDict:
dataDict[item][key] =
lineDict[key][i]
# Add 1.0 to every element in dataDict
subdictionaries
for keyMain in dataDict:
for keySub in dataDict[keyMain]:
dataDict[keyMain][keySub] += 1.0
# Normalize original data (with 1 added)
and update data
valueSums = [sum(item)+4 for item in
values]
# print valueSums
for keyMain in dataDict:
for keySub in dataDict[keyMain]:
dataDict[keyMain][keySub] /=
valueSums[int(keySub)-1]
return dataDict
def parseData(fn,
dataset=1, key='>'):
'''
Read a formatted data file of alpha
sequences
Return a list of sequences
The first element in the list is the header
'''
# initialize output list
dataList = []
# open file for reading
f = open(fn)
# skip to required data set
for _ in range(dataset):
try:
s = f.next()
while not s.startswith(key):
s = f.next()
except StopIteration, e:
print 'We have reached the end of
the file!'
f.close()
return False
# initialize output list
dataList = [s,]
for line in f:
if not line.startswith(key):
dataList.append(line.strip())
else:
break
f.close()
return dataList
def
compileData(fnArray, fnSeq, arraySet=1, seqSet=1):
# sequence factor dictionary
value={"A":0.3,"T":0.3,"C":0.2,"G":0.2}
dataArray = parseArray(fnArray, arraySet)
if dataArray:
dataSeq = parseData(fnSeq, seqSet)
if not dataSeq:
return False
else:
return None
# This is the complete sequence
seq = ''.join(dataSeq[1:])
# These are the subkeys of dataArray -
'01', '02', '03',.............
subKeys = dataArray['A'].keys()
subKeys.sort()
# Calculate num/den for each slice of
sequence
# Each sequence slice length = length of
subKeys
# Example:
# seq = 'ATCGATA'
# subKeys length = 3
# 'ATC', 'TCG', 'CGA', 'GAT', 'ATA'
numList = []
denList = []
seqList = []
for i in xrange(len(seq) - len(subKeys) +
1):
subseq = seq[0:len(subKeys)]
seqList.append(subseq)
num, den = 1, 1
for j, s in enumerate(subseq):
num *= dataArray[s][subKeys[j]]
den *= value[s]
numList.append(num)
denList.append(den)
seq = seq[1:]
resultList = []
for i, num in enumerate(numList):
resultList.append(num/denList[i])
outStr = '\n'.join(['Sequence = %s
Calculation = %0.12f' % (seqList[i], res) for i, res in enumerate(resultList)])
return 'Array set # = %d\nSequence set # =
%d\nSequence Header: %s\n%s' % (arraySet, seqSet, dataSeq[0], outStr)
if __name__ ==
'__main__':
fnArray = r'H:\TEMP\temsys\data9.txt'
fnSeq = r'H:\TEMP\temsys\data12.txt'
outputfile = r'H:\TEMP\temsys\sequence_calc_data.txt'
arraySet = 1
outList = []
calcdata = 1
while not calcdata is None:
seqSet = 1
while True:
calcdata = compileData(fnArray,
fnSeq, arraySet, seqSet)
if calcdata:
outList.append(calcdata)
seqSet += 1
else:
break
arraySet += 1
f = open(outputfile, 'w')
f.write('\n'.join(outList))
f.close()
'''
>>> Array
set # = 1
Sequence set # = 3
Sequence Header:
>Cp36_PRR|Drosophila melanogaster|Cp36|FBgn0000359|X:8324430..8324513
Sequence =
TCTAGAGATCTGGGCA Calculation = 0.000520377928
Sequence =
CTAGAGATCTGGGCAC Calculation = 0.000011324924
Sequence =
TAGAGATCTGGGCACG Calculation = 0.000010676845
Sequence =
AGAGATCTGGGCACGA Calculation = 0.000043154836
Sequence =
GAGATCTGGGCACGAT Calculation = 0.000049390322
Sequence =
AGATCTGGGCACGATG Calculation = 0.000000078869
Sequence =
GATCTGGGCACGATGG Calculation = 0.003679435071
Sequence =
ATCTGGGCACGATGGC Calculation = 0.000004580993
Sequence =
TCTGGGCACGATGGCG Calculation = 0.000025964167
Sequence =
CTGGGCACGATGGCGA Calculation = 0.000190953272
Sequence =
TGGGCACGATGGCGAG Calculation = 0.000209084862
Sequence =
GGGCACGATGGCGAGA Calculation = 0.000349499483
Sequence =
GGCACGATGGCGAGAC Calculation = 0.000014551293
Sequence =
GCACGATGGCGAGACA Calculation = 0.000253266698
Sequence =
CACGATGGCGAGACAA Calculation = 0.000002088444
Sequence =
ACGATGGCGAGACAAA Calculation = 0.000085759837
Sequence =
CGATGGCGAGACAAAG Calculation = 0.000719291466
Sequence =
GATGGCGAGACAAAGA Calculation = 0.108603646410
Sequence =
ATGGCGAGACAAAGAT Calculation = 0.000022105017
Sequence =
TGGCGAGACAAAGATG Calculation = 0.074916911295
Sequence =
GGCGAGACAAAGATGC Calculation = 0.000654673006
Sequence =
GCGAGACAAAGATGCG Calculation = 0.002905350767
Sequence =
CGAGACAAAGATGCGG Calculation = 0.040711263424
Sequence =
GAGACAAAGATGCGGC Calculation = 0.000066332349
Sequence =
AGACAAAGATGCGGCG Calculation = 0.000844706696
Sequence =
GACAAAGATGCGGCGC Calculation = 0.001363986600
Sequence =
ACAAAGATGCGGCGCA Calculation = 0.000000158236
Sequence =
CAAAGATGCGGCGCAA Calculation = 0.000248960708
Sequence =
AAAGATGCGGCGCAAA Calculation = 0.000003482795
Sequence =
AAGATGCGGCGCAAAA Calculation = 0.000003790517
Sequence =
AGATGCGGCGCAAAAT Calculation = 0.000062906122
Sequence =
GATGCGGCGCAAAATC Calculation = 0.000000630359
Sequence =
ATGCGGCGCAAAATCG Calculation = 0.000041339176
Sequence =
TGCGGCGCAAAATCGG Calculation = 0.007412276588
Sequence =
GCGGCGCAAAATCGGA Calculation = 0.000109927284
Sequence =
CGGCGCAAAATCGGAA Calculation = 0.032381958151
Sequence =
GGCGCAAAATCGGAAA Calculation = 0.027066447384
Sequence =
GCGCAAAATCGGAAAT Calculation = 0.000038441301
Sequence =
CGCAAAATCGGAAATG Calculation = 0.016863436369
Sequence =
GCAAAATCGGAAATGG Calculation = 0.016099091359
Sequence =
CAAAATCGGAAATGGA Calculation = 0.000929346454
Sequence =
AAAATCGGAAATGGAG Calculation = 0.000186989034
Sequence =
AAATCGGAAATGGAGA Calculation = 0.003120608869
Sequence =
AATCGGAAATGGAGAT Calculation = 0.000031851876
Sequence =
ATCGGAAATGGAGATG Calculation = 0.000387934984
Sequence =
TCGGAAATGGAGATGG Calculation = 0.000028928662
Sequence =
CGGAAATGGAGATGGA Calculation = 0.858721770074
Sequence =
GGAAATGGAGATGGAT Calculation = 0.000032582474
Sequence =
GAAATGGAGATGGATC Calculation = 0.000194328378
Sequence =
AAATGGAGATGGATCA Calculation = 0.000000025115
Sequence =
AATGGAGATGGATCAC Calculation = 0.000005746845
Sequence =
ATGGAGATGGATCACG Calculation = 0.000000225826
Sequence =
TGGAGATGGATCACGT Calculation = 0.093243689191
Sequence =
GGAGATGGATCACGTA Calculation = 0.000581140752
Sequence =
GAGATGGATCACGTAG Calculation = 0.000002101908
Sequence =
AGATGGATCACGTAGC Calculation = 0.000016524721
Sequence =
GATGGATCACGTAGCC Calculation = 0.000029313806
Sequence =
ATGGATCACGTAGCCG Calculation = 0.000535232860
Sequence =
TGGATCACGTAGCCGG Calculation = 0.000015091041
Sequence =
GGATCACGTAGCCGGC Calculation = 0.000010864488
Sequence =
GATCACGTAGCCGGCC Calculation = 0.000023539371
Sequence =
ATCACGTAGCCGGCCA Calculation = 0.001552014384
Sequence =
TCACGTAGCCGGCCAT Calculation = 0.000000040841
Sequence =
CACGTAGCCGGCCATG Calculation = 0.000005420914
Sequence =
ACGTAGCCGGCCATGG Calculation = 0.000010765295
Sequence =
CGTAGCCGGCCATGGC Calculation = 0.002425152785
Sequence =
GTAGCCGGCCATGGCG Calculation = 0.000000198520
Sequence =
TAGCCGGCCATGGCGG Calculation = 0.000220954056
>>>
'''
''' Data Files
NA bin
01 0.45 8.27 0.00
11.39
02 0.00 0.00 10.02
10.09
03 5.80 1.39 0.00
12.93
04 12.33 5.18 2.60
0.00
05 12.43 0.00 0.00
7.68
06 18.55 0.00 1.57
0.00
07 0.05 0.58 0.00
19.48
08 20.11 0.00 0.00
0.00
09 20.06 0.05 0.00
0.00
10 20.11 0.00 0.00
0.00
11 0.00 15.33 0.00
4.78
12 20.06 0.05 0.00
0.00
13 14.99 0.35 4.78
0.00
14 13.64 2.42 3.37
0.68
15 5.03 0.00 15.08
0.00
16 7.23 0.45 10.94
1.49
//
//
NA bap
01 0.00 3.67 0.00
0.00
02 0.00 0.00 3.67
0.00
03 0.00 0.00 0.00
3.67
04 0.00 3.67 0.00
0.00
05 3.67 0.00 0.00
0.00
06 3.46 0.00 0.22
0.00
07 0.00 0.00 3.67
0.00
08 0.00 0.00 0.00
3.67
09 0.00 0.00 0.00
3.67
10 0.00 3.67 0.00
0.00
11 3.67 0.00 0.00
0.00
12 3.67 0.00 0.00
0.00
13 0.00 0.00 3.67
0.00
14 0.00 0.00 0.00
3.67
15 0.00 0.00 3.67
0.00
16 0.00 3.67 0.00
0.00
//
//
NA bcd
01 42.55 8.75 145.86
8.14
02 0.14 0.53 204.64
0.00
03 126.83 78.02 0.11
0.34
04 0.21 0.17 0.00
204.92
05 0.00 12.38 0.43
192.50
06 174.48 0.95 1.32
28.56
07 79.53 4.70 100.44
20.64
//
//
NA bin
01 0.45 8.27 0.00
11.39
02 0.00 0.00 10.02
10.09
03 5.80 1.39 0.00
12.93
04 12.33 5.18 2.60
0.00
05 12.43 0.00 0.00
7.68
06 18.55 0.00 1.57
0.00
07 0.05 0.58 0.00
19.48
08 20.11 0.00 0.00
0.00
09 20.06 0.05 0.00
0.00
10 20.11 0.00 0.00
0.00
11 0.00 15.33 0.00
4.78
12 20.06 0.05 0.00
0.00
13 14.99 0.35 4.78
0.00
14 13.64 2.42 3.37
0.68
15 5.03 0.00 15.08
0.00
16 7.23 0.45 10.94
1.49
//
//
>CG9571_O-E|Drosophila
melanogaster|CG9571|FBgn0031086|X:19926374..19927133
CCAGTCCACCGGCCGCCGATCTATTTATACGAGAGGAAGAGGCTGAACTCGAGGATTACCCGTGTATCCTGGGACGCG
GATTAGCGATCCATTCCCCTTTTAATCGCCGCGCAAACAGATTCATGAAAGCCTTCGGATTCATTCATTGATCCACAT
CTACGGGAACGGGAGTCGCAAACGTTTTCGGATTAGCGCTGGACTAGCGGTTTCTAAATTGGATTATTTCTACCTGAC
CCTGGAGCCATCGTCCTCGTCCTCCGTCCCTTAGCGCCTCCTGCATGGATGTCGTTTTTGGGTTTCATACCTTTTCAC
ACTGGAAAAATACGGAATTTGTTGTAAGCCCTTTCAAGACGAATGGGATTTAGCTTCGGATGTCAACGTCACCATAAT
CATATTAGGAATATTTCTACTCAATTGCAATATTGGTACTTTTCTGACTGTAAACGCGATGATAATTACAAATATGCC
TAATTTGCTGTCTTTATAATCAAATGGAGTTCTTTATATTTCCAAAATATTGAAATTCCGATTCCCTAGAAAATAATA
CGTTTTTCTGTTATTAATAAAAAACCAATAGGAAAGTTCTCAAAAATTACTCTGTTGTATTTGATCATTTCTTTTCCG
GTATAATCTTTTATTTTAAGCATTCCCATGTGAATAAATTTCAGACTAATGTATTAATAAGATGTCGTGTTTTTCCAC
TTACAAATTTCTCATACAGCTGGATATATACTACGAGTACTATACACATGCTCTGGG
>Cp36_DRR|Drosophila
melanogaster|Cp36|FBgn0000359|X:8323349..8324136
AGTCGACCAGCACGAGATCTCACCTACCTTCTTTATAAGCGGGGTCTCTAGAAGCTAAATCCATGTCCACGTCAAACC
AAAGACTTGCGGTCTCCAGACCATTGAGTTCTATAAATGGGACTGAGCCACACCATACACCACACACCACACATACAC
ACACGCCAACACATTACACACAACACGAACTACACAAACACTGAGATTAAGGAAATTATTAAAAAAAATAATAAAATT
AATACAAAAAAAATATATATATATACAAAAATTTGTTGTGTTTGAATTGAATTAAGAGCTTATCAAGAAAAAAATTTC
AGTGACTCATAATACACTACTCTACAAGTTTAAATTGAATCAACAATTTAACTTTCATTGCTCAGGTTTTTAGTAACA
ATGTTTATATAAGTTTAGGTATAACAAATGATTTAAATATAAGATACTGTATTTCACATTGAGACGAAACAATCCACC
GAAAATCATAAAATATAAGAATGTTGCATTTTATTTTTAAAAATAAAGATGCCTTTTAAGAGGAATAACTTAAATGTC
TTTAATACCTTTGAATTTAATTATATGGCTAATAAACACAAACTTAAAGCTTAAAACTGCATCGAATTGAATGCGGTT
ATAAATGTACTTATATATCTAATATAATCTGCTAATATGGTTTACATGGTATATCTTTCTCGGAAATTTTTACAAAAA
TTATCTATTCATATATCTCGAGCGTAAGATATTTATCAGTTTATAGATAACATCTTTAAATTTGGGTGATTAAAAAAA
AACATTG
>Cp36_PRR|Drosophila
melanogaster|Cp36|FBgn0000359|X:8324430..8324513
TCTAGAGATCTGGGCACGATGGCGAGACAAAGATGCGGCGCAAAATCGGAAATGGAGATGGATCACGTAGCCGGCCAT
GGCGG
>Him_distal|Drosophila
melanogaster|Him|FBgn0030900|X:18039896..18043470
GGTTTTCTGCGATGGCTTCCGCGCCAGCTGAAGTATCTGATTTGCTGCCTTGTTTTTGTTGATATTTCTGCGAAGGGA
CTTGTGCTTTTCAAATGGCCTTTTTTTGGGATTACGGCAAGGGCGCGTTTCCCACGCTCGATCCCCACTTACCATTGG
TGCACGCGATTGCGGCAAGCTGCTGAGGCAAGCTATTAAACGCCACACTGGGCCGGGGGGCGGTACCGGTGGGCGTGG
CAGGGGAGTCGACACATGTTGTGTGCCAGAGAACTTTGCTCCGATCCCCAGATCATCAAATAGTTGTCGCTGTCTGCT
CGTGCGCAAATTGCAATACTTTGCATACCCTTACTGCAGGGTATCTGAGCTTGGACTTTAAATAAGGGGGTATAACAT
AGCTTATACTCTCTATCTCTGTTATAAAGTCAATTTTCCTTAGATCTTTAGTACAGTGGGTAGTTAAGGAGACATAAC
TTCCAAAAAAAAAAACTATAAAATTGCAATAATTTATGCAAAATATGTATTTTATTGAATGGGATGAATAATTTACCT
TATACGACTGTAAAACATTTCTAACGATTAAATGCACTTCTAAAAGTTTTCCCACAAGTAGGTGAGCTATTATGCTAA
GCGTTCCATGACTTGGAATCTAAGATCTTGTTTTGATCTTCGCTGATCTTTGAGAACTCGGGGATTACTTACACATTT
CTGGGCAGGCACAAGTGGGCCGAGGCAGTGTAGATTCATCACGTTTTCACTCAACACACGCAGCTCATTAACAGCCCC
GCTGACAACTTGTCAGGACTTCCCCCTCGTGAATCCCCCTGCTACGCAACCCCCATTCCCCGCCCATTCCAACACTTC
CCGCCGGGAGCGTGGGAAATTATGCGTGTTGGTGGGACGTCGGGCGGTGAAAATTGGCGCGCTCTTCGGGGGGCCACA
CCGCGTGGCATTGACAACTCTTCCACATTTCGCGCCCAACGATGCGTTGGCATCAGTGGGTCACAGGGATTACGGCTG
GCTGGGATTCCAGAGCCAGATCTTTTTCAGCCAAAACTTTCAGCTTTCGAAGACCTCAAGCGATAGGAGAGTGTCGGA
AGTCCAGAAATAGACGCGTAGCACATAAATTATGGATCGTATCGAGTATCGATTAGCCCGGGACAAGCGAAGCGATAG
GGAGACATATTTTTATTACCCTCTCGGGGACCTGCACTTGTTGGCTTCGCTTCTATGAAAGATCCCTCTACCATATCA
CGTATGTGGGCTCCCCCAATCGAACCGAGTTGTGGGAAATGTTTTCCCAGGCCAACAGCTAATTGTCACTCCAAGGGT
TGTCCCCGCAGCCCAGACGACAGATAAGCGGGCAAGTGAAGCCCAGCGATCTGAGTCAAGTGAAGGGCTTCAATTTCT
TTCCCGAGTGGAACTGGGATATCGAAATTACATTTGTAACAGACGTTTTAGTCCGCAATCCTCAGCTAATGGGACTTA
CGAACATATATTCATCTGAAATTCAAGAACATGCGCACTTAAAGAGCAGGGAAGTCGCACACGCGCAAGTCAGGCGCT
CAAAAAGGGATCTTCGGAGGTACAGTGGGCAAAAGACTGTAAATAAATAATATAAATAAAATAATATTTAGCTCTATG
TGTTTATATAATCTACAAAGTAGTTAACAAAAAATATAAAATGGATATAAAAATACATCTTATATATCCCTATAATAA
GAAATAAATAATAATTTTAGTAAATTAATTTTGTTACACAAAGTACCTGTATTATTACCTCTTTTTTGTTGGTTGGTT
CTTTTTTGATGTGGCCCCACTGTGCTCTCTTATCAGTGCGACAATCAGGCATTGCCTTTCCCCATCGGGGGATTCTAA
TTCCGTGGACGATGGGCCGAAACGCCTATAAAGTCGCTCATTAAAAATGTTTAATTATGGCCCATCTTGCATCTTGCA
CCGATGTGGATGGGGTTTGTCGGCAATGATTTACATTATAAAAATGCCCGTTATCTGAGCATTTTGTACGCTCCACTC
CCTCTTCCCCCCTCCAAAAAAAAAAAAAACAGATATGTATATTCCCCGAGATATTCCCAAGCGGCCAAAAATAGACGC
AAATTGTAACGCACTTGAAGTGCACTCTGAAACATCTTGAAGTCCAAATAAAATAGCAGAGAGACCCACAATAATATA
CGTTGATATACACATGTATATATGTATGTATGTACATAAAGGGCCAGGAGCAGGAACGTTAGGCATGCGGTGGTACGA
GCACCGTGGTGCGAGCGAGAGCGCTGTGCTGCCTGAGGGAGAGGTAGCGAGTGGGTTGCATTGCGCACACAGAACATG
TGAATGCAGAGTTCAAGTGCATGCCGTGACACAGACACGCACACACACACACGCACACACAGATGAGTAGCCGCTGCA
AAGTGTTTTTTCCCAGGCGCTATTTATAATATGCATCCCGTCGCCGATCCGATCCGATCCAATCCAATCCGATTGGAT
CCCATCTTGCGGCACTACGATTATGACGCTCGACACGATGATGCATTCGCAGAGTTTCCCGATCGCAGAGTACCCTGT
ACTCGAGTAGTTTTTAGATGCAGTATTATTAAGTAGAAAATTGTAACCGTATAATATTCCATTATATTAAATATTTTT
ATAGCACTAAAGAAATAAAAGCCCATTTTATAATTTATATTACAAAAATACTTAACCATAGAAACTTATGATATGATA
CCAATATTTAAGTTCCAAAAAATGTAGAACATTTTTAAGTATATACTCGAAAATATTAATTTTCAAAATTGATATTCA
AGAGATATTATAAAAAGATCCCCATTCTAAATATCTAACATCATGCCATGCTTTCTAATGAGTATAGTATACCCCTGC
TACCCTGTCAATCCGCAAAACAGGCGCCGAAACATGCGGTTTCTCGCAGCAGACTGCCACGGGAAAAATTCGGTTCGA
GATTTGGGAATGGATGTATGACGGAGCAGAAGGAGCAGGACCCGGATTTCGGATTTCGGAATGGATATGGAAATGAAG
ATGGAAATGGGACTTTGACTGCGCGACGGCCACATGCGCCGCTGGCGATGCCGCTGGATGTTGCATGTGGCAGCGGTC
GGTGCAGCAGCGAAAGTGTTGCAGCTGTATGAGAGGGTCTATTTTTGGGGCGATTGTGCGGCGCTGGTGCTGCCACAT
GTGTTCTGTGTTGGGCTGCTAAAAGGCATTGTAATGAGAGCAGAAAATAGAATTGACTCCACTTGAGCAATGTCCCAT
AAAGCGGGAGTTTCGAGTTTGGCGCGCAATGTGCCGCACCAGCAAACGAACAAAAGAAAAAAAAAAAAAAAAAACACA
GCCAGTAACACATGGGCCCACGAGTTATGTTTTATTTTTAATCCCACAAAGAGTCGATCTCCAAAACAAACCCGCAGA
GAGCACATATAAAGAGACTCGGTGGACGAGTGGTTCGAAACAGTCTTCCGCCGCAGCTCGACGCGCTCGCATATCGGG
AATATATAGATCGGAGATATCGCAGGACCCACAGCAGAGCAGAGCCGCAGAGCCACCAACCTCG
>Him_proximal|Drosophila
melanogaster|Him|FBgn0030900|X:18041232..18043470
GCCCAGACGACAGATAAGCGGGCAAGTGAAGCCCAGCGATCTGAGTCAAGTGAAGGGCTTCAATTTCTTTCCCGAGTG
GAACTGGGATATCGAAATTACATTTGTAACAGACGTTTTAGTCCGCAATCCTCAGCTAATGGGACTTACGAACATATA
TTCATCTGAAATTCAAGAACATGCGCACTTAAAGAGCAGGGAAGTCGCACACGCGCAAGTCAGGCGCTCAAAAAGGGA
TCTTCGGAGGTACAGTGGGCAAAAGACTGTAAATAAATAATATAAATAAAATAATATTTAGCTCTATGTGTTTATATA
ATCTACAAAGTAGTTAACAAAAAATATAAAATGGATATAAAAATACATCTTATATATCCCTATAATAAGAAATAAATA
ATAATTTTAGTAAATTAATTTTGTTACACAAAGTACCTGTATTATTACCTCTTTTTTGTTGGTTGGTTCTTTTTTGAT
GTGGCCCCACTGTGCTCTCTTATCAGTGCGACAATCAGGCATTGCCTTTCCCCATCGGGGGATTCTAATTCCGTGGAC
GATGGGCCGAAACGCCTATAAAGTCGCTCATTAAAAATGTTTAATTATGGCCCATCTTGCATCTTGCACCGATGTGGA
TGGGGTTTGTCGGCAATGATTTACATTATAAAAATGCCCGTTATCTGAGCATTTTGTACGCTCCACTCCCTCTTCCCC
CCTCCAAAAAAAAAAAAAACAGATATGTATATTCCCCGAGATATTCCCAAGCGGCCAAAAATAGACGCAAATTGTAAC
GCACTTGAAGTGCACTCTGAAACATCTTGAAGTCCAAATAAAATAGCAGAGAGACCCACAATAATATACGTTGATATA
CACATGTATATATGTATGTATGTACATAAAGGGCCAGGAGCAGGAACGTTAGGCATGCGGTGGTACGAGCACCGTGGT
GCGAGCGAGAGCGCTGTGCTGCCTGAGGGAGAGGTAGCGAGTGGGTTGCATTGCGCACACAGAACATGTGAATGCAGA
GTTCAAGTGCATGCCGTGACACAGACACGCACACACACACACGCACACACAGATGAGTAGCCGCTGCAAAGTGTTTTT
TCCCAGGCGCTATTTATAATATGCATCCCGTCGCCGATCCGATCCGATCCAATCCAATCCGATTGGATCCCATCTTGC
GGCACTACGATTATGACGCTCGACACGATGATGCATTCGCAGAGTTTCCCGATCGCAGAGTACCCTGTACTCGAGTAG
TTTTTAGATGCAGTATTATTAAGTAGAAAATTGTAACCGTATAATATTCCATTATATTAAATATTTTTATAGCACTAA
AGAAATAAAAGCCCATTTTATAATTTATATTACAAAAATACTTAACCATAGAAACTTATGATATGATACCAATATTTA
AGTTCCAAAAAATGTAGAACATTTTTAAGTATATACTCGAAAATATTAATTTTCAAAATTGATATTCAAGAGATATTA
TAAAAAGATCCCCATTCTAAATATCTAACATCATGCCATGCTTTCTAATGAGTATAGTATACCCCTGCTACCCTGTCA
ATCCGCAAAACAGGCGCCGAAACATGCGGTTTCTCGCAGCAGACTGCCACGGGAAAAATTCGGTTCGAGATTTGGGAA
TGGATGTATGACGGAGCAGAAGGAGCAGGACCCGGATTTCGGATTTCGGAATGGATATGGAAATGAAGATGGAAATGG
GACTTTGACTGCGCGACGGCCACATGCGCCGCTGGCGATGCCGCTGGATGTTGCATGTGGCAGCGGTCGGTGCAGCAG
CGAAAGTGTTGCAGCTGTATGAGAGGGTCTATTTTTGGGGCGATTGTGCGGCGCTGGTGCTGCCACATGTGTTCTGTG
TTGGGCTGCTAAAAGGCATTGTAATGAGAGCAGAAAATAGAATTGACTCCACTTGAGCAATGTCCCATAAAGCGGGAG
TTTCGAGTTTGGCGCGCAATGTGCCGCACCAGCAAACGAACAAAAGAAAAAAAAAAAAAAAAAACACAGCCAGTAACA
CATGGGCCCACGAGTTATGTTTTATTTTTAATCCCACAAAGAGTCGATCTCCAAAACAAACCCGCAGAGAGCACATAT
AAAGAGACTCGGTGGACGAGTGGTTCGAAACAGTCTTCCGCCGCAGCTCGACGCGCTCGCATATCGGGAATATATAGA
TCGGAGATATCGCAGGACCCACAGCAGAGCAGAGCCGCAGAGCCACCAACCTCG
>Obp18a_prom|Drosophila
melanogaster|Obp18a|FBgn0030985|X:18969778..18972746
ATGGCGAAAATCTGTTTCCCAACTAACAATGAGCGCATCATCACAGCTCTATATATATAACCCATCGATTTGCTAATT
CAGCTCAAAAGTAGACAGGAGATTTTAATTAAATAATTGGATGCTACTTTACATTCGCCACACACCAACAAATAAAGT
CTATAATTGAAATTTTAAGCGCAGTTCCCGATTATGAGCTACACGTATGTCGTATGCGCAATATCTGCATTACAATTG
CCAATAGTAAATTACCAACTTGGTTTTCTTCATATTTATTAAGATAGAAAACATACAATTTTTGGCTTTTACACTCCA
AGCATCTCTGAAGTTTAAACAAAAAACATATGTGTAGCCTATCTACTGTATTGGACTTTATTCGTATATTTTATATGG
TTCATTAATATAGGTATAAATACAAATTATATTCACGCTTTGCGATTTGCAGCGAATATCACATCTTATACACGATGT
AAAAAAAAAAAAAATATTTCGTCATGTTTTTAGGTTGGCCGCAGGCAGTGCTCACTGTACCGCCACAATGTTTATCGT
TTTGCATTTTTTTTTTCTTTGTTTTCTTGCGGTTTCCCCTAATTATCTTTAGTATAAACTTAGTCTACTGTCTTTTTT
GGTAAGTATTTTCGTGATGGGCTCGTCTATGCGAATTCCCATTTCCAATGAATAAATAAAGTAATTAGAACATTAAAA
TTAGCAATAAAACACGTACATTTAAAGCTGACAACAAAAAAAAAAAGTATTCTTATGTTAAACTGTAGTATGTGCCTA
TGCAATATTAAGAACAATTAAATAAAATAGCATATTAACTTATGGCAGCACTTTGTTGCTATGTTTATGTTTATGTTT
ATGCACGCAGTTAGGCCAGGGCGGATGTAACATGATCACCCACTCGAAGGCAAAAAGTATAAGTGCATGGTCAGCATT
CACACGCCGACCAAATACATATTACATACGTACATACATATCTCGCTCTCCCGATAAGCCTAGATATATAAGATATAC
ATAAGAACGCCGCTCCGCTGCTGGCGTACCCGGCAGCGCAGCTACGCGGATTAGCCTAAGTCCAAATATATTAAAAAC
TGTAAAATCAGAGAGACTCTGTAGACGTTGAGCTGACAGAACCATTTCTGCCTACTCTAAAATCAAAAGAAGAAATTG
AATAAATATATGTCAGCCCGACGGCTGCCTTCAACTTAAAACGGACTTGTGTTCTGAATTGGAGTTCATCATTACATG
GCGACCGTGACAGTCGTCCAACGCTGGACGAATTGACCAAAGCTGGTGAAAACAAAGGAACAAAGGAACACTGGACTG
GAAGAAGACTGGACTAATTAAATGGAACTGCAAAAACCAAGGAAAAATCTGAGTGAGTAGAGTTCTATTGAGTATGGG
CAAACACCGTGGCGGTTTGAAAACTAAGCTGAATAAACGTATAGCCCACGTAAGGTGGCTAATATACGGTCAGCAAAC
GCCACCGGTTTGGTCGAAAGCTCTAAAGCTACATGCAGAGCTAGACCACTTGTTGCAATATCAGCAAGAATTAAAGAC
CCATAAGCTCGAGAAAACTCACTCAGATAATATTAAAAATATACCCACAATTAATGAAGTTCCAAAATACCAGGCATG
TCCAGCACCAGCACCAGCATTAACAAAACCAAAGAAGTCCTGCCCCCCTGGCTGCGAAGGAATCTGGAGTCCCCACTG
CCTGGGGACTTGTGAGCGACCATCGACGTCTTCAGCGGCGAAGAAATAGACAGCAGCGAGGGAGTGTCAGCGTGCCAC
CCCCGGCGACGCCCAGCTGACACCTGATGAGCATCATCAACAGCAGAATATAATAATAAATATATATAAATATAAAGT
AAATATAAAATATATATAGATAAGAAAAATTGTAAGAAATATTGTAAAACGGAGCATATACTATTATGCCCTGTTAAC
CCAATATGGCCCGTGAAGCCATAGCTAGAATCAGGCAGGCAACAATGTAAAATACAATTTTTTTTTACTCTTGCGAAC
ATTGAAAGATTTTATAAATAGATAATTCCAAACATAAATGTCTATAGAGACAAATGAAATAAGTAAAACTGAAAATAA
AAGTATATACAAAGGAAATTTTCTATTCTATTCTCCAAAATATAAAATTAGTATACCCAAAATGGGTCTAATAGACAC
TAAAACTGTGGACTCTACAGCCAATGTAATAAATAAAGTAGAAGTCCAAAATGCAGACTTGTTCTGGATAACCATAAT
ACTAATTGTAATTGCATTAATTATGGTATCCAATGCATTAATAAAAATATACAAACTGCATAACAAGTGTCTTAAGAA
ACGATACCGTAGCACTGCTAACGGTATAGATAATATTTAAGGAAGATCTTTAATAAAGTCAATTATGAATGAAAATAT
GAGAAAAATTATATGAAAAAAAAAAAATAATAAATAAAAAAAAAAATATAAAACGTAATATTGAATTTATCTACGTTA
AAAAAAAAAATATATACAAATGAATAAATTTGAAGTTATGAGTATACCACAGCATGGACTGGGAAAAGCTTGTTGATC
AGATAAAAGATCAAAATGAAAATTTCAGAAAATCCTATAAGTGCTTAACGCAAAACAGATCAACACAAGCTGTAACAA
TCAATAGGAATGCCCAAGTCTTGGTAAATAGTTATAATGAAATCAGAGAGTTGATCCAACAAAATAGAAAGAATTTGG
AACGCAAACAGTGTGCTAAGGCTTTGAACCTACTGGTGACATTAAGAGAAAAATTAATATTTATAAAAAATAAATTCA
GTCTCCAGATAGAAATTCCAACCATAGTAAACACCCCACTAAGAATAAATTTGAATGAAGACAGCACTAACTCTGACG
AGGAAGATAGGACTATAGTCAAGGAAGACATTAAAGAGGAAGATCTTCACGATCTAACTATACCAGCAAAATTAATGC
TGAA
>Obp19a_prom|Drosophila
melanogaster|Obp19a|FBgn0031109|X:20223943..20226446
CCACCTGCGAAATGGGTCATAGTATATGTATTTGTAAAAAATGTATGTAAAAAAATGTTAAATTAATAATTTTGAATT
TCAATTTGGAGCTGAAAATAATATTTTGTGTCCATCAACAGCTCCAAAGCGATGGTTCATTTTATCTTGTGTGCGTTC
AATAGAATCACTCTTACGTTAGCGCGTCCATTGATGGTTGTCCCATTGAAGTACTTCTTAAAGCCGTCGGCCATTGCT
ACTGGACTGGATCTGGAGATCTGGAGATCTGGATTTGGGGTCGGGTCCGGGTGAGAGCTGAGTGTGTTCTGCCTATAG
CTCCGAGCGAGAACCTAATGACAAGCAGCGAAGTGCAAAGCTCGGCCAACTAGATTACAAAGTCGATTCATTGGCAGG
ATTCGATTTTTATTGACTCAACGAGGTGGTACATGAGTTTGGTCCCCAAGCCTTTAACTGTGGCATCGAGGACCGGAA
AGGGGGTGCTGATTATAAATAGTTATGGATTGCTGACGGGTCGAATGGGTCGGAGCGGTGGGGAGCCATGACTTCAAT
GATTTGGCAGCATCGGCGCCCTAGCCATGGAGCATGGCCTGCTGGCAGCCCTTGCAGTAGAGCTTGGTCTCGCGCCGC
TTCGTGTTGCGGCGGTGCATCTTGACCAGGACGTAGACGAGTCCCAACGAGGCCCAGGTGGCCTTGGCTACCTGTGGG
TTTCGGTGGCGTATTTGGGCGCATCTTGTGTACTGCCGTGTACTGAATCACTTACATTGGCGCGACCACGCATGGTCT
GGCTGTTGAAGGCTTCGTTGAAGTTGAAATGATCGGACATCTTTGGATCGTTGTTGACCGGATTGGCGTGGCTTTTAA
CAAAAGATTAAAATTTGGATTCGATATTCGACCTGTATTTTAGACCGGGATTCGGATTGTGACTTTTAAACGTTCGAA
ATGAAAGGAATGTTACTGACAGTCGTCAAAGCCGACTCGGGTTTCCCAACTAGAGAGAATGCTGAAGTCTAGTACCGA
CTAATGGGATACCCATTAATTACTGCTTAAATACTGTGATGAAAATTGAGATATGCAAGAGGCAAATCGAAAGTTTTG
GACATTTTCATATTGTACCTTTAACCAACTTCAGAATTCATTGAGCTAAATACCATTTACAATTTTATGAAATTTTTA
AGCATGTTACAGCTATAACTATTTTTAAACCAGTTACTAGATTCGTTGAAAATTGTATGTCACACAGAACTTCTTGCC
ATCCTGGTCGGAATTAGGATCACTAGCCAAGCCGATATGGCTATGTCTGTCCGTATGAAAGTCTTGGAATCTGATATT
AACATCGCATATCGATCGACCATTATATATCTAATATATCCTCTACAAATGTATTTTATCACCTAGCTAGCATGTAAA
CATTCTGGCCTATTTAGCTGTACGCTTCAGTTATGCTAATGCAAACATAAGCCTTTTGTGATATTATAATTTACATTT
ATTATTTATTGCAGTTAGCTTTATCAGCGATTTGGGCTCATGCCACACGCAATACTACTTATTTCAACGTCATCAGTT
GTACTAAATGCACAAATGAAATACATTTCGCCAAATAAATGCCAACTTGCAACTAATTTGAATGCTAATCAAACCGAA
CTACTCATTTGCATACAAGGTAATAGGTGGTTAAAGTGAGTGTAATGGACTTACTTAAGGGGTTACAAGGCTTATATT
TAAAATGCCTGCCTTGTAATTAAATTTTTAAATATATTGGAAAAAAATGGCCACTTGTTATGTGAGTCTCCAGAAAAA
AAACAAAAAAACAGCAACCATCTGGTATGCAAAATATCTGGTGGTAGCAAAATATCTGGTGGTATCTGGTGGACTATC
AAAATATAAAAACTTTTTTTTCCAGATAGTATATCTTAAAATCAGCATCTTGAAGGAGTATATGTAAATAGCAAACTA
TTTGTAAAAATAGATTTTATTTTATAATTTTTTAAGATATATACCAAACATTATTACCGATTGTGATTATCTTTACAT
TGTTTGACCTCAAAACGGAAAACTGGATGCGCGGTATCCATGCGACCCTAACTCTGGAACCGATTTTGGAACCGCCCC
GTTAGATCTCAGATTGAAACCTTATTTGCATTCGCATGATCGCTGATGAACACTGGGGAAATGCGGCCCAGCAATGGG
ATTGTCAACGCATCTCGGCCAGAATCGCGCCTCGCATGCCACCTCGCACGGTGACCACATACCTGTGTACACTGTCAA
TTAACGTGGCAAGATTATAGCCCGGCCAGAAAGTAATCCGCCCCAGGAACACCACCCACCGCCCGCCCATTTGGATAT
GGAAATGGGCAGTGGGGGCGGCGATTGGCGCTAACCCATAATTCCCACACCCACTTAGCGGTTCGATCGAACCAATAT
GAAGTCATTTGCATGTCGGGGGCCGTGTATAAAAGGAGTCGCCGATGGGTCTGGAGTCTGGAATCCGCCAAATCGTCT
CGGAAAT
>Obp19b_prom|Drosophila
melanogaster|Obp19b|FBgn0031110|X:20224439..20227440
ATTGCTGACGGGTCGAATGGGTCGGAGCGGTGGGGAGCCATGACTTCAATGATTTGGCAGCATCGGCGCCCTAGCCAT
GGAGCATGGCCTGCTGGCAGCCCTTGCAGTAGAGCTTGGTCTCGCGCCGCTTCGTGTTGCGGCGGTGCATCTTGACCA
GGACGTAGACGAGTCCCAACGAGGCCCAGGTGGCCTTGGCTACCTGTGGGTTTCGGTGGCGTATTTGGGCGCATCTTG
TGTACTGCCGTGTACTGAATCACTTACATTGGCGCGACCACGCATGGTCTGGCTGTTGAAGGCTTCGTTGAAGTTGAA
ATGATCGGACATCTTTGGATCGTTGTTGACCGGATTGGCGTGGCTTTTAACAAAAGATTAAAATTTGGATTCGATATT
CGACCTGTATTTTAGACCGGGATTCGGATTGTGACTTTTAAACGTTCGAAATGAAAGGAATGTTACTGACAGTCGTCA
AAGCCGACTCGGGTTTCCCAACTAGAGAGAATGCTGAAGTCTAGTACCGACTAATGGGATACCCATTAATTACTGCTT
AAATACTGTGATGAAAATTGAGATATGCAAGAGGCAAATCGAAAGTTTTGGACATTTTCATATTGTACCTTTAACCAA
CTTCAGAATTCATTGAGCTAAATACCATTTACAATTTTATGAAATTTTTAAGCATGTTACAGCTATAACTATTTTTAA
ACCAGTTACTAGATTCGTTGAAAATTGTATGTCACACAGAACTTCTTGCCATCCTGGTCGGAATTAGGATCACTAGCC
AAGCCGATATGGCTATGTCTGTCCGTATGAAAGTCTTGGAATCTGATATTAACATCGCATATCGATCGACCATTATAT
ATCTAATATATCCTCTACAAATGTATTTTATCACCTAGCTAGCATGTAAACATTCTGGCCTATTTAGCTGTACGCTTC
AGTTATGCTAATGCAAACATAAGCCTTTTGTGATATTATAATTTACATTTATTATTTATTGCAGTTAGCTTTATCAGC
GATTTGGGCTCATGCCACACGCAATACTACTTATTTCAACGTCATCAGTTGTACTAAATGCACAAATGAAATACATTT
CGCCAAATAAATGCCAACTTGCAACTAATTTGAATGCTAATCAAACCGAACTACTCATTTGCATACAAGGTAATAGGT
GGTTAAAGTGAGTGTAATGGACTTACTTAAGGGGTTACAAGGCTTATATTTAAAATGCCTGCCTTGTAATTAAATTTT
TAAATATATTGGAAAAAAATGGCCACTTGTTATGTGAGTCTCCAGAAAAAAAACAAAAAAACAGCAACCATCTGGTAT
GCAAAATATCTGGTGGTAGCAAAATATCTGGTGGTATCTGGTGGACTATCAAAATATAAAAACTTTTTTTTCCAGATA
GTATATCTTAAAATCAGCATCTTGAAGGAGTATATGTAAATAGCAAACTATTTGTAAAAATAGATTTTATTTTATAAT
TTTTTAAGATATATACCAAACATTATTACCGATTGTGATTATCTTTACATTGTTTGACCTCAAAACGGAAAACTGGAT
GCGCGGTATCCATGCGACCCTAACTCTGGAACCGATTTTGGAACCGCCCCGTTAGATCTCAGATTGAAACCTTATTTG
CATTCGCATGATCGCTGATGAACACTGGGGAAATGCGGCCCAGCAATGGGATTGTCAACGCATCTCGGCCAGAATCGC
GCCTCGCATGCCACCTCGCACGGTGACCACATACCTGTGTACACTGTCAATTA
'''