# Read
a tab delimited file (as saved by Excel) and output to a comma delimited file
# atf, gal, gpr
files are tab delimited (GenePix files)
# The ‘Alternate
method’ using the fileinput module converts the file
in place.
import os, fileinput
def tab_to_csv(tab_name, csv_name):
try:
f1 = open(tab_name, 'r')
f2 = open(csv_name, 'w')
outList
= []
for line in
f1:
outList.append(','.join(line.rstrip('\t\n').split('\t'))+'\n')
f1.close()
f2.writelines(outList)
f2.close()
return True
except:
return False
import re
def readBlockData(fn):
dd
= {}
fList
= open(fn).readlines()
for line in fList:
line = line.strip('"\n\t')
if re.match('Block\d', line):
# tem = re.split('=', line)
tem = line.split('=')
dd[tem[0]] = [int(i) for i in
tem[1].strip().split(', ')]
return dd
if __name__ == '__main__':
def run_script():
gpr_file = (os.path.join('H:\\',
'TEMP', 'temsys', 'GPR3.gpr'))
csv_file = (os.path.join('H:\\', 'TEMP', 'temsys',
'GPR5.txt'))
if tab_to_csv(gpr_file, csv_file):
print 'Tab
delimited file conversion to comma delimited file was successful'
else:
print
'There was an error'
run_script()
dd
= readBlockData(os.path.join('H:\\',
'TEMP', 'temsys', 'GPR3.gpr'))
for key in dd:
print '%s =
%s' % (key, dd[key])
'''
Block4 = [4896, 4896, 100, 24, 175, 5, 175]
Block3 = [400, 4896, 100, 24, 175, 5, 175]
Block2 = [4896, 400, 100, 24, 175, 5, 175]
Block1 = [400, 400, 100, 24, 175, 5, 175]
'''
# Alternate method
gpr_file = (os.path.join('H:\\',
'TEMP', 'temsys', 'Copy of GPR3.gpr'))
import fileinput
for line in fileinput.input(gpr_file, True, '.bak'):
print ','.join(line.rstrip('\t\n').split('\t'))
# Before
processing
"""
ATF 1
8 5
Type=GenePix ArrayList V1.0
BlockCount=4
BlockType=0
URL=http://genome-www.stanford.edu/cgi-bin/dbrun/SacchDB?find+Locus+%22[ID]%22
"Block1= 400,
400, 100, 24, 175, 5, 175"
"Block2= 4896,
400, 100, 24, 175, 5, 175"
"Block3= 400,
4896, 100, 24, 175, 5, 175"
"Block4= 4896,
4896, 100, 24, 175, 5, 175"
Block Column Row Name ID
1 1 1
1 2 1
"""
# After
processing
"""
ATF,1
8,5
Type=GenePix ArrayList V1.0
BlockCount=4
BlockType=0
URL=http://genome-www.stanford.edu/cgi-bin/dbrun/SacchDB?find+Locus+%22[ID]%22
"Block1= 400,
400, 100, 24, 175, 5, 175"
"Block2= 4896,
400, 100, 24, 175, 5, 175"
"Block3= 400,
4896, 100, 24, 175, 5, 175"
"Block4= 4896,
4896, 100, 24, 175, 5, 175"
Block,Column,Row,Name,ID
1,1,1,
1,2,1,
"""