Hi Forum,
I am a total newbie, so please be gentle!!
I am trying to extract data in many text files that looks like this:
0.0 -0.9
0.3 -0.4
0.6 -1.0
0.9 0.3
These colums always appear on the same line number. What I want to do is to get a list of all column1row1:column2row1 of all files, then a list of all column1row2:column2row2 and so on to finally be able to further process these lists
So far I have got the following code:
#!/usr/bin/python
import os
import re
class Data(object):
def __init__(self,l,p):
self.l = l
self.p = p
self.type = 'data'
def read_output(file=None):
"""Reading in contents of output.txt"""
if file is None:
raise "No file has been passed to me!"
file =open(toread, "r")
data = file.readlines()
count = 0
hit={}
for line in data:
count = count + 1
if (line.startswith("0.0") and count == 15):
elems = line.split()
l = float(elems[0])
p = float(elems[1])
hit[elems[1]]
print hit
print "Found relevant entry for lead %s with a persistence of %s" % (l,p)
if (line.startswith("0.3") and count == 16):
elems = line.split()
l = float(elems[0])
p = float(elems[1])
print "Found relevant entry for l %s with a p of %s" % (l,p)
if (line.startswith("0.6") and count == 17):
elems = line.split()
l = float(elems[0])
p = float(elems[1])
print "Found relevant entry for l %s with a p of %s" % (l,p)
if (line.startswith("0.9") and count == 18):
elems = line.split()
l = float(elems[0])
p = float(elems[1])
print "Found relevant entry for l %s with a p of %s" % (l,p)
else:
continue
file.close()
match_file = "output.txt"
cwd = os.getcwd()
children = os.listdir(cwd)
children.sort()
#print "these are the children",children
for child in children:
path = os.path.join(cwd,child)
if os.path.isdir(path):
files = os.listdir(path)
matches = []
l = 0.0
p = 0.0
for file in files:
if file.startswith(match_file):
matches.append(os.path.join(path,file))
if len(matches) == 0:
continue
data = Data(l,p)
matches.sort()
for match in matches:
compressed = False
if match.find(".bz2") != 1:
compressed = True
os.system("bunzip2 %s" % match)
toread = match.replace(".bz2","")
else:
toread = match
input = read_output(file=toread)
The error I get is:
Traceback (most recent call last):
File "./calculatemeans.py", line 84, in <module>
input = read_output(file=toread)
File "./calculatemeans.py", line 32, in read_output
hit[elems[1]]
KeyError: '-0.428899'
Can someone help me please?
Sorry for being so silly!
Marianne