hi, my problem is that i have written script which read a file , take line by line split it and then store it into dictionary. and i am writing the values of dictionary on other file, the problem is that at the end of file , it doesnt print the last line as part of directory
i have a file
>BIG_CLUSTER31 ENSTNIP00000005053 TETRAODON8 14 221 380 Allantoicase GGVCLGFSNAHFGHPRNMIGLGQAASMADGWETARRLDRPPQLKVDGRGILQLAGWEWAVFRLGHPGTVHTVEVDTRHFKGNSPDSCWVEACSLSPAEEVRALRTGWTSGTWCLLLPPQKLRPHHRHLFGEQQLRWSAPVTHVRLLISPDGGVSRLRLWG
>BIG_CLUSTER31 ENSTNIP00000005053 TETRAODON8 14 27 198 Allantoicase GGKVLFATDEYFAPSSNLLKGALPRFDTSAYTEFGKWMDGWETRRKRSPGHDWCIIQLGVPGRVWGVDVDTSFFTGNHSPRVSVQGACLDAPPDAIVEGERTGKAATDGELAAAAKLCSEAWPELVPVSPLMPGYADCCHNYFTVGYTHTVTHLRLNMHPVDGGGARLRVYG
>BIG_CLUSTER31 ENSTRUP00000014331 FUGU4 scaffold_268 222 379 Allantoicase GGVCLGFSNAHFGHPRNMIGLGQAVSMADGWETARRLDRPNKLKVDQQGILQVPGREWAVFRLGHPGTIHRVEIDTIHFKGNFPDSCQVEVCSLSPDEEVQCIQNRWTSGKWRLLLPPQKLRPHHTHRYDEPVPSGPVTHVRLTIRPDGGISRFRLFG
>BIG_CLUSTER31 ENSTRUP00000014331 FUGU4 scaffold_268 28 199 Allantoicase GGKVLFATDEYFAPASNLLKGALPQFDSSAYTEFGKWMDGWETRRKRIPGHDWCIIQLGVPGRICGFDVDTSFFTGNHSPRVSIQGTCLDSSPDFSLKGNRTGMAATDSEFAEVAKLRSEAWPELVPLSPLMPGFSDCCHNFFTSSQFPNRVTHLRLNMYPDGGIARLRVLG
>BIG_CLUSTER31 ENSXETP00000002586 JGI4.1 scaffold_125 222 383 Allantoicase GGVCLGFSDAHYGHPRNLIGIGRACDMGDGWETARRLDRPPVLKADSKGILQVPGFEWAVLKLGHPGLVTHIEIDTNHFKGNSPNSCKIDACALTPTEEEGVKRDERFEIGYNWKPLLSVTQIHPHKRRYVESTSLALHQVISHVKITIAPDGGVSRIRLWG
>BIG_CLUSTER31 ENSXETP00000002586 JGI4.1 scaffold_125 28 199 Allantoicase GGKVLFATDDWFAPAENLLKKKDPEFKPGLFTEFGKWMDGWETRRKRIPGHDWCIIELGVPGIIHGFEADTRFFTGNYAPRISVQAACLKPEEITFQPRKDKIGTAASIEEYKSADKLKSEKWSHLLQMTELTPGYAESSHSYFNVNSKQRWTHLRLNIYPDGGIARFKVYG
it gives results as
>ENSTNIP00000005053
GGKVLFATDEYFAPSSNLLKGALPRFDTSAYTEFGKWMDGWETRRKRSPGHDWCIIQLGVPGRVWGVDVDTSFFTGNHSPRVSVQGACLDAPPDAIVEGERTGKAATDGELAAAAKLCSEAWPELVPVSPLMPGYADCCHNYFTVGYTHTVTHLRLNMHPVDGGGARLRVYGGGVCLGFSNAHFGHPRNMIGLGQAASMADGWETARRLDRPPQLKVDGRGILQLAGWEWAVFRLGHPGTVHTVEVDTRHFKGNSPDSCWVEACSLSPAEEVRALRTGWTSGTWCLLLPPQKLRPHHRHLFGEQQLRWSAPVTHVRLLISPDGGVSRLRLWG
>ENSTRUP00000014331
GGKVLFATDEYFAPASNLLKGALPQFDSSAYTEFGKWMDGWETRRKRIPGHDWCIIQLGVPGRICGFDVDTSFFTGNHSPRVSIQGTCLDSSPDFSLKGNRTGMAATDSEFAEVAKLRSEAWPELVPLSPLMPGFSDCCHNFFTSSQFPNRVT
my code is
from sys import *
import os
import sys
prot = ''
value_list = []
new_items = []
file_list = os.listdir(os.getcwd())
for f in file_list:
fr = open(f,'r')
#filenumber += 1
fr_lines = fr.readlines()
protein_dict = {}
for line in fr_lines:
if line.startswith('>'):
cluster_list = line.split()
#print cluster_list
# cluster_list[0]
clustername = ''.join(cluster_list[:1]).strip('>')
resultfile = clustername + '_fasta.txt'
#print clustername,
proteinname = cluster_list[1]
#print proteinname
if (prot != proteinname):
#print 'yes'
#print proteinname
# protein_dict = {}
if len(protein_dict) > 0:
#print protein_dict
#print clustername
parijat_items = sorted([k for k, v in protein_dict.iteritems()])
#print items
#print items
#print type(items[0])
#print type(items)
#print parijat_items
#print new_items
#print prot
for i in parijat_items:
value_list.append(protein_dict[i])
#print value_list
#print ''.join(value_list)
#value_list = []
protein_dict = {}
proteinseq = '>' + prot + '\n' + ''.join(value_list) +'\n'
value_list = []
print proteinseq
outfile = open(resultfile, 'a')
outfile.write(proteinseq)
outfile.close()
prot = cluster_list[1]
protein_dict[int(cluster_list[4])]= cluster_list[7]
prot = cluster_list[1]
print '###################################'