I have a problem iterating through a dictionary where the sequence should be ACTGDEDQ..., but it includes some other stuff Biopython adds when it parses a FASTA file. If I just print seq_record.seq it prints the sequence, and does not include extra stuff. However, if I print the dictionary it includes extra stuff not part of the polypeptide, and the "for loop" will not iterate through it. This one step has been annoying me forever. Does anyone know what to do?
from Bio import SeqIO
file = open("Zaza.fasta.txt")
for seq_record in SeqIO.parse(file, "fasta"):
protein_dict[seq_record.id]=seq_record.seq
def Filter_By_Molecular_Weight(prot_dict):
acidsCount = {
"A": {"count":0, "weight":89.1288},
"B": {"count":0, "weight":132.64625},
"C": {"count":0, "weight":121.1888},
"D": {"count":0, "weight":133.1386},
"E": {"count":0, "weight":147.1655},
"F": {"count":0, "weight":165.2266},
"G": {"count":0, "weight":75.1019},
"H": {"count":0, "weight":155.1911},
"I": {"count":0, "weight":131.2094},
"K": {"count":0, "weight":146.2241},
"L": {"count":0, "weight":131.2094},
"M": {"count":0, "weight":149.2486},
"N": {"count":0, "weight":132.1539},
"P": {"count":0, "weight":115.1667},
"Q": {"count":0, "weight":146.1807},
"R": {"count":0, "weight":174.2375},
"S": {"count":0, "weight":105.1282},
"T": {"count":0, "weight":119.1551},
"V": {"count":0, "weight":117.1826},
"W": {"count":0, "weight":204.2632},
"X": {"count":0, "weight":145.3726364},
"Y": {"count":0, "weight":181.226},
}
for aa_name,aa_seq in protein_dict.iteritems():
acidsCount[aa_seq]["count"]=acidsCount[aa_seq]["count"]+ 1
total=0
for acid in acidsCount:
total=total+(acidsCount[acid]["count"]* acidsCount[acid]["weight"]/1000)
return total
print Filter_By_Molecular_Weight(protein_dict)