hi everyone i am very close to finishing my first program in python, but there are two things in the way.
first of all the code highlighted in red reads the number of pages of pdf files in a directory. my problem is that it is in a for loop which is wrapped in another for loop which means it repeats itself and than repeats itself again, i tried messing with it but cant figure out a way to get rid of the for loop statement.
secondly the code highlighted in green is suppose to write the output onto a .xls file using xlwt. the problem is that even though i have a counter set, which is suppose to change the value of row from r=1 to r+=1 , but it doesnt work because of the .xls file only one row of information shows up.
any advice would be greatly appreciated. thanxs in advance
import email, getpass, imaplib, os, string, re
from itertools import takewhile
from operator import methodcaller
import xlwt
detach_dir = '/Users/defaultuser/Desktop' # directory where to save attachments (default: current)
m = imaplib.IMAP4_SSL('imap.gmail.com')
m.login('******@gmail.com', '*******')
m.list()
# Out: list of "folders" aka labels in gmail.
m.select("inbox") # connect to inbox.
resp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
items = items[0].split() # getting the mails id
book = xlwt.Workbook(encoding="utf-8")
sheet1 = book.add_sheet("Python Sheet 1")
sheet1.write(0, 0, "Job")
sheet1.write(0, 1, "Date")
sheet1.write(0, 2, "Teacher")
sheet1.write(0, 3, "Copies")
sheet1.write(0, 4, "Pages")
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
email_body = data[0][1] # getting the mail content
mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
#Check if any attachments at all
if mail.get_content_maintype() != 'multipart':
continue
teacher = mail["From"]
subject = mail["Subject"]
d = mail["date"]
date = d[0:16]
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# is this part an attachment ?
if part.get('Content-Disposition') is None:
continue
filename = teacher + subject + ".pdf"
counter = 1
# if there is no filename, we create one with a counter to avoid duplicates
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
#Check if its already there
if not os.path.isfile(att_path) :
# finally write the stuff
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
row = 1
d = r'/Users/defaultuser/Desktop'
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# we are interested only in the simple text messages
if part.get_content_subtype() != 'plain':
continue
payload = part.get_payload()
x = payload
all=string.maketrans('','')
nodigs=all.translate(all, string.digits)
copies = x.translate(all, nodigs)
for f in (pf for pf in os.listdir(d) if pf.endswith('.pdf')):
fn = os.path.join(d,f)
with open(fn, 'rb') as pdf:
text = pdf.read()
pages = int(''.join(takewhile(methodcaller('isdigit'), text[text.rfind('/Count ')+7:].lstrip())))
print('File %s: %i pages' % (f,pages))
print date
print teacher
print subject
print "Number of Copies:" + copies
# we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
sheet1.write( row, 0, str( row ) ) # 'Job'
sheet1.write( row, 1, date ) # 'Date'
sheet1.write( row, 2, teacher ) # 'Teacher'
sheet1.write( row, 3, copies ) # 'Copies'
sheet1.write( row, 4, pages ) # 'Pages'
row += 1
book.save( 'python_spreadsheet.xls' )