hi everyone i have made a program which extracts information from an email account, it extracts the date, the sender, number of copies written in the body, and it extracts the pdf file from the email and puts it into a directory, it also counts the number of pages in each pdf.
i have strings setup so that in the python out put its outputs the date, teacher, copies, and pages. since there are more than one email, the uotput returns more than one oupt put, for example if there are two emails it will output two dates, two teachers, two number of copies, and two page number.
how do i inpt this information in a table using xlwt. so that each i can create a spreadsheet which has all this information in it.
i want the spreadsheet to look like this
Job Date Teacher Copies Pages
1 Feb. 5, 2012 Osman Mak 5 1
2 Feb. 10, 2012 Jack deve 3 6
currently my program outputs all this information i just need to put it on a spreadsheet, i am fairly new to python so if i can ge any help it would be greatly appreciated.
thanks!!
hers my the code i currently have:
import email, getpass, imaplib, os, string, re
from itertools import takewhile
from operator import methodcaller
import xlwt
detach_dir = '/Users/defaultuser/Desktop' # directory where to save attachments (default: current)
m = imaplib.IMAP4_SSL('imap.gmail.com')
m.login('******@gmail.com', '*******')
m.list()
# Out: list of "folders" aka labels in gmail.
m.select("inbox") # connect to inbox.
resp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
items = items[0].split() # getting the mails id
for emailid in items:
resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
email_body = data[0][1] # getting the mail content
mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
#Check if any attachments at all
if mail.get_content_maintype() != 'multipart':
continue
teacher = mail["From"]
subject = mail["Subject"]
d = mail["date"]
date = d[0:16]
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# is this part an attachment ?
if part.get('Content-Disposition') is None:
continue
filename = teacher + subject + ".pdf"
counter = 1
# if there is no filename, we create one with a counter to avoid duplicates
if not filename:
filename = 'part-%03d%s' % (counter, 'bin')
counter += 1
att_path = os.path.join(detach_dir, filename)
#Check if its already there
if not os.path.isfile(att_path) :
# finally write the stuff
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
for part in mail.walk():
# multipart are just containers, so we skip them
if part.get_content_maintype() == 'multipart':
continue
# we are interested only in the simple text messages
if part.get_content_subtype() != 'plain':
continue
payload = part.get_payload()
x = payload
all=string.maketrans('','')
nodigs=all.translate(all, string.digits)
copies = x.translate(all, nodigs)
print date
print teacher
print subject
print "Number of Copies:" + copies
# we use walk to create a generator so we can iterate on the parts and forget about the recursive headach
d = r'/Users/defaultuser/Desktop'
totpages = 0
for f in (pf for pf in os.listdir(d) if pf.endswith('.pdf')):
fn = os.path.join(d,f)
with open(fn, 'rb') as pdf:
text = pdf.read()
pages = int(''.join(takewhile(methodcaller('isdigit'), text[text.rfind('/Count ')+7:].lstrip())))
print('File %s: %i pages' % (f,pages))
book = xlwt.Workbook(encoding="utf-8")
sheet1 = book.add_sheet("Python Sheet 1")
sheet1.write(0, 0, "Job")
sheet1.write(0, 1, "Date")
sheet1.write(0, 2, "Teacher")
sheet1.write(0, 3, "Copies")
sheet1.write(0, 4, "Pages")
book.save("python_spreadsheet.xls")