So what I am trying to do is give a user 4 links of stories that are relating to the topic. (In my specific case, they choose microsoft and I return them 4 top stories about the company). My problem is pairing the description with the link. So far, this is what I have (I am not sure what one may find useful or important, so I will just give my whole code):
import logging
import wsgiref.handlers
from google.appengine.ext import webapp
import urllib2
from urllib import urlencode
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
import cgi
import os
from google.appengine.ext.webapp import template
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
dictt = {'microsoft':'NASDAQ:MSFT', 'att':'NYSE:ATT', 'apple':'NASDAQ:AAPL', 'google':'NASDAQ:GOOG', 'sprint':'NYSE:S'}
class company():
def __init__(self, name, link, descriptions):
self.name = name
self.link = link
self.descriptions = descriptions
class MainHandler(webapp.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/html'
path = self.request.path
parts = path.split('/')
logging.info("path is " + path)
if parts[-1] == '':
contents = mainPage()
self.response.out.write(contents)
elif parts[-2] == 'main':
myTitle = parts[-1]
fquery = dictt.get(myTitle)
info = getLinks(fquery)
links = info[0]
descriptions = str(info[1])
#self.response.out.write(descriptions[0]+descriptions[1])
#contents = subPage(myTitle, links, descriptions)
companyChosen = company(myTitle, links, descriptions)
sub = subPage(companyChosen.name, companyChosen.link, companyChosen.descriptions)
self.response.out.write(sub)
def getLinks(query):
base_url = 'http://www.google.com/finance/company_news?q='
query = query
params = urlencode({"q":query})
full_url = base_url+params
articles = urllib2.urlopen(full_url).read()
soup = BeautifulSoup(articles, selfClosingTags = ["br"])
stories = []
descriptions = []
for entry in soup.findAll('div', attrs = "g-section news sfe-break-bottom-16", limit = 4):
link = str(entry.find("a")["href"])
stories.append(link)
for entry2 in soup.findAll('div', attrs = "g-c")[2:6]:
info = entry2.find('div').contents[0]
#print descriptions
descriptions.append(info)
#print descriptions[1]
return (stories, descriptions)
def mainPage():
s='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n'
s +='"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"\n'
s +='<html><head>\n'
s +="<link rel='stylesheet' href='assets/mystyles.css' type='text/css' />\n"
s +='<title>Speed Stock Information</title>\n'
s +='<center><h1>The Fastest Absorbtion of Stock Information on the Net!</h1></center>\n'
s +="<center><h2>Just click on a link below to get top headlines for that company and a picture of the company's stock graph for the day</h2></center>\n"
s +='</head>\n'
s +='<br />'
s +='<br />'
s +='<br />'
s +='<body>\n'
s +='<p><center><a href = main/sprint target = "_blank">Sprint</a><br />\n'
s +='<a href = main/google target = "_blank">Google</a><br />\n'
s +='<a href = main/apple target = "_blank">Apple</a><br />\n'
s +='<a href = main/att target = "_blank">AT&T</a><br />\n'
s +='<a href = main/microsoft target = "_blank">Microsoft</a></p></center>\n'
s +='<br />'
s +='<br />'
s +='<br />'
s +='<br />'
s +='<center><p><u>What is the point of this?</u></p></center>'
s +='</body></html>'
return s
def subPage(myTitle, links, descriptions):
s='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"\n'
s +='"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"\n'
s +='<html><head>\n'
s +="<link rel='stylesheet' href='assets/mystyles2.css' type='text/css' />\n"
s +='<title>' + myTitle + '</title>\n'
s +='</head>\n'
s +='<body>\n'
for link in links:
s +='<a href =' + link + 'target = "_blank">' + myTitle + '</a>\n<br />'
s +='<p>' + descriptions + '</p>'
s += '<br />'
s += '<br />'
s += '<br />'
s += '<br />'
if myTitle == 'sprint':
s+='<img src = http://stockcharts.com/c-sc/sc?s=S&p=D&b=5&g=0&i=0&r=4613>'
elif myTitle == 'microsoft':
s+='<p>The stock value for Microsoft over the past month</p>'
s+='<img src = http://stockcharts.com/c-sc/sc?s=MSFT&p=D&yr=0&mn=1&dy=0&i=t32803647649&r=3507>'
s +='</body></html>'
return s
def main():
application = webapp.WSGIApplication(
[('/.*', MainHandler)],
debug=True)
wsgiref.handlers.CGIHandler().run(application)
if __name__ == '__main__':
main()
This is the output I am getting. Any time there are "..." is the end of the description. Just to reiterate, I want each description to be under the appropriate link. Also, getting rid of the unicode stuff would be great.
[IMG]http://i22.photobucket.com/albums/b343/romber1234/microsoftpage.jpg[/IMG]
Thanks for any help!