When I use the external module that runs the same code (line commented out) the program terminates. However, when I run the code inside the current module, the program seems to work just fine. Anyone tell me where I'm going wrong? I'd like to be able to use the utilities module in several programs.
'''
Created on Jun 5, 2009
@author: Steven Norris
This module provides the spider capability to be used to collect pages from github.com.
'''
import FLOSSmoleutils
from HTMLParser import HTMLParser
import httplib
import re
import time
import MySQLdb
BASE_SITE="github.com"
'''
This class is used to check every page of the repository for a projects list
'''
class GitHubSpider(HTMLParser):
#Used to store the links needing to be checked
check_links=[]
#Used to reset check_links after every feed()
def reset_link_list(self):
self.check_links=[]
#Used to handle the start tags of the main page
def handle_start_tag(self,tag,attrs):
if tag=='a':
link=attrs[0][1]
if re.search('/tree', link)!=None:
check_links.append(link)
'''
This method runs the spider sequence needed to collect the information from github.com
'''
def main():
try:
#Establish the connection and get the base_page
conn=httplib.HTTPConnection(BASE_SITE)
try:
print("http://"+BASE_SITE+"/repositories")
conn.request("GET","http://"+BASE_SITE+"/repositories")
resp=conn.getresponse()
base_page=resp.read()
base_page=str(base_page)
print(base_page)
# base_page=FLOSSmoleutils.get_page("http://"+BASE_SITE+"/repositories",conn)
#Create the spider and begin the feed
print('making spider')
spider=GitHubSpider()
print('feed')
spider.feed(base_page)
print(spider.check_links)
for link in spider.check_links:
print (link)
conn.close()
except:
print ("Base site request failed.")
except:
print ("Connection failed.")
main()
'''
Created on Jun 5, 2009
@author: Steven Norris
This module provides basic utilities for the FLOSS mole spiders.
'''
def get_page(url, conn):
try:
conn.request("GET",url)
resp=conn.getresponse()
html_page=resp.read()
conn.close()
html_page=str(html_page)
return html_page
except:
print ("The page request failed.")