Hi, I have this code:
import urllib2 as url
import webbrowser
def extract(text, sub1, sub2):
"""
extract a substring from text between first
occurances of substrings sub1 and sub2
"""
return text.split(sub1, 1)[-1].split(sub2, 1)[0]
start="http://xkcd.com/"
permlist=[]
textlist=[]
for i in range(1, 638):
temp=start+str(i)
permlist.append(str(url.urlopen(temp).readlines()[88]))
textlist.append(str(url.urlopen(temp).readlines()[77]))
for i in permlist:
i = extract(i, '<h3>Permanent link to this comic: ', '</h3>')
for i in textlist:
i = extract(i, '<img src="http://imgs.xkcd.com/comics/scribblenauts.png" title="', '"')
print zip(permlist, textlist)
and whenever I run it, it raises this error:
Traceback (most recent call last):
File "C:/Python26/test.py", line 15, in <module>
permlist.append(str(url.urlopen(temp).readlines()[88]))
File "C:\Python26\lib\urllib2.py", line 124, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python26\lib\urllib2.py", line 389, in open
response = meth(req, response)
File "C:\Python26\lib\urllib2.py", line 502, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python26\lib\urllib2.py", line 427, in error
return self._call_chain(*args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "C:\Python26\lib\urllib2.py", line 510, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 404: Not Found
What is the problem, but mainly what can I do to fix it?
thanks in advance