Hi,
I'm at present working on extracting data from various online news sites, and then analyzing it. The code is given below. But when I'm writing this last code, the following error is coming:
>>> import newsfeatures
>>> allw,artw,artt= newsfeatures.getarticlewords( )
>>> wordmatrix,wordvec= newsfeatures.makematrix(allw,artw)
Error:
Traceback (most recent call last):
File "<pyshell#2>", line 1, in <module>
wordmatrix,wordvec= newsfeatures.makematrix(allw,artw)
TypeError: 'NoneType' object is not iterable
Can anybody help me to find why this error is coming.
Thank you.
Sagar
import feedparser
import re
feedlist=['http://today.reuters.com/rss/topNews',
'http://today.reuters.com/rss/domesticNews',
'http://today.reuters.com/rss/worldNews',
'http://hosted.ap.org/lineups/TOPHEADS-rss_2.0.xml',
'http://hosted.ap.org/lineups/USHEADS-rss_2.0.xml',
'http://hosted.ap.org/lineups/WORLDHEADS-rss_2.0.xml',
'http://hosted.ap.org/lineups/POLITICSHEADS-rss_2.0.xml',
'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml',
'http://www.nytimes.com/services/xml/rss/nyt/International.
xml',
'http://news.google.com/?output=rss',
'http://feeds.salon.com/salon/news',
'http://www.foxnews.com/xmlfeed/rss/0,4313,0,00.rss',
'http://www.foxnews.com/xmlfeed/rss/0,4313,80,00.rss',
'http://www.foxnews.com/xmlfeed/rss/0,4313,81,00.rss',
'http://rss.cnn.com/rss/edition.rss',
'http://rss.cnn.com/rss/edition_world.rss',
'http://rss.cnn.com/rss/edition_us.rss']
def stripHTML(h):
p=''
s=0
for c in h:
if c=='<': s=1
elif c=='>':
s=0
p+=' '
elif s==0: p+=c
return p
def separatewords(text):
splitter=re.compile('\\W*')
return[s.lower() for s in splitter.split(text) if len
(s)>3]
def getarticlewords( ):
allwords={}
articlewords=[]
articletitles=[]
ec=0
# Loop over every feed
for feed in feedlist:
f=feedparser.parse(feed)
# Loop over every article
for e in f.entries:
# Ignore identical articles
if e.title in articletitles:continue
# Extract the words
txt=e.title.encode('utf8')+stripHTML
(e.description.encode('utf8'))
words=separatewords(txt)
articlewords.append({})
articletitles.append(e.title)
# Increase the counts for this word in allwords and in
articlewords
for word in words:
allwords.setdefault(word,0)
allwords[word]+=1
articlewords[ec].setdefault(word,0)
articlewords[ec][word]+=1
ec+=1
return allwords,articlewords,articletitles
def makematrix(allw,articlew):
wordvec=[]
for w,c in allw.items():
if c>3 and c<len(articlew)*0.6:
wordvec.append(w)
l1=[[(word in f and f[word] or 0) for word in
worddev] for f in articlew]
return l1,wordvec