def index_dir(self, base_path):
        num_files_indexed = 0
        allfiles = os.listdir(base_path)
        #print allfiles
        num_files_indexed = len(allfiles)
        #print num_files_indexed
        docnumber = 0
        self._inverted_index = {} #dictionary
        for file in allfiles: 
                self.documents = [base_path+file] #list of all text files
                f = open(base_path+file, 'r')
                lines = f.read()
		# Tokenize the file into words
                tokens = self.tokenize(lines)
                docnumber = docnumber + 1
                print 'docnumber', docnumber
                for term in tokens:  
# check if the key already exists in the dictionary, if yes, 
# just add a new value for the key
                    #if self._inverted_index.has_key(term)
                    if term in sorted(self._inverted_index.keys()):
                        docnumlist = self._inverted_index.get(term)
                        docnumlist = docnumlist.append(docnumber)
                    else:
# if the key doesn't exist in dictionary, add the key (term) 
# and associate the docnumber value with it.	
                        self._inverted_index = self._inverted_index.update({term: docnumber})
			#self._inverted_index[term] = docnumber 
                f.close()
        print 'dictionary', self._inverted_index 
        print 'keys', self._inverted_index.keys()
        return num_files_indexed

I'm working on an information retrieval project where we are supposed to crawl through multiple text file, tokenize the files and store the words in an inverted list (dictionary) data structure.

ex:
doc1.txt: "the dog ran"
doc2.txt: "the cat slept"
_inverted_index = {
'the': [0,1],
'dog': [0],
'ran': [0],
'cat': [1],
'slept': [1]
}
where 0,1 are docIDs

I'm getting the following error:
'Nontype' object has no attribute keys. line#95

All help is highly appreciated.

The error message, and not the error type, were of great help.

And a code usage sample also

And wheres line 95?

Cheers and Happy coding

The error message, and not the error type, were of great help.

i.e. include the entire error message (which usually shows the offending line).

""" ------------------------------------------------
Also, this code can be simplified 
Dictionary key lookup is simply
if key in dictionary_name:
""" ------------------------------------------------
    self._inverted_index = {} #dictionary
## ...
## ...
# check if the key already exists in the dictionary, if yes, 
# just add a new value for the key
    if term in sorted(self._inverted_index.keys()):

I did this code to get words unique to files listed by id dictionary:

import string
import random
# Sherlock Holmes and Alice in Wonderland
filenames = {'doc001':'advsh12.txt', 'doc002' : '11.txt'}

words = dict()
words_and_docids = dict()
allwords = set()

notthese = string.punctuation + string.digits
samplesize = 64

for docid in filenames:
    words[docid] = set(
        filter(None,
               (word.lower().strip(notthese)
                ## read in and replace the--without spaces with spaces
                for word in open(filenames[docid]).read().replace('--', '  ').split())
               )
        )
    allwords |= words[docid]

for word in allwords:
    words_and_docids[word] = set(docid for docid in filenames if word in words[docid])

unique = [word for word in allwords if len(words_and_docids[word]) == 1 ]

print 'Words unique in one file:',len(unique),'\nSample of %i:' % samplesize

print '\n'.join("%s: %s" % (fileid, word) for word in random.sample(unique,40)
                for fileid in words_and_docids[word])

raw_input('Ready! Push Enter.')
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.