I've been looking around for an answer to this but have had no luck. I need to take two files and print the top most frequent words they have in common as well as their combined(sum) frequencies. This might be simple but I'm pretty new to programming. Any help?
def mostFrequent(word,frequency,n):
my_list = zip(word,frequency) #combine the two lists
my_list.sort(key=lambda x:x[1],reverse=True) #sort by freq
words,freqs = zip(*my_list[:n]) #take the top n entries and split back to seperate lists
return words, freqs #return our most frequent words in order
from wordFrequencies import * #gives both the word and its frequency in a file
L1 = wordFrequencies('file1.txt')
words1 = L1[0]
freqs1 = L1[1]
L2 = wordFrequencies('file2.txt')
words2 = L2[0]
freqs2 = L2[1]
print mostFrequent(words,freqs,20)
I've tried
L1 = WordFrequencies('file1.txt')
words1 = set(L1[0])
freqs1 = set(L1[1])
L2 = WordFrequencies('file2.txt')
words2 = set(L2[0])
freqs2 = set(L2[1])
words3 = words1.intersection(words2)
freqs3 = freqs1.intersection(freqs2)
print mostFrequent(words3,freqs3,20)
but it didn't work. It outputed the wrong words