Hi all,
I am a newbie to programming and have almost complete my assignment 'Frequency of words and phrases' to a satisfactory standard. My program reads a text file and counts the number of phrases within the file (A phrase is defined as a sequence of one or more words), my program counts phrases up-to 10 words long.
e.g. 'The' occurs 25 times (one word phrase), 'negative refractive index is' occurs 12 times (four word phrase)
I can print out the phrases that occur more than once, however my program still includes phrases that occur once, which I do not want.
Can anyone help?
Thanks
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <vector>
#include <queue>
#include <list>
using namespace std;
struct Phrase {
void display() const
{
for (int i = 0; i < words.size(); ++i)
cout << words[i] << " ";
}
vector<string> words;
};
map<Phrase, int> stat;
struct PhraseFreqPred {
bool operator()(const Phrase& p1, const Phrase& p2)
{
return stat[p1] > stat[p2];
}
};
bool operator<(const Phrase& lhs, const Phrase& rhs)
{
return lhs.words < rhs.words;
}
typedef vector<Phrase> container_t;
void display(container_t c, const int length, const int limit)
{
sort(c.begin(), c.end(), PhraseFreqPred());
cout << "Phrases of " << length << " word(s):\n";
for (int i = 0; i < limit && i < c.size(); ++i)
{
const Phrase& p = c[i];
p.display();
cout << ": " << stat[p] << "\n";
}
cout << "\n";
}
void updateStat(list<string>& words, const string& word)
{
list<string>::const_iterator iter = words.begin();
Phrase cur;
while (iter != words.end())
{
cur.words.push_back(*iter++);
++stat[cur];
}
words.pop_front();
}
int main(){
string filename;
cout << "Enter filename: ";
cin >> filename; // inputting and reading input filename
ifstream input(filename.c_str());
if (!input){
cerr << "Failed to open input file " << filename << endl;
return 1;
} // error in reading the input file
const int LIMIT = 10;
string word;
list<string> words;
while (input >> word)
{
for (int i = 0; i < word.size(); ++i)
word[i] = tolower(word[i]);
words.push_back(word);
if (words.size() == LIMIT)
{
updateStat(words, word);
}
}
while (!words.empty())
updateStat(words, word);
map<int, container_t> lengths;
map<Phrase, int>::const_iterator iter1 = stat.begin();
while (iter1 != stat.end())
{
const Phrase& p = iter1->first;
lengths[p.words.size()].push_back(p);
++iter1;
}
map<int, container_t >::const_iterator iter2 = lengths.begin();
while (iter2 != lengths.end())
{
const int length = iter2->first;
const container_t& c = iter2->second;
display(c, length, 5);
++iter2;
}
while (true) {
cout << "Do you wish to see phrases in more detail (y or n)?: ";
string choice;
cin >> choice;
if (choice == "n")
break;
cout << "Enter number of word in phrase to display: ";
int num;
cin >> num;
if (num < 1 || lengths.find(num) == lengths.end())
cout << "Not found\n\n"<< endl;
else {
const container_t& c = lengths[num];
display(c, num, c.size());
}
}
return 0;
}