Hi all,
I have a program that reads a text file and counts the frequency of phrases, for phrases up to ten words long.
I solved a problem with printing out phrases that occur more than once, however I have discovered that my program reads punctuation marks and includes them as part of a word.
Example) guidance.
final statement!
I do not wish to include the full stop at the end of the word guidance or the phrase final statement
How would I go about ignoring punctuation marks?
Does it have anything to do with <cctype> and std::ispunct( ) ?
Thank you
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <queue>
#include <vector>
#include <list>
using namespace std;
struct Phrase {
void display() const
{
for (size_t i = 0; i < words.size(); ++i)
cout << words[i] << " ";
}
vector<string> words;
};
map<Phrase, int> stat;
struct PhraseFreqPred {
bool operator()(const Phrase& p1, const Phrase& p2)
{
return stat[p1] > stat[p2];
}
};
bool operator<(const Phrase& lhs, const Phrase& rhs)
{
return lhs.words < rhs.words;
}
typedef vector<Phrase> container_t;
void display(container_t c, const int length, int limit)
{
sort(c.begin(), c.end(), PhraseFreqPred());
cout << "Phrases of " << length << " word(s):\n";
for (size_t i = 0; limit && i < c.size(); ++i)
{
const Phrase& p = c[i];
if (stat[p] > 1){
p.display();
cout << ": " << stat[p] << "\n";
--limit;
}
}
cout << "\n";
}
void updateStat(list<string>& words, const string& word)
{
list<string>::const_iterator iter = words.begin();
Phrase cur;
while (iter != words.end()){
cur.words.push_back(*iter++);
++stat[cur];
}
words.pop_front();
}
int main(){
string filename;
cout << "Enter filename: ";
cin >> filename;
ifstream input(filename.c_str());
if (!input){
cerr << "Failed to open input file " << filename << endl;
return 1;
}
const size_t LIMIT = 10;
string word;
list<string> words;
while (input >> word){
for (size_t i = 0; i < word.size(); ++i)
word[i] = tolower(word[i]);
words.push_back(word);
if (words.size() == LIMIT){
updateStat(words, word);
}
}
while (!words.empty())
updateStat(words, word);
map<int, container_t> lengths;
map<Phrase, int>::const_iterator iter1 = stat.begin();
while (iter1 != stat.end())
{
const Phrase& p = iter1->first;
lengths[p.words.size()].push_back(p);
++iter1;
}
map<int, container_t >::const_iterator iter2 = lengths.begin();
while (iter2 != lengths.end())
{
const int length = iter2->first;
const container_t& c = iter2->second;
display(c, length, 5);
++iter2;
}
while (true){
cout << "If you wish to see phrases in greater detail please
press [y]: ";
string choice;
cin >> choice;
if (!(choice == "y" || choice == "Y"))
break;
cout << "Please enter the number of words you wish to see
the phrases for [1-10]: ";
int num;
cin >> num;
if (num < 0 || lengths.find(num) == lengths.end())
cout << "Not found\n\n";
else
{
const container_t& c = lengths[num];
display(c, num, c.size());
}
}
return 0;
}