Hi all,

I have a program that reads a text file and counts the frequency of phrases, for phrases up to ten words long.

I solved a problem with printing out phrases that occur more than once, however I have discovered that my program reads punctuation marks and includes them as part of a word.


Example) guidance.
final statement!

I do not wish to include the full stop at the end of the word guidance or the phrase final statement

How would I go about ignoring punctuation marks?

Does it have anything to do with <cctype> and std::ispunct( ) ?

Thank you

#include <iostream>
#include <fstream>  
#include <string>   
#include <map>          
#include <queue>        
#include <vector>       
#include <list>         

using namespace std;

struct Phrase {
               void display() const
	        {
               for (size_t i = 0; i < words.size(); ++i)
               cout << words[i] << " "; 
              }
               vector<string> words;
              };


map<Phrase, int> stat;

struct PhraseFreqPred {
       bool operator()(const Phrase& p1, const Phrase& p2)
       {
       return stat[p1] > stat[p2];     
       }
       };

bool operator<(const Phrase& lhs, const Phrase& rhs)
     {
     return lhs.words < rhs.words;
     }

typedef vector<Phrase> container_t;


void display(container_t c, const int length, int limit)
    {
    sort(c.begin(), c.end(), PhraseFreqPred());  
    cout << "Phrases of " << length << " word(s):\n";

    for (size_t i = 0; limit && i < c.size(); ++i)
         {
         const Phrase& p = c[i];

         if (stat[p] > 1){
         p.display();
         cout << ": " << stat[p] << "\n";
         --limit;
         }
	   }
         cout << "\n";
    }


void updateStat(list<string>& words, const string& word)
      {
      list<string>::const_iterator iter = words.begin();

      Phrase cur;
      while (iter != words.end()){
             cur.words.push_back(*iter++);
             ++stat[cur];
      }

            words.pop_front();
      }


int main(){                         

           string filename;
           cout << "Enter filename: ";
           cin >> filename;            

ifstream input(filename.c_str());
  
     if (!input){
                 cerr << "Failed to open input file " << filename << endl;
                 return 1;
                }               

  const size_t LIMIT = 10;

  string word;
  list<string> words;

  while (input >> word){
         for (size_t i = 0; i < word.size(); ++i)
         word[i] = tolower(word[i]);

         words.push_back(word);                 

         if (words.size() == LIMIT){     
         updateStat(words, word);
         }
         }

  while (!words.empty())
         updateStat(words, word);               

  map<int, container_t> lengths;
  map<Phrase, int>::const_iterator iter1 = stat.begin();

  while  (iter1 != stat.end())
         {
         const Phrase& p = iter1->first;

         lengths[p.words.size()].push_back(p);
         ++iter1;
         }

  map<int, container_t >::const_iterator iter2 = lengths.begin();

  while  (iter2 != lengths.end())
         {
         const int length = iter2->first;
         const container_t& c = iter2->second;

         display(c, length, 5);                 
         ++iter2;
         }

  while (true){  
               cout << "If you wish to see phrases in greater detail please  
  press [y]: ";		   
               string choice;
               cin >> choice;             
               
	           if (!(choice == "y" || choice == "Y"))
               break;

			   
               cout << "Please enter the number of words you wish to see 
  the phrases for [1-10]: ";
               int num;
               cin >> num;               

               if (num < 0 || lengths.find(num) == lengths.end())
               cout << "Not found\n\n";  

               else 
               {
               const container_t& c = lengths[num];
               display(c, num, c.size());
	         }
			   
               }

  return 0;
}

once you get the word or phrase iterate through it one character at a time and remove the punctuation marks.

Thanks I've solved that problem now.

FInal question!!! I was wondering if anyone could help explain these two functions as I know how to produce these results but to not understand what is fully going on.

Thanks

void display(container_t c, const int length, int limit)
{
    sort(c.begin(), c.end(), PhraseFreqPred());  
    cout << "Phrases of " << length << " word(s):\n";

    for (size_t i = 0; limit && i < c.size(); ++i)
    {
      const Phrase& p = c[i];

      if (stat[p] > 1)
      {
        p.display();
        cout << ": " << stat[p] << "\n";
        --limit;
      }
    }

    cout << "\n";
}

void updateStat(list<string>& words, const string& word)
{
      list<string>::const_iterator iter = words.begin();

      Phrase cur;
      while (iter != words.end())
      {
        cur.words.push_back(*iter++);
        ++stat[cur];
      }

      words.pop_front();
}
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.