Loop problem

Question

MasterDucky 0 Newbie Poster

17 Years Ago

Here's this program i write to uppercase proper nouns in a file.
It opens three files:
1. is the file to uppercase.
2. is a list of proper nouns to uppercase.
3. is a list for proper nouns preceeding words that excludes proper nouns to be uppercased.
I rechecked it a hundred times, for some reason it wont even get in the while() loop with rfind()
I put "cout<< "loc: "<<loc<< endl;" there to see whats going on.
Any hint would be highly appreciated. Thanks!

#include <string>
#include <cctype>
#include <vector>
#include <fstream>
#include <iostream>
using namespace std;

void openFile(ifstream& file){
          
        file.clear();
        for(;;)
        {
            string fileName;
            cout << "Enter the name of your file: " ;
            cin >> fileName;
            file.open(fileName.c_str());
            if (file.good())
            {
                break;
            }
            cerr << "\nThere is no file, named --> " << fileName << " <-- in your folder.\n\n" << endl;
        file.clear();
        }
}
   
int main ()
{   
   ifstream  file1;
   openFile(file1);
   ifstream  file2 ("List of proper nouns.txt");
   ifstream  file3 ("List of proper nouns preceeding words.txt");
   ofstream TheCopy;
   TheCopy.open ("Your_Uppercased_Proper_Nouns_Copy.srt",ios::app);
   string str;
   vector<string> vProperNouns;
   vector<string> vPreceeding;        //vProperNounsPrecedingWords
      
   while (getline(file2, str))
   {        
       vProperNouns.push_back(str);
   }
   file2.close();
   while (getline(file3, str))
   {        
       vPreceeding.push_back(str);
   }
   file3.close();
   
  //write a line into str
   while (getline(file1, str))
   {            
         BreakLoop:
         for ( string::size_type i = 0; i < vProperNouns.size(); i++)
         {     //search for proper noun in str    
               string::size_type pos = 0;
               while (( pos = str.find(vProperNouns[i], pos)) != string::npos)
               {     
                     for ( string::size_type j = 0; j < vPreceeding.size(); j++)
                     {   //search for preceeding word 
                                                   before proper noun
                         string::size_type loc = 0;
                         while((loc = vProperNouns[i].rfind(vPreceeding[j],
                                   pos)) != string::npos)
                         {    cout<< "loc: "<<loc<< endl;
                             // the one it found is just not before 
                                                        the proper noun: 
                                                        go back to for().
                             if ( loc < (pos - (vPreceeding[j].size()+ 1))) 
                             {    
                                 break;
                             }  
                             //the one it found is just before 
                                                     the proper noun: 
                                                     break out of while() 
                                                     and out of for() too.
                             else  
                             {                                            
                                 pos += str.size();
                                 goto BreakLoop;
                             }              
                         }
                     }
               str[pos] = toupper (str[pos]);               
               }
         }
         cout<<str<<endl;
         //TheCopy << str << endl;
   }
   file1.close();
   TheCopy.close();
   system("pause");
   return 0;
}

Sorry for the messed up code, apparently the edit box is larger than the template on the site.

c++

2 Contributors
13 Replies
150 Views
2 Days Discussion Span
Latest Post 17 Years Ago Latest Post by MasterDucky

vijayan121 1,152 Posting Virtuoso

17 Years Ago

yup, the specific problem is here:

while((loc = /*vProperNouns[i]*/str.rfind(vPreceeding[j],
                                   pos)) != string::npos)

the more fundamental problem is the complexity of the code you have written. a while within a for within a while within a for!. why don't you simplify it by writing a function

bool does_precede( const string& str, const string& proper_noun, 
                                      const string& preceding_word ) ;

then you would have only loops nested one level (once in main and once in the function).

[It is] best to confuse only one issue at a time.

- K&R

vijayan121 1,152 Posting Virtuoso

17 Years Ago

> Its true that a function would simplify a lot.
> I just have no idea how start it.
breaking it up into many small functions would simplify it even more. if the functions have meaningful names, the code would also be somewhat self-documenting.
this is one way of doing it (caveat: untested).

#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include <ctype.h>
using namespace std;

typedef string::size_type size_type ;

void fill_vector( const char* file_name, vector<string>& vec )
{
  ifstream file( file_name ) ;
  string str ;
  while( getline( file, str ) ) vec.push_back(str) ;
}

bool is_ws_delimited( const string& line, size_type pos,
                      size_type nchars )
{
  if( (pos!=0) && !isspace( line[pos-1] ) ) return false ;
  if( ( pos+nchars < line.size() ) &&
        !isspace( line[pos+nchars] ) ) return false ;
  return true ;
}

bool does_precede( const string& line, size_type pos,
                   const string& precede )
{
  size_type loc = line.rfind ( precede, pos );
  if( ( loc != string::npos ) &&
      is_ws_delimited(line,loc,precede.size()) )
  {
    loc += precede.size() ;
    if( loc >= pos ) return false ;
    for( size_type i = loc ; i<pos ; ++i )
    {
      if( !isspace( line[i] ) )
         return false ;
    }
    return true ;
  }
  return false ;
}

bool has_preceding_word( const string& line, size_type pos,
                         const vector<string>& prec_words )
{
  for( size_t i = 0 ; i < prec_words.size() ; ++i )
  {
    if( does_precede( line, pos, prec_words[i] ) )
       return true ;
  }
  return false ;
}

void process_noun( string& line, const string& noun,
                   const vector<string>& prec_words )
{
  size_type pos = 0;
  while( ( pos = line.find( noun, pos ) ) != string::npos )
  {
    if( is_ws_delimited(line,pos,noun.size()) &&
        has_preceding_word( line, pos, prec_words ) )
            line[pos] = toupper( line[pos] ) ;
    pos += noun.size() ;
  }
}

int main ()
{
  vector<string> nouns ;
  fill_vector( "nouns.txt", nouns ) ;

  vector<string> prec_words ;
  fill_vector( "preceeding_words.txt", prec_words ) ;

  ifstream  fin( "infile.txt" ) ;
  ofstream fout( "copy.txt" ) ;
  string line;

  while( getline( fin, line ) )
  {
    for ( size_type i = 0 ; i < nouns.size(); ++i )
    {
      process_noun( line, nouns[i], prec_words ) ;
    }
    fout << line << endl ;
  }
}

Reply to this topic

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.

MasterDucky 0 Newbie Poster · Answer 1 · 2008-03-09T20:54:43+00:00

MasterDucky 0 Newbie Poster

17 Years Ago

Never mind, i found the problem. Thanks!

MasterDucky 0 Newbie Poster · Answer 2 · 2008-03-10T03:08:07+00:00

Thanks for the help.
Its true that a function would simplify a lot.
I just have no idea how start it.

bool does_precede( const string& str, const string& proper_noun, 
                                      const string& preceding_word ) 
{

}

MasterDucky 0 Newbie Poster · Answer 3 · 2008-03-10T10:32:25+00:00

Wow, thats really nice, thank you so much Viyajan!
You learned me a big deal here how to write functions.

Im gonna studying it and test it and will let you know how it works.

Much appreciated! Have a good day! :)

MasterDucky 0 Newbie Poster · Answer 4 · 2008-03-11T16:52:40+00:00

Im sorry but i dont understand the purpose of "nchars"

17 bool is_ws_delimited( const string& line,  size_type  pos, size_type nchars )

vijayan121 1,152 Posting Virtuoso · Answer 5 · 2008-03-11T21:51:39+00:00

> i dont understand the purpose of "nchars" bool is_ws_delimited( const string& line, size_type pos, size_type nchars ) returns true if the sequence of nchars characters starting at position pos in string line is a complete word (ie. delimited by whitespace at either end).
the idea is that if the noun is heart and a preceding word is lion , then abcd efghi lion heart jkl mnop is a match, but abcd efghi vermilion hearthrug jkl mnop is not.

MasterDucky 0 Newbie Poster · Answer 6 · 2008-03-11T22:19:09+00:00

So it is equal with the size of the preceding word?
prec_words.size()
Im asking it because i didnt see it declared and i didnt know if it was normal or not.

vijayan121 1,152 Posting Virtuoso · Answer 7 · 2008-03-11T22:29:48+00:00

here it checks that noun appears in line as a complete word starting at pos

void process_noun( string& line, const string& noun,
                   const vector<string>& prec_words )
{
  size_type pos = 0;
  while( ( pos = line.find( noun, pos ) ) != string::npos )
  {
    if( is_ws_delimited(line,pos,noun.size()) &&
        has_preceding_word( line, pos, prec_words ) )
            line[pos] = toupper( line[pos] ) ;
    pos += noun.size() ;
  }

and here it checks that precede appears in line as a complete word starting at loc

bool does_precede( const string& line, size_type pos,
                   const string& precede )
{
  size_type loc = line.rfind ( precede, pos );
  if( ( loc != string::npos ) &&
      is_ws_delimited(line,loc,precede.size()) )
  {
    loc += precede.size() ;
    if( loc >= pos ) return false ;
    for( size_type i = loc ; i<pos ; ++i )
    {
      if( !isspace( line[i] ) )
         return false ;
    }
    return true ;
  }
  return false ;
}

MasterDucky 0 Newbie Poster · Answer 8 · 2008-03-11T22:56:48+00:00

Ok thanks, i gotta think about it, i dont get it 100%.

Btw it works alright, the only problem with it is when i write two nouns one after the other
like "the tom tom", it uppercase the first one instead of the second. But everywhere else
it acts like i want it: not to uppercase after preceding words.

And i switched the logic by the way to do so.

62.
if( is_ws_delimited(line,pos,noun.size()) && has_preceding_word( line, pos, prec_words ) )

To

62.
if( !( is_ws_delimited(line,pos,noun.size()) &&  has_preceding_word( line, pos, prec_words)))

vijayan121 1,152 Posting Virtuoso · Answer 9 · 2008-03-11T23:03:18+00:00

to get the effect you want (not to uppercase after preceding words) i think it should be

if( ( is_ws_delimited(line,pos,noun.size()) &&
    [B]![/B]has_preceding_word( line, pos, prec_words )))

MasterDucky 0 Newbie Poster · Answer 10 · 2008-03-11T23:19:25+00:00

Of course, you're right!

It works now! Thanks! :)

MasterDucky 0 Newbie Poster · Answer 11 · 2008-03-12T01:48:02+00:00

Here is my finished program.

#include <iostream>
#include <fstream>
#include <vector>
#include <cctype>
using namespace std;

void openFile(ifstream& file){
          
        file.clear();
        for(;;)
        {
            string fileName;
            cout << "Enter the name of your file: " ;
            cin >> fileName;
            file.open(fileName.c_str());
            if (file.good())
            {
                break;
            }
            cerr << "\nThere is no file, named --> " << fileName << " <-- in your folder.\n\n" << endl;
        file.clear();
        }
}

typedef string::size_type size_type ;

void fill_vector( const char* file_name, vector<string>& vec )
{
   ifstream file( file_name ) ;
   for(;;)
   {
        if (file.good())
        {
            break;
        }
        cerr << "\nThere is no file, named \n\n\t\t--> "
                << file_name << " <--\n\n\t\t\t\t\t\t\t\t in your folder.\n\n" << endl
             << "\t  Please restart the program and check your file name, thanks! \n\n" << endl;
        system("pause");
        exit (1);
  }  
  string str ;
  while( getline( file, str ) ) vec.push_back(str) ;
}

bool is_ws_delimited( const string& line, size_type pos, size_type nchars )
{
  if((pos!=0) && !isspace( line[pos-1] ) )                       return false ;
  if((pos+nchars < line.size()) && !(isspace( line[pos+nchars])) 
                                && !(line[pos+nchars] == '.' ) 
                                && !(line[pos+nchars] == ',' ) 
                                && !(line[pos+nchars] == '!' )
                                && !(line[pos+nchars] == '?' )
                                && !(line[pos+nchars] == ':' )
                                && !(line[pos+nchars] == ';' ))  return false ;
  
  return true ;
}

bool does_precede( const string& line, size_type pos, const string& precede )
{
  size_type loc = line.rfind ( precede, pos );
  if( ( loc != string::npos ) && is_ws_delimited(line,loc,precede.size()) )
  {
    loc += precede.size() ;
    if( loc >= pos ) return false ;
    for( size_type i = loc ; i<pos ; ++i )
    {
      if( !isspace( line[i] ) )
         return false ;
    }
    return true ;
  }
  return false ;
}

bool has_preceding_word( const string& line, size_type pos, const vector<string>& prec_words )
{
  for( size_t i = 0 ; i < prec_words.size() ; ++i )
  {
    if( does_precede( line, pos, prec_words[i] ) )
       return true ;
  }
  return false ;
}

void process_noun( string& line, const string& noun, const vector<string>& prec_words )
{
  size_type pos = 0;
  while( ( pos = line.find( noun, pos ) ) != string::npos )
  {
    if( is_ws_delimited(line,pos,noun.size()) && ! has_preceding_word( line, pos, prec_words ))
            line[pos] = toupper( line[pos] ) ;
            pos += noun.size() ;
  }
}

int main ()
{
  vector<string> nouns ;
  fill_vector( "List of proper nouns.txt", nouns ) ;

  vector<string> prec_words ;
  fill_vector( "List of proper nouns preceding words.txt", prec_words ) ;

  ifstream  fin ;
  openFile (fin);
  ofstream fout( "Your_Uppercased_Proper_Nouns_Copy.srt" ) ;
  string line;

  while( getline( fin, line ) )
  {
    for ( size_type i = 0 ; i < nouns.size(); ++i )
    {
      process_noun( line, nouns[i], prec_words ) ;
    }
    fout << line << endl ;
    //cout << line << endl ;
  }

//system("pause");
}