I am trying to find a bug in my new and (theoretically) improved syntax highlighter program. I have made 3 versions now and have been able to debug them all with ease. Unfortunately my debugger does not do well with std containers as it shows ALL the information they contain and it can be hard to sift through it to find errors. Can anybody see where I went wrong in this code?
evenBetterMain.cpp: (since I already have main.cpp and betterMain.cpp working :P)
#include <iostream> //for cin/cout
#include <string> //for std::strings, obviously
#include <fstream> //for file operations
#include <stdio.h> //for integer parsing
using namespace std;
string readFile(string fileName)
{
fstream file(fileName.c_str()); //open the file
if (!file.is_open()) //check that it opened
return "";
string ret=""; //initialize the return value
while (!file.eof()) //loop through the whole file
ret+=file.get(); //grab a character and append it to the return value
file.close(); //close the file
return ret;
}
void writeFile(string fileName, string text)
{
fstream file(fileName.c_str()); //open the file
if (!file.is_open()) //check that it opened
return;
file<<text; //write the text
file.close(); //close the file
}
struct highlightFormatter
{
string start; //this is written at the start of the information
string end; //this is written at the end of the information
string newLine; //this is written at the end of each line of the information
string code; //this is written at the start of a segment labeled as code
string comment; //this is written at the start of a segment labeled as comment
string documentation; //this is written at the start of a documentation comment
string preprocessor; //this is written at the start of a preprocessor
string str; //this is written at the start of a string literal
string chr; //this is written at the start of a character literal
string keyword; //this is written at the start of a keyword
string docKeyword; //this is written at the start of a documentation keyword
string op; //this is written at the start of an operator
string number; //this is written at the start of a numerical literal
string keywords; //this stores all the words identified as keywords, seperated by commas
};
highlightFormatter readFormatFile(string filename)
{
highlightFormatter ret; //declare the return value
fstream file(filename.c_str()); //open the file
if (!file.is_open()) //check that the file opened
return ret;
//I am NOT commenting the following lines, they are self-explanatory
getline(file,ret.start);
getline(file,ret.end);
getline(file,ret.newLine);
getline(file,ret.code);
getline(file,ret.comment);
getline(file,ret.documentation);
getline(file,ret.preprocessor);
getline(file,ret.str);
getline(file,ret.chr);
getline(file,ret.keyword);
getline(file,ret.docKeyword);
getline(file,ret.op);
getline(file,ret.number);
getline(file,ret.keywords);
file.close();
return ret;
}
string strFromInt(int i)
{
char ret[20];//I do not believe ints can be longer than 20 digits?
sprintf(ret,"%d",i);
return string(ret);
}
void replaceAll(string &str, char find, string replacement/*a #X indicates to add a number starting at X, #! is a new line*/)
{
int location=0;
do{
location=str.find_first_of(find,location);
if (location>=0)
str.replace(location,1,replacement);
}while(location!=-1);
location=0;//now storing numbers
for (int i=0; i<(int)str.length(); ++i)
{
if (str[i]=='#'&&i+1<(int)str.length()&&str[i+1]>='0'&&str[i+1]<='9')
location=str[i+1]-'0';
else if (str[i]=='#'&&i+1<(int)str.length()&&str[i+1]=='#')
str.replace(i,2,strFromInt(location++));
else if (str[i]=='#'&&i+1<(int)str.length()&&str[i+1]=='!')
str.replace(i,2,"\n");
}
}
bool isAlNum(char ch)
{
return (ch>='A'&&ch<='Z')||
(ch>='a'&&ch<='z')||
(ch>='0'&&ch<='9');
}
bool strContains(string &str,string &search)
{
return str.find(search)!=(size_t)-1;
}
string highlight(string inputText, highlightFormatter formatter)
{
//Declaring some constants
const char cCODE ='0';
const char cCOMMENT ='c';
const char cMLCOMMENT ='C';
const char cDOC ='d';
const char cMLDOC ='D';
const char cPREPROCESSOR ='P';
const char cSTRING ='s';
const char cCHAR ='h';
const char cKEYWORD ='k';
const char cDOCKEYWORD ='K';
const char cOPERATOR ='o';
const char cNUMBER ='n';
const char cENDL ='\n';
char state=cCODE;
string format="";
cout<<"Loading format string...\n";
for (int i=0; i<(int)inputText.length(); ++i)//loop through the whole string
{
if (((i*100)/(inputText.length()))%5==0)
cout<<(i*100)/inputText.length()<<"% formatted...\n";
switch (inputText[i])
{
case '*'://weird character... endl comments and modifies them and is an operator.
if ((state==cMLCOMMENT||state==cMLDOC)&&i+1<(int)inputText.length()&&inputText[i+1]=='/')
state=cCODE;
if (state==cCODE)
format+=cOPERATOR;
else
format+=state;
break;
case '/'://Possible comment... possible operator
if (state==cCHAR||state==cSTRING)//this is not to be parsed
{
format+=state;
break;
}
if (i+1<(int)inputText.length()&&inputText[i+1]=='/')//check for inline comments
{
if (i+2<(int)inputText.length()&&(inputText[i+2]=='/'||inputText[i+2]=='!'))//check for inline documentation
{
state=cDOC;
format+=state;
break;
}
else
{
state=cCOMMENT;
format+=state;
break;
}
}
else if (i+1<(int)inputText.length()&&inputText[i+1]=='*')//check for multiline comments
{
if (i+2<(int)inputText.length()&&(inputText[i+2]=='*'||inputText[i+2]=='!'))//check for multiline documentation
{
state=cMLDOC;
format+=state;
break;
}
else
{
state=cMLCOMMENT;
format+=state;
break;
}
}//else, treat as operator.
case ','://OPERATORS
case '<':case '>':case '=':case '|':case '%':case '^':case '&':
case '(':case ')':case '+':case '-':case '~':case '.':
case '{':case '}':case '[':case ']':case ':':case ';':case '!':
if (state!=cPREPROCESSOR&&state!=cSTRING&&state!=cCHAR&&state!=cCOMMENT&&state!=cDOC&&state!=cMLCOMMENT&&state!=cMLDOC)
format+=cOPERATOR;
else
format+=cPREPROCESSOR;
break;
case '#'://PREPROCESSOR?
if (i>0&&inputText[i-1]=='\n')//I will not deal with indented preprocessor, yet.
{
state=cPREPROCESSOR;
format+=cPREPROCESSOR;
}
else
{
format+=cPREPROCESSOR;
}
break;
case '@'://dockeyword?
if (state==cMLDOC||state==cDOC)
{
for (; i<(int)inputText.length()&&isAlNum(inputText[i]);++i)
format+=cDOCKEYWORD;
--i;
}
break;
case '\"'://string?
if (state==cCODE)
{
format+=cSTRING;
state=cSTRING;
}
else if (state==cSTRING)
{
format+=cSTRING;
state=cCODE;
}
else
{
format+=state;
}
break;
case '\''://character?
if (state==cCODE)
{
format+=cCHAR;
state=cCHAR;
}
else if (state==cCHAR)
{
format+=cCHAR;
state=cCODE;
}
else
{
format+=state;
}
break;
case '0':case '1':case '2':case '3':case '4':case '5':
case '6':case '7':case '8':case '9'://NUMBERS!!!
if (i<1||!isAlNum(inputText[i-1]))
{
for (; i<(int)inputText.length()&&isAlNum(inputText[i]); ++i)
format+=cNUMBER;
--i;
}
else
{
format+=state;
}
break;
case '\n'://newlines
if (state!=cMLCOMMENT&&state!=cMLDOC)//check for multi-line comments
{
if (i>1&&inputText[i-1]=='\\'&&state==cPREPROCESSOR)//check for multi-line preprocessor
{
format+=state;
break;
}
state=cCODE;
format+=cENDL;
break;
}
format+=cENDL;
break;
default://check for keyword
if (state!=cCODE)
{
format+=state;
break;
}
string thisWord="";
for (;i<(int)inputText.length()&&isAlNum(inputText[i]);++i)
thisWord+=inputText[i];
--i;
if (strContains(formatter.keywords,thisWord))
{
for (int ii=0; ii<(int)thisWord.length(); ++ii)
format+=cKEYWORD;
}
else
{
for (int ii=0; ii<(int)thisWord.length(); ++ii)
format+=state;
}
}
}
//now format should contain the format of inputText
state=0;
int offset=0;
for (int i=0; i<(int)format.length(); ++i)
{
if (format[i]!=state)
{
state=format[i];
inputText.insert(offset+i,1,-state);//plunk in an identifier
++offset;
}
}
string ret=inputText;
cout<<"Formatting for HTML...\n";
replaceAll(ret,'&',"&");
replaceAll(ret,' '," ");
replaceAll(ret,'>',">");
replaceAll(ret,'<',"<");
replaceAll(ret,'\"',""");
replaceAll(ret,-cCODE,formatter.code);
replaceAll(ret,-cCOMMENT,formatter.comment);
replaceAll(ret,-cMLCOMMENT,formatter.comment);
replaceAll(ret,-cDOC,formatter.documentation);
replaceAll(ret,-cMLDOC,formatter.documentation);
replaceAll(ret,-cPREPROCESSOR,formatter.preprocessor);
replaceAll(ret,-cSTRING,formatter.str);
replaceAll(ret,-cCHAR,formatter.chr);
replaceAll(ret,-cKEYWORD,formatter.keyword);
replaceAll(ret,-cDOCKEYWORD,formatter.docKeyword);
replaceAll(ret,-cNUMBER,formatter.number);
replaceAll(ret,-cENDL,formatter.newLine);
return formatter.start+ret+formatter.end;
}
int main(int argc, char *argv[])
{
if (argc!=4)
{
//output help message
cout<<"SyntaxHighlighter [input] [output] [specifiers]\n"<<
"\tParses C++ code for HTML.\n"<<
"\t\t[input] is the input filename.\n"<<
"\t\t[output] is the output filename.\n"<<
"\t\t[specifiers] is the specifiers filename.\n";
return 1;
}
cout<<"Loading input file...\n";
string text=readFile(argv[1]);
cout<<"Loading format file...\n";
highlightFormatter hf=readFormatFile(argv[3]);
string tmp=highlight(text,hf);
writeFile(argv[2],tmp);
return 0;
}
formatters.txt: (so that you don't have to make one yourselves)
#2<table width="100%" border="0" cellspacing="0">#!<tr>#!<td width="2%">1.</td>#!<td width="98%" bgcolor="#FFFFFF"><div align="left">
</span></div></td>#!</tr></table><p>
</div></td></tr>#!<td width="2%">##.</td>#!<td bgcolor="#FFFFFFFF"><div align="left">
</span><span class="Code">
</span><span class="Comment">
</span><span class="DocComment">
</span><span class="Preprocessor">
</span><span class="String">
</span><span class="Character">
</span><span class="Keyword">
</span><span class="DocCommentKeyword">
</span><span class="Operator">
</span><span class="Constant">
asm auto bool break case catch char class const const_cast continue default delete do double dynamic_cast else enum explicit export extern false float for friend goto if inline int long mutable namespace new operator private protected public register reinterpret_cast restrict return short signed sizeof static static_cast struct switch template this throw true try typedef typeid typename union unsigned using virtual void volatile while int8_t uint8_t int16_t uint16_t int32_t uint32_t int64_t uint64_t int_least8_t uint_least8_t int_least16_t uint_least16_t int_least32_t uint_least32_t int_least64_t uint_least64_t int_fast8_t uint_fast8_t int_fast16_t uint_fast16_t int_fast32_t uint_fast32_t int_fast64_t uint_fast64_t intptr_t uintptr_t intmax_t uintmax_t wint_t wchar_t wctrans_t wctype_t size_t time_t and and_eq bitand bitor compl not not_eq or or_eq xor xor_eq complex imaginary _Complex _Imaginary _Bool _Pragma string wstring NULL
run.bat:
@echo off
SyntaxHighlighter.exe input.cpp output.txt formatters.txt
pause
input.cpp: Whatever you want to test it on.