(I have 2 queries)

I have made a html parser but the memory leaks, as it seems from the message below; is killing me. I am getting this message from gdb debugger (cygwin build).

Heap block at 003D2CA0 modified at 003D2CB3 past requested size of b


Also now not all the output from cout and printf are getting printed. I am posted the code of main() function where all the pointer acrobatics takes place. The class Parser does the parsing. It can parse from a file or from a character array. It uses an object (buf) of vector<char> to store the output and dynamically instructs vector to resizes when needed using the code...

if(bufptr>=buf.size()) buf.resize(buf.size()+INITIAL_BUF_SIZE,'\0');
buf[bufptr++]=c;

Query 1: What is the difference between vector's resize and reserve?

Quer 2: Do u see any possible memory leaks in the badly hacked code below?

int main(){
    Parser p;
    Tag tag;
    strcpy(tag.name,"div");
    
    Property *px=new Property[2];
    strcpy(px[0].name,"id");
    strcpy(px[0].value,"res");
    tag.totalProperties=1;
    tag.property=px;
    
    bool err;
    char *out=p.parse("g-ogre.htm",tag,0,NULL,0,err,true);
    //cout<<"div:: "<<out<<endl;
    
    
    strcpy(tag.name,"p");
    tag.totalProperties=0;
    
    char *tout=p.parse(out,tag,1,NULL,0,err,false);
    //cout<<tout<<endl;
    
    strcpy(tag.name,"a");
    char *rp=new char[300], *trp=NULL;
    rp[0]='\0';
    int i=1;
    do{
        if(trp) {delete [] trp; trp=NULL;}
        trp=p.parse(tout,tag,i++,NULL,0,err,false);
        strcat(rp,"; ");
        strcat(rp,trp);
    }while(strLen(trp)!=0);
    if(trp) {delete [] trp; trp=NULL;}
    if(tout) {delete [] tout; tout=NULL;}
    //cout<<"Related phrases::-\n"<<rp<<endl;
    
    strcpy(tag.name,"ul");
    strcpy(px[0].name,"type");
    strcpy(px[0].value,"disc");
    tag.totalProperties=1;
    
    Tag itag;//tags to ignore.
    strcpy(itag.name,"br");
    //strcpy(itag[0].name,"a");
    
    tout=p.parse(out,tag,1,&itag,1,err);
    //cout<<"ul :: "<<tout<<endl;
    
    tag.totalProperties=0;
    char *lp=new (nothrow) char[10000];
    if(lp==NULL){
        cout<<"Out of Memory!!!"<<endl;
        return 0;
    }
    lp[0]='\0';
    lp[9999]='@';
    i=1;
    trp=NULL;
    strcpy(itag.name,"a");
    do{
        if(trp) {delete [] trp; trp=NULL;}
        strcpy(tag.name,"font");
        trp=p.parse(tout,tag,i++,NULL,0,err);
        if(strLen(trp)==0) break;
        strcat(lp,"\n\n:");
        char *qp=NULL;
        strcpy(tag.name,"li");
        int j=1;
        do{
            if(qp) {delete [] qp; qp=NULL;}
            qp=p.parse(trp,tag,1,&itag,1,err);//The content of li ignoring the tag <a> (this contains the source url, will be extracted in following lines).
            cout<<">>>>>>>>>>>>>QP"<<qp<<endl;
            strcat(lp,"\n");
            strcat(lp,qp);
        }while(strLen(qp)!=0);
        if(qp) {delete [] qp; qp=NULL;}
        /*
        strcpy(tag.name,"a");
        char *krp=p.parse(trp,tag,1,NULL,0,err);//Extracting content of <a> for getting source url, but contains font tag too that encloses the data required.
        strcpy(tag.name,"font");
        char *ttrp=p.parse(krp,tag,1,NULL,0,err);//From the content extracted in the line above getting the content of <Font>. This effectively removes the font tag.
        strcat(lp,"\nSource Url: ");
        strcat(lp,ttrp);
        delete [] krp; krp=NULL;
        delete [] ttrp; ttrp=NULL;
        delete [] trp; trp=NULL;*/
        //strcat(lp,trp);
    }while(1);
    if(trp) {delete [] trp; trp=NULL;}
    if(lp[4999]!='@')
        printf("lp overflowed!!!\n");
    else
        printf("Definitions:-\n%s\n",lp);
    printf("End of program.");
    //cout<<err;
    return 0;
}
Ancient Dragon commented: Thanks for taking the time to learn how to use code tags +15

Q2: Yes -- there are lots of places that look like memory leeks --
line 24: where is that object deleted?
lines 13, 20, and 63: does parse() return a char pointer that needs to be deleted? If it does, then these lines are memory leeks too.

Q2: Yes -- there are lots of places that look like memory leeks --
line 24: where is that object deleted?
lines 13, 20, and 63: does parse() return a char pointer that needs to be deleted? If it does, then these lines are memory leeks too.

parse returns pointer to dynamically allocated character array using the 'new' operator. As for lines 24,etc. They have not been deleted because they will be used to print the result (later, when debug the code).

Anyway I found the source of the warning from gdb. It is is giving that warning whenever I use the 'delete' operator. I found this after painfully tracing the main() function. e.g. the code if(out) {delete [] out; out=NULL;} which I later inserted at line no. 22 gave this warning from gdb

warning: Heap block at 003D4FB8 modified at 003D647D past requested size of 14bd

Though I confirmed that 'out' did contain valid data at that moment and the starting address of the block it was pointing to was had the address 0x3d4fc0. I am totally confused why is this happening? Furthermore if I comment out the lines with the 'delete' instructions then I get no warning and while stepping through the code I also get the correct code, but as soon as I run this program directly from the command prompt then it crashes with Windows giving me the message

The instruction at xyz location referenced memory at abc location. The memory could not be read.

Pls help. I am at my wits end.

does parse() modify the contents of the char array in the first argument to the function ? If it does, then maybe the parse function is overrunning that buffer.

does parse() modify the contents of the char array in the first argument to the function ? If it does, then maybe the parse function is overrunning that buffer.

Nope. I fact the 1st argument is 'const char *'.

Prototype of classes:-

struct Property{
    char name[MAX_NAME_SIZE];
    char value[PROP_VAL_SIZE];
    bool getValue; //Set this to true if value of 'value' is unknown and is needed by the calling function.
    read in 'name' and then in value (if set).
    Property(){
        name[0]='\0';
        value[0]='\0';
        getValue=false;
    }
};

struct Tag{
    char name[MAX_NAME_SIZE];
    Property *property;
    int totalProperties;//Size of array pointed to by *property.
    int dc;//for internal use.
    Tag(){
        name[0]='\0';
        property=NULL;
        totalProperties=0;
        dc=0;
    }
};
//-------------------------------CLASS------------------
class Parser{
private:
    ifstream *f;
    const char *src;//Only one of the two would be used at a time.
    long srcSize;
    bool eof;
    static const char singleton[NO_OF_SINGLETONS][MAX_NAME_SIZE];
    
    char get(long &pos);//Byte offset when in files and index for arrays.
    char* parse(Tag tag, int NthTag, Tag *tagsToIgnore, int totalNoOfTagsToIgnore, bool &err);
    Tag* findTag(char tbuf[],Tag *iT,int itc);
    Property* findProp(char pbuf[],Tag *tag);
    bool strCmpi(const char*,const char*);
    bool isSingleton(char[]);//Returns true if given tag is hr, br, input or any singleton whose name is in singleton array;
    //bool isAllPropChecked(Tag);
    inline bool isAlphaNum(char);
    inline bool isEvery_Except_Minus(char);
    inline bool isEvery_Except_LT(char);//is Everything except <
    inline bool isEvery_Except_Quote(char);
    
public:
    Parser();
    ~Parser();
    /*Set isFileName to true if Src is filename and to false if Src is an array.
     */
    char* parse(char const *Src, Tag tag, int NthTag, Tag *tagsToIgnore, int totalNoOfTagsToIgnore, bool &err, bool isFileName=false);
    void release();
};

An update. I tried compiling this using MS VC++ 6.0. It compiled well and also runs perfectly (whereas when compiled in GCC it crashes while running. I tried MinGW and Cygwin build of gcc 3.4). But the problem of it crashing, when the 'delete' instructions are uncommented, persists even here.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.