Hey, I'm having trouble with this piece of code. Basically, it's meant to look at a map file (which basically is a bunch of URLs paired to an ID) and then go through another file and replace each URL with it's corresponding ID.
The code seems to work fine, but when I test on the 2GB file I have access to, I'm getting a segfault.
The following is the relevant section of code:
void Compressor::compress(std::string filename)
{
//Input file to read from
Parser parser(filename);
//String stream
std::stringstream stringStream;
//Line count
int count = 0;
//Number of times written to file bufferSize(used to determine whether to start new file or append)
int writeCount = 0;
//Variables to store IDs in
int idNode;
int idBranch;
//Reset buffer count
bufferCount = 0;
//Clear string buffer
stringBuffer.clear();
while(parser.hasNext())
{
std::vector<std::string> elements;
std::string str = parser.getLine();
if (str != "")
{
elements = parser.getElements(str, "\t");
//Convert URLs to ID
if(doesMappingExist(elements[index1]))
{
idNode = getID(elements[index1]);
//Convert second element
if(doesMappingExist(elements[index2]))
{
idBranch = getID(elements[index2]);
str = idNode + "\t" ;
//Clear buffer
stringStream.str("");
//Add to buffer
stringStream << idNode << "\t" << idBranch << "\n";
stringBuffer.push_back(stringStream.str());
//Increment count
bufferCount++;
}
else
{
std::cout << "URL " << count << " not added." << std::endl;
}
}
else
{
std::cout << "URL " << count << " not added." << std::endl;
}
}
if (bufferCount >= bufferSize)
{
writeBufferToFile("inputCompressed.txt", (writeCount == 0) ? false : true);
writeCount++;
std::cout << "Buffer Written.\tMappings created: " << bufferCount * writeCount << std::endl;
stringBuffer.clear();
bufferCount = 0;
}
count++;
}
std::cout << "Buffer size is " << stringBuffer.size() << std::endl;
writeBufferToFile("inputCompressed.txt", (writeCount == 0) ? false : true);
}
void Compressor::writeBufferToFile(std::string filename, bool append = false)
{
//Output file to write to
std::ofstream output;
output.open(filename.c_str(), (append) ? std::ios::app : std::ios::out);
for (int i = 0; i < stringBuffer.size(); i++)
{
output << stringBuffer[i];
}
The following is the gdb output:
Program received signal SIGSEGV, Segmentation fault.
0xff0b1df0 in strlen () from /lib/libc.so.1
(gdb) ;3Dbt
Undefined command: "". Try "help".
(gdb) bt
#0 0xff0b1df0 in strlen () from /lib/libc.so.1
#1 0xff333314 in std::string::operator= (this=0xffbff058,
__s=0x20014 <Address 0x20014 out of bounds>)
at /export/nigelw/build-work/gcc-objdir/sparc-sun-solaris2.10/libstdc++-v3/include/bits/char_traits.h:262
#2 0x000171f8 in Compressor::compress ()
#3 0x00018724 in Compressor::createIdMapping ()
#4 0x00015934 in compressInput ()
#5 0x00015bec in main ()
(gdb)
The program seems to stop at line 96212. Any ideas?