I am a beginner in C++ and I need some help because my decoding is coming out as a series of numbers. Here are the detail as to what I am trying to do.
I wrote 2 programs that compress and decompress a text file, each taking input and out file names as commands.
Use this Algorithm:
read input file in bytes (in Char). The first 100 byte are copied to outputfile, and is used to create model. Model keeps track of each bit, the next bit, how often do they occur. for the first 1000 bytes.
You will print out all the characters that were observed, the four most frequent next characters.
Next the model take rest of file and encode it by a prefix code. You look up character X and character Y and if Y is 1 of the 4 most frequent letters following x, we encode Y by the bit patterns 000 for most frequent, 010 to 110 for the 4th most frquent. Compression is achieved replacing bits by 3 bits. Else encode Y by 9 bit pattern( the bits of Y+1) and length increases from 8bits to 9 bits.
After finding the encoding for the next character Y, and it's length, these bits are put in a interger variable used as accumulator, behind the bits already contained in that accumulator. If Accumulator now contains at least 8 bit, we mask out the front 8bits, interpret them as char, and write the byte to the file, then shift the accumulator by 8 bits to the right. at the end , all remaining bit in the accumulator are written to the file. This finished the compression program.
To decompress, you read the first 100 byte, copy them to the decoded file, and collect the same model information. then you read from the file; if the next codeword starts with a 0, it is three bits long, and represents one of the four most probable next characters after the current character, which is looked up and written to file. Otherwise , it starts with a 1, then it is nine bits long, the leading 1-bit if deleted, and the remaining 8 bits are written to the file.
Here is the code I wrote. Can any one help me out. Since I am new a beginner in c++ can please anyone help me to get this program working. I think I might be a little confused with the bitwise functions. I am using a 256 by 256 array to keep track of the frequency of characters and I also created a sorted version of that array to keep track of most frequent character combinations.
//******************************ENCODING CODE*****************
#include <iostream.h> // I/O
#include <fstream.h> // file I/O
#include <iomanip.h> // format manipulation
#include <cstring>
#include <cstddef>
#include <stdio.h>
//using namespace std;
void displayBits(unsigned);
const long indexSize = 256;
void selectionSort(int[][indexSize],int);
void swap(int, int);
int main () {
ifstream fp_in,fp_in1,fp_in2; // declarations of streams fp_in and fp_out
ofstream fp_out,fp_out1;
char * buffer;
long size, size1;
long copySize;
char * readBuffer;
char * restOfFileBuffer;
char * next;
char infile[30]; // input file
char outfile[30];
char input[1000];
char output[1000];
int model[65536]={0};
int count_array[256][256]={0};
int sort_array[256][256]={0};
int most_frequent[256][4]={0};
//char n[];
// *****give input file a name ****
cout << "Please enter in the input file name: " << endl;
cin >> infile;
cout << "Please enter in the output file name: " << endl;
cin >> outfile;
fp_in.open(infile, ios::in);
fp_out.open(outfile, ios::out); // open the streams
if (fp_in.is_open())
{
cout<<"yes";
}
//get the size of the file
fp_in.seekg(0,ifstream::end);
size=fp_in.tellg();
fp_in.seekg(0);
size=1000;
// allocate memory for file content
buffer = new char [size];
// read content of infile
fp_in.read (buffer,size);
copySize=1000;
// write to outfile
fp_out.write (buffer,copySize);
// release dynamically-allocated memory
delete[] buffer;
fp_out.close();
fp_in.close();
//keeping track of bytes
fp_in1.open(outfile, ios::in); //open output file
//allocate memory for file content
readBuffer = new char [copySize];
if(!fp_in1.is_open())
{
cout<<"not open";
}
while(fp_in1>>readBuffer)
/*
***test*****
int a=readBuffer[0];
int b=readBuffer[3];
cout<<readBuffer[0]<<""<<readBuffer[1];
cout<<static_cast<int>('a') + static_cast<int>('a');
*/
//cout<<readBuffer;
//put frequency of character and next char into 2D array
for(int current=0; current< 1000; current++)
{
int charPosition;
int nextPosition;
int next;
unsigned char a;
unsigned char b;
next=current+1;
a=readBuffer[current];
b=readBuffer[next];
charPosition=static_cast<int>(a);
nextPosition=static_cast<int>(b);
++count_array[charPosition][nextPosition];
++sort_array[charPosition][nextPosition];
}
selectionSort(sort_array, indexSize);
//**TEST
for(int x=0; x<indexSize;x++)
{
int i=0;
unsigned char c = readBuffer[0];
int position=static_cast<int>(c);
// cout<<count_array[position][x];
cout<<sort_array[position][x]<<" ";
}
cout<<"Character "<<"Next Character "<<"Freqency"<<endl;
for(int display=0; display < 1000; display++) //traverse readbuffer
{
int displayChar;
int displayNext;
int nextChar;
unsigned char display1;
unsigned char display2;
nextChar=display+1;
display1=readBuffer[display];
display2=readBuffer[nextChar];
displayChar=static_cast<int>(display1);
displayNext=static_cast<int>(display2);
cout<<display1<<setw(12)<<display2<<setw(18)<<count_array[display1][display2]<<endl;
}
//display four most frequent
cout<<"Character "<<"Next Character "<<"Most Frequent"<<endl;
for(int frequency=0; frequency < 1000; frequency++) //traverse readbuffer
{
int frequencyChar;
int frequencyNext;
int nextChar;
unsigned char frequency1;
unsigned char frequency2;
nextChar=frequency+1;
frequency1=readBuffer[frequency];
frequency2=readBuffer[nextChar];
frequencyChar=static_cast<int>(frequency1);
frequencyNext=static_cast<int>(frequency2);
//cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<count_array[frequency1][frequency2]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][0])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][0]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][1])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][1]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][2])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][2]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][3])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][3]<<endl;
}
cout<<"*********************************************************"<<endl;
fp_in1.close();//close read buffer
cout<<"*********************************encoding******************************************";
fp_in2.open(infile, ios::in);
// open the streams
if (fp_in2.is_open())
{
cout<<"fp_in2 open";
}
//get the size of the file
fp_in2.seekg(0,ifstream::end);
size1=fp_in2.tellg();
fp_in2.seekg(0);
// allocate memory for file content
restOfFileBuffer = new char [size1-1000];
fp_in2.seekg(1000);
fp_in2.read(restOfFileBuffer,size1 - 1000);
//fp_in1.close();
//****************************************test restOfFileBuffer****************
cout<<endl<<"restOfFileBuffer"<<endl;
for(int f=0; f<size1-1000;f++)
{
cout<<"restOfBuffer"<<restOfFileBuffer[f];
}
//****************************************************************************
fp_out1.open(outfile, ios::app);
unsigned int acc_length=0;
unsigned int accumulator=0;
for(int mostFrequency=0; mostFrequency < size1-1000; mostFrequency++) //traverse readbuffer
{
unsigned int mostFrequencyChar;
unsigned int mostFrequencyNext;
unsigned int nextChar;
unsigned char mostFrequency1;
unsigned char mostFrequency2;
unsigned int codeword;
nextChar=mostFrequency+1;
mostFrequency1=restOfFileBuffer[mostFrequency];
mostFrequency2=restOfFileBuffer[nextChar];
mostFrequencyChar=static_cast<int>(mostFrequency1);
mostFrequencyNext=static_cast<int>(mostFrequency2);
if((count_array[mostFrequencyChar][mostFrequencyNext]==sort_array[mostFrequencyChar][0]))
{
codeword=0;
accumulator=accumulator|(codeword<<acc_length);
acc_length += 3;
}
else if((count_array[mostFrequencyChar][mostFrequencyNext]==sort_array[mostFrequencyChar][1]))
{
codeword=2;
accumulator=accumulator|(codeword<<acc_length);
acc_length += 3;
}
else if((count_array[mostFrequencyChar][mostFrequencyNext]==sort_array[mostFrequencyChar][2]))
{
codeword=4;
accumulator=accumulator|(codeword<<acc_length);
acc_length += 3;
}
else if((count_array[mostFrequencyChar][mostFrequencyNext]==sort_array[mostFrequencyChar][3]))
{
codeword=6;
accumulator=accumulator|(codeword<<acc_length);
acc_length += 3;
}
else {
accumulator=accumulator|((mostFrequencyNext<<1|1)<<acc_length);
acc_length +=9;
}
if(acc_length >= 8)
{
//******write*************************
fp_out1<<static_cast<char>(accumulator&0xFF); //print out last 8 bits
accumulator= accumulator>>8;
acc_length-=8;
}
}//end for loop
fp_in2.close();
fp_out1.close();
return 0;
}
//2D selection sort used to sort each row in 2D array
//values in descending order
void selectionSort(int arr[][indexSize], int size)
{
int indexOfMax, row,col,j;
for( row=0; row<size;row++)
{
for( col=0; col<size-1;col++)
{
indexOfMax = col;
for(j=col+1; j<size; j++)
if(arr[row][j]>arr[row][indexOfMax])
indexOfMax = j;
if(indexOfMax != col)
{
int temp;
temp= arr[row][col];
arr[row][col]=arr[row][indexOfMax];
arr[row][indexOfMax] = temp;
}
}
}
}
//swap function for int values
void swap(int *x, int *y)
{
int temp;
temp = *x;
*x =*y;
*y = temp;
}
//printing an unsigned integer in bits used to check bits
void displayBits(unsigned value)
{
const int SHIFT = 8 *sizeof(unsigned) - 1;
const unsigned MASK = 1 <<SHIFT;
cout<<setw(7) << value << " = ";
for (unsigned c=1; c<= SHIFT + 1; c++){
cout <<(value & MASK ? '1' : '0');
value <<=1;
if(c % 8 ==0)
cout<< ' ';
}
cout<<endl;
}
//***************************DECODE**********************
#include <iostream.h> // I/O
#include <fstream.h> // file I/O
#include <iomanip.h> // format manipulation
#include <cstring>
#include <cstddef>
#include <stdio.h>
//using namespace std;
void displayBits(unsigned);
const long indexSize = 256;
void selectionSort(int[][indexSize],int);
void swap(int, int);
int main () {
ifstream fp_in,fp_in1,fp_in2; // declarations of streams fp_in and fp_out
ofstream fp_out,fp_out1;
char * buffer;
long size, size1;
long copySize;
char * readBuffer;
char * restOfFileBuffer;
char * next;
char infile[30]; // input file
char outfile[30];
char input[1000];
char output[1000];
int model[65536]={0};
int count_array[256][256]={0};
int sort_array[256][256]={0};
int most_frequent[256][4]={0};
//char n[];
// *****give input file a name ****
cout << "Please enter in the input file name: " << endl;
cin >> infile;
cout << "Please enter in the output file name: " << endl;
cin >> outfile;
fp_in.open(infile, ios::in);
fp_out.open(outfile, ios::out); // open the streams
if (fp_in.is_open())
{
cout<<"yes";
}
//get the size of the file
fp_in.seekg(0,ifstream::end);
size=fp_in.tellg();
fp_in.seekg(0);
size=1000;
// allocate memory for file content
buffer = new char [size];
// read content of infile
fp_in.read (buffer,size);
copySize=1000;
// write to outfile
fp_out.write (buffer,copySize);
// release dynamically-allocated memory
delete[] buffer;
fp_out.close();
fp_in.close();
//keeping track of bytes
fp_in1.open(outfile, ios::in); //open output file
//allocate memory for file content
readBuffer = new char [copySize];
if(!fp_in1.is_open())
{
cout<<"not open";
}
while(fp_in1>>readBuffer)
//cout<<readBuffer;
//put frequency of character and next char into 2D array
for(int current=0; current< 1000; current++)
{
int charPosition;
int nextPosition;
int next;
unsigned char a;
unsigned char b;
next=current+1;
a=readBuffer[current];
b=readBuffer[next];
charPosition=static_cast<int>(a);
nextPosition=static_cast<int>(b);
++count_array[charPosition][nextPosition];
++sort_array[charPosition][nextPosition];
}
selectionSort(sort_array, indexSize);
//Display frequency
//option to display values
/*
char y;
char toDisplay[1];
//cout<<"Do you want to display frequency?";
//cin>>toDisplay;
//if (toDisplay == y)
//{
//cout<<"Character "<<"Next Character "<<"Freqency"<<endl;
for(int display=0; display < 1000; display++) //traverse readbuffer
{
int displayChar;
int displayNext;
int nextChar;
unsigned char display1;
unsigned char display2;
nextChar=display+1;
display1=readBuffer[display];
display2=readBuffer[nextChar];
displayChar=static_cast<int>(display1);
displayNext=static_cast<int>(display2);
// cout<<display1<<setw(12)<<display2<<setw(18)<<count_array[display1][display2]<<endl;
}*/
//display four most frequent
//}
//selectionSort(sort_array, indexSize);
/* cout<<"Character "<<"Next Character "<<"Most Frequent"<<endl;
for(int frequency=0; frequency < 1000; frequency++) //traverse readbuffer
{
int frequencyChar;
int frequencyNext;
int nextChar;
unsigned char frequency1;
unsigned char frequency2;
nextChar=frequency+1;
frequency1=readBuffer[frequency];
frequency2=readBuffer[nextChar];
frequencyChar=static_cast<int>(frequency1);
frequencyNext=static_cast<int>(frequency2);
//cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<count_array[frequency1][frequency2]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][0])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][0]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][1])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][1]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][2])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][2]<<endl;
if(count_array[frequencyChar][frequencyNext]==sort_array[frequencyChar][3])
cout<<frequency1<<setw(12)<<frequency2<<setw(18)<<sort_array[frequencyChar][3]<<endl;
}
*/
cout<<"*********************************************************"<<endl;
fp_in1.close();//close read buffer
//delete[] readBuffer;
cout<<"*********************************decoding******************************************";
fp_in2.open(infile, ios::in);
// open the streams
if (fp_in2.is_open())
{
cout<<"fp_in2 open";
}
//get the size of the file
fp_in2.seekg(0,ifstream::end);
size1=fp_in2.tellg();
fp_in2.seekg(0);
// allocate memory for file content
restOfFileBuffer = new char [size1-1000];
fp_in2.seekg(1000);
fp_in2.read(restOfFileBuffer,size1 - 1000);
//fp_in1.close();
//****************************************test restOfFileBuffer****************
cout<<endl<<"restOfFileBuffer"<<endl;
for(int f=0; f<15;f++)
{
cout<<restOfFileBuffer[f];
}
//****************************************************************************
fp_out1.open(outfile, ios::app);
cout<<"SIZE:"<<size1-1000<<endl;
unsigned int acc_length=0;
unsigned int accumulator=0;
//for(int mostFrequency=0; mostFrequency < size1-1001; mostFrequency++) //traverse readbuffer
for(int mostFrequency=0; mostFrequency < size1-1000; mostFrequency++)
{
unsigned int mostFrequencyChar;
unsigned int mostFrequencyNext;
unsigned int nextChar;
unsigned char mostFrequency1;
unsigned char mostFrequency2;
unsigned int codeword;
unsigned int mask=1;
unsigned int value;
int nextValue;
nextChar=mostFrequency+1;
mostFrequency1=restOfFileBuffer[mostFrequency];
mostFrequency2=restOfFileBuffer[nextChar];
mostFrequencyChar=static_cast<int>(mostFrequency1);
mostFrequencyNext=static_cast<int>(mostFrequency2);
// cout<<mostFrequency1<<setw(12)<<mostFrequency2<<setw(18)<<count_array[mostFrequency1][mostFrequency2]<<endl;
accumulator=accumulator|mostFrequencyNext;
//cout<<mostFrequencyChar<<" ";
if(!accumulator&mask)
{
acc_length+=3;
accumulator=accumulator&0x7;
// for(int nextValue=0; nextValue< 256; nextValue++)
//{
if(accumulator==0)
{
for(nextValue=0; nextValue< 256; nextValue++)
{
if(count_array[mostFrequencyChar][nextValue]==sort_array[mostFrequencyChar][0] )
{
fp_out1<<static_cast<char>(nextValue);
break;
}
}
}
else if(accumulator==2)
{
for(nextValue=0; nextValue< 256; nextValue++)
{
if(count_array[mostFrequencyChar][nextValue]==sort_array[mostFrequencyChar][1])
{
fp_out1<<static_cast<char>(nextValue);
break;
}
}
}
else if(accumulator==4)
{
for(nextValue=0; nextValue< 256; nextValue++)
{
if(count_array[mostFrequencyChar][nextValue]==sort_array[mostFrequencyChar][2])
{
fp_out1<<static_cast<char>(nextValue);
break;
}
}
}
else if(accumulator==6)
{
//
for(nextValue=0; nextValue< 256; nextValue++)
{
if(count_array[mostFrequencyChar][nextValue]==sort_array[mostFrequencyChar][3])
{
fp_out1<<static_cast<char>(nextValue);
break;
}
}
}
else
{
acc_length+=9;
accumulator>>=acc_length-8;
// cout<<"9 bit:"<<static_cast<char>(mostFrequencyNext)<<" ";
fp_out1<<accumulator;
acc_length-=9;
accumulator=accumulator>>8;
}
acc_length-=3;
accumulator=accumulator>>8;
}//end if
}//end for loop
fp_in2.close();
fp_out1.close();
return 0;
}
//2D selection sort used to sort each row in 2D array
//values in descending order
void selectionSort(int arr[][indexSize], int size)
{
int indexOfMax, row,col,j;
for( row=0; row<size;row++)
{
for( col=0; col<size-1;col++)
{
indexOfMax = col;
for(j=col+1; j<size; j++)
if(arr[row][j]>arr[row][indexOfMax])
indexOfMax = j;
if(indexOfMax != col)
// swap(&arr[row][col], &arr[indexOfMax]);
{
int temp;
temp= arr[row][col];
arr[row][col]=arr[row][indexOfMax];
arr[row][indexOfMax] = temp;
}
}
}
}
//swap function for int values
void swap(int *x, int *y)
{
int temp;
temp = *x;
*x =*y;
*y = temp;
}
//printing an unsigned integer in bits used to check bits
void displayBits(unsigned value)
{
const int SHIFT = 8 *sizeof(unsigned) - 1;
const unsigned MASK = 1 <<SHIFT;
cout<<setw(7) << value << " = ";
for (unsigned c=1; c<= SHIFT + 1; c++){
cout <<(value & MASK ? '1' : '0');
value <<=1;
if(c % 8 ==0)
cout<< ' ';
}
cout<<endl;
}