Hello,
First of all, excuse my English =)
Warning: wall of text\code.
I must say I have some experience in Java and long forgotten C++, so right now I have some problems implementing the Huffman coding.
The algorithm goes like this:
I create an array of 255 elements to represent each char. Next I iterate through my string and increment corresponding char values. Then I go through the array and whenever I found anything greater then zero I create a new instance of my LeafNode class and push it onto priority queue. The LeafNode overloads the "<" operator to make the priority queue work in a way that nodes with least frequent characters are on the top of the queue. At the same time I push all the new LeafNodes to an ordinary vector, just to provide the list of Huffman codes in the end.
The next step is basically the Huffman algorithm. I pop 2 elements from the top of the queue and create a new one (using special InternalNode class) with frequency value equal to the sum of frequency values of those popped. I assign right and left childs and parent links. And then I push the new InternalNode onto the queue.
The problem lies somewhere here as I discovered, that for example all of my LeafNodes have the exactly same parent at address 0x6 and it is inaccessible. Moreover, when I tried to go from the root, sometimes the parents or child of random nodes where inaccessible as well.
Here is my code:
#ifndef HUFF_NODE
#define HUFF_NODE
class Node
{
public:
int freq;
Node* parent;
Node() { freq = 0; }
Node(const int f) : freq(f) { parent = NULL; }
Node(const Node &node) { freq = node.freq; parent = node.parent; }
bool operator < (const Node &other) const { return freq > other.freq; }
};
#endif
#ifndef HUFF_LEAF_NODE
#define HUFF_LEAF_NODE
#include "Node.h"
class LeafNode : public Node
{
public:
char value;
LeafNode(const int f, const char v) : value(v) { freq = f; }
};
#endif
#ifndef HUFF_INTERNAL_NODE
#define HUFF_INTERNAL_NODE
#include "Node.h"
class InternalNode : public Node
{
public:
Node* left;
Node* right;
InternalNode(const int f) { freq = f; }
InternalNode(const Node &node) { freq = node.freq; parent = node.parent; }
InternalNode(const InternalNode &node) { freq = node.freq; parent = node.parent; left = node.left; right = node.right;}
};
#endif
And the main thing:
#include <iostream>
#include <queue>
#include <string>
#include <vector>
#include "Node.h"
#include "LeafNode.h"
#include "InternalNode.h"
int main()
{
std::priority_queue <Node> que;
std::vector <LeafNode> symbols;
std::string str = "abracadabra";
int a[255];
for (int i = 0; i < 255; ++i) {
a[i] = 0;
}
for (int i = 0; i < str.length(); ++i) {
++a[str[i]];
}
std::cout << "Original frequences:\n";
for (int i = 0; i < 255; ++i) {
if (a[i] > 0) {
std::cout << char(i) << " : " << a[i] << "\n";
LeafNode leafNode(a[i], char(i));
que.push(leafNode);
symbols.push_back(leafNode);
}
}
//std::cout << que.size() << "\n";
while (que.size() != 1) {
Node* leftNode = new Node(que.top());
que.pop();
std::cout << leftNode->freq << "\n";
Node* rightNode = new Node(que.top());
que.pop();
std::cout << rightNode->freq << "\n";
int sum = leftNode->freq + rightNode->freq;
InternalNode* parentNode = new InternalNode(sum);
parentNode->left = leftNode;
parentNode->right = rightNode;
leftNode->parent = parentNode;
rightNode->parent = parentNode;
std::cout << leftNode << " " << rightNode << " " << parentNode << "\n";
InternalNode temp = InternalNode(sum);
temp.left = parentNode->left;
temp.right = parentNode->right;
que.push(temp);
}
//std::cout << que.top().freq << "\n";
std::cout << "Huffman codes:\n";
for (int i = 0; i < symbols.size(); ++i) {
LeafNode leafNode = symbols.at(i);
std::cout << leafNode.value << " : ";
std::cout << leafNode.parent << "\n";
std::queue <int> temp;
InternalNode* parent = (InternalNode*)leafNode.parent;
Node* node = &((Node)leafNode);
while (parent != NULL) {
temp.push(node->freq);
node = parent;
parent = (InternalNode*)parent->parent;
}
while(!temp.empty()) {
std::cout << temp.front();
temp.pop();
}
std::cout << "\n";
}
}
I believe the trouble is somewhere here:
while (que.size() != 1) {
Node* leftNode = new Node(que.top());
que.pop();
std::cout << leftNode->freq << "\n";
Node* rightNode = new Node(que.top());
que.pop();
std::cout << rightNode->freq << "\n";
int sum = leftNode->freq + rightNode->freq;
InternalNode* parentNode = new InternalNode(sum);
parentNode->left = leftNode;
parentNode->right = rightNode;
leftNode->parent = parentNode;
rightNode->parent = parentNode;
std::cout << leftNode << " " << rightNode << " " << parentNode << "\n";
InternalNode temp = InternalNode(sum);
temp.left = parentNode->left;
temp.right = parentNode->right;
que.push(temp);
}
This code may look strange, but it's due to fact that I've been trying to solve this problem for about 4 hours without completely destroying my code and idea(because intuitively it feels right and I am sure I would have already implemented this in Java if needed). I tried numerous approaches using both pointers and normal objects, creating various types of constructors. Some of them aren't accepted by compiler (as I tried to create prirority_queue <Node*>) some of them result in segmentation fault errors and some of them display really weird thing in output.