Huffman algorithm implementation in C

ckid 0 Tallied Votes 3K Views Share

HI friends!! I am very new to this type of groups, and this is my first post to this group, before asking u all a help I am just posting this code to u all, , please respond for this code
Huffman algorithm is one used for image compression and it is also called as variable length coding, after all u had seen this code, if u have any problems please send me a message.
My code takes a message file(example file is attached with this post as text file) and gives the output.

/*************************/
/*************************/
// HFMANN.C
/*************************/
/*************************/

#include<stdio.h>
#include<conio.h>
#include<alloc.h>
#include"C:\windows\desktop\input.h"
#define MAXBITS 32
#define MAXNODES 512
#define MAXSYMBS 256
int hmin(void);
void hinsert(int,int);
struct codetype{
   int bits[MAXBITS];
   int startpos;
   };
struct nodetype{
   int freq;
   int father;
   int isleft;
   };
typedef struct hlist{
   int pos;
   int hfreq;
   struct hlist	*next;
   }hnode;
hnode *hroot=NULL,*traversal;
extern struct list a[256];
extern int n;
int hmin()
{
 int p;
 p = hroot->pos;
 traversal=hroot;
 hroot = traversal->next;
 free(traversal);
 return(p);
}
void hinsert(int p,int freq)
{
  hnode* new1=(hnode *)malloc(sizeof(hnode));
  new1->pos = p;
  new1->hfreq = freq;
  traversal = hroot;
  if(hroot == NULL)
  {
	hroot = new1;
	hroot->next = NULL;
	return;
  }
  if(hroot->next == NULL)
  {
	if(hroot->hfreq>new1->hfreq)
	{
		new1->next = hroot;
		hroot =new1;
		traversal->next =NULL;
		return;
	}
	else
	{
	   hroot->next =new1;
	   new1->next =NULL;
	   return;
	}
  }
  if(hroot->hfreq>=new1->hfreq)
  {
	new1->next =hroot;
	hroot = new1;
	return;
  }

  while(traversal->next->hfreq<new1->hfreq)
  {
	traversal=traversal->next;
	if(traversal->next==NULL)
	break;
  }
  if(traversal->next->hfreq>=new1->hfreq)
  {
	new1->next = traversal->next;
	traversal->next = new1;
	return;
  }

  new1->next = NULL;
  traversal->next = new1;

}
int main(){
  struct codetype cd,code[MAXSYMBS];
  struct nodetype node[MAXNODES];
  int i,k,p,p1,p2,root;

  char symb,alph[MAXSYMBS];
  clrscr();
  for(i=0;i<MAXSYMBS;i++)
  alph[i]=' ';
  //scanf("%d",&n);
  input();
  for(i=0;i<n;i++){
	 flushall();
	// scanf("%c %d",&symb,&node[i].freq);
	 symb = a[i].alph;
	 node[i].freq = a[i].freq;
	 hinsert(i,node[i].freq);

	 alph[i]=symb;
	 }

  for(p=n;p<(2*n-1);p++){

	 p1 =hmin();

	 p2 =hmin();

	 node[p1].father = p;
	 node[p1].isleft = 1;
	 node[p2].father = p;
	 node[p2].isleft = 0;
	 node[p].freq =node[p1].freq+node[p2].freq;
	 hinsert(p,node[p].freq);
  }
	 root = hmin();
  for(i=0;i<n;i++){
	 cd.startpos = MAXBITS;
	 p=i;
	 while(p!=root){
		--cd.startpos ;
		if(node[p].isleft)
		   cd.bits[cd.startpos] =0;
		else
		   cd.bits[cd.startpos] =1;
		p =node[p].father;
	 }
	 for(k=cd.startpos;k<MAXBITS;k++)
		code[i].bits[k]=cd.bits[k];
	 code[i].startpos =cd.startpos;
  }
  for(i=0;i<n;i++){
	 printf("\n%c  %d",alph[i],node[i].freq);
	 for(k=code[i].startpos;k<MAXBITS;k++)
		printf(" %d",code[i].bits[k]);
	 printf("\n");
	 }
	 return(0);
  }

/*************************/
/*************************/
// INPUT.H
/*************************/
/*************************/


#include<stdio.h>
#include<conio.h>
struct list{
char alph;
int freq;
} ;
struct list a[256];
int n;
void input()
{
FILE *fin,*fout;
char *filein,*fileout,ch;
int i,k,f;

printf("enter the filename of from which the data is to be read::\n");
scanf("%s",filein);
fin=fopen(filein,"r");
for(i=0,n=0;i<256;i++)
{     f=0;k=0;
  while((ch=fgetc(fin))!=EOF)
  {
     if(ch==i)
     { f=1;
     a[n].alph=ch;
     k++;
     }
     }
     if(f==1){
     a[n].freq =k;
     n++;
     }

     rewind(fin);
}


fclose(fin);

getch();

}

/*************************/
/*************************/
// message.txt
/*************************/
/*************************/

AAAAABBBBACCCCCDDDFGGGGHI
Dani 4,329 The Queen of DaniWeb Administrator Featured Poster Premium Member

I wasn't sure, so I did some research, and found that Huffman's algorithm is an algorithm to form a complete binary tree. The algorithm is shown here http://planetmath.org/encyclopedia/HuffmansAlgorithm.html for those who are interested. Thanks so much for providing us with your code, ckid

Dave Sinkula 2,398 long time no c Team Colleague

char *filein,*fileout,ch;
scanf("%s",filein);

Yikes!

AhmedHan 0 Junior Poster in Training

What is this :#include"C:\windows\desktop\input.h"

bofarull 0 Newbie Poster

To AhmedHan: I bet you a fiver it is the input file called in input.h

emil_ham 0 Newbie Poster

man, did you really have to read 255 times from the file, if you could read it once to a buffer and make anything you want with it in main memory. I bet you a fiver it is much cheaper to work with ram than with disk.

bubbly_blue 0 Newbie Poster

hi,i hav a file frm which i already calculated the frequencies of each character.But im unable to form the tree for its code generation....could u plzz help me out?!

nilu28 0 Newbie Poster

/* Huffman Coding in C . This program reads a text file named on the command line, then compresses it using Huffman coding. The file is read twice, once to determine the frequencies of the characters, and again to do the actual compression. */

#include < stdio.h>
#include < stdlib.h>
#include < string.h>
#include < time.h>

/* there are 256 possible characters */

#define NUM_CHARS 256

/* tree node, heap node */

typedef struct _treenode treenode;
struct _treenode {
int freq; /* frequency; is the priority for heap */
unsigned char ch; /* character, if any */
treenode *left, /* left child of Huffman tree (not heap!) */
*right; /* right child of Huffman tree */
};

/* this is a priority queue implemented as a binary heap */
typedef struct _pq {
int heap_size;
treenode *A[NUM_CHARS];
} PQ;

/* create an empty queue */

void create_pq (PQ *p) {
p->heap_size = 0;
}

/* this heap node's parent */

int parent (int i) {
return (i-1) / 2;
}

/* this heap node's left kid */

int left (int i) {
return i * 2 + 1;
}

/* this heap node's right kid */

int right (int i) {
return i * 2 + 2;
}

/* makes the subheap with root i into a heap , assuming left(i) and
* right(i) are heaps
*/
void heapify (PQ *p, int i) {
int l, r, smallest;
treenode *t;

l = left (i);
r = right (i);

/* find the smallest of parent, left, and right */

if (l < p->heap_size && p->A[l]->freq < p->A->freq)
smallest = l;
else
smallest = i;
if (r < p->heap_size && p->A[r]->freq < p->A[smallest]->freq)
smallest = r;

/* swap the parent with the smallest, if needed. */

if (smallest != i) {
t = p->A;
p->A = p->A[smallest];
p->A[smallest] = t;
heapify (p, smallest);
}
}

/* insert an element into the priority queue. r->freq is the priority */
void insert_pq (PQ *p, treenode *r) {
int i;

p->heap_size++;
i = p->heap_size - 1;

/* we would like to place r at the end of the array,
* but this might violate the heap property. we'll start
* at the end and work our way up
*/
while ((i > 0) && (p->A[parent(i)]->freq > r->freq)) {
p->A = p->A[parent(i)];
i = parent (i);
}
p->A = r;
}

/* remove the element at head of the queue (i.e., with minimum frequency) */
treenode *extract_min_pq (PQ *p) {
treenode *r;

if (p->heap_size == 0) {
printf ("heap underflow!\n");
exit (1);
}

/* get return value out of the root */

r = p->A[0];

/* take the last and stick it in the root (just like heapsort) */

p->A[0] = p->A[p->heap_size-1];

/* one less thing in queue */

p->heap_size--;

/* left and right are a heap, make the root a heap */

heapify (p, 0);
return r;
}

/* read the file, computing the frequencies for each character
* and placing them in v[]
*/
unsigned int get_frequencies (FILE *f, unsigned int v[]) {
int r, n;

/* n will count characters */

for (n=0;;n++) {

/* fgetc() gets an unsigned char, converts to int */

r = fgetc (f);

/* no more? get out of loop */

if (feof (f)) break;

/* one more of this character */

v[r]++;
}
return n;
}

/* make the huffman tree from frequencies in freq[] (Huffman's Algorithm) */

treenode *build_huffman (unsigned int freqs[]) {
int i, n;
treenode *x, *y, *z;
PQ p;

/* make an empty queue */

create_pq (&p);

/* for each character, make a heap/tree node with its value
* and frequency
*/
for (i=0; i< NUM_CHARS; i++) {
x = malloc (sizeof (treenode));

/* its a leaf of the Huffman tree */

x->left = NULL;
x->right = NULL;
x->freq = freqs;
x->ch = (char) i;

/* put this node into the heap */

insert_pq (&p, x);
}

/* at this point, the heap is a "forest" of singleton trees */

n = p.heap_size-1; /* heap_size isn't loop invariant! */

/* if we insert two things and remove one each time,
* at the end of heap_size-1 iterations, there will be
* one tree left in the heap
*/
for (i=0; i< n; i++) {

/* make a new node z from the two least frequent
* nodes x and y
*/
z = malloc (sizeof (treenode));
x = extract_min_pq (&p);
y = extract_min_pq (&p);
z->left = x;
z->right = y;

/* z's frequency is the sum of x and y */

z->freq = x->freq + y->freq;

/* put this back in the queue */

insert_pq (&p, z);
}

/* return the only thing left in the queue, the whole Huffman tree */

return extract_min_pq (&p);
}

/* traverse the Huffman tree, building up the codes in codes[] */

void traverse (treenode *r, /* root of this (sub)tree */
int level, /* current level in Huffman tree */
char code_so_far[], /* code string up to this point in tree */
char *codes[]) {/* array of codes */

/* if we're at a leaf node, */

if ((r->left == NULL) && (r->right == NULL)) {

/* put in a null terminator */

code_so_far[level] = 0;

/* make a copy of the code and put it in the array */

codes[r->ch] = strdup (code_so_far);
} else {

/* not at a leaf node. go left with bit 0 */

code_so_far[level] = '0';
traverse (r->left, level+1, code_so_far, codes);

/* go right with bit 1 */

code_so_far[level] = '1';
traverse (r->right, level+1, code_so_far, codes);
}
}

/* global variables, a necessary evil */

int nbits, current_byte, nbytes;

/* output a single bit to an open file */

void bitout (FILE *f, char b) {

/* shift current byte left one */

current_byte < < = 1;

/* put a one on the end of this byte if b is '1' */

if (b == '1') current_byte |= 1;

/* one more bit */

nbits++;

/* enough bits? write out the byte */

if (nbits == 8) {
fputc (current_byte, f);
nbytes++;
nbits = 0;
current_byte = 0;
}
}

/* using the codes in codes[], encode the file in infile, writing
* the result on outfile
*/
void encode_file (FILE *infile, FILE *outfile, char *codes[]) {
unsigned char ch;
char *s;

/* initialize globals for bitout() */

current_byte = 0;
nbits = 0;
nbytes = 0;

/* continue until end of file */

for (;;) {

/* get a char */

ch = fgetc (infile);
if (feof (infile)) break;

/* put the corresponding bitstring on outfile */

for (s=codes[ch]; *s; s++) bitout (outfile, *s);
}

/* finish off the last byte */

while (nbits) bitout (outfile, '0');
}

/* main program */

int main (int argc, char *argv[]) {
FILE *f, *g;
treenode *r; /* root of Huffman tree */
unsigned int n, /* number of bytes in file */
freqs[NUM_CHARS]; /* frequency of each char */
char *codes[NUM_CHARS], /* array of codes, 1 per char */
code[NUM_CHARS], /* a place to hold one code */
fname[100]; /* what to call output file */

/* hassle user */

if (argc != 2) {
fprintf (stderr, "Usage: %s < filename>\n", argv[0]);
exit (1);
}

/* set all frequencies to zero */

memset (freqs, 0, sizeof (freqs));

/* open command line argument file */

f = fopen (argv[1], "r");
if (!f) {
perror (argv[1]);
exit (1);
}

/* compute frequencies from this file */

n = get_frequencies (f, freqs);
fclose (f);

/* make the huffman tree */

r = build_huffman (freqs);

/* traverse the tree, filling codes[] with the codes */

traverse (r, 0, code, codes);

/* name the output file something.huf */

sprintf (fname, "%s.huf", argv[1]);
g = fopen (fname, "w");
if (!g) {
perror (fname);
exit (1);
}

/* write frequencies to file so they can be reproduced */

fwrite (freqs, NUM_CHARS, sizeof (int), g);

/* write number of characters to file as binary int */

fwrite (&n, 1, sizeof (int), g);

/* open input file again */

f = fopen (argv[1], "r");
if (!f) {
perror (argv[1]);
exit (1);
}

/* encode f to g with codes[] */

encode_file (f, g, codes);
fclose (f);
fclose (g);
/* brag */
printf ("%s is %0.2f%% of %s\n",
fname, (float) nbytes / (float) n, argv[1]);
exit (0);
}

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.