Huffman Compress
Huffman coding is a data compression algorithm. It is based on the idea that frequently-appearing letters should have shorter bit representations and less common letters should have longer representations.
For Huffman Decompress algorithm click here.
/*****Please include following header files*****/
// stdint.h
/***********************************************/
#define MAX_TREE_NODES 511
typedef struct {
uint8_t* BytePointer;
uint32_t BitPosition;
} BitStream;
typedef struct {
int Symbol;
uint32_t Count;
uint32_t Code;
uint32_t Bits;
} Symbol;
typedef struct encodeNode {
struct encodeNode* ChildA;
struct encodeNode* ChildB;
int Count;
int Symbol;
} EncodeNode;
void initBitstream(BitStream* stream, uint8_t* buffer)
{
stream->BytePointer = buffer;
stream->BitPosition = 0;
}
void writeBits(BitStream* stream, uint32_t x, uint32_t bits)
{
uint8_t* buffer = stream->BytePointer;
uint32_t bit = stream->BitPosition;
uint32_t mask = 1 << (bits - 1);
for (uint32_t count = 0; count < bits; ++count)
{
*buffer = (*buffer & (0xff ^ (1 << (7 - bit)))) + ((x & mask ? 1 : 0) << (7 - bit));
x <<= 1;
bit = (bit + 1) & 7;
if (!bit)
{
++buffer;
}
}
stream->BytePointer = buffer;
stream->BitPosition = bit;
}
void histogram(uint8_t* input, Symbol* sym, uint32_t size)
{
int i;
for (i = 0; i < 256; ++i)
{
sym[i].Symbol = i;
sym[i].Count = 0;
sym[i].Code = 0;
sym[i].Bits = 0;
}
for (i = size; i; --i)
{
sym[*input++].Count++;
}
}
void storeTree(EncodeNode* node, Symbol* sym, BitStream* stream, uint32_t code, uint32_t bits)
{
uint32_t symbolIndex;
if (node->Symbol >= 0)
{
writeBits(stream, 1, 1);
writeBits(stream, node->Symbol, 8);
for (symbolIndex = 0; symbolIndex < 256; ++symbolIndex)
{
if (sym[symbolIndex].Symbol == node->Symbol)
break;
}
sym[symbolIndex].Code = code;
sym[symbolIndex].Bits = bits;
return;
}
else
{
writeBits(stream, 0, 1);
}
storeTree(node->ChildA, sym, stream, (code << 1) + 0, bits + 1);
storeTree(node->ChildB, sym, stream, (code << 1) + 1, bits + 1);
}
void makeTree(Symbol* sym, BitStream* stream)
{
EncodeNode nodes[MAX_TREE_NODES], *node1, *node2, *root;
uint32_t i, numSymbols = 0, nodesLeft, nextIndex;
for (i = 0; i < 256; ++i)
{
if (sym[i].Count > 0)
{
nodes[numSymbols].Symbol = sym[i].Symbol;
nodes[numSymbols].Count = sym[i].Count;
nodes[numSymbols].ChildA = (EncodeNode *)0;
nodes[numSymbols].ChildB = (EncodeNode *)0;
++numSymbols;
}
}
root = (EncodeNode*)0;
nodesLeft = numSymbols;
nextIndex = numSymbols;
while (nodesLeft > 1)
{
node1 = (EncodeNode *)0;
node2 = (EncodeNode *)0;
for (i = 0; i < nextIndex; ++i)
{
if (nodes[i].Count > 0)
{
if (!node1 || (nodes[i].Count <= node1->Count))
{
node2 = node1;
node1 = &nodes[i];
}
else if (!node2 || (nodes[i].Count <= node2->Count))
{
node2 = &nodes[i];
}
}
}
root = &nodes[nextIndex];
root->ChildA = node1;
root->ChildB = node2;
root->Count = node1->Count + node2->Count;
root->Symbol = -1;
node1->Count = 0;
node2->Count = 0;
++nextIndex;
--nodesLeft;
}
if (root)
{
storeTree(root, sym, stream, 0, 0);
}
else
{
root = &nodes[0];
storeTree(root, sym, stream, 0, 1);
}
}
int Compress(uint8_t* input, uint8_t* output, uint32_t inputSize)
{
Symbol sym[256], temp;
BitStream stream;
uint32_t i, totalBytes, swaps, symbol;
if (inputSize < 1)
return 0;
initBitstream(&stream, output);
histogram(input, sym, inputSize);
makeTree(sym, &stream);
do
{
swaps = 0;
for (i = 0; i < 255; ++i)
{
if (sym[i].Symbol > sym[i + 1].Symbol)
{
temp = sym[i];
sym[i] = sym[i + 1];
sym[i + 1] = temp;
swaps = 1;
}
}
} while (swaps);
for (i = 0; i < inputSize; ++i)
{
symbol = input[i];
writeBits(&stream, sym[symbol].Code, sym[symbol].Bits);
}
totalBytes = (int)(stream.BytePointer - output);
if (stream.BitPosition > 0)
{
++totalBytes;
}
return totalBytes;
}
Example
/*****Please include following header files*****/
// string.h
// stdlib.h
/***********************************************/
char* str = "This is an example for Huffman coding";
uint8_t* originalData = (uint8_t*)str;
int originalDataSize = strlen(str);
uint8_t* compressedData = (uint8_t*)malloc(originalDataSize * (101 / 100) + 320);
int compressedDataSize = Compress(originalData, compressedData, originalDataSize);