Huffman Compress
Huffman coding is a data compression algorithm. It is based on the idea that frequently-appearing letters should have shorter bit representations and less common letters should have longer representations.
For Huffman Decompress algorithm click here.
Private Const MAX_TREE_NODES As Integer = 511
Public Class BitStream
Public BytePointer As Byte()
Public BitPosition As UInteger
Public Index As UInteger
End Class
Public Structure Symbol
Public Sym As Integer
Public Count As UInteger
Public Code As UInteger
Public Bits As UInteger
End Structure
Public Class EncodeNode
Public ChildA As EncodeNode
Public ChildB As EncodeNode
Public Count As Integer
Public Symbol As Integer
End Class
Private Shared Sub initBitstream(ByRef stream As BitStream, buffer As Byte())
stream.BytePointer = buffer
stream.BitPosition = 0
End Sub
Private Shared Sub writeBits(ByRef stream As BitStream, x As UInteger, bits As UInteger)
Dim buffer As Byte() = stream.BytePointer
Dim bit As UInteger = stream.BitPosition
Dim mask As UInteger = CUInt(1 << CInt(bits - 1))
For count As UInteger = 0 To bits - 1
buffer(stream.Index) = CByte((buffer(stream.Index) And (&HFF Xor (1 << CInt(7 - bit)))) + ((If(Convert.ToBoolean(x And mask), 1, 0)) << CInt(7 - bit)))
x <<= 1
bit = (bit + 1) And 7
If Not Convert.ToBoolean(bit) Then
stream.Index += 1
End If
Next
stream.BytePointer = buffer
stream.BitPosition = bit
End Sub
Private Shared Sub histogram(input As Byte(), sym As Symbol(), size As UInteger)
Dim i As Integer
Dim index As Integer = 0
For i = 0 To 255
sym(i).Sym = i
sym(i).Count = 0
sym(i).Code = 0
sym(i).Bits = 0
Next
i = CInt(size)
While Convert.ToBoolean(i)
sym(input(index)).Count += 1
i -= 1
index += 1
End While
End Sub
Private Shared Sub storeTree(ByRef node As EncodeNode, sym As Symbol(), ByRef stream As BitStream, code As UInteger, bits As UInteger)
Dim symbolIndex As UInteger
If node.Symbol >= 0 Then
writeBits(stream, 1, 1)
writeBits(stream, CUInt(node.Symbol), 8)
For symbolIndex = 0 To 255
If sym(symbolIndex).Sym = node.Symbol Then
Exit For
End If
Next
sym(symbolIndex).Code = code
sym(symbolIndex).Bits = bits
Return
Else
writeBits(stream, 0, 1)
End If
storeTree(node.ChildA, sym, stream, (code << 1) + 0, bits + 1)
storeTree(node.ChildB, sym, stream, (code << 1) + 1, bits + 1)
End Sub
Private Shared Sub makeTree(sym As Symbol(), ByRef stream As BitStream)
Dim nodes As EncodeNode() = New EncodeNode(MAX_TREE_NODES - 1) {}
For counter As Integer = 0 To nodes.Length - 1
nodes(counter) = New EncodeNode()
Next
Dim node1 As EncodeNode, node2 As EncodeNode, root As EncodeNode
Dim i As UInteger, numSymbols As UInteger = 0, nodesLeft As UInteger, nextIndex As UInteger
For i = 0 To 255
If sym(i).Count > 0 Then
nodes(numSymbols).Symbol = sym(i).Sym
nodes(numSymbols).Count = CInt(sym(i).Count)
nodes(numSymbols).ChildA = Nothing
nodes(numSymbols).ChildB = Nothing
numSymbols += 1
End If
Next
root = Nothing
nodesLeft = numSymbols
nextIndex = numSymbols
While nodesLeft > 1
node1 = Nothing
node2 = Nothing
For i = 0 To nextIndex - 1
If nodes(i).Count > 0 Then
If node1 Is Nothing OrElse (nodes(i).Count <= node1.Count) Then
node2 = node1
node1 = nodes(i)
ElseIf node2 Is Nothing OrElse (nodes(i).Count <= node2.Count) Then
node2 = nodes(i)
End If
End If
Next
root = nodes(nextIndex)
root.ChildA = node1
root.ChildB = node2
root.Count = node1.Count + node2.Count
root.Symbol = -1
node1.Count = 0
node2.Count = 0
nextIndex += 1
nodesLeft -= 1
End While
If root IsNot Nothing Then
storeTree(root, sym, stream, 0, 0)
Else
root = nodes(0)
storeTree(root, sym, stream, 0, 1)
End If
End Sub
Public Shared Function Compress(input As Byte(), output As Byte(), inputSize As UInteger) As Integer
Dim sym As Symbol() = New Symbol(255) {}
Dim temp As Symbol
Dim stream As New BitStream()
Dim i As UInteger, totalBytes As UInteger, swaps As UInteger, symbol As UInteger
If inputSize < 1 Then
Return 0
End If
initBitstream(stream, output)
histogram(input, sym, inputSize)
makeTree(sym, stream)
Do
swaps = 0
For i = 0 To 254
If sym(i).Sym > sym(i + 1).Sym Then
temp = sym(i)
sym(i) = sym(i + 1)
sym(i + 1) = temp
swaps = 1
End If
Next
Loop While Convert.ToBoolean(swaps)
For i = 0 To inputSize - 1
symbol = input(i)
writeBits(stream, sym(symbol).Code, sym(symbol).Bits)
Next
totalBytes = stream.Index
If stream.BitPosition > 0 Then
totalBytes += 1
End If
Return CInt(totalBytes)
End Function
Example
Dim str As String = "This is an example for Huffman coding"
Dim originalData As Byte() = Encoding.[Default].GetBytes(str)
Dim originalDataSize As UInteger = CUInt(str.Length)
Dim compressedData As Byte() = New Byte(originalDataSize * (101 \ 100) + 319) {}
Dim compressedDataSize As Integer = Compress(originalData, compressedData, originalDataSize)