Shannon–Fano Compress
Shannon–Fano is data compression algorithm, which replaces each symbol with an alternate binary representation. Common symbols are represented by few bits and uncommon symbols are represented by many bits.
For Shannon–Fano Decompress algorithm click here.
Public Class BitStream
Public BytePointer As Byte()
Public BitPosition As UInteger
Public Index As UInteger
End Class
Public Structure Symbol
Public Sym As UInteger
Public Count As UInteger
Public Code As UInteger
Public Bits As UInteger
End Structure
Private Shared Sub initBitStream(ByRef stream As BitStream, buffer As Byte())
stream.BytePointer = buffer
stream.BitPosition = 0
End Sub
Private Shared Sub writeBits(ByRef stream As BitStream, x As UInteger, bits As UInteger)
Dim buffer As Byte() = stream.BytePointer
Dim bit As UInteger = stream.BitPosition
Dim mask As UInteger = CUInt(1 << CInt(bits - 1))
For count As UInteger = 0 To bits - 1
buffer(stream.Index) = CByte((buffer(stream.Index) And (&HFF Xor (1 << CInt(7 - bit)))) + ((If(Convert.ToBoolean(x And mask), 1, 0)) << CInt(7 - bit)))
x <<= 1
bit = (bit + 1) And 7
If Not Convert.ToBoolean(bit) Then
stream.Index += 1
End If
Next
stream.BytePointer = buffer
stream.BitPosition = bit
End Sub
Private Shared Sub histogram(input As Byte(), sym As Symbol(), size As UInteger)
Dim temp As Symbol
Dim i As Integer, swaps As Integer
Dim index As Integer = 0
For i = 0 To 255
sym(i).Sym = CUInt(i)
sym(i).Count = 0
sym(i).Code = 0
sym(i).Bits = 0
Next
i = CInt(size)
While Convert.ToBoolean(i)
sym(input(index)).Count += 1
i -= 1
index += 1
End While
Do
swaps = 0
For i = 0 To 254
If sym(i).Count < sym(i + 1).Count Then
temp = sym(i)
sym(i) = sym(i + 1)
sym(i + 1) = temp
swaps = 1
End If
Next
Loop While Convert.ToBoolean(swaps)
End Sub
Private Shared Sub makeTree(sym As Symbol(), ByRef stream As BitStream, code As UInteger, bits As UInteger, first As UInteger, last As UInteger)
Dim i As UInteger, size As UInteger, sizeA As UInteger, sizeB As UInteger, lastA As UInteger, firstB As UInteger
If first = last Then
writeBits(stream, 1, 1)
writeBits(stream, sym(first).Sym, 8)
sym(first).Code = code
sym(first).Bits = bits
Return
Else
writeBits(stream, 0, 1)
End If
size = 0
For i = first To last
size += sym(i).Count
Next
sizeA = 0
i = first
While sizeA < ((size + 1) >> 1) AndAlso i < last
sizeA += sym(i).Count
i += 1
End While
If sizeA > 0 Then
writeBits(stream, 1, 1)
lastA = i - 1
makeTree(sym, stream, (code << 1) + 0, bits + 1, first, lastA)
Else
writeBits(stream, 0, 1)
End If
sizeB = size - sizeA
If sizeB > 0 Then
writeBits(stream, 1, 1)
firstB = i
makeTree(sym, stream, (code << 1) + 1, bits + 1, firstB, last)
Else
writeBits(stream, 0, 1)
End If
End Sub
Public Shared Function Compress(input As Byte(), output As Byte(), inputSize As UInteger) As Integer
Dim sym As Symbol() = New Symbol(255) {}
Dim temp As Symbol
Dim stream As New BitStream()
Dim i As UInteger, totalBytes As UInteger, swaps As UInteger, symbol As UInteger, lastSymbol As UInteger
If inputSize < 1 Then
Return 0
End If
initBitStream(stream, output)
histogram(input, sym, inputSize)
lastSymbol = 255
While sym(lastSymbol).Count = 0
lastSymbol -= 1
End While
If lastSymbol = 0 Then
lastSymbol += 1
End If
makeTree(sym, stream, 0, 0, 0, lastSymbol)
Do
swaps = 0
For i = 0 To 254
If sym(i).Sym > sym(i + 1).Sym Then
temp = sym(i)
sym(i) = sym(i + 1)
sym(i + 1) = temp
swaps = 1
End If
Next
Loop While Convert.ToBoolean(swaps)
For i = 0 To inputSize - 1
symbol = input(i)
writeBits(stream, sym(symbol).Code, sym(symbol).Bits)
Next
totalBytes = stream.Index
If stream.BitPosition > 0 Then
totalBytes += 1
End If
Return CInt(totalBytes)
End Function
Example
Dim str As String = "This is an example for Shannon–Fano coding"
Dim originalData As Byte() = Encoding.[Default].GetBytes(str)
Dim originalDataSize As UInteger = CUInt(str.Length)
Dim compressedData As Byte() = New Byte(originalDataSize * (101 \ 100) + 383) {}
Dim compressedDataSize As Integer = Compress(originalData, compressedData, originalDataSize)