RLE Compress
RLE (Run-length encoding) is a very simple form of lossless data compression in which runs of data (that is, sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count, rather than as the original run. This is most useful on data that contains many such runs.
For RLE Decompress algorithm click here.
Private Shared Sub encodeRepetition(output As Byte(), ByRef outputPosition As UInteger, marker As Byte, symbol As Byte, count As UInteger)
Dim index As UInteger = outputPosition
If count <= 3 Then
If symbol = marker Then
output(index) = marker
index += 1
output(index) = CByte(count - 1)
index += 1
Else
For i As UInteger = 0 To count - 1
output(index) = symbol
index += 1
Next
End If
Else
output(index) = marker
index += 1
count -= 1
If count >= 128 Then
output(index) = CByte((count >> 8) Or &H80)
index += 1
End If
output(index) = CByte(count And &HFF)
index += 1
output(index) = symbol
index += 1
End If
outputPosition = index
End Sub
Private Shared Sub encodeNonRepetition(output As Byte(), ByRef outputPosition As UInteger, marker As Byte, symbol As Byte)
Dim index As UInteger = outputPosition
If symbol = marker Then
output(index) = marker
index += 1
output(index) = 0
index += 1
Else
output(index) = symbol
index += 1
End If
outputPosition = index
End Sub
Public Shared Function Compress(input As Byte(), ByRef output As Byte(), inputSize As UInteger) As Integer
Dim byte1 As Byte, byte2 As Byte, marker As Byte
Dim i As UInteger, inputPosition As UInteger, outputPosition As UInteger, count As UInteger
Dim histogram As UInteger() = New UInteger(255) {}
If inputSize < 1 Then
Return 0
End If
For i = 0 To 255
histogram(i) = 0
Next
For i = 0 To inputSize - 1
histogram(input(i)) += 1
Next
marker = 0
For i = 1 To 255
If histogram(i) < histogram(marker) Then
marker = CByte(i)
End If
Next
output(0) = marker
outputPosition = 1
byte1 = input(0)
inputPosition = 1
count = 1
If inputSize >= 2 Then
byte2 = input(inputPosition)
inputPosition += 1
count = 2
Do
If byte1 = byte2 Then
While (inputPosition < inputSize) AndAlso (byte1 = byte2) AndAlso (count < 32768)
byte2 = input(inputPosition)
inputPosition += 1
count += 1
End While
If byte1 = byte2 Then
encodeRepetition(output, outputPosition, marker, byte1, count)
If inputPosition < inputSize Then
byte1 = input(inputPosition)
inputPosition += 1
count = 1
Else
count = 0
End If
Else
encodeRepetition(output, outputPosition, marker, byte1, count - 1)
byte1 = byte2
count = 1
End If
Else
encodeNonRepetition(output, outputPosition, marker, byte1)
byte1 = byte2
count = 1
End If
If inputPosition < inputSize Then
byte2 = input(inputPosition)
inputPosition += 1
count = 2
End If
Loop While (inputPosition < inputSize) OrElse (count >= 2)
End If
If count = 1 Then
encodeNonRepetition(output, outputPosition, marker, byte1)
End If
Return CInt(outputPosition)
End Function
Example
Dim str As String = "aaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbNNNNNNNNNNNNNNNNPPPPPPPPPPP12888888888888@@@@@@@@@@@@@"
Dim originalData As Byte() = Encoding.[Default].GetBytes(str)
Dim originalDataSize As Integer = str.Length
Dim compressedData As Byte() = New Byte(originalDataSize * (257 \ 256)) {}
Dim compressedDataSize As Integer = Compress(originalData, compressedData, CUInt(originalDataSize))