RLE Compress

RLE (Run-length encoding) is a very simple form of lossless data compression in which runs of data (that is, sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count, rather than as the original run. This is most useful on data that contains many such runs.

For RLE Decompress algorithm click here.



									Private Shared Sub encodeRepetition(output As Byte(), ByRef outputPosition As UInteger, marker As Byte, symbol As Byte, count As UInteger)
	Dim index As UInteger = outputPosition

	If count <= 3 Then
		If symbol = marker Then
			output(index) = marker
			index += 1
			output(index) = CByte(count - 1)
			index += 1
		Else
			For i As UInteger = 0 To count - 1
				output(index) = symbol
				index += 1
			Next
		End If
	Else
		output(index) = marker
		index += 1
		count -= 1

		If count >= 128 Then
			output(index) = CByte((count >> 8) Or &H80)
			index += 1
		End If

		output(index) = CByte(count And &HFF)
		index += 1
		output(index) = symbol
		index += 1
	End If

	outputPosition = index
End Sub

Private Shared Sub encodeNonRepetition(output As Byte(), ByRef outputPosition As UInteger, marker As Byte, symbol As Byte)
	Dim index As UInteger = outputPosition

	If symbol = marker Then
		output(index) = marker
		index += 1
		output(index) = 0
		index += 1
	Else
		output(index) = symbol
		index += 1
	End If

	outputPosition = index
End Sub

Public Shared Function Compress(input As Byte(), ByRef output As Byte(), inputSize As UInteger) As Integer
	Dim byte1 As Byte, byte2 As Byte, marker As Byte
	Dim i As UInteger, inputPosition As UInteger, outputPosition As UInteger, count As UInteger
	Dim histogram As UInteger() = New UInteger(255) {}

	If inputSize < 1 Then
		Return 0
	End If

	For i = 0 To 255
		histogram(i) = 0
	Next

	For i = 0 To inputSize - 1
		histogram(input(i)) += 1
	Next

	marker = 0

	For i = 1 To 255
		If histogram(i) < histogram(marker) Then
			marker = CByte(i)
		End If
	Next

	output(0) = marker
	outputPosition = 1

	byte1 = input(0)
	inputPosition = 1
	count = 1

	If inputSize >= 2 Then
		byte2 = input(inputPosition)
		inputPosition += 1
		count = 2

		Do
			If byte1 = byte2 Then
				While (inputPosition < inputSize) AndAlso (byte1 = byte2) AndAlso (count < 32768)
					byte2 = input(inputPosition)
					inputPosition += 1
					count += 1
				End While

				If byte1 = byte2 Then
					encodeRepetition(output, outputPosition, marker, byte1, count)

					If inputPosition < inputSize Then
						byte1 = input(inputPosition)
						inputPosition += 1
						count = 1
					Else
						count = 0
					End If
				Else
					encodeRepetition(output, outputPosition, marker, byte1, count - 1)
					byte1 = byte2
					count = 1
				End If
			Else
				encodeNonRepetition(output, outputPosition, marker, byte1)
				byte1 = byte2
				count = 1
			End If
			If inputPosition < inputSize Then
				byte2 = input(inputPosition)
				inputPosition += 1
				count = 2
			End If
		Loop While (inputPosition < inputSize) OrElse (count >= 2)
	End If

	If count = 1 Then
		encodeNonRepetition(output, outputPosition, marker, byte1)
	End If

	Return CInt(outputPosition)
End Function
								


Example

									Dim str As String = "aaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbNNNNNNNNNNNNNNNNPPPPPPPPPPP12888888888888@@@@@@@@@@@@@"
Dim originalData As Byte() = Encoding.[Default].GetBytes(str)
Dim originalDataSize As Integer = str.Length
Dim compressedData As Byte() = New Byte(originalDataSize * (257 \ 256)) {}

Dim compressedDataSize As Integer = Compress(originalData, compressedData, CUInt(originalDataSize))