RLE Compress
RLE (Run-length encoding) is a very simple form of lossless data compression in which runs of data (that is, sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count, rather than as the original run. This is most useful on data that contains many such runs.
For RLE Decompress algorithm click here.
private static void encodeRepetition(byte[] output, ref uint outputPosition, byte marker, byte symbol, uint count)
{
uint index = outputPosition;
if (count <= 3)
{
if (symbol == marker)
{
output[index++] = marker;
output[index++] = (byte)(count - 1);
}
else
{
for (uint i = 0; i < count; ++i)
{
output[index++] = symbol;
}
}
}
else
{
output[index++] = marker;
--count;
if (count >= 128)
{
output[index++] = (byte)((count >> 8) | 0x80);
}
output[index++] = (byte)(count & 0xff);
output[index++] = symbol;
}
outputPosition = index;
}
private static void encodeNonRepetition(byte[] output, ref uint outputPosition, byte marker, byte symbol)
{
uint index = outputPosition;
if (symbol == marker)
{
output[index++] = marker;
output[index++] = 0;
}
else
{
output[index++] = symbol;
}
outputPosition = index;
}
public static int Compress(byte[] input, ref byte[] output, uint inputSize)
{
byte byte1, byte2, marker;
uint i, inputPosition, outputPosition, count;
uint[] histogram = new uint[256];
if (inputSize < 1)
{
return 0;
}
for (i = 0; i < 256; ++i)
{
histogram[i] = 0;
}
for (i = 0; i < inputSize; ++i)
{
++histogram[input[i]];
}
marker = 0;
for (i = 1; i < 256; ++i)
{
if (histogram[i] < histogram[marker])
{
marker = (byte)i;
}
}
output[0] = marker;
outputPosition = 1;
byte1 = input[0];
inputPosition = 1;
count = 1;
if (inputSize >= 2)
{
byte2 = input[inputPosition++];
count = 2;
do
{
if (byte1 == byte2)
{
while ((inputPosition < inputSize) && (byte1 == byte2) && (count < 32768))
{
byte2 = input[inputPosition++];
++count;
}
if (byte1 == byte2)
{
encodeRepetition(output, ref outputPosition, marker, byte1, count);
if (inputPosition < inputSize)
{
byte1 = input[inputPosition++];
count = 1;
}
else
{
count = 0;
}
}
else
{
encodeRepetition(output, ref outputPosition, marker, byte1, count - 1);
byte1 = byte2;
count = 1;
}
}
else
{
encodeNonRepetition(output, ref outputPosition, marker, byte1);
byte1 = byte2;
count = 1;
}
if (inputPosition < inputSize)
{
byte2 = input[inputPosition++];
count = 2;
}
} while ((inputPosition < inputSize) || (count >= 2));
}
if (count == 1)
{
encodeNonRepetition(output, ref outputPosition, marker, byte1);
}
return (int)outputPosition;
}
Example
string str = "aaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbNNNNNNNNNNNNNNNNPPPPPPPPPPP12888888888888@@@@@@@@@@@@@";
byte[] originalData = Encoding.Default.GetBytes(str);
int originalDataSize = str.Length;
byte[] compressedData = new byte[originalDataSize * (257 / 256) + 1];
int compressedDataSize = Compress(originalData, ref compressedData, (uint)originalDataSize);