RLE Compress
RLE (Run-length encoding) is a very simple form of lossless data compression in which runs of data (that is, sequences in which the same data value occurs in many consecutive data elements) are stored as a single data value and count, rather than as the original run. This is most useful on data that contains many such runs.
For RLE Decompress algorithm click here.
/*****Please include following header files*****/
// cstdint
/***********************************************/
static void encodeRepetition(uint8_t* output, uint32_t* outputPosition, uint8_t marker, uint8_t symbol, uint32_t count)
{
uint32_t index = *outputPosition;
if (count <= 3)
{
if (symbol == marker)
{
output[index++] = marker;
output[index++] = count - 1;
}
else
{
for (uint32_t i = 0; i < count; ++i)
{
output[index++] = symbol;
}
}
}
else
{
output[index++] = marker;
--count;
if (count >= 128)
{
output[index++] = (count >> 8) | 0x80;
}
output[index++] = count & 0xff;
output[index++] = symbol;
}
*outputPosition = index;
}
static void encodeNonRepetition(uint8_t* output, uint32_t* outputPosition, uint8_t marker, uint8_t symbol)
{
uint32_t index = *outputPosition;
if (symbol == marker)
{
output[index++] = marker;
output[index++] = 0;
}
else
{
output[index++] = symbol;
}
*outputPosition = index;
}
static int Compress(uint8_t* input, uint8_t* output, uint32_t inputSize) {
uint8_t byte1, byte2, marker;
uint32_t i, inputPosition, outputPosition, count, histogram[256];
if (inputSize < 1)
{
return 0;
}
for (i = 0; i < 256; ++i)
{
histogram[i] = 0;
}
for (i = 0; i < inputSize; ++i)
{
++histogram[input[i]];
}
marker = 0;
for (i = 1; i < 256; ++i)
{
if (histogram[i] < histogram[marker])
{
marker = i;
}
}
output[0] = marker;
outputPosition = 1;
byte1 = input[0];
inputPosition = 1;
count = 1;
if (inputSize >= 2)
{
byte2 = input[inputPosition++];
count = 2;
do
{
if (byte1 == byte2)
{
while ((inputPosition < inputSize) && (byte1 == byte2) && (count < 32768))
{
byte2 = input[inputPosition++];
++count;
}
if (byte1 == byte2)
{
encodeRepetition(output, &outputPosition, marker, byte1, count);
if (inputPosition < inputSize)
{
byte1 = input[inputPosition++];
count = 1;
}
else
{
count = 0;
}
}
else
{
encodeRepetition(output, &outputPosition, marker, byte1, count - 1);
byte1 = byte2;
count = 1;
}
}
else
{
encodeNonRepetition(output, &outputPosition, marker, byte1);
byte1 = byte2;
count = 1;
}
if (inputPosition < inputSize)
{
byte2 = input[inputPosition++];
count = 2;
}
} while ((inputPosition < inputSize) || (count >= 2));
}
if (count == 1)
{
encodeNonRepetition(output, &outputPosition, marker, byte1);
}
return outputPosition;
}
Example
/*****Please include following header files*****/
// cstring
/***********************************************/
char* str = "aaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbNNNNNNNNNNNNNNNNPPPPPPPPPPP12888888888888@@@@@@@@@@@@@";
uint8_t* originalData = (uint8_t*)str;
int originalDataSize = strlen(str);
uint8_t* compressedData = new uint8_t[originalDataSize * (257 / 256) + 1];
int compressedDataSize = Compress(originalData, compressedData, originalDataSize);