Longest Common Substring
Longest common substring is an algorithm that finds the longest string that is a substring of two or more strings.
char* GetSubString(char* str, int index, int count) {
int strLen = strlen(str);
int lastIndex = index + count;
if (index >= 0 && lastIndex > strLen) return "";
char* subStr = malloc(count + 1);
for (int i = 0; i < count; i++) {
subStr[i] = str[index + i];
}
subStr[count] = '\0';
return subStr;
}
char* AppendString(const char* str1, const char* str2) {
int str1Len = strlen(str1);
int str2Len = strlen(str2);
int strLen = str1Len + str2Len + 1;
char* str = malloc(strLen);
for (int i = 0; i < str1Len; i++)
str[i] = str1[i];
for (int i = 0; i < str2Len; i++)
str[(str1Len + i)] = str2[i];
str[strLen - 1] = '\0';
return str;
}
char* LongestCommonSubstring(const char* str1, const char* str2) {
int str1Size = strlen(str1);
int str2Size = strlen(str2);
int *curr = malloc(sizeof(int) * str2Size);
int *prev = malloc(sizeof(int) * str2Size);
int *swap;
int maxSubstr = 0;
int lastSubsBegin = 0;
char* sequenceBuilder = "";
for (int i = 0; i<str1Size; ++i)
{
for (int j = 0; j<str2Size; ++j)
{
if (str1[i] != str2[j])
{
curr[j] = 0;
}
else
{
if (i == 0 || j == 0)
{
curr[j] = 1;
}
else
{
curr[j] = 1 + prev[j - 1];
}
if (maxSubstr < curr[j])
{
maxSubstr = curr[j];
int thisSubsBegin = i - curr[j] + 1;
if (lastSubsBegin == thisSubsBegin)
{
char* s = malloc(2);
s[0] = str1[i];
s[1] = '\0';
sequenceBuilder = AppendString(sequenceBuilder, s);
}
else
{
lastSubsBegin = thisSubsBegin;
sequenceBuilder = "";
sequenceBuilder = AppendString(sequenceBuilder, GetSubString(str1, lastSubsBegin, (i + 1) - lastSubsBegin));
}
}
}
}
swap = curr;
curr = prev;
prev = swap;
}
free(curr);
free(prev);
return sequenceBuilder;
}
Example
char* str1 = "the quick brown fox jumps over the lazy dog";
char* str2 = "ghdsgf fjsdfg ghdsfbrown fox jumpshfsdjfg 457877fsdfhb$%";
char* value = LongestCommonSubstring(str1, str2);
Output
brown fox jumps