Longest Common Subsequence
The longest common subsequence (LCS) is the problem of finding the longest subsequence common to all sequences in a set of sequences (often just two sequences). It differs from problems of finding common substrings: unlike substrings, subsequences are not required to occupy consecutive positions within the original sequences.
private static int MAX(int a, int b)
{
return a > b ? a : b;
}
public static int LongestCommonSubsequence(string s1, string s2, out string output)
{
int i, j, k, t;
int s1Len = s1.Length;
int s2Len = s2.Length;
int[] z = new int[(s1Len + 1) * (s2Len + 1)];
int[,] c = new int[(s1Len + 1), (s2Len + 1)];
for (i = 0; i <= s1Len; ++i)
c[i, 0] = z[i * (s2Len + 1)];
for (i = 1; i <= s1Len; ++i)
{
for (j = 1; j <= s2Len; ++j)
{
if (s1[i - 1] == s2[j - 1])
c[i, j] = c[i - 1, j - 1] + 1;
else
c[i, j] = MAX(c[i - 1, j], c[i, j - 1]);
}
}
t = c[s1Len, s2Len];
char[] outputSB = new char[t];
for (i = s1Len, j = s2Len, k = t - 1; k >= 0;)
{
if (s1[i - 1] == s2[j - 1])
{
outputSB[k] = s1[i - 1];
--i;
--j;
--k;
}
else if (c[i, j - 1] > c[i - 1, j])
--j;
else
--i;
}
output = new string(outputSB);
return t;
}
Example
string s1 = "human";
string s2 = "chimpanzee";
string output = "";
int outputLen = LongestCommonSubsequence(s1, s2, out output);
Output
output: hman
outputLen: 4