Compare two different sized strings similarity - c#

I have a string that is produced by code but it may not be correct.
So i have a user screen that user checks and changes it.
I have to let user to change maximum of 5 characters.
I need to check how many characters are changed by user
with comparing two strings.
length of strings may be different.
Thanx in advance. (language c#)

You could compute the Levenshtein Distance between the two strings, which returns the number of character edits (removals, inserts, replacements) that must occur to get from string A to string B.
public static class LevenshteinDistance
{
/// <summary>
/// Compute the distance between two strings.
/// </summary>
public static int Compute(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
// Step 1
if (n == 0) return m;
if (m == 0) return n;
// Step 2
for (int i = 0; i <= n; d[i, 0] = i++);
for (int j = 0; j <= m; d[0, j] = j++);
// Step 3
for (int i = 1; i <= n; i++)
{
//Step 4
for (int j = 1; j <= m; j++)
{
// Step 5
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
// Step 6
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
// Step 7
return d[n, m];
}
}
Then handle it:
if (LevenshteinDistance.Compute(s1, s2) <= 5)
// Valid
else
// Invalid

Related

Select enum value by string similarity

I have an enum with six distinct values:
One
Two
Three
Four
Five
Six
which is filled from a config file (i.e. string)
Let's say someone writes into the config file any of the values
On
one
onbe
or other common misspellings/typos, I want to set the most similar value from the enum (in this case, "One") instead of throwing up.
Does C# have something like that builtin or will I have to adapt an existing Edit Distance algorithm for C# and hook that into the enum?
You can use Levinshtein distance, this tells us the number of edits needed to turn one string into another:
so just go through all values in your enum and calculate Levinshtein distance:
private static int CalcLevenshteinDistance(string a, string b)
{
if (String.IsNullOrEmpty(a) || String.IsNullOrEmpty(b)) return 0;
int lengthA = a.Length;
int lengthB = b.Length;
var distances = new int[lengthA + 1, lengthB + 1];
for (int i = 0; i <= lengthA; distances[i, 0] = i++) ;
for (int j = 0; j <= lengthB; distances[0, j] = j++) ;
for (int i = 1; i <= lengthA; i++)
for (int j = 1; j <= lengthB; j++)
{
int cost = b[j - 1] == a[i - 1] ? 0 : 1;
distances[i, j] = Math.Min
(
Math.Min(distances[i - 1, j] + 1, distances[i, j - 1] + 1),
distances[i - 1, j - 1] + cost
);
}
return distances[lengthA, lengthB];
}

How do I implement a pruning strategy with a DTW algorithm?

I've tried to implement a DTW algorithm for a speech recognition application, which I've done successfully, and I'm now trying to improve the performance of the DTW algorithm by pruning. I've tried searching for an improvement to this algorithms and found that I'm supposed to somehow calculate the values in the 2D array DTW in a specific range as shown in Image #1, but I don't seem to know exactly how to do that. Could someone provide any help with that?
The code to the algorithm is included (C#)
/// <summary>
/// Calculates the distance between two audio frames in the form of two MFCCFrame objects as input parameters
/// returns the difference in a double
/// </summary>
double distance(MFCCFrame frame1, MFCCFrame frame2)
{
double tempSum = 0;
for (int i = 0; i < 13; i++)
tempSum += Math.Pow(Math.Abs(frame1.Features[i] - frame2.Features[i]), 2);
return Math.Sqrt(tempSum);
}
/// <summary>
/// DTW Algorithms
/// Takes input 2 sequences: seq1 and seq2 to calculate the shortest distance between them
/// Uses the function "distance" defined above to calculate distance between two frames
/// 2D array -> DTW[,] with dimentions: number of frames in seq 1, number of frames in seq2
/// returns the last element in the 2D array, which is the difference between the two sequences
/// </summary>
double DTWDistance(Sequence seq1, Sequence seq2)
{
int m = seq1.Frames.Length, n = seq2.Frames.Length;
double[,] DTW = new double[m, n];
DTW[0, 0] = 0;
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
double cost = distance(seq1.Frames[i], seq2.Frames[j]);
if (i == 0 && j == 0)
DTW[i, j] = cost;
else if (i == 0)
DTW[i, j] = cost + DTW[i, j - 1];
else if (j == 0)
DTW[i, j] = cost + DTW[i - 1, j];
else
DTW[i, j] = (cost + Math.Min(DTW[i - 1, j], Math.Min(DTW[i, j - 1], DTW[i - 1, j - 1])));
}
}
}
Since no one answered, I thought I'd answer this in case anyone needed help with this as well
This is the regular DTW algorithm:
/// <summary>
/// DTW Algorithms
/// Takes input 2 sequences: input and template to calculate the shortest distance between them
/// Uses the function "distance" defined above to calculate distance between two frames
/// 2D array -> DTW[,] with dimentions: number of frames in input, number of frames in template
/// returns the last element in the 2D array, which is the difference between the two sequences
/// </summary>
///
double DTWDistance(Sequence input, Sequence template)
{
int rows = input.Frames.Length, columns = template.Frames.Length;
if (rows < (double)(columns / 2) || columns < (double)(rows / 2))
{
return double.MaxValue;
}
double[,] DTW = new double[rows, columns];
DTW[0, 0] = 0;
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < columns; j++)
{
double cost = distance(input.Frames[i], template.Frames[j]);
if (i == 0 && j == 0)
DTW[i, j] = cost;
else if (i == 0)
DTW[i, j] = cost + DTW[i, j - 1];
else if (j == 0)
DTW[i, j] = cost + DTW[i - 1, j];
else
DTW[i, j] = (cost + Math.Min(DTW[i - 1, j], DTW[i - 1, j - 1]));// insert ,match
}
}
return DTW[rows - 1, columns - 1];
}
And I've also implemented a pruning strategy on the regular DTW algorithms, it's shown here:
/// <summary>
/// DTW Algotithm with Pruning
/// Only Sakoe-Chiba band is caluculated and the rest is pruned
/// </summary>
float Pruning_DTWDistance(Sequence input, Sequence output)
{
int rows = input.Frames.Length, columns = output.Frames.Length;
if (rows < (double)(columns / 2) || columns < (double)(rows / 2))
{
return float.MaxValue;
}
float cost;
float[,] DTW = new float[rows + 1, columns + 1];
int w = Math.Abs(columns - rows);// window length -> |rows - columns|<= w
for (int i = 1; i <= rows; i++)
{
for (int j = Math.Max(1, i - w); j <= Math.Min(columns, i + w); j++)
{
if (DTW[i - 1, j] == 0)
DTW[i - 1, j] = float.MaxValue;
if (DTW[i - 1, j - 1] == 0)
DTW[i - 1, j - 1] = float.MaxValue;
DTW[0, 0] = 0;
cost = distance(input.Frames[i - 1], output.Frames[j - 1]);// frames 0 based
DTW[i, j] = (cost + Math.Min(DTW[i - 1, j], DTW[i - 1, j - 1]));// insert ,match
}
}
return DTW[rows, columns];
}
Both functions use a helper function distance, which is defined as follows:
/// <summary>
/// Calculates the distance between two audio frames in the form of MFCCFrame objects
/// returns the difference in a float
/// </summary>
///
float distance(MFCCFrame frame1, MFCCFrame frame2)
{
double tempSum = 0;
for (int i = 0; i < 13; i++)
tempSum += Math.Pow(Math.Abs(frame1.Features[i] - frame2.Features[i]), 2);
return (float)Math.Sqrt(tempSum);
}
[EDIT]
In order to improve the memory consumption of the DTW algoritm, I have used only 2 arrays instead of a 2D matrix, the algorithm after this change is shown here:
double DTW_improved(Sequence input, Sequence template)
{
// Don't compare two sequences if one of their lengths is half the other's
if (input.Frames.Length <= (0.5 * template.Frames.Length) || template.Frames.Length <= (0.5 * input.Frames.Length))
return double.PositiveInfinity;
int rows = template.Frames.Length, columns = input.Frames.Length;
double[] c1 = new double[rows];
double[] c2 = new double[rows];
double[] temp; // To hold address only (use it in swapping address)
c1[0] = distance(input.Frames[0], template.Frames[0]);
for (int i = 1; i < rows; i++)
c1[i] = c1[i - 1] + distance(input.Frames[0], template.Frames[i]);
for (int i = 1; i < columns; i++)
{
c2[0] = distance(input.Frames[i], template.Frames[0]) + c1[0];
c2[1] = distance(input.Frames[i], template.Frames[1]) + Math.Min(c1[0], c1[1]);
// Calculating first 2 elements of the array before the loop
//since they both need special conditions
for (int j = 2; j < rows; j++)
c2[j] = Math.Min(c1[j], Math.Min(c1[j - 1], c1[j - 2])) + distance(input.Frames[i], template.Frames[j]);
if (i != columns - 1) // Swapping addresses of c1 & c2
{
temp = c2;
c2 = c1;
c1 = temp;
}
}
return c2[rows - 1] / (0.5 * (input.Frames.Length + template.Frames.Length)); // Normalization: Dividing edit distance by average of input length & template length
}

fuzzy matching word on OCR page

I have a static phrase the I am searching an OCR'd image for.
string KeywordToFind = "Account Number"
string OcrPageText = "
GEORGIA
POWER
A SOUTHERN COMPANY
AecountNumber
122- 493
Pagel of2
Please Pay By
Jan 29,2014
Total Due
39.11
"
How can I find the word "AecountNumber" using my keyword "Account Number"?
I have tried using variations of the Levenshtein Distance Algorithm HERE with varied success. I've also tried regexes, but the OCR often converts the text differently, thus rendering the regex useless.
Suggestions? I can provide more code if the link doesn't give enough information. Also, Thanks!
Why not try something mostly arbitrary, like this -- while it would certainly match a lot more than just account number, the chances of the start and end characters existing elsewhere in that order is pretty slim.
A.?c.?.?nt ?N.?[mn]b.?r
http://regex101.com/r/zV1yM2
It'll match things like:
Account Number
AccntNumbr
Aecnt Nunber
Answered My Question with the use of sub-strings. Posting in case others run into the same type of problem. A little unorthodox, but it works great for me.
int TextLengthBuffer = (int)StaticTextLength - 1; //start looking for correct result with one less character than it should have.
int LowestLevenshteinNumber = 999999; //initialize insanely high maximum
decimal PossibleStringLength = (PossibleString.Length); //Length of string to search
decimal StaticTextLength = (StaticText.Length); //Length of text to search for
decimal NumberOfErrorsAllowed = Math.Round((StaticTextLength * (ErrorAllowance / 100)), MidpointRounding.AwayFromZero); //Find number of errors allowed with given ErrorAllowance percentage
//Look for best match with 1 less character than it should have, then the correct amount of characters.
//And last, with 1 more character. (This is because one letter can be recognized as
//two (W -> VV) and visa versa)
for (int i = 0; i < 3; i++)
{
for (int e = TextLengthBuffer; e <= (int)PossibleStringLength; e++)
{
string possibleResult = (PossibleString.Substring((e - TextLengthBuffer), TextLengthBuffer));
int lAllowance = (int)(Math.Round((possibleResult.Length - StaticTextLength) + (NumberOfErrorsAllowed), MidpointRounding.AwayFromZero));
int lNumber = LevenshteinAlgorithm(StaticText, possibleResult);
if (lNumber <= lAllowance && ((lNumber < LowestLevenshteinNumber) || (TextLengthBuffer == StaticText.Length && lNumber <= LowestLevenshteinNumber)))
{
PossibleResult = (new StaticTextResult { text = possibleResult, errors = lNumber });
LowestLevenshteinNumber = lNumber;
}
}
TextLengthBuffer++;
}
public static int LevenshteinAlgorithm(string s, string t) // Levenshtein Algorithm
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
if (n == 0)
{
return m;
}
if (m == 0)
{
return n;
}
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
for (int i = 1; i <= n; i++)
{
for (int j = 1; j <= m; j++)
{
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
return d[n, m];
}

Find closest match to input string in a list of strings

I have problems finding an implementation of closest match strings for .net
I would like to match a list of strings, example:
input string: "Publiczna Szkoła Podstawowa im. Bolesława Chrobrego w Wąsoszu"
List of strings:
Publiczna Szkoła Podstawowa im. B. Chrobrego w Wąsoszu
Szkoła Podstawowa Specjalna
Szkoła Podstawowa im.Henryka Sienkiewicza w Wąsoszu
Szkoła Podstawowa im. Romualda Traugutta w Wąsoszu Górnym
This would clearly need to be matched with "Publiczna Szkoła Podstawowa im. B. Chrobrego w Wąsoszu".
What algorithms are there available for .net?
Edit distance
Edit distance is a way of quantifying how dissimilar two strings
(e.g., words) are to one another by counting the minimum number of
operations required to transform one string into the other.
Levenshtein distance
Informally, the Levenshtein distance between two words is the minimum
number of single-character edits (i.e. insertions, deletions or
substitutions) required to change one word into the other.
Fast, memory efficient Levenshtein algorithm
C# Levenshtein
using System;
/// <summary>
/// Contains approximate string matching
/// </summary>
static class LevenshteinDistance
{
/// <summary>
/// Compute the distance between two strings.
/// </summary>
public static int Compute(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
// Step 1
if (n == 0)
{
return m;
}
if (m == 0)
{
return n;
}
// Step 2
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
// Step 3
for (int i = 1; i <= n; i++)
{
//Step 4
for (int j = 1; j <= m; j++)
{
// Step 5
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
// Step 6
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
// Step 7
return d[n, m];
}
}
class Program
{
static void Main()
{
Console.WriteLine(LevenshteinDistance.Compute("aunt", "ant"));
Console.WriteLine(LevenshteinDistance.Compute("Sam", "Samantha"));
Console.WriteLine(LevenshteinDistance.Compute("flomax", "volmax"));
}
}
.NET does not supply anything out of the box - you need to implement a an Edit Distance algorithm yourself. For example, you can use Levenshtein Distance, like this:
// This code is an implementation of the pseudocode from the Wikipedia,
// showing a naive implementation.
// You should research an algorithm with better space complexity.
public static int LevenshteinDistance(string s, string t) {
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
for (int i = 0; i <= n; d[i, 0] = i++)
;
for (int j = 0; j <= m; d[0, j] = j++)
;
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= m; j++) {
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
return d[n, m];
}
Call LevenshteinDistance(targetString, possible[i]) for each i, then pick the string possible[i] for which LevenshteinDistance returns the smallest value.
Late to the party, but I had a similar requirement to #Ali123:
"ECM" is closer to "Open form for ECM" than "transcribe" phonetically
I found a simple solution that works for my use case, which is comparing sentences, and finding the sentence that has the most words in common:
public static string FindBestMatch(string stringToCompare, IEnumerable<string> strs) {
HashSet<string> strCompareHash = stringToCompare.Split(' ').ToHashSet();
int maxIntersectCount = 0;
string bestMatch = string.Empty;
foreach (string str in strs)
{
HashSet<string> strHash = str.Split(' ').ToHashSet();
int intersectCount = strCompareHash.Intersect(strCompareHash).Count();
if (intersectCount > maxIntersectCount)
{
maxIntersectCount = intersectCount;
bestMatch = str;
}
}
return bestMatch;
}

Comparing strings with tolerance

I'm looking for a way to compare a string with an array of strings. Doing an exact search is quite easy of course, but I want my program to tolerate spelling mistakes, missing parts of the string and so on.
Is there some kind of framework which can perform such a search? I'm having something in mind that the search algorithm will return a few results order by the percentage of match or something like this.
You could use the Levenshtein Distance algorithm.
"The Levenshtein distance between two strings is defined as the minimum number of edits needed to transform one string into the other, with the allowable edit operations being insertion, deletion, or substitution of a single character." - Wikipedia.com
This one is from dotnetperls.com:
using System;
/// <summary>
/// Contains approximate string matching
/// </summary>
static class LevenshteinDistance
{
/// <summary>
/// Compute the distance between two strings.
/// </summary>
public static int Compute(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
// Step 1
if (n == 0)
{
return m;
}
if (m == 0)
{
return n;
}
// Step 2
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
// Step 3
for (int i = 1; i <= n; i++)
{
//Step 4
for (int j = 1; j <= m; j++)
{
// Step 5
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
// Step 6
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
// Step 7
return d[n, m];
}
}
class Program
{
static void Main()
{
Console.WriteLine(LevenshteinDistance.Compute("aunt", "ant"));
Console.WriteLine(LevenshteinDistance.Compute("Sam", "Samantha"));
Console.WriteLine(LevenshteinDistance.Compute("flomax", "volmax"));
}
}
You may in fact prefer to use the Damerau-Levenshtein distance algorithm, which also allows characters to be transposed, which is a common human error in data entry. You'll find a C# implementation of it here.
There is nothing in the .NET framework that will help you with this out-of-the-box.
The most common spelling mistakes are those where the letters are a decent phonetic representation of the word, but not the correct spelling of the word.
For example, it could be argued that the words sword and sord (yes, that's a word) have the same phonetic roots (they sound the same when you pronounce them).
That being said, there are a number of algorithms that you can use to translate words (even mispelled ones) into phonetic variants.
The first is Soundex. It's fairly simple to implement and there are a fair number of .NET implementations of this algorithm. It's rather simple, but it gives you real values you can compare to each other.
Another is Metaphone. While I can't find a native .NET implementation of Metaphone, the link provided has links to a number of other implementations which could be converted. The easiest to convert would probably be the Java implementation of the Metaphone algorithm.
It should be noted that the Metaphone algorithm has gone through revisions. There is Double Metaphone (which has a .NET implementation) and Metaphone 3. Metaphone 3 is a commercial application, but has a 98% accuracy rate compared to an 89% accuracy rate for the Double Metaphone algorithm when run against a database of common English words. Depending on your need, you might want to look for (in the case of Double Metaphone) or purchase (in the case of Metaphone 3) the source for the algorithm and convert or access it through the P/Invoke layer (there are C++ implementations abound).
Metaphone and Soundex differ in the sense that Soundex produces fixed length numeric keys, whereas Metaphone produces keys of different length, so the results will be different. In the end, both will do the same kind of comparison for you, you just have to find out which suits your needs the best, given your requirements and resources (and intolerance levels for the spelling mistakes, of course).
Here is an implementation of the LevenshteinDistance method that uses far less memory while producing the same results. This is a C# adaptation of the pseudo code found in this wikipedia article under the "Iterative with two matrix rows" heading.
public static int LevenshteinDistance(string source, string target)
{
// degenerate cases
if (source == target) return 0;
if (source.Length == 0) return target.Length;
if (target.Length == 0) return source.Length;
// create two work vectors of integer distances
int[] v0 = new int[target.Length + 1];
int[] v1 = new int[target.Length + 1];
// initialize v0 (the previous row of distances)
// this row is A[0][i]: edit distance for an empty s
// the distance is just the number of characters to delete from t
for (int i = 0; i < v0.Length; i++)
v0[i] = i;
for (int i = 0; i < source.Length; i++)
{
// calculate v1 (current row distances) from the previous row v0
// first element of v1 is A[i+1][0]
// edit distance is delete (i+1) chars from s to match empty t
v1[0] = i + 1;
// use formula to fill in the rest of the row
for (int j = 0; j < target.Length; j++)
{
var cost = (source[i] == target[j]) ? 0 : 1;
v1[j + 1] = Math.Min(v1[j] + 1, Math.Min(v0[j + 1] + 1, v0[j] + cost));
}
// copy v1 (current row) to v0 (previous row) for next iteration
for (int j = 0; j < v0.Length; j++)
v0[j] = v1[j];
}
return v1[target.Length];
}
Here is a function that will give you the percentage similarity.
/// <summary>
/// Calculate percentage similarity of two strings
/// <param name="source">Source String to Compare with</param>
/// <param name="target">Targeted String to Compare</param>
/// <returns>Return Similarity between two strings from 0 to 1.0</returns>
/// </summary>
public static double CalculateSimilarity(string source, string target)
{
if ((source == null) || (target == null)) return 0.0;
if ((source.Length == 0) || (target.Length == 0)) return 0.0;
if (source == target) return 1.0;
int stepsToSame = LevenshteinDistance(source, target);
return (1.0 - ((double)stepsToSame / (double)Math.Max(source.Length, target.Length)));
}
Your other option is to compare phonetically using Soundex or Metaphone. I just completed an article that presents C# code for both algorithms. You can view it at http://www.blackbeltcoder.com/Articles/algorithms/phonetic-string-comparison-with-soundex.
Here are two methods that calculate the Levenshtein Distance between strings.
The Levenshtein distance between two strings is defined as the minimum number of edits needed to transform one string into the other, with the allowable edit operations being insertion, deletion, or substitution of a single character.
Once you have the result, you'll need to define what value you want to use as a threshold for a match or not. Run the function on a bunch of sample data to get a good idea of how it works to help decide on your particular threshold.
/// <summary>
/// Calculates the Levenshtein distance between two strings--the number of changes that need to be made for the first string to become the second.
/// </summary>
/// <param name="first">The first string, used as a source.</param>
/// <param name="second">The second string, used as a target.</param>
/// <returns>The number of changes that need to be made to convert the first string to the second.</returns>
/// <remarks>
/// From http://www.merriampark.com/ldcsharp.htm
/// </remarks>
public static int LevenshteinDistance(string first, string second)
{
if (first == null)
{
throw new ArgumentNullException("first");
}
if (second == null)
{
throw new ArgumentNullException("second");
}
int n = first.Length;
int m = second.Length;
var d = new int[n + 1, m + 1]; // matrix
if (n == 0) return m;
if (m == 0) return n;
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
for (int i = 1; i <= n; i++)
{
for (int j = 1; j <= m; j++)
{
int cost = (second.Substring(j - 1, 1) == first.Substring(i - 1, 1) ? 0 : 1); // cost
d[i, j] = Math.Min(
Math.Min(
d[i - 1, j] + 1,
d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
return d[n, m];
}
using System;
public class Example
{
public static int getEditDistance(string X, string Y)
{
int m = X.Length;
int n = Y.Length;
int[][] T = new int[m + 1][];
for (int i = 0; i < m + 1; ++i) {
T[i] = new int[n + 1];
}
for (int i = 1; i <= m; i++) {
T[i][0] = i;
}
for (int j = 1; j <= n; j++) {
T[0][j] = j;
}
int cost;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
cost = X[i - 1] == Y[j - 1] ? 0: 1;
T[i][j] = Math.Min(Math.Min(T[i - 1][j] + 1, T[i][j - 1] + 1),
T[i - 1][j - 1] + cost);
}
}
return T[m][n];
}
public static double findSimilarity(string x, string y) {
if (x == null || y == null) {
throw new ArgumentException("Strings must not be null");
}
double maxLength = Math.Max(x.Length, y.Length);
if (maxLength > 0) {
// optionally ignore case if needed
return (maxLength - getEditDistance(x, y)) / maxLength;
}
return 1.0;
}
public static void Main()
{
string s1 = "Techie Delight";
string s2 = "Tech Delight";
double similarity = findSimilarity(s1, s2) * 100;
Console.WriteLine(similarity); // 85.71428571428571
}
}

Categories