Can anyone spot the bug in my Damerau-Levenshtein Distance implementation? - c#

I've got a funky bug that's driving me nuts. Can anyone help me find it? Try calling the function with two words that differ only by a missing last character ("garble" vs "garbl"). The function is returning 0 instead of the expected 1. It's supposed to return 1, right?
I've tried fiddling with the array bounds but that's only causing IndexOutOfRangeExceptions.
public static class FuzzyStringMatcher
{
private const int DELETION = 0;
private const int INSERTION = 1;
private const int SUBSTITUTION = 2;
private const int TRANSPOSITION = 3;
private const int COST_OF_DELETION = 1;
private const int COST_OF_INSERTION = 1;
private const int COST_OF_TRANSPOSITION = 1;
private const int COST_OF_SUBSTITUTION = 1;
public static int Compute_DamerauLevenshtein_Distance(string a, string b)
{
int[,] rows = new int[a.Length + 1, b.Length + 1];
int cost_ratio;
int[] calculations = new int[4];
//
// Init the array
//
for (int i = 0; i < rows.GetUpperBound(0); i++)
rows[i, 0] = i;
for (int i = 0; i < rows.GetUpperBound(1); i++)
rows[0, i] = i;
for (int aidx = 1; aidx < rows.GetUpperBound(0); aidx++)
{
for (int bidx = 1; bidx < rows.GetUpperBound(1); bidx++)
{
if (a[aidx - 1] == b[bidx - 1])
cost_ratio = 0;
else
cost_ratio = 1;
calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION;
calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION;
calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION;
calculations[TRANSPOSITION] = int.MaxValue;
if (aidx > 1 && bidx > 1 && a[aidx] == b[bidx - 1] && a[aidx - 1] == b[bidx])
calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION;
rows[aidx, bidx] = calculations.Min();
}
}
int score = rows[rows.GetUpperBound(0) - 1, rows.GetUpperBound(1) - 1];
if (a.Contains(b) || b.Contains(a))
score = score / 2;
return score;
}
}
My implementation is based off the algorithm given in the Wikipedia page on Damerau-Levenshtein-Distance

+1 to Lou Franco. But beside that, it seems like you have lots of index issues (note that all 4 for cycles in wiki sample are inclusive, and when 1 is subtracted from aidx/bidx you actually need to subtract 2 because in wiki sample indexes in strings start at 1). My version:
public static int Compute_DamerauLevenshtein_Distance2(string a, string b)
{
int[,] rows = new int[a.Length + 1, b.Length + 1];
int cost_ratio;
int[] calculations = new int[4];
for(int i = 0; i <= rows.GetUpperBound(0); i++)
rows[i, 0] = i;
for(int i = 1; i <= rows.GetUpperBound(1); i++)
rows[0, i] = i;
for(int aidx = 1; aidx <= rows.GetUpperBound(0); aidx++)
{
for(int bidx = 1; bidx <= rows.GetUpperBound(1); bidx++)
{
if(a[aidx - 1] == b[bidx - 1])
cost_ratio = 0;
else
cost_ratio = 1;
calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION;
calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION;
calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION;
calculations[TRANSPOSITION] = int.MaxValue;
if(aidx > 1 && bidx > 1 && a[aidx - 1] == b[bidx - 2] && a[aidx - 2] == b[bidx - 1])
calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION;
rows[aidx, bidx] = calculations.Min();
}
}
int score = rows[rows.GetUpperBound(0), rows.GetUpperBound(1)];
return score;
}

This isn't in the Wikipedia article:
if (a.Contains(b) || b.Contains(a))
score = score / 2;
Since it's true for your example -- and integer division of 1/2 == 0, then that could be it.

Related

inaccurate results with function to add an array of digits together

so i have this function:
static int[] AddArrays(int[] a, int[] b)
{
int length1 = a.Length;
int length2 = b.Length;
int carry = 0;
int max_length = Math.Max(length1, length2) + 1;
int[] minimum_arr = new int[max_length - length1].Concat(a).ToArray();
int[] maximum_arr = new int[max_length - length2].Concat(b).ToArray();
int[] new_arr = new int[max_length];
for (int i = max_length - 1; i >= 0; i--)
{
int first_digit = maximum_arr[i];
int second_digit = i - (max_length - minimum_arr.Length) >= 0 ? minimum_arr[i - (max_length - minimum_arr.Length)] : 0;
if (second_digit + first_digit + carry > 9)
{
new_arr[i] = (second_digit + first_digit + carry) % 10;
carry = 1;
}
else
{
new_arr[i] = second_digit + first_digit + carry;
carry = 0;
}
}
if (carry == 1)
{
int[] result = new int[max_length + 1];
result[0] = 1;
Array.Copy(new_arr, 0, result, 1, max_length);
return result;
}
else
{
return new_arr;
}
}
it basically takes 2 lists of digits and adds them together. the point of this is that each array of digits represent a number that is bigger then the integer limits. now this function is close to working the results get innacurate at certein places and i honestly have no idea why. for example if the function is given these inputs:
"1481298410984109284109481491284901249018490849081048914820948019" and
"3475893498573573849739857349873498739487598" (both of these are being turned into a array of integers before being sent to the function)
the expected output is:
1,481,298,410,984,109,284,112,957,384,783,474,822,868,230,706,430,922,413,560,435,617
and what i get is:
1,481,298,410,984,109,284,457,070,841,142,258,634,158,894,233,092,241,356,043,561,7
i would very much appreciate some help with this ive been trying to figure it out for hours and i cant seem to get it to work perfectly.
I suggest Reverse arrays a and b and use good old school algorithm:
static int[] AddArrays(int[] a, int[] b) {
Array.Reverse(a);
Array.Reverse(b);
int[] result = new int[Math.Max(a.Length, b.Length) + 1];
int carry = 0;
int value = 0;
for (int i = 0; i < Math.Max(a.Length, b.Length); ++i) {
value = (i < a.Length ? a[i] : 0) + (i < b.Length ? b[i] : 0) + carry;
result[i] = value % 10;
carry = value / 10;
}
if (carry > 0)
result[result.Length - 1] = carry;
else
Array.Resize(ref result, result.Length - 1);
// Let's restore a and b
Array.Reverse(a);
Array.Reverse(b);
Array.Reverse(result);
return result;
}
Demo:
string a = "1481298410984109284109481491284901249018490849081048914820948019";
string b = "3475893498573573849739857349873498739487598";
string c = string.Concat(AddArrays(
a.Select(d => d - '0').ToArray(),
b.Select(d => d - '0').ToArray()));
Console.Write(c);
Output:
1481298410984109284112957384783474822868230706430922413560435617

how to write the value to each matrix index

For example, I have set up a formula to find my Xnew[k+1], Ynew[k+1] and Anew[k+1].
How do I pass the value to a 3 by 1 matrix if I want my
index 1,1 be Xnew[k+1],
index 1,2 be Ynew[k+1],
index 1,3 be Anew[k+1].
Here's what I got so far.
for (k = 0; k < 5; k++)
{
Xnew[k+1] = cX + (T * MPCV[k]) * Math.Cos(cA);
Ynew[k+1] = cY + (T * MPCV[k]) * Math.Sin(cA);
Anew[k+1] = cA + (T * MPCW[k]);
double[,] qK = new double[3, 1];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 1; j++)
{
qK[i, j] = 1;
}
}
}
Thank you for the help.
Suposing Xnew[k+1], Ynew[k+1] and Anew[k+1] are doubles:
for ( k = 0; k < 5; k++ ) {
Xnew[k + 1] = cX + (T * MPCV[k]) * Math.Cos(cA);
Ynew[k + 1] = cY + (T * MPCV[k]) * Math.Sin(cA);
Anew[k + 1] = cA + (T * MPCW[k]);
double[,] qk = { { Xnew[k + 1] , Ynew[k + 1] , Anew[k + 1] } };
}
This will make you a 1x3 matrix (arrays start in 0) with:
qk[0,0] = Xnew[k+1]
qk[0,1] = Ynew[k+1]
qk[0,2] = Anew[k+1]
But if you want a 3x1 matrix use instead.
double[,] qk = { { Xnew[k + 1] }, {Ynew[k + 1] }, {Anew[k + 1] } };
That will give you:
qk[0,0] = Xnew[k+1]
qk[1,0] = Ynew[k+1]
qk[2,0] = Anew[k+1]

Regex or string compare with allowance of error

I'm trying to do a string compare in C# with some allowance for error. For example, if my search term is "Welcome", but if my comparison string (generated through OCR) is "We1come" and my error allowance is 20%, that should match. That part isn't so difficult using something like the Levenshtein algorithm. The hard part is making it work within a larger block of text, like a regular expression. For example, maybe my OCR result is "Hello. My name is Ben. We1come to my StackOverflow question.", I would like to pick out that We1come as a good result compared to my search term.
Took quite a while, but this works well. Fun problem :)
string PossibleString = PossibleString.ToString().ToLower();
string StaticText = StaticText.ToLower();
decimal PossibleStringLength = (PossibleString.Length);
decimal StaticTextLength = (StaticText.Length);
decimal NumberOfErrorsAllowed = Math.Round((StaticTextLength * (ErrorAllowance / 100)), MidpointRounding.AwayFromZero);
int LevenshteinDistance = LevenshteinAlgorithm(StaticText, PossibleString);
string PossibleResult = string.Empty;
if (LevenshteinDistance == PossibleStringLength - StaticTextLength)
{
// Perfect match. no need to calculate.
PossibleResult = StaticText;
}
else
{
int TextLengthBuffer = (int)StaticTextLength - 1;
int LowestLevenshteinNumber = 999999;
for (int i = 0; i < 3; i++) // Check for best results with same amount of characters as expected, as well as +/- 1
{
for (int e = TextLengthBuffer; e <= (int)PossibleStringLength; e++)
{
string possibleResult = (PossibleString.Substring((e - TextLengthBuffer), TextLengthBuffer));
int lAllowance = (int)(Math.Round((possibleResult.Length - StaticTextLength) + (NumberOfErrorsAllowed), MidpointRounding.AwayFromZero));
int lNumber = LevenshteinAlgorithm(StaticText, possibleResult);
if (lNumber <= lAllowance && ((lNumber < LowestLevenshteinNumber) || (TextLengthBuffer == StaticText.Length && lNumber <= LowestLevenshteinNumber)))
{
PossibleResult = possibleResult;
LowestLevenshteinNumber = lNumber;
}
}
TextLengthBuffer++;
}
}
public static int LevenshteinAlgorithm(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
if (n == 0)
{
return m;
}
if (m == 0)
{
return n;
}
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
for (int j = 0; j <= m; d[0, j] = j++)
{
}
for (int i = 1; i <= n; i++)
{
for (int j = 1; j <= m; j++)
{
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
return d[n, m];
}
If it is somehow predictable how the OCR can miss letters, I would replace the letters in the search with misses.
If the search is Welcome, the regex would be (?i)We[l1]come.

MergeSort Implementation in C#

Currently studying algorithm analysis, and instead of blindly running off of pseudo-code from my textbook, I'm implementing each algorithm in C#. This is the psuedo-code:
MERGE-SORT(A,p,r)
1 if p < r
2 q = (p+r)/2
3 MERGE-SORT(A,p,q)
4 MERGE-SORT(A,q+1,r)
5 MERGE(A,p,q,r)
MERGE(A,p,q,r)
1 n1 = q - p + 1
2 n2 = r - q
3 let L[1..n1+1] and R[1..n2+1] be new arrays
4 for i = 1 to n1
5 L[i] = A[p+i-1]
6 for j = 1 to n2
7 R[j] = A[q+j]
8 L[n1+1] = INF
9 R[n1+1] = INF
10 i = 1
11 j = 1
12 for k = p to r
13 if L[i] <= R[j]
14 A[k] = L[i]
15 i = i + 1
16 else
17 A[k] = R[j]
18 j = j + 1
This is my code:
static void Main(string[] args)
{
int[] unsortedArray = new int[] { 5, 2, 7, 4, 1, 6, 8, 3, 9, 10 };
MergeSort(ref unsortedArray, 1, unsortedArray.Length);
foreach (int element in unsortedArray)
{
Console.WriteLine(element);
}
Console.Read();
}
private static void MergeSort(ref int[] unsortedArray, int leftIndex, int rightIndex)
{
if (leftIndex < rightIndex)
{
int middleIndex = (leftIndex + rightIndex) / 2;
//Sort left (will call Merge to produce a fully sorted left array)
MergeSort(ref unsortedArray, leftIndex, middleIndex);
//Sort right (will call Merge to produce a fully sorted right array)
MergeSort(ref unsortedArray, middleIndex + 1, rightIndex);
//Merge the sorted left & right to finish off.
Merge(ref unsortedArray, leftIndex, middleIndex, rightIndex);
}
}
private static void Merge(ref int[] unsortedArray, int leftIndex, int middleIndex, int rightIndex)
{
int lengthLeft = middleIndex - leftIndex + 1;
int lengthRight = rightIndex - middleIndex;
int[] leftArray = new int[lengthLeft + 1];
int[] rightArray = new int[lengthRight + 1];
for (int i = 0; i < lengthLeft; i++)
{
leftArray[i] = unsortedArray[leftIndex + i - 1];
}
for (int j = 0; j < lengthRight; j++)
{
rightArray[j] = unsortedArray[middleIndex + j];
}
leftArray[lengthLeft] = Int32.MaxValue;
rightArray[lengthRight] = Int32.MaxValue;
int iIndex = 0;
int jIndex = 0;
for (int k = leftIndex; k < rightIndex; k++)
{
if (leftArray[iIndex] <= rightArray[jIndex])
{
unsortedArray[k] = leftArray[iIndex];
iIndex++;
}
else
{
unsortedArray[k] = rightArray[jIndex];
jIndex++;
}
}
}
I'm not sure where I'm messing things up -- I tried to follow the pseudo-code as well as I could, but my output is funky (i.e. repeated values and not properly sorted).
Debugging this didn't help me figure out the problem either (recursive solutions get too messy).
Where am I going wrong, and how do I fix it?
As correctly pointed out in the comments, C# array indexing is zero-based, while your pseudo code is one-based.
That being said, here's the errors:
1) Main method
MergeSort(ref unsortedArray, 1, unsortedArray.Length);
has to be changed to:
MergeSort(ref unsortedArray, 0, unsortedArray.Length - 1);
2) Merge method
leftArray[i] = unsortedArray[leftIndex + i - 1];
has to be change to:
leftArray[i] = unsortedArray[leftIndex + i];
3) Merge method
rightArray[j] = unsortedArray[middleIndex + j];
has to be change to:
rightArray[j] = unsortedArray[middleIndex + j + 1];
4) Merge method
for (int k = leftIndex; k < rightIndex; k++)
has to be change to:
for (int k = leftIndex; k <= rightIndex; k++)
BTW, the ref keyword in your code is not really necessay, since you're just modifying the values inside the array and not creating a new instance of it .
private static void Merge(int[]A, int[]aux, int lo, int mid, int hi)
{
for (int k = lo; k <= hi; k++)
aux[k] = A[k];
int i = lo, j = mid + 1;
for (int k = lo; k <= hi; k++)
{
if (i > mid) A[k] = aux[j++];
else if (j > hi) A[k] = aux[i++];
else if (aux[j] < aux[i]) A[k] = aux[j++];
else A[k] = aux[i++];
}
}
private static void Sort(int[] A, int[] aux, int lo, int hi)
{
if (hi <= lo) return;
int mid = lo + (hi - lo) / 2;
Sort(A, aux, lo, mid);
Sort(A, aux, mid + 1, hi);
if (A[mid + 1] > A[mid]) return;
Merge(A, aux, lo, mid, hi);
}
public static void Sort(int[] A)
{
int[] aux = new int[A.Length];
Sort(A, aux, 0, A.Length - 1);
}
A Simple Merge Sort Implementation.
https://github.com/bharathkumarms/AlgorithmsMadeEasy/blob/master/AlgorithmsMadeEasy/MergeSort.cs
using System;
using System.Collections.Generic;
using System.Linq;
namespace AlgorithmsMadeEasy
{
class MergeSort
{
public void MergeSortMethod()
{
var input = System.Console.ReadLine();
string[] sInput = input.Split(' ');
int[] numbers = Array.ConvertAll(sInput, int.Parse);
int len = numbers.Length;
MergeSort_Recursive(numbers, 0, len - 1);
for (int i = 0; i < len; i++)
{
Console.Write(numbers[i] + " ");
}
Console.ReadLine();
}
static public void MergeSort_Recursive(int[] numbers, int left, int right)
{
int mid;
if (right > left)
{
mid = (right + left) / 2;
MergeSort_Recursive(numbers, left, mid);
MergeSort_Recursive(numbers, (mid + 1), right);
DoMerge(numbers, left, (mid + 1), right);
}
}
static public void DoMerge(int[] numbers, int left, int mid, int right)
{
int[] temp = new int[numbers.Length];
int i, left_end, num_elements, tmp_pos;
left_end = (mid - 1);
tmp_pos = left;
num_elements = (right - left + 1);
while ((left <= left_end) && (mid <= right))
{
if (numbers[left] <= numbers[mid])
temp[tmp_pos++] = numbers[left++];
else
temp[tmp_pos++] = numbers[mid++];
}
while (left <= left_end)
temp[tmp_pos++] = numbers[left++];
while (mid <= right)
temp[tmp_pos++] = numbers[mid++];
for (i = 0; i < num_elements; i++)
{
numbers[right] = temp[right];
right--;
}
}
}
}
/*
Sample Input:
6 5 3 2 8
Calling Code:
MergeSort ms = new MergeSort();
ms.MergeSortMethod();
*/

newly added class missing on AForge.Imaging.dll

im tryn to add a new adaptive threshold method to the AForge.Imaging.Filters
I have added the new cs file under
Sources\Imaging\Filters\Adaptive Binarization
Below is the code for the thresholding methods
namespace AForge.Imaging.Filters
{
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using AForge.Imaging;
class SauvolaAdaptiveThresholding : BaseInPlacePartialFilter
{
private const float DEFAULT_WeightingFactor = 0.3f;
private const short DEFAULT_WindowSize = 40;
private float weightingFactor ;
private short windowSize ;
// private format translation dictionary
private Dictionary<PixelFormat, PixelFormat> formatTranslations = new Dictionary<PixelFormat, PixelFormat>();
/// <summary>
/// Format translations dictionary.
/// </summary>
public override Dictionary<PixelFormat,PixelFormat> FormatTranslations
{
get { return formatTranslations; }
}
public float WeightingFactor
{
get{return this.weightingFactor;}
set{this.weightingFactor = value;}
}
public short WindowSize
{
get{return this.windowSize;}
set{this.windowSize = value;}
}
public SauvolaAdaptiveThresholding() :
this(DEFAULT_WeightingFactor,DEFAULT_WindowSize) { }
public SauvolaAdaptiveThresholding(float _weightFact , short _wsize)
{
this.WeightingFactor = _weightFact;
this.WindowSize =_wsize;
// initialize format translation dictionary
formatTranslations[PixelFormat.Format8bppIndexed] = PixelFormat.Format8bppIndexed;
formatTranslations[PixelFormat.Format16bppGrayScale] = PixelFormat.Format16bppGrayScale;
}
protected override unsafe void ProcessFilter(UnmanagedImage image, Rectangle rect)
{
int whalf = windowSize >> 1; //half of windowsize
whalf = windowSize >> 1;
byte* ptr = (byte*)image.ImageData.ToPointer(); //input
// Calculate the integral image, and integral of the squared image
ulong[,] integral_image = new ulong[image.Width , image.Height];
ulong[,] rowsum_image = new ulong[image.Width , image.Height];
ulong[,] integral_sqimg = new ulong[image.Width , image.Height];
ulong[,] rowsum_sqimg = new ulong[image.Width , image.Height];
int xmin, ymin, xmax, ymax, index;
double diagsum, idiagsum, diff, sqdiagsum, sqidiagsum, sqdiff, area;
double mean, std, threshold;
for (int j = 0; j < image.Height; j++)
{
rowsum_image[0, j] = (ulong)*(ptr + j);
rowsum_sqimg[0, j] = rowsum_image[0, j] * rowsum_image[0, j];
}
for (int i = 1; i < image.Width; i++)
{
for (int j = 0; j < image.Height; j++)
{
index = j * image.Width + i;
rowsum_image[i, j] = rowsum_image[i - 1, j] + (ulong)*(ptr + index);
rowsum_sqimg[i, j] = rowsum_sqimg[i - 1, j] + (ulong)(*(ptr + index) * *(ptr + index));
}
}
for (int i = 0; i < image.Width; i++)
{
integral_image[i, 0] = rowsum_image[i, 0];
integral_sqimg[i, 0] = rowsum_sqimg[i, 0];
}
for (int i = 0; i < image.Width; i++)
{
for (int j = 1; j < image.Height; j++)
{
integral_image[i, j] = integral_image[i, j - 1] + rowsum_image[i, j];
integral_sqimg[i, j] = integral_sqimg[i, j - 1] + rowsum_sqimg[i, j];
}
}
//Calculate the mean and standard deviation using the integral image
for (int i = 0; i < image.Width; i++)
{
for (int j = 0; j < image.Height; j++)
{
xmin = Math.Max(0, i - whalf);//max(0, i - whalf);
ymin = Math.Max(0, j - whalf);
xmax = Math.Min(image.Width - 1, i + whalf);
ymax = Math.Min(image.Height - 1, j + whalf);
area = (xmax - xmin + 1) * (ymax - ymin + 1);
if (xmin == 0 && ymin == 0)
{ // Point at origin
diff = integral_image[xmax, ymax];
sqdiff = integral_sqimg[xmax, ymax];
}
else if (xmin == 0 && ymin != 0)
{ // first column
diff = integral_image[xmax, ymax] - integral_image[xmax, ymin - 1];
sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmax, ymin - 1];
}
else if (xmin != 0 && ymin == 0)
{ // first row
diff = integral_image[xmax, ymax] - integral_image[xmin - 1, ymax];
sqdiff = integral_sqimg[xmax, ymax] - integral_sqimg[xmin - 1, ymax];
}
else
{ // rest of the image
diagsum = integral_image[xmax, ymax] + integral_image[xmin - 1, ymin - 1];
idiagsum = integral_image[xmax, ymin - 1] + integral_image[xmin - 1, ymax];
diff = diagsum - idiagsum;
sqdiagsum = integral_sqimg[xmax, ymax] + integral_sqimg[xmin - 1, ymin - 1];
sqidiagsum = integral_sqimg[xmax, ymin - 1] + integral_sqimg[xmin - 1, ymax];
sqdiff = sqdiagsum - sqidiagsum;
}
mean = diff / area;
std = Math.Sqrt((sqdiff - diff * diff / area) / (area - 1));
threshold = mean * (1 + WeightingFactor * ((std / 128) - 1));
if ((double)*(ptr + (j * image.Width + i)) < threshold)//if (gray_image[i, j] < threshold)
*(ptr + (j * image.Width + i)) = 0;
else
*(ptr + (j * image.Width + i)) = 255;
}
}
}
}
}
but when i build the project and use the AForge.Imaging.dll, the above class isn't available. When i try to use the object browser i see all the other classes except for the one i added newly.
Can someone please tell me what am i doing wrong?
It needs to be public
public class SauvolaAdaptiveThresholding : BaseInPlacePartialFilter
You can find more info about the C# access modifiers here: http://msdn.microsoft.com/en-us/library/ms173121.aspx

Categories