I have 2 strings
string a = "foo bar";
string b = "bar foo";
and I want to detect the changes from a to b. What characters do I have to change, to get from a to b?
I think there must be a iteration over each character and detect if it was added, removed or remained equal. So this is my exprected result
'f' Remove
'o' Remove
'o' Remove
' ' Remove
'b' Equal
'a' Equal
'r' Equal
' ' Add
'f' Add
'o' Add
'o' Add
class and enum for the result:
public enum Operation { Add,Equal,Remove };
public class Difference
{
public Operation op { get; set; }
public char c { get; set; }
}
Here is my solution but the "Remove" case is not clear to me how the code has to look like
public static List<Difference> CalculateDifferences(string left, string right)
{
int count = 0;
List<Difference> result = new List<Difference>();
foreach (char ch in left)
{
int index = right.IndexOf(ch, count);
if (index == count)
{
count++;
result.Add(new Difference() { c = ch, op = Operation.Equal });
}
else if (index > count)
{
string add = right.Substring(count, index - count);
result.AddRange(add.Select(x => new Difference() { c = x, op = Operation.Add }));
count += add.Length;
}
else
{
//Remove?
}
}
return result;
}
How does the code have to look like for removed characters?
Update - added a few more examples
example 1:
string a = "foobar";
string b = "fooar";
expected result:
'f' Equal
'o' Equal
'o' Equal
'b' Remove
'a' Equal
'r' Equal
example 2:
string a = "asdfghjk";
string b = "wsedrftr";
expected result:
'a' Remove
'w' Add
's' Equal
'e' Add
'd' Equal
'r' Add
'f' Equal
'g' Remove
'h' Remove
'j' Remove
'k' Remove
't' Add
'r' Add
Update:
Here is a comparison between Dmitry's and ingen's answer: https://dotnetfiddle.net/MJQDAO
You are looking for (minimum) edit distance / (minimum) edit sequence. You can find the theory of the process here:
https://web.stanford.edu/class/cs124/lec/med.pdf
Let's implement (simplest) Levenstein Distance / Sequence algorithm (for details see https://en.wikipedia.org/wiki/Levenshtein_distance). Let's start from helper classes (I've changed a bit your implementation of them):
public enum EditOperationKind : byte {
None, // Nothing to do
Add, // Add new character
Edit, // Edit character into character (including char into itself)
Remove, // Delete existing character
};
public struct EditOperation {
public EditOperation(char valueFrom, char valueTo, EditOperationKind operation) {
ValueFrom = valueFrom;
ValueTo = valueTo;
Operation = valueFrom == valueTo ? EditOperationKind.None : operation;
}
public char ValueFrom { get; }
public char ValueTo {get ;}
public EditOperationKind Operation { get; }
public override string ToString() {
switch (Operation) {
case EditOperationKind.None:
return $"'{ValueTo}' Equal";
case EditOperationKind.Add:
return $"'{ValueTo}' Add";
case EditOperationKind.Remove:
return $"'{ValueFrom}' Remove";
case EditOperationKind.Edit:
return $"'{ValueFrom}' to '{ValueTo}' Edit";
default:
return "???";
}
}
}
As far as I can see from the examples provided we don't have any edit operation, but add + remove; that's why I've put editCost = 2 when insertCost = 1, int removeCost = 1 (in case of tie: insert + remove vs. edit we put insert + remove).
Now we are ready to implement Levenstein algorithm:
public static EditOperation[] EditSequence(
string source, string target,
int insertCost = 1, int removeCost = 1, int editCost = 2) {
if (null == source)
throw new ArgumentNullException("source");
else if (null == target)
throw new ArgumentNullException("target");
// Forward: building score matrix
// Best operation (among insert, update, delete) to perform
EditOperationKind[][] M = Enumerable
.Range(0, source.Length + 1)
.Select(line => new EditOperationKind[target.Length + 1])
.ToArray();
// Minimum cost so far
int[][] D = Enumerable
.Range(0, source.Length + 1)
.Select(line => new int[target.Length + 1])
.ToArray();
// Edge: all removes
for (int i = 1; i <= source.Length; ++i) {
M[i][0] = EditOperationKind.Remove;
D[i][0] = removeCost * i;
}
// Edge: all inserts
for (int i = 1; i <= target.Length; ++i) {
M[0][i] = EditOperationKind.Add;
D[0][i] = insertCost * i;
}
// Having fit N - 1, K - 1 characters let's fit N, K
for (int i = 1; i <= source.Length; ++i)
for (int j = 1; j <= target.Length; ++j) {
// here we choose the operation with the least cost
int insert = D[i][j - 1] + insertCost;
int delete = D[i - 1][j] + removeCost;
int edit = D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);
int min = Math.Min(Math.Min(insert, delete), edit);
if (min == insert)
M[i][j] = EditOperationKind.Add;
else if (min == delete)
M[i][j] = EditOperationKind.Remove;
else if (min == edit)
M[i][j] = EditOperationKind.Edit;
D[i][j] = min;
}
// Backward: knowing scores (D) and actions (M) let's building edit sequence
List<EditOperation> result =
new List<EditOperation>(source.Length + target.Length);
for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) {
EditOperationKind op = M[y][x];
if (op == EditOperationKind.Add) {
x -= 1;
result.Add(new EditOperation('\0', target[x], op));
}
else if (op == EditOperationKind.Remove) {
y -= 1;
result.Add(new EditOperation(source[y], '\0', op));
}
else if (op == EditOperationKind.Edit) {
x -= 1;
y -= 1;
result.Add(new EditOperation(source[y], target[x], op));
}
else // Start of the matching (EditOperationKind.None)
break;
}
result.Reverse();
return result.ToArray();
}
Demo:
var sequence = EditSequence("asdfghjk", "wsedrftr");
Console.Write(string.Join(Environment.NewLine, sequence));
Outcome:
'a' Remove
'w' Add
's' Equal
'e' Add
'd' Equal
'r' Add
'f' Equal
'g' Remove
'h' Remove
'j' Remove
'k' Remove
't' Add
'r' Add
I'll go out on a limb here and provide an algorithm that's not the most efficient, but is easy to reason about.
Let's cover some ground first:
1) Order matters
string before = "bar foo"
string after = "foo bar"
Even though "bar" and "foo" occur in both strings, "bar" will need to be removed and added again later. This also tells us it's the after string that gives us the order of chars we're interested in, we want "foo" first.
2) Order over count
Another way to look at it, is that some chars may never get their turn.
string before = "abracadabra"
string after = "bar bar"
Only the bold chars of "bar bar", get their say in "abracadabra". Even though we've got two b's in both strings, only the first one counts. By the time we get to the second b in "bar bar" the second b in "abracadabra" has already been passed, when we were looking for the first occurrence of 'r'.
3) Barriers
Barriers are the chars that exist in both strings, taking order and count into consideration. This already suggests a set might not be the most appropriate data structure, as we would lose count.
For an input
string before = "pinata"
string after = "accidental"
We get (pseudocode)
var barriers = { 'a', 't', 'a' }
"pinata"
"accidental"
Let's follow the execution flow:
'a' is the first barrier, it's also the first char of after so everything prepending the first 'a' in before can be removed. "pinata" -> "ata"
the second barrier is 't', it's not at the next position in our after string, so we can insert everything in between. "ata" -> "accidenta"
the third barrier 'a' is already at the next position, so we can move to the next barrier without doing any real work.
there are no more barriers, but our string length is still less than that of after, so there will be some post processing. "accidenta" -> "accidental"
Note 'i' and 'n' don't get to play, again, order over count.
Implementation
We've established that order and count matter, a Queue comes to mind.
static public List<Difference> CalculateDifferences(string before, string after)
{
List<Difference> result = new List<Difference>();
Queue<char> barriers = new Queue<char>();
#region Preprocessing
int index = 0;
for (int i = 0; i < after.Length; i++)
{
// Look for the first match starting at index
int match = before.IndexOf(after[i], index);
if (match != -1)
{
barriers.Enqueue(after[i]);
index = match + 1;
}
}
#endregion
#region Queue Processing
index = 0;
while (barriers.Any())
{
char barrier = barriers.Dequeue();
// Get the offset to the barrier in both strings,
// ignoring the part that's already been handled
int offsetBefore = before.IndexOf(barrier, index) - index;
int offsetAfter = after.IndexOf(barrier, index) - index;
// Remove prefix from 'before' string
if (offsetBefore > 0)
{
RemoveChars(before.Substring(index, offsetBefore), result);
before = before.Substring(offsetBefore);
}
// Insert prefix from 'after' string
if (offsetAfter > 0)
{
string substring = after.Substring(index, offsetAfter);
AddChars(substring, result);
before = before.Insert(index, substring);
index += substring.Length;
}
// Jump over the barrier
KeepChar(barrier, result);
index++;
}
#endregion
#region Post Queue processing
if (index < before.Length)
{
RemoveChars(before.Substring(index), result);
}
if (index < after.Length)
{
AddChars(after.Substring(index), result);
}
#endregion
return result;
}
static private void KeepChar(char barrier, List<Difference> result)
{
result.Add(new Difference()
{
c = barrier,
op = Operation.Equal
});
}
static private void AddChars(string substring, List<Difference> result)
{
result.AddRange(substring.Select(x => new Difference()
{
c = x,
op = Operation.Add
}));
}
static private void RemoveChars(string substring, List<Difference> result)
{
result.AddRange(substring.Select(x => new Difference()
{
c = x,
op = Operation.Remove
}));
}
I tested with 3 examples above, and it returns the expected result properly and perfectly.
int flag = 0;
int flag_2 = 0;
string a = "asdfghjk";
string b = "wsedrftr";
char[] array_a = a.ToCharArray();
char[] array_b = b.ToCharArray();
for (int i = 0,j = 0, n= 0; i < array_b.Count(); i++)
{
//Execute 1 time until reach first equal character
if(i == 0 && a.Contains(array_b[0]))
{
while (array_a[n] != array_b[0])
{
Console.WriteLine(String.Concat(array_a[n], " : Remove"));
n++;
}
Console.WriteLine(String.Concat(array_a[n], " : Equal"));
n++;
}
else if(i == 0 && !a.Contains(array_b[0]))
{
Console.WriteLine(String.Concat(array_a[n], " : Remove"));
n++;
Console.WriteLine(String.Concat(array_b[0], " : Add"));
}
else
{
if(n < array_a.Count())
{
if (array_a[n] == array_b[i])
{
Console.WriteLine(String.Concat(array_a[n], " : Equal"));
n++;
}
else
{
flag = 0;
for (int z = n; z < array_a.Count(); z++)
{
if (array_a[z] == array_b[i])
{
flag = 1;
break;
}
}
if (flag == 0)
{
flag_2 = 0;
for (int aa = i; aa < array_b.Count(); aa++)
{
for(int bb = n; bb < array_a.Count(); bb++)
{
if (array_b[aa] == array_a[bb])
{
flag_2 = 1;
break;
}
}
}
if(flag_2 == 1)
{
Console.WriteLine(String.Concat(array_b[i], " : Add"));
}
else
{
for (int z = n; z < array_a.Count(); z++)
{
Console.WriteLine(String.Concat(array_a[z], " : Remove"));
n++;
}
Console.WriteLine(String.Concat(array_b[i], " : Add"));
}
}
else
{
Console.WriteLine(String.Concat(array_a[n], " : Remove"));
i--;
n++;
}
}
}
else
{
Console.WriteLine(String.Concat(array_b[i], " : Add"));
}
}
}//end for
MessageBox.Show("Done");
//OUTPUT CONSOLE:
/*
a : Remove
w : Add
s : Equal
e : Add
d : Equal
r : Add
f : Equal
g : Remove
h : Remove
j : Remove
k : Remove
t : Add
r : Add
*/
Here might be another solution, full code and commented.
However the result of your first original example is inverted :
class Program
{
enum CharState
{
Add,
Equal,
Remove
}
struct CharResult
{
public char c;
public CharState state;
}
static void Main(string[] args)
{
string a = "asdfghjk";
string b = "wsedrftr";
while (true)
{
Console.WriteLine("Enter string a (enter to quit) :");
a = Console.ReadLine();
if (a == string.Empty)
break;
Console.WriteLine("Enter string b :");
b = Console.ReadLine();
List<CharResult> result = calculate(a, b);
DisplayResults(result);
}
Console.WriteLine("Press a key to exit");
Console.ReadLine();
}
static List<CharResult> calculate(string a, string b)
{
List<CharResult> res = new List<CharResult>();
int i = 0, j = 0;
char[] array_a = a.ToCharArray();
char[] array_b = b.ToCharArray();
while (i < array_a.Length && j < array_b.Length)
{
//For the current char in a, we check for the equal in b
int index = b.IndexOf(array_a[i], j);
if (index < 0) //not found, this char should be removed
{
res.Add(new CharResult() { c = array_a[i], state = CharState.Remove });
i++;
}
else
{
//we add all the chars between B's current index and the index
while (j < index)
{
res.Add(new CharResult() { c = array_b[j], state = CharState.Add });
j++;
}
//then we say the current is the same
res.Add(new CharResult() { c = array_a[i], state = CharState.Equal });
i++;
j++;
}
}
while (i < array_a.Length)
{
//b is now empty, we remove the remains
res.Add(new CharResult() { c = array_a[i], state = CharState.Remove });
i++;
}
while (j < array_b.Length)
{
//a has been treated, we add the remains
res.Add(new CharResult() { c = array_b[j], state = CharState.Add });
j++;
}
return res;
}
static void DisplayResults(List<CharResult> results)
{
foreach (CharResult r in results)
{
Console.WriteLine($"'{r.c}' - {r.state}");
}
}
}
If you want to have a precise comparison between two strings, you must read and understand Levenshtein Distance. by using this algorithm you can precisely calculate rate of similarity between two string and also you can backtrack the algorithm to get the chain of changing on the second string. this algorithm is a important metric for Natural Language Processing also.
there are some other benefits and it's need time to learn.
in this link there is a C# version of Levenshtein Distance :
https://www.dotnetperls.com/levenshtein
In data sometimes the same product will be named with a roman numeral while other times it will be a digit.
Example Samsung Galaxy SII verses Samsung Galaxy S2
How can the II be converted to the value 2?
I've noticed some really complicated solutions here but this is a really simple problem. I made a solution that avoided the need to hard code the "exceptions" (IV, IX, XL, etc). I used a for loop to look ahead at the next character in the Roman numeral string to see if the number associated with the numeral should be subtracted or added to the total. For simplicity's sake I'm assuming all input is valid.
private static Dictionary<char, int> RomanMap = new Dictionary<char, int>()
{
{'I', 1},
{'V', 5},
{'X', 10},
{'L', 50},
{'C', 100},
{'D', 500},
{'M', 1000}
};
public static int RomanToInteger(string roman)
{
int number = 0;
for (int i = 0; i < roman.Length; i++)
{
if (i + 1 < roman.Length && RomanMap[roman[i]] < RomanMap[roman[i + 1]])
{
number -= RomanMap[roman[i]];
}
else
{
number += RomanMap[roman[i]];
}
}
return number;
}
I initially tried using a foreach on the string which I think was a slightly more readable solution but I ended up adding every single number and subtracting it twice later if it turned out to be one of the exceptions, which I didn't like. I'll post it here anyway for posterity.
public static int RomanToInteger(string roman)
{
int number = 0;
char previousChar = roman[0];
foreach(char currentChar in roman)
{
number += RomanMap[currentChar];
if(RomanMap[previousChar] < RomanMap[currentChar])
{
number -= RomanMap[previousChar] * 2;
}
previousChar = currentChar;
}
return number;
}
This is my solution
public int SimplerConverter(string number)
{
number = number.ToUpper();
var result = 0;
foreach (var letter in number)
{
result += ConvertLetterToNumber(letter);
}
if (number.Contains("IV")|| number.Contains("IX"))
result -= 2;
if (number.Contains("XL")|| number.Contains("XC"))
result -= 20;
if (number.Contains("CD")|| number.Contains("CM"))
result -= 200;
return result;
}
private int ConvertLetterToNumber(char letter)
{
switch (letter)
{
case 'M':
{
return 1000;
}
case 'D':
{
return 500;
}
case 'C':
{
return 100;
}
case 'L':
{
return 50;
}
case 'X':
{
return 10;
}
case 'V':
{
return 5;
}
case 'I':
{
return 1;
}
default:
{
throw new ArgumentException("Ivalid charakter");
}
}
}
A more simple and readable C# implementation that:
maps I to 1, V to 5, X to 10, L to 50, C to 100, D to 500, M to 1000.
uses one single foreach loop (foreach used on purpose, with previous value
hold).
adds the mapped number to the total.
subtracts twice the number added before, if I before V or X, X before L or C, C before D or M (not all chars are allowed here!).
returns 0 (not used in Roman numerals) on empty string, wrong letter
or not allowed char used for subtraction.
remark: it's still not totally complete, we didn't check all possible conditions for a valid input string!
Code:
private static Dictionary<char, int> _romanMap = new Dictionary<char, int>
{
{'I', 1}, {'V', 5}, {'X', 10}, {'L', 50}, {'C', 100}, {'D', 500}, {'M', 1000}
};
public static int ConvertRomanToNumber(string text)
{
int totalValue = 0, prevValue = 0;
foreach (var c in text)
{
if (!_romanMap.ContainsKey(c))
return 0;
var crtValue = _romanMap[c];
totalValue += crtValue;
if (prevValue != 0 && prevValue < crtValue)
{
if (prevValue == 1 && (crtValue == 5 || crtValue == 10)
|| prevValue == 10 && (crtValue == 50 || crtValue == 100)
|| prevValue == 100 && (crtValue == 500 || crtValue == 1000))
totalValue -= 2 * prevValue;
else
return 0;
}
prevValue = crtValue;
}
return totalValue;
}
I landed here searching for a small implementation of a Roman Numerals parser but wasn't satisfied by the provided answers in terms of size and elegance. I leave my final, recursive implementation here, to help others searching a small implementation.
Convert Roman Numerals by Recursion
The algorithm is able to handle numerals in irregular subtractive notation (f.e. XIIX).
This implementation may only work with well-formed (strings matching /[mdclxvi]*/i) roman numerals.
The implementation is not optimized for speed.
// returns the value for a roman literal
private static int romanValue(int index)
{
int basefactor = ((index % 2) * 4 + 1); // either 1 or 5...
// ...multiplied with the exponentation of 10, if the literal is `x` or higher
return index > 1 ? (int) (basefactor * System.Math.Pow(10.0, index / 2)) : basefactor;
}
public static int FromRoman(string roman)
{
roman = roman.ToLower();
string literals = "mdclxvi";
int value = 0, index = 0;
foreach (char literal in literals)
{
value = romanValue(literals.Length - literals.IndexOf(literal) - 1);
index = roman.IndexOf(literal);
if (index > -1)
return FromRoman(roman.Substring(index + 1)) + (index > 0 ? value - FromRoman(roman.Substring(0, index)) : value);
}
return 0;
}
Try it using this .Netfiddle: https://dotnetfiddle.net/veaNk3
How does it work?
This algorithm calculates the value of a Roman Numeral by taking the highest value from the Roman Numeral and adding/subtracting recursively the value of the remaining left/right parts of the literal.
ii X iiv # Pick the greatest value in the literal `iixiiv` (symbolized by uppercase)
Then recursively reevaluate and subtract the lefthand-side and add the righthand-side:
(iiv) + x - (ii) # Subtract the lefthand-side, add the righthand-side
(V - (ii)) + x - ((I) + i) # Pick the greatest values, again
(v - ((I) + i)) + x - ((i) + i) # Pick the greatest value of the last numeral compound
Finally the numerals are substituted by their integer values:
(5 - ((1) + 1)) + 10 - ((1) + 1)
(5 - (2)) + 10 - (2)
3 + 10 - 2
= 11
I wrote a simple Roman Numeral Converter just now, but it doesn't do a whole lot of error checking, but it seems to work for everything I could throw at it that is properly formatted.
public class RomanNumber
{
public string Numeral { get; set; }
public int Value { get; set; }
public int Hierarchy { get; set; }
}
public List<RomanNumber> RomanNumbers = new List<RomanNumber>
{
new RomanNumber {Numeral = "M", Value = 1000, Hierarchy = 4},
//{"CM", 900},
new RomanNumber {Numeral = "D", Value = 500, Hierarchy = 4},
//{"CD", 400},
new RomanNumber {Numeral = "C", Value = 100, Hierarchy = 3},
//{"XC", 90},
new RomanNumber {Numeral = "L", Value = 50, Hierarchy = 3},
//{"XL", 40},
new RomanNumber {Numeral = "X", Value = 10, Hierarchy = 2},
//{"IX", 9},
new RomanNumber {Numeral = "V", Value = 5, Hierarchy = 2},
//{"IV", 4},
new RomanNumber {Numeral = "I", Value = 1, Hierarchy = 1}
};
/// <summary>
/// Converts the roman numeral to int, assumption roman numeral is properly formatted.
/// </summary>
/// <param name="romanNumeralString">The roman numeral string.</param>
/// <returns></returns>
private int ConvertRomanNumeralToInt(string romanNumeralString)
{
if (romanNumeralString == null) return int.MinValue;
var total = 0;
for (var i = 0; i < romanNumeralString.Length; i++)
{
// get current value
var current = romanNumeralString[i].ToString();
var curRomanNum = RomanNumbers.First(rn => rn.Numeral.ToUpper() == current.ToUpper());
// last number just add the value and exit
if (i + 1 == romanNumeralString.Length)
{
total += curRomanNum.Value;
break;
}
// check for exceptions IV, IX, XL, XC etc
var next = romanNumeralString[i + 1].ToString();
var nextRomanNum = RomanNumbers.First(rn => rn.Numeral.ToUpper() == next.ToUpper());
// exception found
if (curRomanNum.Hierarchy == (nextRomanNum.Hierarchy - 1))
{
total += nextRomanNum.Value - curRomanNum.Value;
i++;
}
else
{
total += curRomanNum.Value;
}
}
return total;
}
private static int convertRomanToInt(String romanNumeral)
{
Dictionary<Char, Int32> romanMap = new Dictionary<char, int>
{
{'I', 1 },
{'V', 5},
{'X', 10},
{'L', 50},
{'C', 100},
{'D', 500},
{'M', 1000}
};
Int32 result = 0;
for (Int32 index = romanNumeral.Length - 1, last = 0; index >= 0; index--)
{
Int32 current = romanMap[romanNumeral[index]];
result += (current < last ? -current : current);
last = current;
}
return result;
}
With Humanizer library you can change Roman numerals to numbers using the FromRoman extension
"IV".FromRoman() => 4
And reverse operation using the ToRoman extension.
4.ToRoman() => "IV"
Borrowed a lot from System.Linq on this one. String implements IEnumerable<char>, so I figured that was appropriate since we are treating it as an enumerable object anyways. Tested it against a bunch of random numbers, including 1, 3, 4, 8, 83, 99, 404, 555, 846, 927, 1999, 2420.
public static IDictionary<char, int> CharValues
{
get
{
return new Dictionary<char, int>
{{'I', 1}, {'V', 5}, {'X', 10}, {'L', 50}, {'C', 100}, {'D', 500}, {'M', 1000}};
}
}
public static int RomanNumeralToInteger(IEnumerable<char> romanNumerals)
{
int retVal = 0;
//go backwards
for (int i = romanNumerals.Count() - 1; i >= 0; i--)
{
//get current character
char c = romanNumerals.ElementAt(i);
//error checking
if (!CharValues.ContainsKey(c)) throw new InvalidRomanNumeralCharacterException(c);
//determine if we are adding or subtracting
bool op = romanNumerals.Skip(i).Any(rn => CharValues[rn] > CharValues[c]);
//then do so
retVal = op ? retVal - CharValues[c] : retVal + CharValues[c];
}
return retVal;
}
Solution with fulfilling the "subtractive notation" semantics checks
None of the current solutions completely fulfills the entire set of rules
for the "subtractive notation". "IIII" -> is not possible. Each of the solutions results a 4. Also the strings: "CCCC", "VV", "IC", "IM" are invalid.
A good online-converter to check the semantics is https://www.romannumerals.org/converter
So, if you really want doing a completely semantics-check, it's much more complex.
My approach was to first write the unit tests with the semantic checks. Then to write the code. Then to reduce the loops with some linq expressions.
Maybe there is a smarter solution, but I think the following code fullfills the rules to convert a roman numerals string.
After the code section, there is a section with my unit tests.
public class RomanNumerals
{
private List<Tuple<char, ushort, char?[]>> _validNumerals = new List<Tuple<char, ushort, char?[]>>()
{
new Tuple<char, ushort, char?[]>('I', 1, new char? [] {'V', 'X'}),
new Tuple<char, ushort, char?[]>('V', 5, null),
new Tuple<char, ushort, char?[]>('X', 10, new char?[] {'L', 'C'}),
new Tuple<char, ushort, char?[]>('L', 50, null),
new Tuple<char, ushort, char?[]>('C', 100, new char? [] {'D', 'M'}),
new Tuple<char, ushort, char?[]>('D', 500, null),
new Tuple<char, ushort, char?[]>('M', 1000, new char? [] {null, null})
};
public int TranslateRomanNumeral(string input)
{
var inputList = input?.ToUpper().ToList();
if (inputList == null || inputList.Any(x => _validNumerals.Select(t => t.Item1).Contains(x) == false))
{
throw new ArgumentException();
}
char? valForSubtraction = null;
int result = 0;
bool noAdding = false;
int equalSum = 0;
for (int i = 0; i < inputList.Count; i++)
{
var currentNumeral = _validNumerals.FirstOrDefault(s => s.Item1 == inputList[i]);
var nextNumeral = i < inputList.Count - 1 ? _validNumerals.FirstOrDefault(s => s.Item1 == inputList[i + 1]) : null;
bool currentIsDecimalPower = currentNumeral?.Item3?.Any() ?? false;
if (nextNumeral != null)
{
// Syntax and Semantics checks
if ((currentNumeral.Item2 < nextNumeral.Item2) && (currentIsDecimalPower == false || currentNumeral.Item3.Any(s => s == nextNumeral.Item1) == false) ||
(currentNumeral.Item2 == nextNumeral.Item2) && (currentIsDecimalPower == false || nextNumeral.Item1 == valForSubtraction) ||
(currentIsDecimalPower && result > 0 && ((nextNumeral.Item2 -currentNumeral.Item2) > result )) ||
(currentNumeral.Item2 > nextNumeral.Item2) && (nextNumeral.Item1 == valForSubtraction)
)
{
throw new ArgumentException();
}
if (currentNumeral.Item2 == nextNumeral.Item2)
{
equalSum += equalSum == 0 ? currentNumeral.Item2 + nextNumeral.Item2 : nextNumeral.Item2;
int? smallest = null;
var list = _validNumerals.Where(p => _validNumerals.FirstOrDefault(s => s.Item1 == currentNumeral.Item1).Item3.Any(s2 => s2 != null && s2 == p.Item1)).ToList();
if (list.Any())
{
smallest = list.Select(s3 => s3.Item2).ToList().Min();
}
// Another Semantics check
if (currentNumeral.Item3 != null && equalSum >= (smallest - currentNumeral.Item2))
{
throw new ArgumentException();
}
result += noAdding ? 0 : currentNumeral.Item2 + nextNumeral.Item2;
noAdding = !noAdding;
valForSubtraction = null;
}
else
if (currentNumeral.Item2 < nextNumeral.Item2)
{
equalSum = 0;
result += nextNumeral.Item2 - currentNumeral.Item2;
valForSubtraction = currentNumeral.Item1;
noAdding = true;
}
else
if (currentNumeral.Item2 > nextNumeral.Item2)
{
equalSum = 0;
result += noAdding ? 0 : currentNumeral.Item2;
noAdding = false;
valForSubtraction = null;
}
}
else
{
result += noAdding ? 0 : currentNumeral.Item2;
}
}
return result;
}
}
Here are the UNIT tests
[TestFixture]
public class RomanNumeralsTests
{
[Test]
public void TranslateRomanNumeral_WhenArgumentIsNull_RaiseArgumentNullException()
{
var romanNumerals = new RomanNumerals();
Assert.Throws<ArgumentException>(() => romanNumerals.TranslateRomanNumeral(null));
}
[TestCase("A")]
[TestCase("-")]
[TestCase("BXA")]
[TestCase("MMXK")]
public void TranslateRomanNumeral_WhenInvalidNumeralSyntax_RaiseException(string input)
{
var romanNumerals = new RomanNumerals();
Assert.Throws<ArgumentException>(() => romanNumerals.TranslateRomanNumeral(input));
}
[TestCase("IIII")]
[TestCase("CCCC")]
[TestCase("VV")]
[TestCase("IC")]
[TestCase("IM")]
[TestCase("XM")]
[TestCase("IL")]
[TestCase("MCDXCXI")]
[TestCase("MCDDXC")]
public void TranslateRomanNumeral_WhenInvalidNumeralSemantics_RaiseException(string input)
{
var romanNumerals = new RomanNumerals();
Assert.Throws<ArgumentException>(() => romanNumerals.TranslateRomanNumeral(input));
}
[TestCase("I", 1)]
[TestCase("II", 2)]
[TestCase("III", 3)]
[TestCase("IV", 4)]
[TestCase("XLII", 42)]
[TestCase("MMXIII", 2013)]
[TestCase("MXI", 1011)]
[TestCase("MCDXCIX", 1499)]
[TestCase("MMXXII", 2022)]
[TestCase("V", 5)]
[TestCase("VI", 6)]
[TestCase("CX", 110)]
[TestCase("CCCLXXV", 375)]
[TestCase("MD", 1500)]
[TestCase("MDLXXV", 1575)]
[TestCase("MDCL", 1650)]
[TestCase("MDCCXXV", 1725)]
[TestCase("MDCCC", 1800)]
[TestCase("MDCCCLXXV", 1875)]
[TestCase("MCML", 1950)]
[TestCase("MMXXV", 2025)]
[TestCase("MMC", 2100)]
[TestCase("MMCLXXV", 2175)]
[TestCase("MMCCL", 2250)]
[TestCase("MMCCCXXV", 2325)]
[TestCase("MMCD", 2400)]
[TestCase("MMCDLXXV", 2475)]
[TestCase("MMDL", 2550)]
[TestCase("MMMMMMMM", 8000)]
[TestCase("MMMMMMMMIV", 8004)]
public void TranslateRomanNumeral_WhenValidNumeral_Translate(string input, int output)
{
var romanNumerals = new RomanNumerals();
var result = romanNumerals.TranslateRomanNumeral(input);
Assert.That(result.Equals(output));
}
}
private static Dictionary<char, int> RomanNumberMap = new Dictionary<char, int> {
{'I', 1},
{'V', 5},
{'X', 10},
{'L', 50},
{'C', 100},
{'D', 500},
{'M', 1000}
};
private const string RomanNumberValidationRegEx = "^(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3})$";
private static int ConvertToRomanNumberToInteger(string romanNumber)
{
if (!Regex.IsMatch(romanNumber, RomanNumberValidationRegEx))
{
throw new ArgumentOutOfRangeException(romanNumber);
}
int result = 0;
for (int i = 0; i < romanNumber.Length; i++)
{
int currentVal = RomanNumberMap[romanNumber[i]];
if (romanNumber.Length > i + 1)
{
int nextVal = RomanNumberMap[romanNumber[i + 1]];
if (nextVal > currentVal)
{
result = result + (nextVal - currentVal);
i++;
continue;
}
}
result = result + currentVal;
}
return result;
}
I will suggest a simplest method for this by using array in .net : comments are given in C# section for explanation
VB.net
Public Class Form1
Dim indx() As Integer = {1, 2, 3, 4, 5, 10, 50, 100, 500, 1000}
Dim row() As String = {"I", "II", "III", "IV", "V", "X", "L", "C", "D", "M"}
Dim limit As Integer = 9
Dim output As String = ""
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Dim num As Integer
output = ""
num = CInt(txt1.Text)
While num > 0
num = find(num)
End While
txt2.Text = output
End Sub
Public Function find(ByVal Num As Integer) As Integer
Dim i As Integer = 0
While indx(i) <= Num
i += 1
End While
If i <> 0 Then
limit = i - 1
Else
limit = 0
End If
output = output & row(limit)
Num = Num - indx(limit)
Return Num
End Function
End Class
C#
using Microsoft.VisualBasic;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
public class Form1
{
int[] indx = {
1,
2,
3,
4,
5,
10,
50,
100,
500,
1000
// initialize array of integers
};
string[] row = {
"I",
"II",
"III",
"IV",
"V",
"X",
"L",
"C",
"D",
"M"
//Carasponding roman letters in for the numbers in the array
};
// integer to indicate the position index for link two arrays
int limit = 9;
//string to store output
string output = "";
private void Button1_Click(System.Object sender, System.EventArgs e)
{
int num = 0;
// stores the input
output = "";
// clear output before processing
num = Convert.ToInt32(txt1.Text);
// get integer value from the textbox
//Loop until the value became 0
while (num > 0) {
num = find(num);
//call function for processing
}
txt2.Text = output;
// display the output in text2
}
public int find(int Num)
{
int i = 0;
// loop variable initialized with 0
//Loop until the indx(i).value greater than or equal to num
while (indx(i) <= Num) {
i += 1;
}
// detemine the value of limit depends on the itetration
if (i != 0) {
limit = i - 1;
} else {
limit = 0;
}
output = output + row(limit);
//row(limit) is appended with the output
Num = Num - indx(limit);
// calculate next num value
return Num;
//return num value for next itetration
}
}
I refer from this blog. You could just reverse the roman numeral , then all the thing would be more easier compare to make the comparison.
public static int pairConversion(int dec, int lastNum, int lastDec)
{
if (lastNum > dec)
return lastDec - dec;
else return lastDec + dec;
}
public static int ConvertRomanNumtoInt(string strRomanValue)
{
var dec = 0;
var lastNum = 0;
foreach (var c in strRomanValue.Reverse())
{
switch (c)
{
case 'I':
dec = pairConversion(1, lastNum, dec);
lastNum = 1;
break;
case 'V':
dec=pairConversion(5,lastNum, dec);
lastNum = 5;
break;
case 'X':
dec = pairConversion(10, lastNum, dec);
lastNum = 10;
break;
case 'L':
dec = pairConversion(50, lastNum, dec);
lastNum = 50;
break;
case 'C':
dec = pairConversion(100, lastNum, dec);
lastNum = 100;
break;
case 'D':
dec = pairConversion(500, lastNum, dec);
lastNum = 500;
break;
case 'M':
dec = pairConversion(1000, lastNum, dec);
lastNum = 1000;
break;
}
}
return dec;
}
This one uses a stack:
public int RomanToInt(string s)
{
var dict = new Dictionary<char, int>();
dict['I'] = 1;
dict['V'] = 5;
dict['X'] = 10;
dict['L'] = 50;
dict['C'] = 100;
dict['D'] = 500;
dict['M'] = 1000;
Stack<char> st = new Stack<char>();
foreach (char ch in s.ToCharArray())
st.Push(ch);
int result = 0;
while (st.Count > 0)
{
var c1=st.Pop();
var ch1 = dict[c1];
if (st.Count > 0)
{
var c2 = st.Peek();
var ch2 = dict[c2];
if (ch2 < ch1)
{
result += (ch1 - ch2);
st.Pop();
}
else
{
result += ch1;
}
}
else
{
result += ch1;
}
}
return result;
}
I wrote this just using arrays.
I omit the testing code here, but it looks it works properly.
public static class RomanNumber {
static string[] units = { "", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX" };
static string[] tens = { "", "X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC" };
static string[] hundreds = { "", "C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM" };
static string[] thousands = { "", "M", "MM", "MMM" };
static public bool IsRomanNumber(string source) {
try {
return RomanNumberToInt(source) > 0;
}
catch {
return false;
}
}
/// <summary>
/// Parses a string containing a roman number.
/// </summary>
/// <param name="source">source string</param>
/// <returns>The integer value of the parsed roman numeral</returns>
/// <remarks>
/// Throws an exception on invalid source.
/// Throws an exception if source is not a valid roman number.
/// Supports roman numbers from "I" to "MMMCMXCIX" ( 1 to 3999 )
/// NOTE : "IMMM" is not valid</remarks>
public static int RomanNumberToInt(string source) {
if (String.IsNullOrWhiteSpace(source)) {
throw new ArgumentNullException();
}
int total = 0;
string buffer = source;
// parse the last four characters in the string
// each time we check the buffer against a number array,
// starting from units up to thousands
// we quit as soon as there are no remaing characters to parse
total += RipOff(buffer, units, out buffer);
if (buffer != null) {
total += (RipOff(buffer, tens, out buffer)) * 10;
}
if (buffer != null) {
total += (RipOff(buffer, hundreds, out buffer)) * 100;
}
if (buffer != null) {
total += (RipOff(buffer, thousands, out buffer)) * 1000;
}
// after parsing for thousands, if there is any character left, this is not a valid roman number
if (buffer != null) {
throw new ArgumentException(String.Format("{0} is not a valid roman number", buffer));
}
return total;
}
/// <summary>
/// Given a string, takes the four characters on the right,
/// search an element in the numbers array and returns the remaing characters.
/// </summary>
/// <param name="source">source string to parse</param>
/// <param name="numbers">array of roman numerals</param>
/// <param name="left">remaining characters on the left</param>
/// <returns>If it finds a roman numeral returns its integer value; otherwise returns zero</returns>
public static int RipOff(string source, string[] numbers, out string left) {
int result = 0;
string buffer = null;
// we take the last four characters : this is the length of the longest numeral in our arrays
// ("VIII", "LXXX", "DCCC")
// or all if source length is 4 or less
if (source.Length > 4) {
buffer = source.Substring(source.Length - 4);
left = source.Substring(0, source.Length - 4);
}
else {
buffer = source;
left = null;
}
// see if buffer exists in the numbers array
// if it does not, skip the first character and try again
// until buffer contains only one character
// append the skipped character to the left arguments
while (!numbers.Contains(buffer)) {
if (buffer.Length == 1) {
left = source; // failed
break;
}
else {
left += buffer.Substring(0, 1);
buffer = buffer.Substring(1);
}
}
if (buffer.Length > 0) {
if (numbers.Contains(buffer)) {
result = Array.IndexOf(numbers, buffer);
}
}
return result;
}
}
}
EDIT
Forget about it !
Just look at BrunoLM solution here.
It's simple and elegant.
The only caveat is that it does not check the source.
This is my solution:
/// <summary>
/// Converts a Roman number string into a Arabic number
/// </summary>
/// <param name="romanNumber">the Roman number string</param>
/// <returns>the Arabic number (0 if the given string is not convertible to a Roman number)</returns>
public static int ToArabicNumber(string romanNumber)
{
string[] replaceRom = { "CM", "CD", "XC", "XL", "IX", "IV" };
string[] replaceNum = { "DCCCC", "CCCC", "LXXXX", "XXXX", "VIIII", "IIII" };
string[] roman = { "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I" };
int[] arabic = { 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 };
return Enumerable.Range(0, replaceRom.Length)
.Aggregate
(
romanNumber,
(agg, cur) => agg.Replace(replaceRom[cur], replaceNum[cur]),
agg => agg.ToArray()
)
.Aggregate
(
0,
(agg, cur) =>
{
int idx = Array.IndexOf(roman, cur.ToString());
return idx < 0 ? 0 : agg + arabic[idx];
},
agg => agg
);
}
/// <summary>
/// Converts a Arabic number into a Roman number string
/// </summary>
/// <param name="arabicNumber">the Arabic number</param>
/// <returns>the Roman number string</returns>
public static string ToRomanNumber(int arabicNumber)
{
string[] roman = { "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I" };
int[] arabic = { 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 };
return Enumerable.Range(0, arabic.Length)
.Aggregate
(
Tuple.Create(arabicNumber, string.Empty),
(agg, cur) =>
{
int remainder = agg.Item1 % arabic[cur];
string concat = agg.Item2 + string.Concat(Enumerable.Range(0, agg.Item1 / arabic[cur]).Select(num => roman[cur]));
return Tuple.Create(remainder, concat);
},
agg => agg.Item2
);
}
Here's the Explanation how the methods work:
ToArabicNumber
First aggregation step is to Replace the Roman Number special cases (e.g.: IV -> IIII). Second Aggregate step simply sums up the equivalent Arabic number of the Roman letter (e.g. V -> 5)
ToRomanNumber:
I start the aggregation with the given Arabic number. For each step the number will be divided by the equivalent number of the Roman letter. The remainder of this division is then the input for the next step. The division Result will be translated to the Equivalent Roman Number character which will be appended to the result string.
private static HashMap<Character, Integer> romanMap = new HashMap<>() {{
put('I', 1); put('V', 5); put('X', 10); put('L', 50);
put('C', 100); put('D', 500); put('M', 1000);
}};
private static int convertRomanToInt(String romanNumeral) {
int total = 0;
romanNumeral = romanNumeral.toUpperCase();
//add every Roman numeral
for(int i = 0; i < romanNumeral.length(); i++) {
total += romanMap.get(romanNumeral.charAt(i));
}
//remove the Roman numerals that are followed
//directly by a larger Roman numeral
for(int i = 0; i < romanNumeral.length()-1; i++) {
if(romanMap.get(romanNumeral.charAt(i))
< romanMap.get(romanNumeral.charAt(i+1))) {
total -= 2* romanMap.get(romanNumeral.charAt(i));
}
}
return total;
}
//note that the topmost solution does not solve this Roman numeral
//but mine does
//also note that this solution is a preference of simplicity over complexity
public static void main(String[] args) {
String rn = "CcLXxiV"; //274
System.out.println("Convert " + rn + " to " + convertRomanToInt(rn));
}
/*
this uses the string object Replace() & Split() methods
*/
int ToNumber(string roman){
/*
the 0 padding after the comma delimiter allows adding up the extra index produced by Split, which is not numerical
*/
string s1=roman.Replace("CM","900,0");
s1=s1.Replace("M","1000,0");
s1=s1.Replace("CD","400,0");
s1=s1.Replace("D","500,0");
s1=s1.Replace("XC","90,0");
s1=s1.Replace("C","100,0");
s1=s1.Replace("XL","40,0");
s1=s1.Replace("L","50,0");
s1=s1.Replace("IX","9,0");
s1=s1.Replace("X","10,0");
s1=s1.Replace("IV","4,0");
s1=s1.Replace("V","5,0");
s1=s1.Replace("I","1,0");
string[] spl=s1.Split(",");
int rlt=0;
for(int i=0;i<spl.Count();i++)
{
rlt+= Convert.ToInt32(spl[i].ToString());
}
return rlt;
}
Here is my O(n) solution in JavaScript
const NUMERAL = {
'I': 1,
'V': 5,
'X': 10,
'L': 50,
'C': 100,
'D': 500,
'M': 1000,
}
var romanToInt = function(s) {
if (s.length === 1) return +NUMERAL[s];
let number = 0;
for (let i = 0; i < s.length; i++) {
let num = +NUMERAL[s[i]];
let prev = +NUMERAL[s[i-1]];
if (prev < num) number += num - (2 * prev);
else number += num;
}
return number;
};
FWIW, here is a "try parse" version of David DeMar's answer:
private static readonly Dictionary<char, int> _romanMap = new Dictionary<char, int>() { { 'I', 1 }, { 'V', 5 }, { 'X', 10 }, { 'L', 50 }, { 'C', 100 }, { 'D', 500 }, { 'M', 1000 } };
public static bool TryParseRoman(string text, out int value)
{
value = 0;
if (string.IsNullOrEmpty(text))
return false;
var number = 0;
for (var i = 0; i < text.Length; i++)
{
if (!_romanMap.TryGetValue(text[i], out var num))
return false;
if ((i + 1) < text.Length)
{
if (!_romanMap.TryGetValue(text[i + 1], out var num2))
return false;
if (num < num2)
{
number -= num;
continue;
}
}
number += num;
}
value = number;
return true;
}
public class Solution {
public int RomanToInt(string s) {
var dict = new Dictionary<char,int>{
{'I',1},
{'V',5},
{'X',10},
{'L',50},
{'C',100},
{'M',1000},
{'D',500},
};
var result = 0; //What am I gonna return it from method ?
for(int i=0; i<s.Length; i++)
{
if(i+1<s.Length && dict[s[i]] < dict[s[i+1]])
{
result -=dict[s[i]];
}
else
{
result +=dict[s[i]];
}
}
return result;
}
}
public int RomanToInt(string s)
{
char[] romans = { 'I', 'V', 'X', 'L', 'C', 'D', 'M' };
int[] nums = { 1, 5, 10, 50, 100, 500, 1000 };
int result = 0;
foreach (char c in s)
{
for (int i = 0; i < romans.Length; i++)
{
if (romans[i] == c)
{
result += nums[i];
}
}
}
if (s.Contains("IV") || s.Contains("IX"))
result -= 2;
if (s.Contains("XL") || s.Contains("XC"))
result -= 20;
if (s.Contains("CD") || s.Contains("CM"))
result -= 200;
return result;
}
public static int ConvertRomanNumtoInt(string strRomanValue)
{
Dictionary RomanNumbers = new Dictionary
{
{"M", 1000},
{"CM", 900},
{"D", 500},
{"CD", 400},
{"C", 100},
{"XC", 90},
{"L", 50},
{"XL", 40},
{"X", 10},
{"IX", 9},
{"V", 5},
{"IV", 4},
{"I", 1}
};
int retVal = 0;
foreach (KeyValuePair pair in RomanNumbers)
{
while (strRomanValue.IndexOf(pair.Key.ToString()) == 0)
{
retVal += int.Parse(pair.Value.ToString());
strRomanValue = strRomanValue.Substring(pair.Key.ToString().Length);
}
}
return retVal;
}