Reverse Polish Notation for array elements. (example: array_name[i,j]) - c#

I need to covert expressions like array_name[i, i*k, i-k] into Reverse Polish Notation.
What I'm basically doing is trying to translate the expression like this:
if(array_name[i, j+k]>=a+b) array_name[i, j*k] = a+2; else x = a / b;
into the RPN using regular expressions.
I already have quite an ugly huge regular expression, which matches: if, else, + - * / = == ( ) <= >= < > != and all the words that match this pattern: [a-zA-z][a-zA-z0-9_]*. And also I have the code that translates infix arithmetical expressions into RPN. Here it is:
/// <summary>
/// Returns the collection of all the lexemes of the expression
/// using Regex.
/// The Regex created works fine with 'if else' constructions and is good
///with
///any variable name possible in C#, with arithmetical expressions,
///like +, -, /, and all the boolean operators.
/// </summary>
/// <param name="input">String expression in infix notation.</param>
/// <returns>Collection of all the lexemes of the expression</returns>
private static MatchCollection GetMatchCollection(string input)
{
var rx =
new Regex(
#"/\bif\b|\belse\b|\(|\)|\+|\-|\*|\<=|\>=|\\|\>|\<|(?<![!=])[!=]=(?!=)|([a-zA-Z][a-zA-z0-9_]*)|(\d+\.?\d*)|(?<!=)=(?!=)|\/|/^/g");
return rx.Matches(input);
}
/// <summary>
/// Translates the infix expression into RPN
/// </summary>
/// <param name="input">String expression in infix notation.</param>
/// <returns>RPN expression</returns>
public static string Translate(string input)
{
var mc = GetMatchCollection(input);
var id = new Regex(#"[a-zA-z][a-zA-z0-9_]*"); // regex for identifiers
var num = new Regex(#"\d+\.?\d*"); // regex for decimals
var skobki = new Regex(#"\(|\)"); // regex for braces
object[] operators =
{
"(", ")", "else", "*", "/", "+", "-", "=", "<", ">", "<=", ">=", "==", "!=", "&&",
"||", "if"
}; // operators by priority
var opers = new Regex(#"\(|\)|\+|\-|\*|\/|<=?|>=?|!=|=|&&|\|\|\bif\b|\belse\b"); // regex for operators
var stOper = new Stack();
var expr = new ArrayList();
foreach (Match m in mc)
{
var m1 = id.Match(m.Value);
if (m1.Success) { expr.Add(m1.Value); continue; }
m1 = num.Match(m.Value);
if (m1.Success) { expr.Add(m1.Value); continue; }
m1 = skobki.Match(m.Value);
if (m1.Success)
{
if (m1.Value == "(") { stOper.Push(m1.Value); continue; }
var op = stOper.Pop().ToString();
while (op != "(")
{
expr.Add(op);
op = stOper.Pop().ToString();
}
continue;
}
m1 = opers.Match(m.Value);
if (m1.Success)
{
try
{
while (Array.IndexOf(operators, m1.Value) > Array.IndexOf(operators, stOper.Peek()))
{
if (stOper.Peek().ToString() == "(") break;
expr.Add(stOper.Pop().ToString());
}
}
catch (Exception)
{
// stack is empty
}
stOper.Push(m1.Value);
}
}
while (stOper.Count != 0)
{
expr.Add(stOper.Pop().ToString());
}
// Make the RPN expression string
// from the ArrayList expr.
var res = new StringBuilder();
foreach (var s in expr)
res.Append(s).Append(' ');
return res.ToString();
}
How can I modify the code to make the method public static string Translate(string input) translate simple expressions like array_name[i,k*i-1] into the RPN expression?
Note, that the public static string Translate(string input) method works fine only with simple arithmetical expressions, but not with the one I provided above (the if-else statement).

Regular expressions are not the way to go. Parse it, probably leveraging some parser generator, into an abstract syntax tree and the output it into RPN, which is trivial.

Solved the issue. Here is the algorithm.
class InfixToPostfixConverter
{
private readonly Stack<string> _stack;
private readonly string[] _input;
private readonly Dictionary<string, int> _priorities;
private readonly List<string> _poliz;
public InfixToPostfixConverter(string input)
{
_input = input.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
_stack = new Stack<string>();
_poliz = new List<string>();
_priorities = new Dictionary<string, int>
{
{"(", 0},
{")", 0},
{"[", 0},
{"if", 0},
{"flag", 0},
{">", 1},
{"<", 1},
{"<=", 1},
{">=", 1},
{"=", 1},
{"==", 1},
{"!=", 1},
{"+", 2},
{"-", 2},
{"*", 3},
{"/", 3},
{"^", 4},
{"#", 5}
};
}
public string Translate()
{
return ConvertInfixToPostfix();
}
private string ConvertInfixToPostfix()
{
int countAts = 0;
bool reachedElse = false;
foreach (var lexeme in _input)
{
if (lexeme.Equals("if"))
{
_stack.Push(lexeme);
_stack.Push("flag");
}
else if (lexeme.Equals("else"))
{
_poliz.Add("null");
_poliz.Add("1B");
reachedElse = true;
_poliz[_poliz.FindIndex(x => x.Equals("null"))] = _poliz.Count.ToString();
}
else if (Regex.IsMatch(lexeme, #"[a-zA-Z][a-zA-z0-9_]*|\d+\.?\d*")
&& !lexeme.Equals("if") && !lexeme.Equals("else"))
{
_poliz.Add(lexeme);
}
else if (lexeme.Equals(";"))
{
if (!reachedElse)
{
while (_stack.Count != 0 && !_stack.Peek().Equals("if"))
{
_poliz.Add(_stack.Pop());
}
if (_stack.Count != 0)
{
_stack.Pop();
}
}
else
{
while (_stack.Count != 0)
{
_poliz.Add(_stack.Pop());
}
_poliz[_poliz.FindIndex(x => x.Equals("null"))] = _poliz.Count.ToString();
}
}
else if (lexeme.Equals(","))
{
countAts++;
while (_stack.Count != 0 && !_stack.Peek().Equals("["))
{
_poliz.Add(_stack.Pop());
}
}
else if (lexeme.Equals(")") || lexeme.Equals("]"))
{
var brace = lexeme.Equals(")") ? "(" : "[";
while (_stack.Count != 0 && !_stack.Peek().Equals(brace))
{
_poliz.Add(_stack.Pop());
}
_stack.Pop();
if (_stack.Peek().Equals("flag"))
{
_stack.Pop();
_poliz.Add((_poliz.Count + 3).ToString());
_poliz.Add("null");
_poliz.Add("3Y");
}
if (lexeme.Equals("]"))
{
countAts++;
_poliz.Add(countAts.ToString());
_poliz.Add(_stack.Pop());
countAts = 0;
}
}
else
{
if (_stack.Count != 0 && !lexeme.Equals("(") && !lexeme.Equals("["))
{
for (;
_stack.Count != 0 &&
_priorities[lexeme] <= _priorities[_stack.Peek()];
_poliz.Add(_stack.Pop())) {}
}
if (lexeme.Equals("["))
{
countAts++;
_stack.Push("#");
}
_stack.Push(lexeme);
}
}
return _poliz.Aggregate(string.Empty, (current, x) => current + (x + " "));
}
}

Related

Evaluate logic expression contained in a passed in string based on a list of strings

So I am unsure how to word this question properly.
If you look below lets say I have a list of options (PG , PA, PM, TK, TD) that a customer has ordered. Now lets say I have some expression I need evaluate against the customers ordered options such as: PA OR PB where evaluates to customers list of options contains either option PA or PB. Seems simple enough but these expressions could grow quite a bit. Below in the code are some good examples of what I would like to accomplish.
I by no means claim to have knowledge about string parsing and comparing logical operations. I am looking for some general direction or what my options are. I saw some things about dynamic linq, rule engines, expression trees, etc. Its just a whole lot to absorb and am looking for some direction on which would accomplish what I want?
I am open to just about any approach. Any answers are appreciated!
Code:
class Program
{
static void Main(string[] args)
{
//Reprsents what the customer selected
List<string> CustomerSelectedOptins = new List<string> { "PG", "PA", "PM", "TK", "TD" };
string LogicOperation = "PA OR (PB AND PC)"; //Evaluates true since customer list contains PA
string LogicOperation2 = "PF OR (NOT PB AND PM)"; //Evaluates true since customer list does not contain PB but contain PM
string LogicOperation3 = "NOT PG AND NOT(PL AND TZ)"; //Evaluates false since the customer does have PG selected
}
}
There are a few different approaches you can take. One common one is to replace the option checks with true or false and then use one of the built-in expression evaluators to evaluate the resulting expression. Note: XOR is not available for these.
public static class Evaluator {
static Regex wordRE = new Regex(#"[A-Z]+", RegexOptions.Compiled);
static HashSet<string> Operators = new[] { "AND", "OR", "NOT" }.ToHashSet(StringComparer.OrdinalIgnoreCase);
public static bool Evaluate(this List<string> options, string op) {
var opListOfOptions = wordRE.Matches(op).Select(m => m.Value).Where(w => !Operators.Contains(w));
foreach (var option in opListOfOptions) {
var value = options.Contains(option).ToString();
op = op.Replace(option, value);
}
//return DTEval(op) == 1;
return CompEval(op);
//return XEval(op);
}
static double DTEval(string expression) {
var dt = new DataTable();
var loDataColumn = new DataColumn("Eval", typeof(double), expression);
dt.Columns.Add(loDataColumn);
dt.Rows.Add(0);
return (double)(dt.Rows[0]["Eval"]);
}
static DataTable cDT = new DataTable();
static bool CompEval(string expression) {
return (bool)cDT.Compute(expression, "");
}
public static bool XEval(string expression) {
expression = new System.Text.RegularExpressions.Regex(#"not +(true|false)").Replace(expression.ToLower(), " not(${1}) ");
expression = new System.Text.RegularExpressions.Regex(#"(true|false)").Replace(expression, " ${1}() ");
return (bool)new System.Xml.XPath.XPathDocument(new System.IO.StringReader("<r/>")).CreateNavigator()
.Evaluate(String.Format("boolean({0})", expression));
}
}
The Evaluate method comments show the different options you could use.
Alternatively, you could write your own expression evaluator. A simple one is a recursive descent evaluator that parses a simple grammar. This one uses C# precedence rules, having OR/XOR/AND binding left to right.
public class Evaluator {
// recursive descent boolean expression evaluator
// grammer:
// primary = id
// primary = ( expr )
// unop = primary
// unop = not unop
// andop = unop [ and unop ]*
// xorop = andop [ xor andop ]*
// orop = xorop [ or xorop ]*
// expr = orop
public class TokenList {
List<string> tokens;
int curTokenNum;
static Regex tokenRE = new Regex(#"\w+|[()]", RegexOptions.Compiled);
public TokenList(string expr) {
curTokenNum = 0;
tokens = tokenRE.Matches(expr).Select(m => m.Value).ToList();
}
public string CurToken => curTokenNum < tokens.Count ? tokens[curTokenNum] : String.Empty;
public void MoveNext() => ++curTokenNum;
public bool MoreTokens => curTokenNum < tokens.Count;
public void NextToken() {
MoveNext();
if (!MoreTokens)
throw new InvalidExpressionException("Expected token");
}
}
static List<string> OperatorStrings = new[] { "AND", "OR", "XOR", "NOT" }.ToList();
enum Operators { and, or, xor, not };
static List<string> ParenStrings = new[] { "(", ")" }.ToList();
enum Parens { open, close };
TokenList tokens;
List<string> trueOptions;
public Evaluator(List<string> trueOptions) {
this.trueOptions = trueOptions;
}
string curToken => tokens.CurToken;
bool curTokenValue => trueOptions.Contains(curToken);
bool isOperator => OperatorStrings.FindIndex(s => s.Equals(curToken, StringComparison.OrdinalIgnoreCase)) != -1;
Operators curOp => (Operators)OperatorStrings.FindIndex(s => s.Equals(curToken, StringComparison.OrdinalIgnoreCase));
bool isParen => ParenStrings.Contains(curToken);
Parens curParen => (Parens)(ParenStrings.IndexOf(curToken));
public bool id() {
if (isOperator)
throw new InvalidExpressionException("missing operand");
else {
var ans = curTokenValue;
tokens.MoveNext();
return ans;
}
}
bool primary() {
if (isParen)
if (curParen == Parens.open) {
tokens.NextToken();
var ans = expr();
if (!isParen || curParen != Parens.close)
throw new InvalidExpressionException($"missing ) at {curToken}");
else
tokens.MoveNext();
return ans;
}
else
throw new InvalidExpressionException("Invalid )");
else
return id();
}
bool unop() {
if (isOperator && curOp == Operators.not) {
tokens.NextToken();
return !unop();
}
else
return primary();
}
bool andop() {
var ans = unop();
while (tokens.MoreTokens && isOperator && curOp == Operators.and) {
tokens.NextToken();
ans = ans & unop();
}
return ans;
}
bool xorop() {
var ans = andop();
while (tokens.MoreTokens && isOperator && curOp == Operators.xor) {
tokens.NextToken();
ans = ans ^ andop();
}
return ans;
}
bool orop() {
var ans = xorop();
while (tokens.MoreTokens && isOperator && curOp == Operators.or) {
tokens.NextToken();
ans = ans | xorop();
}
return ans;
}
bool expr() => orop();
public bool Value(string exp) {
tokens = new TokenList(exp);
var ans = expr();
if (tokens.MoreTokens)
throw new InvalidExpressionException($"Unrecognized token {curToken} after expression");
return ans;
}
}
You can call it by creating an Evaluator and passing it an expression to evaluate:
var eval = new Evaluator(CustomerSelectedOptions);
var ans = eval.Value(LogicOperation);

How to use IF-ELSE in RPN(Reverse Polish Notation)?

i have done a RPN class to calculate strings which end-user input like
"1.0+3/2-tan(45)/(1+1)+sin(30)*abs(-1)+Abs(-10)"
Then, I want to parsing conditional statements and multi-parameters function such as "if(1>2,3/3,2*1)","max(1,2,3,4)"
So, my questions how to use IF-ELSE in the RPN?
Here's my code: enter link description here
For if(1>2,3/3,2*1) you would first evaluate the three argument from right to left and push their resuls on the stack so that it looked like this:
top-of-stack->false
1
2
Then if would be implemented in the RPN engine something like (pseudo-code):
void DoIf()
{
if (pop()) // pop result of "if" evaluation
{
var result = pop(); // pop "true" result from stack
pop(); // discard "false" result
push(result); // push back "true" result
}
else
{
pop(); // discard "true" result, leaving "false" result on stack
}
}
As for multi-parameter functions, there should be no special handling needed. Just evaluate and push all arguments (right to left, typically). The implementation of the function should pop off the required number of arguments and then push its result (if any).
i try to parse multi-parameters function such as if\Max before RPN.Parse()
public class MultiParameterFunctionParser
{
public readonly List<string> Funcs = new List<string> {"IF", "MAX"};
public string Parse(string exp)
{
while (IsFunction(exp,out var index,out var funcName))//
{
var parameters = GetParameters(exp, index, funcName, out var before, out var after);
var list = GetParameterList(parameters);
var value = Evaluate(list, funcName);
exp= $"{before}({value}){after}";
}
return exp;
}
/// <summary>
/// Is Exp Contains a function?
/// </summary>
/// <param name="exp"></param>
/// <param name="index"></param>
/// <param name="funcName"></param>
/// <returns></returns>
private bool IsFunction(string exp, out int index, out string funcName)
{
index = -1;
funcName = "";
foreach (var func in Funcs)
{
var idx = exp.IndexOf($"{func}(", StringComparison.CurrentCultureIgnoreCase);
if (idx == -1 || idx + 3 >= exp.Length - 1)
continue;
index = idx;
funcName = func;
break;
}
return index != -1 && index + 3 < exp.Length - 1;
}
/// <summary>
/// Get Parameters' string
/// </summary>
/// <param name="exp">8+if(12,sin(90),0)+1.2</param>
/// <param name="index">2 if's start index</param>
/// <param name="before">8+</param>
/// <param name="after">+1.2</param>
/// <returns>12,sin(90),0</returns>
private static string GetParameters(string exp,int index, string funcName, out string before, out string after)
{
before = exp.Substring(0, index);
index += funcName.Length + 1;
var leftCount = 1; // '(' count
var rightCount = 0;// ')' count
var results = "";
while (index < exp.Length && leftCount != rightCount)
{
var c = exp[index];
if (c.Equals('('))
leftCount++;
else if (c.Equals(')'))
rightCount++;
if (leftCount > rightCount)
results += c;
else
break;
index++;
}
after = exp.Substring(index + 1, exp.Length - index - 1);
return results;
}
/// <summary>
/// Parse Parameter string to list.
/// </summary>
/// <param name="exp">MAX(1,-1),1,0</param>
/// <returns>{"MAX(1,-1)","1","0"}</returns>
private static List<string> GetParameterList(string exp)
{
var count = exp.Length;
for (var i = count - 1; i > -1 && exp.Length > 0; i--)
{
var c = exp[i];
if (c != ',')
continue;
var after = exp.Substring(i + 1);
var before = exp.Substring(0,i);
if (after.Count(a => a == '(').Equals(after.Count(a => a == ')')))
{
exp = before + '#' + after;
}
}
var results = exp.Split('#').ToList();
return results;
}
private static double Evaluate(List<string> parameters, string funcName)
{
if (funcName.Equals("MAX", StringComparison.CurrentCultureIgnoreCase))
return EvaluateMax(parameters);
if (funcName.Equals("IF", StringComparison.CurrentCultureIgnoreCase))
return EvaluateIF(parameters);
return 0;
}
private static double EvaluateIF(List<string> parameters)
{
if (parameters == null || parameters.Count != 3)
throw new Exception("EvaluateIF parameters.Count()!=3");
var results = new List<double>();
foreach (var parameter in parameters)
{
var rpn = new RPN();
rpn.Parse(parameter);
var obj = rpn.Evaluate();
if (obj == null)
{
throw new Exception("EvaluateIF Not Number!");
}
if (obj.ToString().Equals("true", StringComparison.CurrentCultureIgnoreCase))
{
results.Add(1);
}
else if (obj.ToString().Equals("false", StringComparison.CurrentCultureIgnoreCase))
{
results.Add(-1);
}
else
{
if (double.TryParse(obj.ToString(), out var d))
results.Add(d);
else
throw new Exception("EvaluateIF Not Number!");
}
}
return results[0] >= 0 ? results[1] : results[2];
}
private static double EvaluateMax(IEnumerable<string> parameters)
{
var results = new List<double>();
foreach (var parameter in parameters)
{
var rpn = new RPN();
rpn.Parse(parameter);
var obj = rpn.Evaluate();
if (double.TryParse(obj.ToString(), out var d))
results.Add(d);
}
return results.Count > 0 ? results.Max() : 0;
}
}

C# arbitrary nested Lists

In Python I can convert a binary tree structure to an arbitrary nested List:
great
/ \
gr eat
/ \ / \
g r e at
/ \
a t
[great, [gr, [g, r], eat, [e, at, [a, t]]]
Is there a way to build an arbitrary nested List in C#?
EDIT: I took a BinaryTree<T> class from MSDN docs as a base class for my custom StrBinaryTree class. Method FormTree is doing a job for creating a tree structure from a string:
public class StrBinaryTree : BinaryTree<string>
{
public StrBinaryTree(string data)
{
if (data.Length == 0)
{
base.Root = null;
base.Count = 0;
}
else
{
base.Root = new BinaryTreeNode<string>();
base.Root.Data = data;
base.Count = 1;
}
}
public void FormTree(BinaryTreeNode<string> node)
{
var subLength = node.Data.Length / 2;
if (subLength == 0)
return;
node.Left = new BinaryTreeNode<string>(node.Data.Substring(0, subLength));
node.Right = new BinaryTreeNode<string>(node.Data.Substring(subLength));
base.Count += 2;
FormTree(node.Left);
FormTree(node.Right);
}
...}
I would use recursion to go through the tree. Since you didn't told us the type of the tree we cannot provide you c# sample code.
But it would be something like this:
void List<Object> GetNestedListFromTree(Tree tree, List<Object> list = null)
{
List<Object> curList;
if (!tree.HasChildNodes)
return list;
else
{
if (list==null)
{
list = new List<Object>;
curList = list;
}
else
{
curList = new List<Object>;
list.Add(curList);
}
foreach(node in tree.ChildNodes)
{
curList.Add(node.Name);
curList.Add(GetNestedListFromTree(node.GetSubtree, curList));
}
return curList;
}
}
This isn't tested because I don't know your tree but yeah ... It should work if your tree can provide the needed functionality.
Try this solution
public static List<object> Solve(string input, List<object> list = null)
{
if (list == null)
return Solve(input, new List<object> { input });
if (input.Length > 1)
{
var middle = input.Length / 2;
var first = input.Substring(0, middle);
var second = input.Substring(middle);
var innerList = new List<object>();
list.Add(innerList);
foreach (var side in new[] { first, second })
{
innerList.Add(side);
Solve(side, innerList);
}
}
return list;
}
public static void Show(object input)
{
if (!(input is string))
{
Console.Write("[");
var list = input as List<object>;
foreach (var item in list)
{
Show(item);
if (item != list.Last())
Console.Write(", ");
}
Console.Write("]");
}
else
Console.Write(input);
}
Usage:
var result = Solve("great");
Show(result);//[great, [gr, [g, r], eat, [e, at, [a, t]]]]
Approximate code for BinaryTreeNode:
public static BinaryTreeNode<string> Solve(BinaryTreeNode<string> node)
{
if(node.Data.Length > 1)
{
var middle = node.Data.Length / 2;
var left = node.Data.Substring(0, middle);
var right = node.Data.Substring(middle);
node.Left = Solve(new BinaryTreeNode<string>(left));
node.Right = Solve(new BinaryTreeNode<string>(right));
}
return node;
}
Usage:
var result = Solve(new BinaryTreeNode<string>("great"));
Try this:
public Tree<string> Build(string text)
{
var tree = new Tree<string>() { Value = text };
if (text.Length > 1)
{
tree.Add(Build(text.Substring(0, text.Length / 2)));
tree.Add(Build(text.Substring(text.Length / 2)));
}
return tree;
}
public class Tree<T> : List<Tree<T>>
{
public T Value;
public override string ToString()
{
var r = $"\"{this.Value}\"";
if (this.Any())
{
r += $" [{String.Join(", ", this.Select(t => t.ToString()))}]";
}
return r;
}
}
When I run Build("great") I get:
"great" ["gr" ["g", "r"], "eat" ["e", "at" ["a", "t"]]]

How do I sort strings alphabetically while accounting for value when a string is numeric?

I'm trying to sort an array of numbers that are strings and I'd like them to sort numerically.
The catch is that I cannot convert the numbers into int.
Here is the code:
string[] things= new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => x))
{
Console.WriteLine(thing);
}
Output:
101, 102, 103, 105, 90
I'd like:
90, 101, 102, 103, 105
EDIT:
The output can't be 090, 101, 102...
Updated the code sample to say "things" instead of "sizes". The array can be something like this:
string[] things= new string[] { "paul", "bob", "lauren", "007", "90" };
That means it needs to be sorted alphabetically and by number:
007, 90, bob, lauren, paul
Pass a custom comparer into OrderBy. Enumerable.OrderBy will let you specify any comparer you like.
This is one way to do that:
void Main()
{
string[] things = new string[] { "paul", "bob", "lauren", "007", "90", "101"};
foreach (var thing in things.OrderBy(x => x, new SemiNumericComparer()))
{
Console.WriteLine(thing);
}
}
public class SemiNumericComparer: IComparer<string>
{
/// <summary>
/// Method to determine if a string is a number
/// </summary>
/// <param name="value">String to test</param>
/// <returns>True if numeric</returns>
public static bool IsNumeric(string value)
{
return int.TryParse(value, out _);
}
/// <inheritdoc />
public int Compare(string s1, string s2)
{
const int S1GreaterThanS2 = 1;
const int S2GreaterThanS1 = -1;
var IsNumeric1 = IsNumeric(s1);
var IsNumeric2 = IsNumeric(s2);
if (IsNumeric1 && IsNumeric2)
{
var i1 = Convert.ToInt32(s1);
var i2 = Convert.ToInt32(s2);
if (i1 > i2)
{
return S1GreaterThanS2;
}
if (i1 < i2)
{
return S2GreaterThanS1;
}
return 0;
}
if (IsNumeric1)
{
return S2GreaterThanS1;
}
if (IsNumeric2)
{
return S1GreaterThanS2;
}
return string.Compare(s1, s2, true, CultureInfo.InvariantCulture);
}
}
Just pad with zeroes to the same length:
int maxlen = sizes.Max(x => x.Length);
var result = sizes.OrderBy(x => x.PadLeft(maxlen, '0'));
Value is a string
List = List.OrderBy(c => c.Value.Length).ThenBy(c => c.Value).ToList();
Works
And, how about this ...
string[] sizes = new string[] { "105", "101", "102", "103", "90" };
var size = from x in sizes
orderby x.Length, x
select x;
foreach (var p in size)
{
Console.WriteLine(p);
}
There is a native function in windows StrCmpLogicalW that will compare in strings numbers as numbers instead of letters. It is easy to make a comparer that calls out to that function and uses it for it's comparisons.
public class StrCmpLogicalComparer : Comparer<string>
{
[DllImport("Shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string x, string y);
public override int Compare(string x, string y)
{
return StrCmpLogicalW(x, y);
}
}
It even works on strings that have both text and numbers. Here is a example program that will show the diffrence between the default sort and the StrCmpLogicalW sort
class Program
{
static void Main()
{
List<string> items = new List<string>()
{
"Example1.txt", "Example2.txt", "Example3.txt", "Example4.txt", "Example5.txt", "Example6.txt", "Example7.txt", "Example8.txt", "Example9.txt", "Example10.txt",
"Example11.txt", "Example12.txt", "Example13.txt", "Example14.txt", "Example15.txt", "Example16.txt", "Example17.txt", "Example18.txt", "Example19.txt", "Example20.txt"
};
items.Sort();
foreach (var item in items)
{
Console.WriteLine(item);
}
Console.WriteLine();
items.Sort(new StrCmpLogicalComparer());
foreach (var item in items)
{
Console.WriteLine(item);
}
Console.ReadLine();
}
}
which outputs
Example1.txt
Example10.txt
Example11.txt
Example12.txt
Example13.txt
Example14.txt
Example15.txt
Example16.txt
Example17.txt
Example18.txt
Example19.txt
Example2.txt
Example20.txt
Example3.txt
Example4.txt
Example5.txt
Example6.txt
Example7.txt
Example8.txt
Example9.txt
Example1.txt
Example2.txt
Example3.txt
Example4.txt
Example5.txt
Example6.txt
Example7.txt
Example8.txt
Example9.txt
Example10.txt
Example11.txt
Example12.txt
Example13.txt
Example14.txt
Example15.txt
Example16.txt
Example17.txt
Example18.txt
Example19.txt
Example20.txt
try this
sizes.OrderBy(x => Convert.ToInt32(x)).ToList<string>();
Note:
this will helpful when all are string convertable to int.....
You say you cannot convert the numbers into int because the array can contain elements that cannot be converted to int, but there is no harm in trying:
string[] things = new string[] { "105", "101", "102", "103", "90", "paul", "bob", "lauren", "007", "90" };
Array.Sort(things, CompareThings);
foreach (var thing in things)
Debug.WriteLine(thing);
Then compare like this:
private static int CompareThings(string x, string y)
{
int intX, intY;
if (int.TryParse(x, out intX) && int.TryParse(y, out intY))
return intX.CompareTo(intY);
return x.CompareTo(y);
}
Output: 007, 90, 90, 101, 102, 103, 105, bob, lauren, paul
This site discusses alphanumeric sorting and will sort the numbers in a logical sense instead of an ASCII sense. It also takes into account the alphas around it:
http://www.dotnetperls.com/alphanumeric-sorting
EXAMPLE:
C:/TestB/333.jpg
11
C:/TestB/33.jpg
1
C:/TestA/111.jpg
111F
C:/TestA/11.jpg
2
C:/TestA/1.jpg
111D
22
111Z
C:/TestB/03.jpg
1
2
11
22
111D
111F
111Z
C:/TestA/1.jpg
C:/TestA/11.jpg
C:/TestA/111.jpg
C:/TestB/03.jpg
C:/TestB/33.jpg
C:/TestB/333.jpg
The code is as follows:
class Program
{
static void Main(string[] args)
{
var arr = new string[]
{
"C:/TestB/333.jpg",
"11",
"C:/TestB/33.jpg",
"1",
"C:/TestA/111.jpg",
"111F",
"C:/TestA/11.jpg",
"2",
"C:/TestA/1.jpg",
"111D",
"22",
"111Z",
"C:/TestB/03.jpg"
};
Array.Sort(arr, new AlphaNumericComparer());
foreach(var e in arr) {
Console.WriteLine(e);
}
}
}
public class AlphaNumericComparer : IComparer
{
public int Compare(object x, object y)
{
string s1 = x as string;
if (s1 == null)
{
return 0;
}
string s2 = y as string;
if (s2 == null)
{
return 0;
}
int len1 = s1.Length;
int len2 = s2.Length;
int marker1 = 0;
int marker2 = 0;
// Walk through two the strings with two markers.
while (marker1 < len1 && marker2 < len2)
{
char ch1 = s1[marker1];
char ch2 = s2[marker2];
// Some buffers we can build up characters in for each chunk.
char[] space1 = new char[len1];
int loc1 = 0;
char[] space2 = new char[len2];
int loc2 = 0;
// Walk through all following characters that are digits or
// characters in BOTH strings starting at the appropriate marker.
// Collect char arrays.
do
{
space1[loc1++] = ch1;
marker1++;
if (marker1 < len1)
{
ch1 = s1[marker1];
}
else
{
break;
}
} while (char.IsDigit(ch1) == char.IsDigit(space1[0]));
do
{
space2[loc2++] = ch2;
marker2++;
if (marker2 < len2)
{
ch2 = s2[marker2];
}
else
{
break;
}
} while (char.IsDigit(ch2) == char.IsDigit(space2[0]));
// If we have collected numbers, compare them numerically.
// Otherwise, if we have strings, compare them alphabetically.
string str1 = new string(space1);
string str2 = new string(space2);
int result;
if (char.IsDigit(space1[0]) && char.IsDigit(space2[0]))
{
int thisNumericChunk = int.Parse(str1);
int thatNumericChunk = int.Parse(str2);
result = thisNumericChunk.CompareTo(thatNumericChunk);
}
else
{
result = str1.CompareTo(str2);
}
if (result != 0)
{
return result;
}
}
return len1 - len2;
}
}
I guess this will be much more good if it has some numeric in the string.
Hope it will help.
PS:I'm not sure about performance or complicated string values but it worked good something like this:
lorem ipsum
lorem ipsum 1
lorem ipsum 2
lorem ipsum 3
...
lorem ipsum 20
lorem ipsum 21
public class SemiNumericComparer : IComparer<string>
{
public int Compare(string s1, string s2)
{
int s1r, s2r;
var s1n = IsNumeric(s1, out s1r);
var s2n = IsNumeric(s2, out s2r);
if (s1n && s2n) return s1r - s2r;
else if (s1n) return -1;
else if (s2n) return 1;
var num1 = Regex.Match(s1, #"\d+$");
var num2 = Regex.Match(s2, #"\d+$");
var onlyString1 = s1.Remove(num1.Index, num1.Length);
var onlyString2 = s2.Remove(num2.Index, num2.Length);
if (onlyString1 == onlyString2)
{
if (num1.Success && num2.Success) return Convert.ToInt32(num1.Value) - Convert.ToInt32(num2.Value);
else if (num1.Success) return 1;
else if (num2.Success) return -1;
}
return string.Compare(s1, s2, true);
}
public bool IsNumeric(string value, out int result)
{
return int.TryParse(value, out result);
}
}
This seems a weird request and deserves a weird solution:
string[] sizes = new string[] { "105", "101", "102", "103", "90" };
foreach (var size in sizes.OrderBy(x => {
double sum = 0;
int position = 0;
foreach (char c in x.ToCharArray().Reverse()) {
sum += (c - 48) * (int)(Math.Pow(10,position));
position++;
}
return sum;
}))
{
Console.WriteLine(size);
}
The answer given by Jeff Paulsen is correct but the Comprarer can be much simplified to this:
public class SemiNumericComparer: IComparer<string>
{
public int Compare(string s1, string s2)
{
if (IsNumeric(s1) && IsNumeric(s2))
return Convert.ToInt32(s1) - Convert.ToInt32(s2)
if (IsNumeric(s1) && !IsNumeric(s2))
return -1;
if (!IsNumeric(s1) && IsNumeric(s2))
return 1;
return string.Compare(s1, s2, true);
}
public static bool IsNumeric(object value)
{
int result;
return Int32.TryParse(value, out result);
}
}
This works because the only thing that is checked for the result of the Comparer is if the result is larger, smaller or equal to zero. One can simply subtract the values from another and does not have to handle the return values.
Also the IsNumeric method should not have to use a try-block and can benefit from TryParse.
And for those who are not sure:
This Comparer will sort values so, that non numeric values are always appended to the end of the list. If one wants them at the beginning the second and third if block have to be swapped.
public class NaturalSort: IComparer<string>
{
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
public static extern int StrCmpLogicalW(string x, string y);
public int Compare(string x, string y)
{
return StrCmpLogicalW(x, y);
}
}
arr = arr.OrderBy(x => x, new NaturalSort()).ToArray();
The reason I needed it was to get filed in a directory whose filenames started with a number:
public static FileInfo[] GetFiles(string path)
{
return new DirectoryInfo(path).GetFiles()
.OrderBy(x => x.Name, new NaturalSort())
.ToArray();
}
Try this :
string[] things= new string[] { "105", "101", "102", "103", "90" };
int tmpNumber;
foreach (var thing in (things.Where(xx => int.TryParse(xx, out tmpNumber)).OrderBy(xx => int.Parse(xx))).Concat(things.Where(xx => !int.TryParse(xx, out tmpNumber)).OrderBy(xx => xx)))
{
Console.WriteLine(thing);
}
Expanding on Jeff Paulsen answer. I wanted to make sure it didn't matter how many number or char groups were in the strings:
public class SemiNumericComparer : IComparer<string>
{
public int Compare(string s1, string s2)
{
if (int.TryParse(s1, out var i1) && int.TryParse(s2, out var i2))
{
if (i1 > i2)
{
return 1;
}
if (i1 < i2)
{
return -1;
}
if (i1 == i2)
{
return 0;
}
}
var text1 = SplitCharsAndNums(s1);
var text2 = SplitCharsAndNums(s2);
if (text1.Length > 1 && text2.Length > 1)
{
for (var i = 0; i < Math.Max(text1.Length, text2.Length); i++)
{
if (text1[i] != null && text2[i] != null)
{
var pos = Compare(text1[i], text2[i]);
if (pos != 0)
{
return pos;
}
}
else
{
//text1[i] is null there for the string is shorter and comes before a longer string.
if (text1[i] == null)
{
return -1;
}
if (text2[i] == null)
{
return 1;
}
}
}
}
return string.Compare(s1, s2, true);
}
private string[] SplitCharsAndNums(string text)
{
var sb = new StringBuilder();
for (var i = 0; i < text.Length - 1; i++)
{
if ((!char.IsDigit(text[i]) && char.IsDigit(text[i + 1])) ||
(char.IsDigit(text[i]) && !char.IsDigit(text[i + 1])))
{
sb.Append(text[i]);
sb.Append(" ");
}
else
{
sb.Append(text[i]);
}
}
sb.Append(text[text.Length - 1]);
return sb.ToString().Split(' ');
}
}
I also took SplitCharsAndNums from an SO Page after amending it to deal with file names.
Example of short IComparer class.
if both string arguments can be converted to integer then arguments
are parsed to integers and compared
if only one argument can be converted to integer, then integer is
prioritized (has lower value) and are inserted before string.
If no one of arguments can be converted into integer then ordinary
string comparison is used.
Code:
public class CompareIntegerStrings : IComparer<string>
{
public int Compare(string x, string y)
{
if (int.TryParse(x, out int xOut) && int.TryParse(y, out int yOut))
return xOut.CompareTo(yOut);
else if (int.TryParse(x, out _))
return -1;
else if (int.TryParse(y, out _))
return 1;
else
return x.CompareTo(y);
}
}
In this example
List<string> intStrings = new List<string> { "01","0022","abba", "11", "deep purple", "02", };
List<string> orderedIntStrings = intStrings.OrderBy(i=>i,new CompareIntegerStrings()).ToList();
ordered list orderedIntString are { "01","02","11","0022","abba","deep purple"}.
Recommend using NaturalSort.Extension(nuget/github), as it is a reasonably difficult operation as you can see from the answer.
using NaturalSort.Extension;
var ordered = things.OrderBy(x => x, StringComparison.OrdinalIgnoreCase.WithNaturalSort());
Try this out..
string[] things = new string[] { "paul", "bob", "lauren", "007", "90", "-10" };
List<int> num = new List<int>();
List<string> str = new List<string>();
for (int i = 0; i < things.Count(); i++)
{
int result;
if (int.TryParse(things[i], out result))
{
num.Add(result);
}
else
{
str.Add(things[i]);
}
}
Now Sort the lists and merge them back...
var strsort = from s in str
orderby s.Length
select s;
var numsort = from n in num
orderby n
select n;
for (int i = 0; i < things.Count(); i++)
{
if(i < numsort.Count())
things[i] = numsort.ElementAt(i).ToString();
else
things[i] = strsort.ElementAt(i - numsort.Count());
}
I jsut tried to make a contribution in this interesting question...
My preferred solution (if all strings are numeric only):
// Order by numerical order: (Assertion: all things are numeric strings only)
foreach (var thing in things.OrderBy(int.Parse))
{
Console.Writeline(thing);
}
public class Test
{
public void TestMethod()
{
List<string> buyersList = new List<string>() { "5", "10", "1", "str", "3", "string" };
List<string> soretedBuyersList = null;
soretedBuyersList = new List<string>(SortedList(buyersList));
}
public List<string> SortedList(List<string> unsoredList)
{
return unsoredList.OrderBy(o => o, new SortNumericComparer()).ToList();
}
}
public class SortNumericComparer : IComparer<string>
{
public int Compare(string x, string y)
{
int xInt = 0;
int yInt = 0;
int result = -1;
if (!int.TryParse(x, out xInt))
{
result = 1;
}
if(int.TryParse(y, out yInt))
{
if(result == -1)
{
result = xInt - yInt;
}
}
else if(result == 1)
{
result = string.Compare(x, y, true);
}
return result;
}
}
Using Regex.Replace is so simple yet efficient. Note that the number "3" just has to be a number equal-to or larger than your longest string, so for anyone else, increase as needed.
using System.Text.RegularExpressions;
string[] things = new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => Regex.Replace(x, #"\d+", i =>
i.Value.PadLeft(3, '0'))))
{
Console.WriteLine(thing);
}
I would have commented under recursive's answer, but my reputation is too low for that.
Because recursive's answer only works with numeric strings (if You have a string like "I am just a damn long string", it would be sorted after "Not so long string") and OP edited his answer, my Idea for the question would be to sort the strings by differentiating them into numbers and not numbers:
int maxlen = items.Max(x => x.Length);
var items = items.OrderBy(x => long.TryParse(x, out _) == true ? x.PadLeft(maxlen, '0') : x);
The underscore is for discarding the output
namespace X
{
public class Utils
{
public class StrCmpLogicalComparer : IComparer<Projects.Sample>
{
[DllImport("Shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string x, string y);
public int Compare(Projects.Sample x, Projects.Sample y)
{
string[] ls1 = x.sample_name.Split("_");
string[] ls2 = y.sample_name.Split("_");
string s1 = ls1[0];
string s2 = ls2[0];
return StrCmpLogicalW(s1, s2);
}
}
}
}
Even though this is an old question, I'd like to give a solution:
string[] things= new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => Int32.Parse(x) )
{
Console.WriteLine(thing);
}
Woha quite simple right? :D

How to word by word iterate in string in C#?

I want to iterate over string as word by word.
If I have a string "incidentno and fintype or unitno", I would like to read every word one by one as "incidentno", "and", "fintype", "or", and "unitno".
foreach (string word in "incidentno and fintype or unitno".Split(' ')) {
...
}
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
This works even if you have ".,; tabs and new lines" between your words.
Slightly twisted I know, but you could define an iterator block as an extension method on strings. e.g.
/// <summary>
/// Sweep over text
/// </summary>
/// <param name="Text"></param>
/// <returns></returns>
public static IEnumerable<string> WordList(this string Text)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(' ', cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
foreach (string word in "incidentno and fintype or unitno".WordList())
System.Console.WriteLine("'" + word + "'");
Which has the advantage of not creating a big array for long strings.
Use the Split method of the string class
string[] words = "incidentno and fintype or unitno".Split(" ");
This will split on spaces, so "words" will have [incidentno,and,fintype,or,unitno].
Assuming the words are always separated by a blank, you could use String.Split() to get an Array of your words.
There are multiple ways to accomplish this. Two of the most convenient methods (in my opinion) are:
Using string.Split() to create an array. I would probably use this method, because it is the most self-explanatory.
example:
string startingSentence = "incidentno and fintype or unitno";
string[] seperatedWords = startingSentence.Split(' ');
Alternatively, you could use (this is what I would use):
string[] seperatedWords = startingSentence.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
StringSplitOptions.RemoveEmptyEntries will remove any empty entries from your array that may occur due to extra whitespace and other minor problems.
Next - to process the words, you would use:
foreach (string word in seperatedWords)
{
//Do something
}
Or, you can use regular expressions to solve this problem, as Darin demonstrated (a copy is below).
example:
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
For processing, you can use similar code to the first option.
foreach (string word in words)
{
//Do something
}
Of course, there are many ways to solve this problem, but I think that these two would be the simplest to implement and maintain. I would go with the first option (using string.Split()) just because regex can sometimes become quite confusing, while a split will function correctly most of the time.
When using split, what about checking for empty entries?
string sentence = "incidentno and fintype or unitno"
string[] words = sentence.Split(new char[] { ' ', ',' ,';','\t','\n', '\r'}, StringSplitOptions.RemoveEmptyEntries);
foreach (string word in words)
{
// Process
}
EDIT:
I can't comment so I'm posting here but this (posted above) works:
foreach (string word in "incidentno and fintype or unitno".Split(' '))
{
...
}
My understanding of foreach is that it first does a GetEnumerator() and the calles .MoveNext until false is returned. So the .Split won't be re-evaluated on each iteration
public static string[] MyTest(string inword, string regstr)
{
var regex = new Regex(regstr);
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase);
return words;
}
? MyTest("incidentno, and .fintype- or; :unitno",#"[^\w+]")
[0]: "incidentno"
[1]: "and"
[2]: "fintype"
[3]: "or"
[4]: "unitno"
I'd like to add some information to JDunkerley's awnser.
You can easily make this method more reliable if you give a string or char parameter to search for.
public static IEnumerable<string> WordList(this string Text,string Word)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(Word, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
public static IEnumerable<string> WordList(this string Text, char c)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(c, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
I write a string processor class.You can use it.
Example:
metaKeywords = bodyText.Process(prepositions).OrderByDescending().TakeTop().GetWords().AsString();
Class:
public static class StringProcessor
{
private static List<String> PrepositionList;
public static string ToNormalString(this string strText)
{
if (String.IsNullOrEmpty(strText)) return String.Empty;
char chNormalKaf = (char)1603;
char chNormalYah = (char)1610;
char chNonNormalKaf = (char)1705;
char chNonNormalYah = (char)1740;
string result = strText.Replace(chNonNormalKaf, chNormalKaf);
result = result.Replace(chNonNormalYah, chNormalYah);
return result;
}
public static List<KeyValuePair<String, Int32>> Process(this String bodyText,
List<String> blackListWords = null,
int minimumWordLength = 3,
char splitor = ' ',
bool perWordIsLowerCase = true)
{
string[] btArray = bodyText.ToNormalString().Split(splitor);
long numberOfWords = btArray.LongLength;
Dictionary<String, Int32> wordsDic = new Dictionary<String, Int32>(1);
foreach (string word in btArray)
{
if (word != null)
{
string lowerWord = word;
if (perWordIsLowerCase)
lowerWord = word.ToLower();
var normalWord = lowerWord.Replace(".", "").Replace("(", "").Replace(")", "")
.Replace("?", "").Replace("!", "").Replace(",", "")
.Replace("<br>", "").Replace(":", "").Replace(";", "")
.Replace("،", "").Replace("-", "").Replace("\n", "").Trim();
if ((normalWord.Length > minimumWordLength && !normalWord.IsMemberOfBlackListWords(blackListWords)))
{
if (wordsDic.ContainsKey(normalWord))
{
var cnt = wordsDic[normalWord];
wordsDic[normalWord] = ++cnt;
}
else
{
wordsDic.Add(normalWord, 1);
}
}
}
}
List<KeyValuePair<String, Int32>> keywords = wordsDic.ToList();
return keywords;
}
public static List<KeyValuePair<String, Int32>> OrderByDescending(this List<KeyValuePair<String, Int32>> list, bool isBasedOnFrequency = true)
{
List<KeyValuePair<String, Int32>> result = null;
if (isBasedOnFrequency)
result = list.OrderByDescending(q => q.Value).ToList();
else
result = list.OrderByDescending(q => q.Key).ToList();
return result;
}
public static List<KeyValuePair<String, Int32>> TakeTop(this List<KeyValuePair<String, Int32>> list, Int32 n = 10)
{
List<KeyValuePair<String, Int32>> result = list.Take(n).ToList();
return result;
}
public static List<String> GetWords(this List<KeyValuePair<String, Int32>> list)
{
List<String> result = new List<String>();
foreach (var item in list)
{
result.Add(item.Key);
}
return result;
}
public static List<Int32> GetFrequency(this List<KeyValuePair<String, Int32>> list)
{
List<Int32> result = new List<Int32>();
foreach (var item in list)
{
result.Add(item.Value);
}
return result;
}
public static String AsString<T>(this List<T> list, string seprator = ", ")
{
String result = string.Empty;
foreach (var item in list)
{
result += string.Format("{0}{1}", item, seprator);
}
return result;
}
private static bool IsMemberOfBlackListWords(this String word, List<String> blackListWords)
{
bool result = false;
if (blackListWords == null) return false;
foreach (var w in blackListWords)
{
if (w.ToNormalString().Equals(word))
{
result = true;
break;
}
}
return result;
}
}

Categories