Related
I am learning to use expression trees/expressions in C#. I have gradually built up a parser, with which I can take a string in "calculator" syntax (like "2 * 3 + 14 * 4 / 7 - 5 * 5") and build and evaluate an abstract syntax tree (AST). It even calculates the correct answer! :-) The AST consists of Expression nodes: arithmethical binary nodes (Add, Subtract, Multiply, Divide) and unary Constant nodes representing the integer values.
Next step: I want to add parameters to the expression to be parsed, like "2 * 3 + myVar1 * 4 / 7 - 5 * myVar2", and supply the actual values for the parameters at runtime (after the AST has been compiled). I can easily add the ParameterExpressions to the tree - but I cannot find out how to correctly compile my tree and supply the values.
The parser is built using Coco/R and an attributed Bachus-Naur grammar, and looks like this:
using System.Linq.Expressions;
using Ex = System.Linq.Expressions.Expression;
using System;
namespace AtgKalk
{
public class Parser
{
public const int _EOF = 0;
public const int _identifikator = 1;
public const int _tall = 2;
public const int _pluss = 3;
public const int _minus = 4;
public const int _ganger = 5;
public const int _deler = 6;
public const int maxT = 7;
const bool T = true;
const bool x = false;
const int minErrDist = 2;
public Scanner scanner;
public Errors errors;
public Token t; // last recognized token
public Token la; // lookahead token
int errDist = minErrDist;
public Parser(Scanner scanner)
{
this.scanner = scanner;
errors = new Errors();
}
void SynErr(int n)
{
if (errDist >= minErrDist) errors.SynErr(la.line, la.col, n);
errDist = 0;
}
void Get()
{
for (; ; )
{
t = la;
la = scanner.Scan();
if (la.kind <= maxT) { ++errDist; break; }
la = t;
}
}
void Expect(int n)
{
if (la.kind == n) Get(); else { SynErr(n); }
}
void Calculator()
{
Ex n;
CalcExpr(out n);
Console.Write($"AST: {n} = ");
Console.WriteLine(Ex.Lambda<Func<int>>(n).Compile()());
// The above works fine as long as there are no parameter names in the input string
}
void CalcExpr(out Ex n1)
{
Ex n2; Func<Ex, Ex, Ex> f;
Term(out n1);
while (la.kind == 3 || la.kind == 4)
{
AddOp(out f);
Term(out n2);
n1 = f(n1, n2);
}
}
void Term(out Ex n1)
{
n1 = null; Ex n2; Func<Ex, Ex, Ex> f = null;
Fact(out n1);
while (la.kind == 5 || la.kind == 6)
{
MulOp(out f);
Fact(out n2);
n1 = f(n1, n2);
}
}
void AddOp(out Func<Ex, Ex, Ex> f)
{
f = null;
if (la.kind == 3)
{
Get();
f = (l, r) => Ex.Add(l, r);
}
else if (la.kind == 4)
{
Get();
f = (l, r) => Ex.Subtract(l, r);
}
else SynErr(8);
}
void Fact(out Ex n)
{
n = null;
if (la.kind == 2)
{
Number(out n);
}
else if (la.kind == 1)
{
Parameter(out n);
}
else SynErr(9);
}
void MulOp(out Func<Ex, Ex, Ex> f)
{
f = null;
if (la.kind == 5)
{
Get();
f = (l, r) => Ex.Multiply(l, r);
}
else if (la.kind == 6)
{
Get();
f = (l, r) => Ex.Divide(l, r);
}
else SynErr(10);
}
void Number(out Ex n)
{
Expect(2);
n = Ex.Constant(int.Parse(t.val), typeof(int));
}
void Parameter(out Ex n)
{
Expect(1);
n = Ex.Parameter(typeof(int), t.val);
}
public void Parse()
{
la = new Token();
la.val = "";
Get();
Calculator();
Expect(0);
}
static readonly bool[,] set = {
{T,x,x,x, x,x,x,x, x}
};
} // end Parser
public class Errors
{
public int count = 0; // number of errors detected
public System.IO.TextWriter errorStream = Console.Out; // error messages go to this stream
public string errMsgFormat = "-- line {0} col {1}: {2}"; // 0=line, 1=column, 2=text
public virtual void SynErr(int line, int col, int n)
{
string s;
switch (n)
{
case 0: s = "EOF expected"; break;
case 1: s = "identifikator expected"; break;
case 2: s = "tall expected"; break;
case 3: s = "pluss expected"; break;
case 4: s = "minus expected"; break;
case 5: s = "ganger expected"; break;
case 6: s = "deler expected"; break;
case 7: s = "??? expected"; break;
case 8: s = "invalid AddOp"; break;
case 9: s = "invalid Fakt"; break;
case 10: s = "invalid MulOp"; break;
default: s = "error " + n; break;
}
errorStream.WriteLine(errMsgFormat, line, col, s);
count++;
}
} // Errors
public class FatalError : Exception
{
public FatalError(string m) : base(m) { }
}
}
My problem lies in line 63, I think:
Console.WriteLine(Ex.Lambda<Func<int>>(n).Compile()());
Invocation:
Scanner scanner = new Scanner(args[0]); // if args[0] contains the input string :-)
Parser parser = new Parser(scanner);
parser.Parse();
I have now solved my problem. Thanks to kaby76 for valuable tips leading me in the right direction. The example now can handle an arbitrary number of parameters (probably max 16, since this is the maximum number of input arguments for Func<...>)
The solution to the problem war threefold:
Collect the parameters and supply this collection of parameters to the Lambda
Remove the explicit type arguments from the Lambda, letting it infer types
Use DynamicInvoke to execute the resulting Delegate
The problematic statement then looks like this, for an expression with two parameters:
Console.WriteLine(Ex.Lambda(n, para).Compile().DynamicInvoke(3, 4));
I have a input like
string input = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
// up to 10MB string size
int result = Calc(input); // 11
the calculation is from left to right, number by number
the numbers are 0 to 99
multiplication before addition is ignored so 14 + 2 * 32 is 512
the possible calculations are +-*/
division by 0 is not possible so after a / can't be a 0
My Approach
public static int Calc(string sInput)
{
int iCurrent = sInput.IndexOf(' ');
int iResult = int.Parse(sInput.Substring(0, iCurrent));
int iNext = 0;
while ((iNext = sInput.IndexOf(' ', iCurrent + 4)) != -1)
{
iResult = Operate(iResult, sInput[iCurrent + 1], int.Parse(sInput.Substring((iCurrent + 3), iNext - (iCurrent + 2))));
iCurrent = iNext;
}
return Operate(iResult, sInput[iCurrent + 1], int.Parse(sInput.Substring((iCurrent + 3))));
}
public static int Operate(int iReturn, char cOperator, int iOperant)
{
switch (cOperator)
{
case '+':
return (iReturn + iOperant);
case '-':
return (iReturn - iOperant);
case '*':
return (iReturn * iOperant);
case '/':
return (iReturn / iOperant);
default:
throw new Exception("Error");
}
}
I need the fastest way to get a result.
Question: is there a way to make this calculation faster? I have multiple threads but I use only one.
Update:
Test-Case: (I've removed the division by 0 bug and removed the StringBuilder.ToString() from the StopWatch measurement)
Random rand = new Random();
System.Text.StringBuilder input = new System.Text.StringBuilder();
string operators = "+-*/";
input.Append(rand.Next(0, 100));
for (int i = 0; i < 1000000; i++)
{
int number = rand.Next(0, 100);
char coperator = operators[rand.Next(0, number > 0 ? 4 : 3)];
input.Append(" " + coperator + " " + number);
}
string calc = input.ToString();
System.Diagnostics.Stopwatch watch = new System.Diagnostics.Stopwatch();
watch.Start();
int result = Calc(calc);
watch.Stop();
Edit edit: updated with latest versions by The General and Mirai Mann:
If you want to know which horse is fastest: race the horses. Here are BenchmarkDotNet results comparing various answers from this question (I have not merged their code into my full example, because that feels wrong - only the numbers are presented) with repeatable but large random input, via:
static MyTests()
{
Random rand = new Random(12345);
StringBuilder input = new StringBuilder();
string operators = "+-*/";
var lastOperator = '+';
for (int i = 0; i < 1000000; i++)
{
var #operator = operators[rand.Next(0, 4)];
input.Append(rand.Next(lastOperator == '/' ? 1 : 0, 100) + " " + #operator + " ");
lastOperator = #operator;
}
input.Append(rand.Next(0, 100));
expression = input.ToString();
}
private static readonly string expression;
with sanity checks (to check they all do the right thing):
Original: -1426
NoSubStrings: -1426
NoSubStringsUnsafe: -1426
TheGeneral4: -1426
MiraiMann1: -1426
we get timings (note: Original is OP's version in the question; NoSubStrings[Unsafe] is my versions from below, and two other versions from other answers by user-name):
(lower "Mean" is better)
(note; there is a newer version of Mirai Mann's implementation, but I no longer have things setup to run a new test; but: fair to assume it should be better!)
Runtime: .NET Framework 4.7 (CLR 4.0.30319.42000), 32bit LegacyJIT-v4.7.2633.0
Method | Mean | Error | StdDev |
------------------- |----------:|----------:|----------:|
Original | 104.11 ms | 1.4920 ms | 1.3226 ms |
NoSubStrings | 21.99 ms | 0.4335 ms | 0.7122 ms |
NoSubStringsUnsafe | 20.53 ms | 0.4103 ms | 0.6967 ms |
TheGeneral4 | 15.50 ms | 0.3020 ms | 0.5369 ms |
MiraiMann1 | 15.54 ms | 0.3096 ms | 0.4133 ms |
Runtime: .NET Framework 4.7 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.2633.0
Method | Mean | Error | StdDev | Median |
------------------- |----------:|----------:|----------:|----------:|
Original | 114.15 ms | 1.3142 ms | 1.0974 ms | 114.13 ms |
NoSubStrings | 21.33 ms | 0.4161 ms | 0.6354 ms | 20.93 ms |
NoSubStringsUnsafe | 19.24 ms | 0.3832 ms | 0.5245 ms | 19.43 ms |
TheGeneral4 | 13.97 ms | 0.2795 ms | 0.2745 ms | 13.86 ms |
MiraiMann1 | 15.60 ms | 0.3090 ms | 0.4125 ms | 15.53 ms |
Runtime: .NET Core 2.1.0-preview1-26116-04 (CoreCLR 4.6.26116.03, CoreFX 4.6.26116.01), 64bit RyuJIT
Method | Mean | Error | StdDev | Median |
------------------- |----------:|----------:|----------:|----------:|
Original | 101.51 ms | 1.7807 ms | 1.5786 ms | 101.44 ms |
NoSubStrings | 21.36 ms | 0.4281 ms | 0.5414 ms | 21.07 ms |
NoSubStringsUnsafe | 19.85 ms | 0.4172 ms | 0.6737 ms | 19.80 ms |
TheGeneral4 | 14.06 ms | 0.2788 ms | 0.3723 ms | 13.82 ms |
MiraiMann1 | 15.88 ms | 0.3153 ms | 0.5922 ms | 15.45 ms |
Original answer from before I added BenchmarkDotNet:
If I was trying this, I'd be tempted to have a look at the Span<T> work in 2.1 previews - the point being that a Span<T> can be sliced without allocating (and a string can be converted to a Span<char> without allocating); this would allow the string carving and parsing to be performed without any allocations. However, reducing allocations is not always quite the same thing as raw performance (although they are related), so to know if it was faster: you'd need to race your horses (i.e. compare them).
If Span<T> isn't an option: you can still do the same thing by tracking an int offset manually and just *never using SubString)
In either case (string or Span<char>): if your operation only needs to cope with a certain subset of representations of integers, I might be tempted to hand role a custom int.Parse equivalent that doesn't apply as many rules (cultures, alternative layouts, etc), and which works without needing a Substring - for example it could take a string and ref int offset, where the offset is updated to be where the parse stopped (either because it hit an operator or a space), and which worked.
Something like:
static class P
{
static void Main()
{
string input = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
var val = Evaluate(input);
System.Console.WriteLine(val);
}
static int Evaluate(string expression)
{
int offset = 0;
SkipSpaces(expression, ref offset);
int value = ReadInt32(expression, ref offset);
while(ReadNext(expression, ref offset, out char #operator, out int operand))
{
switch(#operator)
{
case '+': value = value + operand; break;
case '-': value = value - operand; break;
case '*': value = value * operand; break;
case '/': value = value / operand; break;
}
}
return value;
}
static bool ReadNext(string value, ref int offset,
out char #operator, out int operand)
{
SkipSpaces(value, ref offset);
if(offset >= value.Length)
{
#operator = (char)0;
operand = 0;
return false;
}
#operator = value[offset++];
SkipSpaces(value, ref offset);
if (offset >= value.Length)
{
operand = 0;
return false;
}
operand = ReadInt32(value, ref offset);
return true;
}
static void SkipSpaces(string value, ref int offset)
{
while (offset < value.Length && value[offset] == ' ') offset++;
}
static int ReadInt32(string value, ref int offset)
{
bool isNeg = false;
char c = value[offset++];
int i = (c - '0');
if(c == '-')
{
isNeg = true;
i = 0;
// todo: what to do here if `-` is not followed by [0-9]?
}
while (offset < value.Length && (c = value[offset++]) >= '0' && c <= '9')
i = (i * 10) + (c - '0');
return isNeg ? -i : i;
}
}
Next, I might consider whether it is worthwhile switching to unsafe to remove the double length checks. So I'd implement it both ways, and test it with something like BenchmarkDotNet to see whether it is worth it.
Edit: here is is with unsafe added and BenchmarkDotNet usage:
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
static class P
{
static void Main()
{
var summary = BenchmarkRunner.Run<MyTests>();
System.Console.WriteLine(summary);
}
}
public class MyTests
{
const string expression = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
[Benchmark]
public int Original() => EvalOriginal.Calc(expression);
[Benchmark]
public int NoSubStrings() => EvalNoSubStrings.Evaluate(expression);
[Benchmark]
public int NoSubStringsUnsafe() => EvalNoSubStringsUnsafe.Evaluate(expression);
}
static class EvalOriginal
{
public static int Calc(string sInput)
{
int iCurrent = sInput.IndexOf(' ');
int iResult = int.Parse(sInput.Substring(0, iCurrent));
int iNext = 0;
while ((iNext = sInput.IndexOf(' ', iCurrent + 4)) != -1)
{
iResult = Operate(iResult, sInput[iCurrent + 1], int.Parse(sInput.Substring((iCurrent + 3), iNext - (iCurrent + 2))));
iCurrent = iNext;
}
return Operate(iResult, sInput[iCurrent + 1], int.Parse(sInput.Substring((iCurrent + 3))));
}
public static int Operate(int iReturn, char cOperator, int iOperant)
{
switch (cOperator)
{
case '+':
return (iReturn + iOperant);
case '-':
return (iReturn - iOperant);
case '*':
return (iReturn * iOperant);
case '/':
return (iReturn / iOperant);
default:
throw new Exception("Error");
}
}
}
static class EvalNoSubStrings {
public static int Evaluate(string expression)
{
int offset = 0;
SkipSpaces(expression, ref offset);
int value = ReadInt32(expression, ref offset);
while (ReadNext(expression, ref offset, out char #operator, out int operand))
{
switch (#operator)
{
case '+': value = value + operand; break;
case '-': value = value - operand; break;
case '*': value = value * operand; break;
case '/': value = value / operand; break;
default: throw new Exception("Error");
}
}
return value;
}
static bool ReadNext(string value, ref int offset,
out char #operator, out int operand)
{
SkipSpaces(value, ref offset);
if (offset >= value.Length)
{
#operator = (char)0;
operand = 0;
return false;
}
#operator = value[offset++];
SkipSpaces(value, ref offset);
if (offset >= value.Length)
{
operand = 0;
return false;
}
operand = ReadInt32(value, ref offset);
return true;
}
static void SkipSpaces(string value, ref int offset)
{
while (offset < value.Length && value[offset] == ' ') offset++;
}
static int ReadInt32(string value, ref int offset)
{
bool isNeg = false;
char c = value[offset++];
int i = (c - '0');
if (c == '-')
{
isNeg = true;
i = 0;
}
while (offset < value.Length && (c = value[offset++]) >= '0' && c <= '9')
i = (i * 10) + (c - '0');
return isNeg ? -i : i;
}
}
static unsafe class EvalNoSubStringsUnsafe
{
public static int Evaluate(string expression)
{
fixed (char* ptr = expression)
{
int len = expression.Length;
var c = ptr;
SkipSpaces(ref c, ref len);
int value = ReadInt32(ref c, ref len);
while (len > 0 && ReadNext(ref c, ref len, out char #operator, out int operand))
{
switch (#operator)
{
case '+': value = value + operand; break;
case '-': value = value - operand; break;
case '*': value = value * operand; break;
case '/': value = value / operand; break;
default: throw new Exception("Error");
}
}
return value;
}
}
static bool ReadNext(ref char* c, ref int len,
out char #operator, out int operand)
{
SkipSpaces(ref c, ref len);
if (len-- == 0)
{
#operator = (char)0;
operand = 0;
return false;
}
#operator = *c++;
SkipSpaces(ref c, ref len);
if (len == 0)
{
operand = 0;
return false;
}
operand = ReadInt32(ref c, ref len);
return true;
}
static void SkipSpaces(ref char* c, ref int len)
{
while (len != 0 && *c == ' ') { c++;len--; }
}
static int ReadInt32(ref char* c, ref int len)
{
bool isNeg = false;
char ch = *c++;
len--;
int i = (ch - '0');
if (ch == '-')
{
isNeg = true;
i = 0;
}
while (len-- != 0 && (ch = *c++) >= '0' && ch <= '9')
i = (i * 10) + (ch - '0');
return isNeg ? -i : i;
}
}
The following solution is a finite automaton. Calc(input) = O(n). For better performance, this solution does not use IndexOf, Substring, Parse, string concatenation, or repeated reading of value (fetching input[i] more than once)... just a character processor.
static int Calculate1(string input)
{
int acc = 0;
char last = ' ', operation = '+';
for (int i = 0; i < input.Length; i++)
{
var current = input[i];
switch (current)
{
case ' ':
if (last != ' ')
{
switch (operation)
{
case '+': acc += last - '0'; break;
case '-': acc -= last - '0'; break;
case '*': acc *= last - '0'; break;
case '/': acc /= last - '0'; break;
}
last = ' ';
}
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (last == ' ') last = current;
else
{
var num = (last - '0') * 10 + (current - '0');
switch (operation)
{
case '+': acc += num; break;
case '-': acc -= num; break;
case '*': acc *= num; break;
case '/': acc /= num; break;
}
last = ' ';
}
break;
case '+': case '-': case '*': case '/':
operation = current;
break;
}
}
if (last != ' ')
switch (operation)
{
case '+': acc += last - '0'; break;
case '-': acc -= last - '0'; break;
case '*': acc *= last - '0'; break;
case '/': acc /= last - '0'; break;
}
return acc;
}
And another implementation. It reads 25% less from the input. I expect that it has 25% better performance.
static int Calculate2(string input)
{
int acc = 0, i = 0;
char last = ' ', operation = '+';
while (i < input.Length)
{
var current = input[i];
switch (current)
{
case ' ':
if (last != ' ')
{
switch (operation)
{
case '+': acc += last - '0'; break;
case '-': acc -= last - '0'; break;
case '*': acc *= last - '0'; break;
case '/': acc /= last - '0'; break;
}
last = ' ';
i++;
}
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (last == ' ')
{
last = current;
i++;
}
else
{
var num = (last - '0') * 10 + (current - '0');
switch (operation)
{
case '+': acc += num; break;
case '-': acc -= num; break;
case '*': acc *= num; break;
case '/': acc /= num; break;
}
last = ' ';
i += 2;
}
break;
case '+': case '-': case '*': case '/':
operation = current;
i += 2;
break;
}
}
if (last != ' ')
switch (operation)
{
case '+': acc += last - '0'; break;
case '-': acc -= last - '0'; break;
case '*': acc *= last - '0'; break;
case '/': acc /= last - '0'; break;
}
return acc;
}
I implemented one more variant:
static int Calculate3(string input)
{
int acc = 0, i = 0;
var operation = '+';
while (true)
{
var a = input[i++] - '0';
if (i == input.Length)
{
switch (operation)
{
case '+': acc += a; break;
case '-': acc -= a; break;
case '*': acc *= a; break;
case '/': acc /= a; break;
}
break;
}
var b = input[i];
if (b == ' ') i++;
else
{
a = a * 10 + (b - '0');
i += 2;
}
switch (operation)
{
case '+': acc += a; break;
case '-': acc -= a; break;
case '*': acc *= a; break;
case '/': acc /= a; break;
}
if (i >= input.Length) break;
operation = input[i];
i += 2;
}
return acc;
}
Results in abstract points:
Calculate1 230
Calculate2 192
Calculate3 111
NOTE
Per comments, this answer does not give a performant solution.
I'll leave it here as there are points to be considered / which may be of interest to others finding this thread in future; but as people have said below, this is far from the most performant solution.
Original Answer
The .net framework already supplies a way to handle formulas given as strings:
var formula = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
var result = new DataTable().Compute(formula, null);
Console.WriteLine(result); //returns 139.125490196078
Initial feedback based on comments
Per the comments thread I need to point out some things:
Does this work the way I've described?
No; this follows the normal rules of maths.
I assume that your amended rules are to simplify writing code to handle them, rather than because you want to support a new branch of mathematics? If that's the case, I'd argue against that. People will expect things to behave in a certain way; so you'd have to ensure that anyone sending equations to your code was primed with the knowledge to expect the rules of this new-maths rather than being able to use their existing expectations.
There isn't an option to change the rules here; so if your requirement is to change the rules of maths, this won't work for you.
Is this the Fastest Solution
No. However it should perform well given MS spend a lot of time optimising their code, and so will likely perform faster than any hand-rolled code to do the same (though admittedly this code does a lot more than just support the four main operators; so it's not doing exactly the same).
Per MatthewWatson's specific comment (i.e. calling the DataTable constructor incurs a significant overhead) you'd want to create and then re-use one instance of this object. Depending on what your solution looks like there are various ways to do that; here's one:
interface ICalculator //if we use an interface we can easily switch from datatable to some other calulator; useful for testing, or if we wanted to compare different calculators without much recoding
{
T Calculate<T>(string expression) where T: struct;
}
class DataTableCalculator: ICalculator
{
readonly DataTable dataTable = new DataTable();
public DataTableCalculator(){}
public T Calculate<T>(string expression) where T: struct =>
(T)dataTable.Compute(expression, null);
}
class Calculator: ICalculator
{
static ICalculator internalInstance;
public Calculator(){}
public void InitialiseCalculator (ICalculator calculator)
{
if (internalInstance != null)
{
throw new InvalidOperationException("Calculator has already been initialised");
}
internalInstance = calculator;
}
public T Calculate<T>(string expression) where T: struct =>
internalInstance.Calculate<T>(expression);
}
//then we use it on our code
void Main()
{
var calculator1 = new Calculator();
calculator1.InitialiseCalculator(new DataTableCalculator());
var equation = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
Console.WriteLine(calculator1.Calculate<double>(equation)); //139.125490196078
equation = "1 + 2 - 3 + 4";
Console.WriteLine(calculator1.Calculate<int>(equation)); //4
calculator1 = null;
System.GC.Collect(); //in reality we'd pretty much never do this, but just to illustrate that our static variable continues fro the life of the app domain rather than the life of the instance
var calculator2 = new Calculator();
//calculator2.InitialiseCalculator(new DataTableCalculator()); //uncomment this and you'll get an error; i.e. the calulator should only be initialised once.
equation = "1 + 2 - 3 + 4 / 5 * 6 - 7 / 8 + 9";
Console.WriteLine(calculator2.Calculate<double>(equation)); //12.925
}
NB: The above solution uses a static variable; some people are against use of statics. For this scenario (i.e. where during the lifetime of the application you're only going to require one way of doing calculations) this is a legitimate use case. If you wanted to support switching the calculator at runtime a different approach would be required.
Update after Testing & Comparing
Having run some performance tests:
The DataTable.Compute method's biggest problem is that for equations the size of which you're dealing with it throws a StackOverflowException; (i.e. based on your equation generator's loop for (int i = 0; i < 1000000; i++).
For a single operation with a smaller equation (i < 1000), the compute method (including constructor and Convert.ToInt32 on the double result) takes almost 100 times longer.
for the single operation I also encountered overflow exceptions more often; i.e. because the result of the operations had pushed the value outside the bounds of supported data types...
Even if we move the constructor/initialise call outside of the timed area and remove the conversion to int (and run for thousands of iterations to get an average), your solution comes in 3.5 times faster than mine.
Link to the docs: https://msdn.microsoft.com/en-us/library/system.data.datatable.compute%28v=vs.110%29.aspx?f=255&MSPPError=-2147217396
Update
My original answer was just a bit of fun late at night trying to put this in unsafe and I failed miserably (actually didn't work at all and was slower). However I decided to give this another shot.
The premise was to make everything inline, to remove as much IL as I could, keep everything in int or char*, and make my code pretty. I further optimized this by removing the switch, Ifs will be more efficient in this situation, also we can order them in the most logical way. And lastly, if we remove the amount of checks for things we do and assume the input is correct we can remove even more overhead by just assuming things like; if the char is > '0' it must be a number. If it's a space we can do some calculations, else it must be an operator.
This is my last attempt with 10,000,000 calculations run 100 times to get an average, each test does a GC.Collect() and GC.WaitForPendingFinalizers() so we aren't fragmenting the memory.
Results
Test : ms : Cycles (rough) : Increase
-------------------------------------------------------------------
OriginalCalc : 1,295 : 4,407,795,584 :
MarcEvalNoSubStrings : 241 : 820,660,220 : 437.34%, * 5.32
MarcEvalNoSubStringsUnsafe : 206 : 701,980,373 : 528.64%, * 6.28
MiraiMannCalc1 : 225 : 765,678,062 : 475.55%, * 5.75
MiraiMannCalc2 : 183 : 623,384,924 : 607.65%, * 7.07
MyCalc4 : 156 : 534,190,325 : 730.12%, * 8.30
MyCalc5 : 146 : 496,185,459 : 786.98%, * 8.86
MyCalc6 : 134 : 455,610,410 : 866.41%, * 9.66
Fastest Code so far
unsafe int Calc6(ref string expression)
{
int res = 0, val = 0, op = 0;
var isOp = false;
// pin the array
fixed (char* p = expression)
{
// Let's not evaluate this 100 million times
var max = p + expression.Length;
// Let's go straight to the source and just increment the pointer
for (var i = p; i < max; i++)
{
// numbers are the most common thing so let's do a loose
// basic check for them and push them in to our val
if (*i >= '0') { val = val * 10 + *i - 48; continue; }
// The second most common thing are spaces
if (*i == ' ')
{
// not every space we need to calculate
if (!(isOp = !isOp)) continue;
// In this case 4 ifs are more efficient then a switch
// do the calculation, reset out val and jump out
if (op == '+') { res += val; val = 0; continue; }
if (op == '-') { res -= val; val = 0; continue; }
if (op == '*') { res *= val; val = 0; continue; }
if (op == '/') { res /= val; val = 0; continue; }
// this is just for the first op
res = val; val = 0; continue;
}
// anything else is considered an operator
op = *i;
}
if (op == '+') return res + val;
if (op == '-') return res - val;
if (op == '*') return res * val;
if (op == '/') return res / val;
throw new IndexOutOfRangeException();
}
}
Previous
unsafe int Calc4(ref string expression)
{
int res = 0, val = 0, op = 0;
var isOp = false;
fixed (char* p = expression)
{
var max = p + expression.Length;
for (var i = p; i < max; i++)
switch (*i)
{
case ' ':
isOp = !isOp;
if (!isOp) continue;
switch (op)
{
case '+': res += val; val = 0; continue;
case '-': res -= val; val = 0; continue;
case '*': res *= val; val = 0; continue;
case '/': res /= val; val = 0; continue;
default: res = val; val = 0; continue;
}
case '+': case '-': case '*': case '/': op = *i; continue;
default: val = val * 10 + *i - 48; continue;
}
switch (op)
{
case '+': return res + val;
case '-': return res - val;
case '*': return res * val;
case '/': return res / val;
default : return -1;
}
}
}
How I measured the Thread cycles
static class NativeMethods {
public static ulong GetThreadCycles() {
ulong cycles;
if (!QueryThreadCycleTime(PseudoHandle, out cycles))
throw new System.ComponentModel.Win32Exception();
return cycles;
}
[DllImport("kernel32.dll", SetLastError = true)]
private static extern bool QueryThreadCycleTime(IntPtr hThread, out ulong cycles);
private static readonly IntPtr PseudoHandle = (IntPtr)(-2);
}
Original Post
I thought I'd try to be smart and use fixed and max this out with millions of calculations
public static unsafe int Calc2(string sInput)
{
var buf = "";
var start = sInput.IndexOf(' ');
var value1 = int.Parse(sInput.Substring(0, start));
string op = null;
var iResult = 0;
var isOp = false;
fixed (char* p = sInput)
{
for (var i = start + 1; i < sInput.Length; i++)
{
var cur = *(p + i);
if (cur == ' ')
{
if (!isOp)
{
op = buf;
isOp = true;
}
else
{
var value2 = int.Parse(buf);
switch (op[0])
{
case '+': iResult += value1 + value2; break;
case '-': iResult += value1 - value2; break;
case '*': iResult += value1 * value2; break;
case '/': iResult += value1 / value2; break;
}
value1 = value2;
isOp = false;
}
buf = "";
}
else
{
buf += cur;
}
}
}
return iResult;
}
private static void Main(string[] args)
{
var input = "14 + 2 * 32 / 60 + 43 - 7 + 3 - 1 + 0 * 7 + 87 - 32 / 34";
var sb = new StringBuilder();
sb.Append(input);
for (var i = 0; i < 10000000; i++)
sb.Append(" + " + input);
var sw = new Stopwatch();
sw.Start();
Calc2(sb.ToString());
sw.Stop();
Console.WriteLine($"sw : {sw.Elapsed:c}");
}
Results were 2 seconds slower than the original!
Here is a Java fun fact. I implemented the same thing in Java and it runs about 20 times faster than Mirai Mann implementation in C#. On my machine 100M chars input string took about 353 milliseconds.
Below is the code that creates and tests the result.
Also, note that while it's a good Java/C# performance tester this is not an optimal solution. A better performance can be achieved by multithreading it. It's possible to calculate portions of the string and then combine the result.
public class Test {
public static void main(String...args){
new Test().run();
}
private void run() {
long startTime = System.currentTimeMillis();
Random random = new Random(123);
int result = 0;
StringBuilder input = new StringBuilder();
input.append(random.nextInt(99) + 1);
while (input.length() < 100_000_000){
int value = random.nextInt(100);
switch (random.nextInt(4)){
case 0:
input.append("-");
result -= value;
break;
case 1: // +
input.append("+");
result += value;
break;
case 2:
input.append("*");
result *= value;
break;
case 3:
input.append("/");
while (value == 0){
value = random.nextInt(100);
}
result /= value;
break;
}
input.append(value);
}
String inputData = input.toString();
System.out.println("Test created in " + (System.currentTimeMillis() - startTime));
startTime = System.currentTimeMillis();
int testResult = test(inputData);
System.out.println("Completed in " + (System.currentTimeMillis() - startTime));
if(result != testResult){
throw new Error("Oops");
}
}
private int test(String inputData) {
char[] input;
try {
Field val = String.class.getDeclaredField("value");
val.setAccessible(true);
input = (char[]) val.get(inputData);
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new Error(e);
}
int result = 0;
int startingI = 1;
{
char c = input[0];
if (c >= '0' && c <= '9') {
result += c - '0';
c = input[1];
if (c >= '0' && c <= '9') {
result += (c - '0') * 10;
startingI++;
}
}
}
for (int i = startingI, length = input.length, value=0; i < length; i++) {
char operation = input[i];
i++;
char c = input[i];
if(c >= '0' && c <= '9'){
value += c - '0';
c = input[i + 1];
if(c >= '0' && c <= '9'){
value = value * 10 + (c - '0');
i++;
}
}
switch (operation){
case '-':
result -= value;
break;
case '+':
result += value;
break;
case '*':
result *= value;
break;
case '/':
result /= value;
break;
}
value = 0;
}
return result;
}
}
When you read the code then you can see that I used a small hack when converting the string to a char array. I mutated the string in order to avoid additional memory allocations for the char array.
I have the following code:
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void PanicFuncDelegate(string str, IntPtr args);
private void PanicFunc(string str, IntPtr args)
{
LogFunc("PANIC", str, args);
}
public void LogFunc(string severity, string str, IntPtr args)
{
vprintf($"[{severity}] "+ str,args);
}
[DllImport("libc.so.6")]
private static extern int vprintf(string format, IntPtr args);
This prints to the console the messages correctly formatted. I want to retrieve the values from args to use them in my own logger.
If I try to get the value of each pointer from the array in args (as suggested here: Marshal va_list in C# delegate) I get segmentation fault.
Any suggestions?
I have a function call with this working, here's what I do:
For the DLLImport I use an __arglist to marshall to the va_list,
[DllImport("libc.so.6")]
private static extern int vprintf(string format, __arglist);
Then when calling the function I create the __arglist,
vprintf(string format, __arglist(arg1, arg2, arg3...))
Ofcourse you would need to either call the function with all the arguments statically or build that __arglist dynamically, I don't have the code here but it's possible.
I wonder if you get a segmentation fault because the elements in the object[] are not pinned? Maybe if you pin the object[] and all elements within that would help? Just a guess though.
Just think on how C program gets variables from va_list, and there is the solution:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
namespace VaTest
{
class Program
{
static void Main(string[] args)
{
MarshalVaArgs(vaList => vprintf("%c%d%s", vaList), false, 'a', 123, "bc");
}
[DllImport("msvcrt")] //windows
//[DllImport("c")] //linux
private static extern int vprintf(string format, IntPtr vaList);
private static int IntSizeOf(Type t)
{
return (Marshal.SizeOf(t) + IntPtr.Size - 1) & ~(IntPtr.Size - 1);
}
public static void MarshalVaArgs(Action<IntPtr> action, bool? isUnicode, params object[] args)
{
var sizes = new int[args.Length];
for (var i = 0; i < args.Length; i++)
{
sizes[i] = args[i] is string ? IntPtr.Size : IntSizeOf(args[i].GetType());
}
var allocs = new List<IntPtr>();
var offset = 0;
var result = Marshal.AllocHGlobal(sizes.Sum());
allocs.Add(result);
for (var i = 0; i < args.Length; i++)
{
if (args[i] is string)
{
var s = (string)args[i];
var data = default(IntPtr);
if (isUnicode.HasValue)
{
if (isUnicode.Value)
{
data = Marshal.StringToHGlobalUni(s);
}
else
{
data = Marshal.StringToHGlobalAnsi(s);
}
}
else
{
data = Marshal.StringToHGlobalAuto(s);
}
allocs.Add(data);
Marshal.WriteIntPtr(result, offset, data);
offset += sizes[i];
}
else
{
Marshal.StructureToPtr(args[i], result + offset, false);
offset += sizes[i];
}
}
action(result);
foreach (var ptr in allocs)
{
Marshal.FreeHGlobal(ptr);
}
}
}
}
The code is written and tested with .NET Core 3.0 preview 5, compatible with .NET Framework 4.0 and C# 3.0.
Outputs:
a123bc
As this isn't solved yet i post a long solution that worked for me.
I found the solution in an abandoned project
https://github.com/GoaLitiuM/libobs-sharp
Use like this (tested with FFmpeg):
var objects = va_list_Helper.VaListToArray(format, va_List_Ptr);
// format: frame=%4d QP=%.2f NAL=%d Slice:%c Poc:%-3d I:%-4d P:%-4d SKIP:%-4d size=%d bytes%s
// format (filled): frame= 3 QP=13.00 NAL=0 Slice:B Poc:4 I:0 P:8 SKIP:912 size=32 bytes
// va_List objects: 3, 13, 0, 'B', 4, 0, 8, 912, 32
The classes needed:
public class va_list_Helper
{
public static unsafe object[] VaListToArray(string format, byte* va_list)
{
var vaList = new va_list((IntPtr)va_list);
return vaList.GetObjectsByFormat(format);
}
}
public static class Printf
{
// used
public static string[] GetFormatSpecifiers(string format)
{
if (format.IndexOf('%') == -1)
return null;
// find specifiers from format string
List<int> indices = new List<int>();
for (int j = 0; j < format.Length; j++)
{
j = format.IndexOf('%', j);
if (j == -1)
break;
indices.Add(j);
if (format[j + 1] == '%') // ignore "%%"
j++;
}
if (indices.Count == 0)
return null;
List<string> formats = new List<string>(indices.Count);
for (int mi = 0; mi < indices.Count; mi++)
{
string formatSpecifier = format.Substring(indices[mi], (mi + 1 < indices.Count ? indices[mi + 1] : format.Length) - indices[mi]);
if (!string.IsNullOrWhiteSpace(formatSpecifier))
formats.Add(formatSpecifier);
}
return formats.ToArray();
}
public class FormatSpecificationInfo
{
public string specification;
//public int parameter;
public char type;
public int width;
public int precision;
public FormatFlags flags;
};
[Flags]
public enum FormatFlags
{
// Type length
IsLong = 0x0001, // l
IsLongLong = 0x0002, // ll
IsShort = 0x0004, // h
IsChar = 0x0008, // hh
IsLongDouble = 0x0016, // L
// Flags
LeftAlign = 0x0100, // '-' left align within the width
Sign = 0x0200, // '+' use - or + signs for signed types
Alternate = 0x0400, // '#' prefix non-zero values with hex types
ZeroPad = 0x0800, // '0' pad with zeros
Blank = 0x1000, // ' ' pad sign with blank
Grouping = 0x2000, // '\' group by thousands
ArchSize = 0x4000, // '?' use arch precision
// Dynamic parameters
DynamicWidth = 0x10000,
DynamicPrecision = 0x20000,
};
// used
public static FormatSpecificationInfo GetFormatSpecifierInfo(string specification)
{
if (string.IsNullOrWhiteSpace(specification))
return null;
FormatSpecificationInfo info = new FormatSpecificationInfo()
{
type = '\0',
width = int.MinValue,
precision = 6,
};
string width = "";
string precision = "";
int start = -1;
int fsLength = 1;
// TODO: parse parameter index
for (int i = 0; i < specification.Length && info.type == '\0'; i++)
{
char c = specification[i];
switch (c)
{
case '%':
if (start == -1)
start = i;
else
info.type = c;
info.specification = specification.Substring(start, i + 1 - start);
fsLength = i + 1;
break;
// flags
case '-':
info.flags |= FormatFlags.LeftAlign;
break;
case '+':
info.flags |= FormatFlags.Sign;
break;
case ' ':
info.flags |= FormatFlags.Blank;
break;
case '#':
info.flags |= FormatFlags.Alternate;
break;
case '\'':
info.flags |= FormatFlags.Grouping;
break;
case '?':
info.flags |= FormatFlags.ArchSize;
break;
// precision
case '.':
{
for (int j = i + 1; j < specification.Length; j++)
{
if (specification[j] == '*')
info.flags |= FormatFlags.DynamicPrecision;
else if (char.IsNumber(specification[j]))
precision += specification[j];
else
break;
i++;
}
}
break;
// length flags
case 'h':
info.flags += (int)FormatFlags.IsShort;
break;
case 'l':
info.flags += (int)FormatFlags.IsLong;
break;
case 'L':
info.flags |= FormatFlags.IsLongDouble;
break;
case 'z':
case 'j':
case 't':
// not supported
break;
// dynamic width
case '*':
info.flags |= FormatFlags.DynamicWidth;
break;
default:
{
if (char.IsNumber(c))
{
if (width == "" && c == '0')
info.flags |= FormatFlags.ZeroPad;
else
width += c;
}
else if (char.IsLetter(c) && info.type == '\0')
{
info.type = c;
info.specification = specification.Substring(start, i + 1 - start);
fsLength = i + 1;
}
}
break;
}
}
// sign overrides space
if (info.flags.HasFlag(FormatFlags.Sign) && info.flags.HasFlag(FormatFlags.Blank))
info.flags &= ~FormatFlags.Blank;
if (info.flags.HasFlag(FormatFlags.LeftAlign) && info.flags.HasFlag(FormatFlags.ZeroPad))
info.flags &= ~FormatFlags.ZeroPad;
// unsupported precision for these types
if (info.type == 's' ||
info.type == 'c' ||
Char.ToUpper(info.type) == 'X' ||
info.type == 'o')
{
info.precision = int.MinValue;
}
if (!string.IsNullOrWhiteSpace(precision))
info.precision = Convert.ToInt32(precision);
if (!string.IsNullOrWhiteSpace(width))
info.width = Convert.ToInt32(width);
return info;
}
}
public class va_list
{
internal IntPtr instance; //unmanaged pointer to va_list
public va_list(IntPtr ptr)
{
instance = ptr;
}
/// <summary> Returns unmanaged pointer to argument list. </summary>
public IntPtr GetPointer()
{
return instance;
}
/// <summary> Returns array of objects with help of printf format string. </summary>
/// <param name="format"> printf format string. </param>
public object[] GetObjectsByFormat(string format)
{
return GetObjectsByFormat(format, this);
}
public static unsafe object[] GetObjectsByFormat(string format, va_list va_list)
{
string[] formatSpecifiers = Printf.GetFormatSpecifiers(format);
if (formatSpecifiers == null || va_list == null || va_list.GetPointer() == IntPtr.Zero)
return null;
IntPtr args = va_list.GetPointer();
List<object> objects = new List<object>(formatSpecifiers.Length);
//var bytesDebug = new byte[format.Length];
//Marshal.Copy(va_list.GetPointer(), bytesDebug, 0, bytesDebug.Length);
int offset = 0;
foreach (string spec in formatSpecifiers)
{
var info = Printf.GetFormatSpecifierInfo(spec);
if (info.type == '\0')
continue;
// dynamic width and precision arguments
// these are stored in stack before the actual value
if (info.flags.HasFlag(Printf.FormatFlags.DynamicWidth))
{
int widthArg = Marshal.ReadInt32(args, offset);
objects.Add(widthArg);
offset += Marshal.SizeOf(typeof(IntPtr));
}
if (info.flags.HasFlag(Printf.FormatFlags.DynamicPrecision))
{
int precArg = Marshal.ReadInt32(args, offset);
objects.Add(precArg);
offset += Marshal.SizeOf(typeof(IntPtr));
}
int iSize = info.flags.HasFlag(Printf.FormatFlags.IsLongLong)
? Marshal.SizeOf(typeof(Int64)) : Marshal.SizeOf(typeof(IntPtr));
// marshal objects from pointer
switch (info.type)
{
// 8/16-bit integers
// char / wchar_t (promoted to int)
case 'c':
char c = (char)Marshal.ReadByte(args, offset);
objects.Add(c);
//offset += Marshal.SizeOf(typeof(Int32));
offset += Marshal.SizeOf(typeof(IntPtr));
break;
// signed integers
case 'd':
case 'i':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h
{
short sh = (short)Marshal.ReadInt32(args, offset);
objects.Add(sh);
offset += Marshal.SizeOf(typeof(Int32));
}
else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll
{
long l = Marshal.ReadInt64(args, offset);
objects.Add(l);
offset += iSize;
}
else // int and long types
{
var i = Marshal.ReadInt32(args, offset);
objects.Add(i);
offset += iSize;
}
}
break;
// unsigned integers
case 'u':
case 'o':
case 'x':
case 'X':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h
{
ushort su = (ushort)Marshal.ReadInt32(args, offset);
objects.Add(su);
offset += Marshal.SizeOf(typeof(Int32));
}
else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll
{
ulong lu = (ulong)(long)Marshal.ReadInt64(args, offset);
objects.Add(lu);
offset += iSize;
}
else // uint and ulong types
{
uint u = (uint)Marshal.ReadInt32(args, offset);
objects.Add(u);
offset += iSize;
}
}
break;
// floating-point types
case 'f':
case 'F':
case 'e':
case 'E':
case 'g':
case 'G':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsLongDouble)) // L
{
// not really supported but read it as long
long lfi = Marshal.ReadInt64(args, offset);
double d = *(double*)(void*)&lfi;
objects.Add(d);
offset += Marshal.SizeOf(typeof(double));
}
else // double
{
long lfi = Marshal.ReadInt64(args, offset);
double d = *(double*)(void*)&lfi;
objects.Add(d);
offset += Marshal.SizeOf(typeof(double));
}
}
break;
// string
case 's':
{
string s = null;
// same:
//var addr1 = new IntPtr(args.ToInt64() + offset);
//var intPtr4 = Marshal.ReadIntPtr(addr1);
var intPtr3 = Marshal.ReadIntPtr(args, offset);
if (info.flags.HasFlag(Printf.FormatFlags.IsLong))
{
s = Marshal.PtrToStringUni(intPtr3);
}
else
{
s = Marshal.PtrToStringAnsi(intPtr3);
}
objects.Add(s);
offset += Marshal.SizeOf(typeof(IntPtr));
}
break;
// pointer
case 'p':
IntPtr ptr = Marshal.ReadIntPtr(args, offset);
objects.Add(ptr);
offset += Marshal.SizeOf(typeof(IntPtr));
break;
// non-marshallable types, ignored
case ' ':
case '%':
case 'n':
break;
default:
throw new ApplicationException("printf specifier '%" + info.type + "' not supported");
}
}
return objects.ToArray();
}
}
I'm writing a function to test if a string is a palindrome or not and I am wondering how to return the middle letter or letters if the string is indeed a palindrome?
Here's what i have so far:
My bool to check if the string is a palindrome:
public static bool IsPalindrome(string input)
{
int i = 0;
int j = input.Length - 1;
while (true)
{
if (i > j)
{
return true;
}
char a = input[i];
char b = input[j];
if (!a.Equals(b))
{
return false;
}
i++;
j--;
}
}
And here is where I'd like to be able to print out the middle letter(s):
while (true)
{
Console.Clear();
Regex myRegex = new Regex("[ ;:,.-?'!\"]");
string userInput = String.Empty;
Console.WriteLine("Please enter sentence or phrase");
userInput = Console.ReadLine();
Console.WriteLine();
if (IsPalindrome(myRegex.Replace(userInput, string.Empty).ToLower()))
{
Console.WriteLine("True");
Console.WriteLine("Press any key to continue");
}
else
{
Console.WriteLine("False");
Console.WriteLine("Press any key to continue");
}
Console.ReadLine();
}
Here's another implementation to return the middle letter(s):
public string MiddleLettersOf(string s)
{
if (s.Length == 0)
return "";
if ((s.Length & 1) == 1) // Odd length?
return s.Substring(s.Length/2, 1);
return s.Substring(s.Length/2-1, 2);
}
(This assumes that passing a null string is an error and therefore I allow it to throw a NullReferenceException.)
By the way, a simple (but not the most efficient) way of checking a string to see if it's a palindrome is:
public static bool IsPalindrome(string s)
{
return s.SequenceEqual(s.Reverse());
}
You can generalise that test to any IEnumerable:
public static bool IsPalindrome<T>(IEnumerable<T> s)
{
return s.SequenceEqual(s.Reverse());
}
but the flaw with that code is that s is enumerated twice, which might be A Bad Thing.
Try this:
private static string GetMiddleLetters(string input)
{
//Find the middle point
var mid = input.Length / 2.0;
//If it's odd, we take 1 letter, if it's even, we take 2
var numToTake = (mid == (int)mid) ? 2 : 1;
//Round up from the middle, and subtract one (as Substring is 0-indexed)
var startIndex = (int)Math.Ceiling(mid) - 1;
return input.Substring((int)Math.Ceiling(mid) - 1, numToTake);
}
And use it like this:
var fixedString = myRegex.Replace(userInput, string.Empty).ToLower();
if (IsPalindrome(fixedString))
{
Console.WriteLine(GetMiddleLetters(fixedString));
//Rest of the code here...
}
Something like that:
public static String MiddleLetters(string value) {
if (String.IsNullOrEmpty(value))
return value; // middle of the "null" is supposed to be null
return value.Length % 2 == 0 ?
value.Substring(value.Length / 2 - 1, 2)
: value.Substring(value.Length / 2, 1);
}
To test for being palindrome:
public static bool IsPalindrome(string value) {
if (String.IsNullOrEmpty(value))
return true; // or false, or throw an exception
//TODO: are you looking for case sensitive or case insensitive palindromes?
for (int i = 0; i < value.Length / 2; ++i)
if (value[i] != value[value.Length - i - 1])
return false;
return true;
}
I am working on a CLR Table-Valued function for SQL Server 2008 R2. I need to HTMLDecode a string at one point, but this is problematic b/c that relies on System.Web, which is not a support assembly for SQL Server.
Can anyone think of a better way to do the HTML Decode?
FYI SQL Server 2008 CLR only supports up to .NET 3.5 so system.net.webutility will not work.
Also you can use reflector to grab the code from WebUtility directly (please don't blame me for the coding style, its reflected stuff):
public class WebUtility {
private static class HtmlEntities {
private static string[] _entitiesList = new string[]
{
"\"-quot",
"&-amp",
"'-apos",
"<-lt",
">-gt",
"\u00a0-nbsp",
"¡-iexcl",
"¢-cent",
"£-pound",
"¤-curren",
"¥-yen",
"¦-brvbar",
"§-sect",
"¨-uml",
"©-copy",
"ª-ordf",
"«-laquo",
"¬-not",
"-shy",
"®-reg",
"¯-macr",
"°-deg",
"±-plusmn",
"²-sup2",
"³-sup3",
"´-acute",
"µ-micro",
"¶-para",
"·-middot",
"¸-cedil",
"¹-sup1",
"º-ordm",
"»-raquo",
"¼-frac14",
"½-frac12",
"¾-frac34",
"¿-iquest",
"À-Agrave",
"Á-Aacute",
"Â-Acirc",
"Ã-Atilde",
"Ä-Auml",
"Å-Aring",
"Æ-AElig",
"Ç-Ccedil",
"È-Egrave",
"É-Eacute",
"Ê-Ecirc",
"Ë-Euml",
"Ì-Igrave",
"Í-Iacute",
"Î-Icirc",
"Ï-Iuml",
"Ð-ETH",
"Ñ-Ntilde",
"Ò-Ograve",
"Ó-Oacute",
"Ô-Ocirc",
"Õ-Otilde",
"Ö-Ouml",
"×-times",
"Ø-Oslash",
"Ù-Ugrave",
"Ú-Uacute",
"Û-Ucirc",
"Ü-Uuml",
"Ý-Yacute",
"Þ-THORN",
"ß-szlig",
"à-agrave",
"á-aacute",
"â-acirc",
"ã-atilde",
"ä-auml",
"å-aring",
"æ-aelig",
"ç-ccedil",
"è-egrave",
"é-eacute",
"ê-ecirc",
"ë-euml",
"ì-igrave",
"í-iacute",
"î-icirc",
"ï-iuml",
"ð-eth",
"ñ-ntilde",
"ò-ograve",
"ó-oacute",
"ô-ocirc",
"õ-otilde",
"ö-ouml",
"÷-divide",
"ø-oslash",
"ù-ugrave",
"ú-uacute",
"û-ucirc",
"ü-uuml",
"ý-yacute",
"þ-thorn",
"ÿ-yuml",
"Œ-OElig",
"œ-oelig",
"Š-Scaron",
"š-scaron",
"Ÿ-Yuml",
"ƒ-fnof",
"ˆ-circ",
"˜-tilde",
"Α-Alpha",
"Β-Beta",
"Γ-Gamma",
"Δ-Delta",
"Ε-Epsilon",
"Ζ-Zeta",
"Η-Eta",
"Θ-Theta",
"Ι-Iota",
"Κ-Kappa",
"Λ-Lambda",
"Μ-Mu",
"Ν-Nu",
"Ξ-Xi",
"Ο-Omicron",
"Π-Pi",
"Ρ-Rho",
"Σ-Sigma",
"Τ-Tau",
"Υ-Upsilon",
"Φ-Phi",
"Χ-Chi",
"Ψ-Psi",
"Ω-Omega",
"α-alpha",
"β-beta",
"γ-gamma",
"δ-delta",
"ε-epsilon",
"ζ-zeta",
"η-eta",
"θ-theta",
"ι-iota",
"κ-kappa",
"λ-lambda",
"μ-mu",
"ν-nu",
"ξ-xi",
"ο-omicron",
"π-pi",
"ρ-rho",
"ς-sigmaf",
"σ-sigma",
"τ-tau",
"υ-upsilon",
"φ-phi",
"χ-chi",
"ψ-psi",
"ω-omega",
"ϑ-thetasym",
"ϒ-upsih",
"ϖ-piv",
"\u2002-ensp",
"\u2003-emsp",
"\u2009-thinsp",
"-zwnj",
"-zwj",
"-lrm",
"-rlm",
"–-ndash",
"—-mdash",
"‘-lsquo",
"’-rsquo",
"‚-sbquo",
"“-ldquo",
"”-rdquo",
"„-bdquo",
"†-dagger",
"‡-Dagger",
"•-bull",
"…-hellip",
"‰-permil",
"′-prime",
"″-Prime",
"‹-lsaquo",
"›-rsaquo",
"‾-oline",
"⁄-frasl",
"€-euro",
"ℑ-image",
"℘-weierp",
"ℜ-real",
"™-trade",
"ℵ-alefsym",
"←-larr",
"↑-uarr",
"→-rarr",
"↓-darr",
"↔-harr",
"↵-crarr",
"⇐-lArr",
"⇑-uArr",
"⇒-rArr",
"⇓-dArr",
"⇔-hArr",
"∀-forall",
"∂-part",
"∃-exist",
"∅-empty",
"∇-nabla",
"∈-isin",
"∉-notin",
"∋-ni",
"∏-prod",
"∑-sum",
"−-minus",
"∗-lowast",
"√-radic",
"∝-prop",
"∞-infin",
"∠-ang",
"∧-and",
"∨-or",
"∩-cap",
"∪-cup",
"∫-int",
"∴-there4",
"∼-sim",
"≅-cong",
"≈-asymp",
"≠-ne",
"≡-equiv",
"≤-le",
"≥-ge",
"⊂-sub",
"⊃-sup",
"⊄-nsub",
"⊆-sube",
"⊇-supe",
"⊕-oplus",
"⊗-otimes",
"⊥-perp",
"⋅-sdot",
"⌈-lceil",
"⌉-rceil",
"⌊-lfloor",
"⌋-rfloor",
"〈-lang",
"〉-rang",
"◊-loz",
"♠-spades",
"♣-clubs",
"♥-hearts",
"♦-diams"
};
private static Dictionary<string, char> _lookupTable = WebUtility.HtmlEntities.GenerateLookupTable();
private static Dictionary<string, char> GenerateLookupTable() {
Dictionary<string, char> dictionary = new Dictionary<string, char>(StringComparer.Ordinal);
string[] entitiesList = WebUtility.HtmlEntities._entitiesList;
for (int i = 0; i < entitiesList.Length; i++) {
string text = entitiesList[i];
dictionary.Add(text.Substring(2), text[0]);
}
return dictionary;
}
public static char Lookup(string entity) {
char result;
WebUtility.HtmlEntities._lookupTable.TryGetValue(entity, out result);
return result;
}
}
private enum UnicodeDecodingConformance {
Auto,
Strict,
Compat,
Loose
}
private static char[] _htmlEntityEndingChars = new char[] { ';', '&' };
private static readonly UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto;
public static string HtmlDecode(string value) {
if (string.IsNullOrEmpty(value)) {
return value;
}
if (!WebUtility.StringRequiresHtmlDecoding(value)) {
return value;
}
StringWriter stringWriter = new StringWriter(CultureInfo.InvariantCulture);
WebUtility.HtmlDecode(value, stringWriter);
return stringWriter.ToString();
}
private static bool StringRequiresHtmlDecoding(string s) {
if (WebUtility._htmlDecodeConformance == UnicodeDecodingConformance.Compat) {
return s.IndexOf('&') >= 0;
}
for (int i = 0; i < s.Length; i++) {
char c = s[i];
if (c == '&' || char.IsSurrogate(c)) {
return true;
}
}
return false;
}
private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) {
int num = (int)(smpChar - 65536u);
leadingSurrogate = (char)(num / 1024 + 55296);
trailingSurrogate = (char)(num % 1024 + 56320);
}
public static void HtmlDecode(string value, TextWriter output) {
if (value == null) {
return;
}
if (output == null) {
throw new ArgumentNullException("output");
}
if (!WebUtility.StringRequiresHtmlDecoding(value)) {
output.Write(value);
return;
}
int length = value.Length;
int i = 0;
while (i < length) {
char c = value[i];
if (c != '&') {
goto IL_1B6;
}
int num = value.IndexOfAny(WebUtility._htmlEntityEndingChars, i + 1);
if (num <= 0 || value[num] != ';') {
goto IL_1B6;
}
string text = value.Substring(i + 1, num - i - 1);
if (text.Length > 1 && text[0] == '#') {
uint num2;
bool flag;
if (text[1] == 'x' || text[1] == 'X') {
flag = uint.TryParse(text.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num2);
} else {
flag = uint.TryParse(text.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num2);
}
if (flag) {
switch (WebUtility._htmlDecodeConformance) {
case UnicodeDecodingConformance.Strict:
flag = (num2 < 55296u || (57343u < num2 && num2 <= 1114111u));
break;
case UnicodeDecodingConformance.Compat:
flag = (0u < num2 && num2 <= 65535u);
break;
case UnicodeDecodingConformance.Loose:
flag = (num2 <= 1114111u);
break;
default:
flag = false;
break;
}
}
if (!flag) {
goto IL_1B6;
}
if (num2 <= 65535u) {
output.Write((char)num2);
} else {
char value2;
char value3;
WebUtility.ConvertSmpToUtf16(num2, out value2, out value3);
output.Write(value2);
output.Write(value3);
}
i = num;
} else {
i = num;
char c2 = WebUtility.HtmlEntities.Lookup(text);
if (c2 != '\0') {
c = c2;
goto IL_1B6;
}
output.Write('&');
output.Write(text);
output.Write(';');
}
IL_1BD:
i++;
continue;
IL_1B6:
output.Write(c);
goto IL_1BD;
}
}
}