How to parse nested parenthesis only in first level in C# - c#

I would like to write C# code that parses nested parenthesis to array elements, but only on first level. An example is needed for sure:
I want this string:
"(example (to (parsing nested paren) but) (first lvl only))"
tp be parsed into:
["example", "(to (parsing nested paren) but)", "(first lvl only)"]
I was thinking about using regex but can't figure out how to properly use them without implementing this behaviour from scratch.
In the case of malformed inputs I would like to return an empty array, or an array ["error"]

I developed a parser for your example. I also checked some other examples which you can see in the code.
using System;
using System.Collections;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
string str = "(example (to (parsing nested paren) but) (first lvl only))"; // => [example , (to (parsing nested paren) but) , (first lvl only)]
//string str = "(first)(second)(third)"; // => [first , second , third]
//string str = "(first(second)third)"; // => [first , (second) , third]
//string str = "(first(second)(third)fourth)"; // => [first , (second) , (third) , fourth]
//string str = "(first((second)(third))fourth)"; // => [first , ((second)(third)) , fourth]
//string str = "just Text"; // => [ERROR]
//string str = "start with Text (first , second)"; // => [ERROR]
//string str = "(first , second) end with text"; // => [ERROR]
//string str = ""; // => [ERROR]
//string str = "("; // => [ERROR]
//string str = "(first()(second)(third))fourth)"; // => [ERROR]
//string str = "(((extra close pareanthese))))"; // => [ERROR]
var res = Parser.parse(str);
showRes(res);
}
static void showRes(ArrayList res)
{
var strings = res.ToArray();
var theString = string.Join(" , ", strings);
Console.WriteLine("[" + theString + "]");
}
}
public class Parser
{
static Dictionary<TokenType, TokenType> getRules()
{
var rules = new Dictionary<TokenType, TokenType>();
rules.Add(TokenType.OPEN_PARENTHESE, TokenType.START | TokenType.OPEN_PARENTHESE | TokenType.CLOSE_PARENTHESE | TokenType.SIMPLE_TEXT);
rules.Add(TokenType.CLOSE_PARENTHESE, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE);
rules.Add(TokenType.SIMPLE_TEXT, TokenType.SIMPLE_TEXT | TokenType.CLOSE_PARENTHESE | TokenType.OPEN_PARENTHESE);
rules.Add(TokenType.END, TokenType.CLOSE_PARENTHESE);
return rules;
}
static bool isValid(Token prev, Token cur)
{
var rules = Parser.getRules();
return rules.ContainsKey(cur.type) && ((prev.type & rules[cur.type]) == prev.type);
}
public static ArrayList parse(string sourceText)
{
ArrayList result = new ArrayList();
int openParenthesesCount = 0;
Lexer lexer = new Lexer(sourceText);
Token prevToken = lexer.getStartToken();
Token currentToken = lexer.readNextToken();
string tmpText = "";
while (currentToken.type != TokenType.END)
{
if (currentToken.type == TokenType.OPEN_PARENTHESE)
{
openParenthesesCount++;
if (openParenthesesCount > 1)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.CLOSE_PARENTHESE)
{
openParenthesesCount--;
if (openParenthesesCount < 0)
{
return Parser.Error();
}
if (openParenthesesCount > 0)
{
tmpText += currentToken.token;
}
}
else if (currentToken.type == TokenType.SIMPLE_TEXT)
{
tmpText += currentToken.token;
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (openParenthesesCount == 1 && tmpText.Trim() != "")
{
result.Add(tmpText);
tmpText = "";
}
prevToken = currentToken;
currentToken = lexer.readNextToken();
}
if (openParenthesesCount != 0)
{
return Parser.Error();
}
if (!Parser.isValid(prevToken, currentToken))
{
return Parser.Error();
}
if (tmpText.Trim() != "")
{
result.Add(tmpText);
}
return result;
}
static ArrayList Error()
{
var er = new ArrayList();
er.Add("ERROR");
return er;
}
}
class Lexer
{
string _txt;
int _index;
public Lexer(string text)
{
this._index = 0;
this._txt = text;
}
public Token getStartToken()
{
return new Token(-1, TokenType.START, "");
}
public Token readNextToken()
{
if (this._index >= this._txt.Length)
{
return new Token(-1, TokenType.END, "");
}
Token t = null;
string txt = "";
if (this._txt[this._index] == '(')
{
txt = "(";
t = new Token(this._index, TokenType.OPEN_PARENTHESE, txt);
}
else if (this._txt[this._index] == ')')
{
txt = ")";
t = new Token(this._index, TokenType.CLOSE_PARENTHESE, txt);
}
else
{
txt = this._readText();
t = new Token(this._index, TokenType.SIMPLE_TEXT, txt);
}
this._index += txt.Length;
return t;
}
private string _readText()
{
string txt = "";
int i = this._index;
while (i < this._txt.Length && this._txt[i] != '(' && this._txt[i] != ')')
{
txt = txt + this._txt[i];
i++;
}
return txt;
}
}
class Token
{
public int position
{
get;
private set;
}
public TokenType type
{
get;
private set;
}
public string token
{
get;
private set;
}
public Token(int position, TokenType type, string token)
{
this.position = position;
this.type = type;
this.token = token;
}
}
[Flags]
enum TokenType
{
START = 1,
OPEN_PARENTHESE = 2,
SIMPLE_TEXT = 4,
CLOSE_PARENTHESE = 8,
END = 16
}

well, regex will do the job:
var text = #"(example (to (parsing nested paren) but) (first lvl only))";
var pattern = #"\(([\w\s]+) (\([\w\s]+ \([\w\s]+\) [\w\s]+\)) (\([\w\s]+\))\)*";
try
{
Regex r = new Regex(pattern, RegexOptions.IgnoreCase);
Match m = r.Match(text);
string group_1 = m.Groups[1].Value; //example
string group_2 = m.Groups[2].Value; //(to (parsing nested paren) but)
string group_3 = m.Groups[3].Value; //(first lvl only)
return new string[]{group_1,group_2,group_3};
}
catch(Exception ex){
return new string[]{"error"};
}
hopefully this helps, tested here in dotnetfiddle
Edit:
this might get you started into building the right expression according to whatever patterns you are falling into and maybe build a recursive function to parse the rest into the desired output :)

RegEx is not recursive. You either count bracket level, or recurse.
An non-recursive parser loop I tested for the example you show is..
string SplitFirstLevel(string s)
{
List<string> result = new List<string>();
int p = 0, level = 0;
for (int i = 0; i < s.Length; i++)
{
if (s[i] == '(')
{
level++;
if (level == 1) p = i + 1;
if (level == 2)
{
result.Add('"' + s.Substring(p, i - p) + '"');
p = i;
}
}
if (s[i] == ')')
if (--level == 0)
result.Add('"' + s.Substring(p, i - p) + '"');
}
return "[" + String.Join(",", result) + "]";
}
Note: after some more testing, I see your specification is unclear. How to delimit orphaned level 1 terms, that is terms without bracketing ?
For example, my parser translates
(example (to (parsing nested paren) but) (first lvl only))
to:
["example ","(to (parsing nested paren) but) ","(first lvl only)"]
and
(example (to (parsing nested paren)) but (first lvl only))
to:
["example ","(to (parsing nested paren)) but ","(first lvl only)"]
In either case, "example" gets a separate term, while "but" is grouped with the first term. In the first example this is logical, it is in the bracketing, but it may be unwanted behaviour in the second case, where "but" should be separated, like "example", which also has no bracketing (?)

Related

Swiftly search for multiple partial strings in a huge string

I need to check whether all parts of a string like
A=1&AW=43&KO=96&R=7&WW=15&ZJ=80
are in a big string like:
A=1&AG=77&AW=43&.....&KF=11&KO=96&.....&QW=55&R=7&....&WV=1&WW=15&....ZJ=80&
My code splits the first string on & and uses Contains. But the duration is too long, as the big string is up to 800000 characters.
Is there a better/faster method for this?
public partial class UserDefinedFunctions
{
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlInt32 EquipmentCompare(SqlString equip, SqlString comp)
{
SqlInt32 result = 1;
if (comp.IsNull)
{
result = 1;
}
else
{
string equipment = "&" + equip.ToString();
string compString = comp.ToString() + "! ";
while (compString.Length > 1)
{
string sub = compString.Substring(0, compString.IndexOf("!"));
compString = compString.Substring(compString.IndexOf("!")+1);
string[] elements = sub.Split('&');
foreach (string i in elements)
{
if (i.StartsWith("~"))
{
if (equipment.Contains("&" + i.Substring(1) + "&"))
{
result = 0;
break;
}
}
else if (!equipment.Contains("&" + i + "&"))
{
result = 0;
break;
}
else
{
result = 1;
continue;
}
}
if (result == 1)
{
break;
}
}
}
return result;
}
}
I think you may speed up your code by using HashSet. Try this:
var str1 = "A=1&AW=43&KO=96&R=7&WW=15&ZJ=80";
var str2 = "A=1&AG=77&AW=43&.....&KF=11&KO=96&.....&QW=55&R=7&....&WV=1&WW=15&....ZJ=80&";
var largeStringSet = new HashSet<string>(str2.Split('&'));
var allPartsIncluded = str1.Split('&').All(s => largeStringSet.Contains(s));

String formatting in C#?

I have some problems to format strings from a List<string>
Here's a picture of the List values:
Now I managed to manipulate some of the values but others not, here's what I used to manipulate:
string prepareStr(string itemToPrepare) {
string first = string.Empty;
string second = string.Empty;
if (itemToPrepare.Contains("\"")) {
first = itemToPrepare.Replace("\"", "");
}
if (first.Contains("-")) {
int beginIndex = first.IndexOf("-");
second = first.Remove(beginIndex, first.Length - beginIndex);
}
return second;
}
Here's a picture of the Result:
I need to get the clear Path without the (-startup , -minimzed , MSRun , double apostrophes).
What am I doing wrong here?
EDIT my updated code:
void getStartUpEntries() {
var startEntries = StartUp.getStartUp();
if (startEntries != null && startEntries.Count != 0) {
for (int i = 0; i < startEntries.Count; i++) {
var splitEntry = startEntries[i].Split(new string[] { "||" }, StringSplitOptions.None);
var str = splitEntry[1];
var match = Regex.Match(str, #"\|\|""(?<path>(?:\""|[^""])*)""");
var finishedPath = match.Groups["path"].ToString();
if (!string.IsNullOrEmpty(finishedPath)) {
if (File.Exists(finishedPath) || Directory.Exists(finishedPath)) {
var _startUpObj = new StartUp(splitEntry[0], finishedPath,
"Aktiviert: ", new Uri("/Images/inWatch.avOK.png", UriKind.RelativeOrAbsolute),
StartUp.getIcon(finishedPath));
_startUpList.Add(_startUpObj);
}
else {
var _startUpObjNo = new StartUp(splitEntry[0], finishedPath,
"Aktiviert: ", new Uri("/Images/inWatch.avOK.png", UriKind.RelativeOrAbsolute),
StartUp.getIcon(string.Empty));
_startUpList.Add(_startUpObjNo);
}
}
var _startUpObjLast = new StartUp(splitEntry[0], splitEntry[1],
"Aktiviert: ", new Uri("/Images/inWatch.avOK.png", UriKind.RelativeOrAbsolute),
StartUp.getIcon(string.Empty));
_startUpList.Add(_startUpObjLast);
}
lstStartUp.ItemsSource = _startUpList.OrderBy(item => item.Name).ToList();
}
You could use a regex to extract the path:
var str = #"0Raptr||""C:\Program Files (x86)\Raptr\raptrstub.exe"" --startup"
var match = Regex.Match(str, #"\|\|""(?<path>(?:\""|[^""])*)""");
Console.WriteLine(match.Groups["path"]);
This will match any (even empty) text (either an escaped quote, or any character which is not a quote) between two quote characters preceeded by two pipe characters.
Similarly, you could simply split on the double quotes as I see that's a repeating occurrence in your examples and take the second item in the split array:
var path = new Regex("\"").Split(s)[1];
This is and update to your logic without using any Regex:
private string prepareStr(string itemToPrepare)
{
string result = null;
string startString = #"\""";
string endString = #"\""";
int startPoint = itemToPrepare.IndexOf(startString);
if (startPoint >= 0)
{
startPoint = startPoint + startString.Length;
int EndPoint = itemToPrepare.IndexOf(endString, startPoint);
if (EndPoint >= 0)
{
result = itemToPrepare.Substring(startPoint, EndPoint - startPoint);
}
}
return result;
}

How to choose an XML Node? (Using LINQ, XPath, anything is fine)

I have an XML like below :
<Decide Name="MemoryCheck" CommonUnit="MB">
<Decision CellColor="Red" Status="Critical" Exp="<=100" />
<Decision CellColor="Yellow" Status="Warning" Exp="<=200 & >100"/>
<Decision CellColor="Green" Status="OK" Exp=">200" />
</Decide>
For Input 50 MB, Output returned should be "Critical-Red"
For Input 142 MB, Output returned should be "Warning-Yellow"
For Input 212 MB, Output returned should be"OK-Green"
How to go about this using C# ??
Xml Name is "Decide.xml" and Code I have now :
XmlDocument xmldecide = new XmlDocument();
xmldecide.Load("C:\\Decide.xml");
XmlNodeList decidelist = xmldecide.GetElementsbyTagName("Decide");
XmlNode xdecide = decidelist[0];
string input = "50"; // Unit in MB
// Now I have to display the desired O/P "Critical-Red"
string input = "142"; // Unit in MB
// Now I have to display the desired O/P "Warning-Yellow"
string input = "212"; // Unit in MB
// Now I have to display the desired O/P "OK-Green"
Just a suggestion - If you have control of that xml you should consider creating a min and max attribute. Having to parse out conditional and integer information from a single attribute is ugly. That said, assuming you can't change the xml, here's a solution. It assumes the conditionals in the attribute are always in a similar format.
public static string AlertLevel(this XDocument decisionDocument, int size)
{
var queryResult = decisionDocument.Descendants("Decision");
foreach (var item in queryResult)
{
var expAttribute = item.Attribute("Exp");
if (expAttribute == null) continue;
var returnString = CreateResultString(item);
int minValue;
int maxValue;
if (expAttribute.Value.Contains(">") && expAttribute.Value.Contains("<="))
{
//evaluate minValue < size > maxValue
var stringValue = expAttribute.Value.Replace("<=", string.Empty).Replace(">", string.Empty).Trim();
var stringValueArray = stringValue.Split('&');
if (int.TryParse(stringValueArray[1], out minValue) &&
int.TryParse(stringValueArray[0], out maxValue))
{
if (minValue < size &&
size < maxValue)
return returnString;
}
}
else if (expAttribute.Value.Contains(">"))
{
//evaluate size > value
var stringValue = expAttribute.Value.Replace(">", string.Empty).Trim();
if (int.TryParse(stringValue, out maxValue))
{
if (size > maxValue)
return returnString;
}
}
else if (expAttribute.Value.Contains("<="))
{
//else evaluate size < value
var stringValue = expAttribute.Value.Replace("<=", string.Empty).Trim();
if (int.TryParse(stringValue, out minValue))
{
if (size < minValue)
return returnString;
}
}
}
return "No condition was met!";
}
private static string CreateResultString(XElement item)
{
var statusAttribute = item.Attribute("Status");
var returnString = statusAttribute == null ? "Status" : statusAttribute.Value;
var colorAttribute = item.Attribute("CellColor");
returnString += colorAttribute == null ? "-Color" : "-" + colorAttribute.Value;
return returnString;
}
usage
var xmlDecide = XDocument.Load("Decide.xml");
Console.WriteLine("50MB: " + xmlDecide.AlertLevel(50));
Console.WriteLine("142MB: " + xmlDecide.AlertLevel(142));
Console.WriteLine("212MB: " + xmlDecide.AlertLevel(212));
EDIT: You can use the same code for use with XmlDocument instead of XDocument. Just change "Attribute" to "Attributes.GetNamedItem" and "Descendants" to "GetElementsByTagName"
This is complicated.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Xml;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string XML =
"<Decide Name=\"MemoryCheck\" CommonUnit=\"MB\">" +
"<Decision CellColor=\"Red\" Status=\"Critical\" Exp=\"<=100\" />" +
"<Decision CellColor=\"Yellow\" Status=\"Warning\" Exp=\"<=200 & >100\"/>" +
"<Decision CellColor=\"Green\" Status=\"OK\" Exp=\">200\" />" +
"</Decide>";
XmlDocument doc = new XmlDocument();
doc.LoadXml(XML);
XmlNodeList memoryCheck = doc.GetElementsByTagName("Decision");
foreach(XmlNode decision in memoryCheck)
{
Decision newDecision = new Decision();
Decision.decisions.Add(newDecision);
newDecision.Cellcolor = decision.Attributes.GetNamedItem("CellColor").Value;
newDecision.status = decision.Attributes.GetNamedItem("Status").Value;
newDecision.low = 0;
newDecision.high = null;
string exps = decision.Attributes.GetNamedItem("Exp").Value;
string[] expsArray = exps.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
foreach (string exp in expsArray)
{
if(exp.StartsWith("<="))
{
newDecision.high = int.Parse(exp.Substring(exp.IndexOf("=") + 1));
}
if(exp.StartsWith(">"))
{
newDecision.low = int.Parse(exp.Substring(exp.IndexOf(">") + 1));
}
}
}
Decision result = Decision.GetBySize(212);
}
}
public class Decision
{
public static List<Decision> decisions = new List<Decision>();
public string Cellcolor { get; set; }
public string status { get; set; }
public int? low { get; set; }
public int? high {get; set;}
public static Decision GetBySize(int memory)
{
Decision newDecision = null;
foreach(Decision decision in decisions)
{
if (memory >= decision.low)
{
if (decision.high == null)
{
newDecision = decision;
break;
}
else
{
if (memory <= decision.high)
{
newDecision = decision;
break;
}
}
}
}
return newDecision;
}
}
}

How to Get Data from index of String

I'm new in c#. and I have some Question...
I have String following this code
string taxNumber = "1222233333445";
I want to get data from This string like that
string a = "1"
string b = "2222"
string c = "33333"
string d = "44"
string e = "5"
Please Tell me about Method for get Data From String.
Thank You Very Much ^^
Use the String.Substring(int index, int length) method
string a = taxNumber.Substring(0, 1);
string b = taxNumber.Substring(1, 4);
// etc
Oh well, the best I can come up with is this:
IEnumerable<string> numbers
= taxNumber.ToCharArray()
.Distinct()
.Select(c => new string(c, taxNumber.Count(t => t == c)));
foreach (string numberGroup in numbers)
{
Console.WriteLine(numberGroup);
}
Outputs:
1
2222
33333
44
5
This can also do , you dont need to fix the no of characters, you can check by changing the no of 1's , 2's etc
string taxNumber = "1222233333445";
string s1 = taxNumber.Substring(taxNumber.IndexOf("1"), ((taxNumber.Length - taxNumber.IndexOf("1")) - (taxNumber.Length - taxNumber.LastIndexOf("1"))) + 1);
string s2 = taxNumber.Substring(taxNumber.IndexOf("2"), ((taxNumber.Length - taxNumber.IndexOf("2")) - (taxNumber.Length - taxNumber.LastIndexOf("2"))) + 1);
string s3 = taxNumber.Substring(taxNumber.IndexOf("3"), ((taxNumber.Length - taxNumber.IndexOf("3")) - (taxNumber.Length - taxNumber.LastIndexOf("3"))) + 1);
You can use Char.IsDigit to identify digits out of string, and may apply further logic as follows:
for (int i=0; i< taxNumber.Length; i++)
{
if (Char.IsDigit(taxNumber[i]))
{
if(taxNumber[i-1]==taxNumber[i])
{
/*Further assign values*/
}
}
Try this Code
string taxNumber = "1222233333445";
char[] aa = taxNumber.ToCharArray();
List<string> finals = new List<string>();
string temp = string.Empty;
for (int i = 0; i < aa.Length; i++)
{
if (i == 0)
{
temp = aa[i].ToString();
}
else
{
if (aa[i].ToString() == aa[i - 1].ToString())
{
temp += aa[i];
}
else
{
if (temp != string.Empty)
{
finals.Add(temp);
temp = aa[i].ToString();
}
}
if (i == aa.Length - 1)
{
if (aa[i].ToString() != aa[i - 1].ToString())
{
temp = aa[i].ToString();
finals.Add(temp);
}
else
{
finals.Add(temp);
}
}
}
}
and check value of finals string list
you may use regex:
string strRegex = #"(1+|2+|3+|4+|5+|6+|7+|8+|9+|0+)";
RegexOptions myRegexOptions = RegexOptions.None;
Regex myRegex = new Regex(strRegex, myRegexOptions);
string strTargetString = #"1222233333445";
return myRegex.Split(strTargetString);

How can i remove ids one by one from querystring in asp.net using c#?

I want remove "ids"one by one querystring from my url. How can i do this ? (using Asp.net4.0 , c#)
Default.aspx?ids=10,2,6,5
I want to remove"ids=6", but language would be the first,middle or last, so I will have this :
Default.aspx?ids=10,2,5,
Step 1. Have your ids in an array by:-
string[] idsarray = Request.QueryString["ids"].ToString().Split(',');
step 2. create a function to remove as per your language
string removeidat(string[] id, string at)
{
string toren = "";
int remat = -1;
if (at=="first")
{
remat = 0;
}
else if (at == "middle")
{
remat = id.Length / 2;
}
else
{
remat = id.GetUpperBound(0);
}
for (int i = 0; i < id.GetUpperBound(0); i++)
{
if (i!=remat)
{
toren += id[i] + ",";
}
}
if (toren.Length>0)
{
toren = toren.Substring(0, toren.Length - 1);
}
return toren;
}
Example : if you want to remove last id your code would be
string[] idsarray = Request.QueryString["ids"].ToString().Split(',');
string newids = removeidat(idsarray , "last")
string strIDs = Request.QueryString["ids"];
if(strIDs != null)
{
string[] ids = strIDs.Split(new[]{','}, StringSplitOptions.RemoveEmptyEntries);
var no6 = ids.Where(id => id != "6");
string newUrl = string.Format("Default.aspx?ids={0}", string.Join(",", no6));
Response.Redirect(newUrl);
}

Categories