The recursive method is getting into deadloop - c#

I am writing a simple crawler based on HTMLAgilityPack and Fizzler, in order to check if a keyword is contained anywhere on the webpage and it's corresponding sublinks. Then the same procedure is repeated for all of the sublinks up to 50 level deep. So that the number grows exponentially.
The issue is that I wanted to convert the method that I have written to a recursive one, but it doesn't work - gets stuck after first link, as well as works really slow.
This is what I've done currently:
public static void GetAllLinks(HtmlWeb web, List<string> relevantLinks, string inputLink)
{
string mainLink = "http://www.cnet.com";
Console.WriteLine("Current count of links: " + relevantLinks.Count + "\tCurrent link: " + inputLink);
HtmlDocument html = web.Load(inputLink);
HtmlDocument htmlInner = new HtmlDocument();
html.DocumentNode.Descendants()
.Where(n => n.Name == "script" || n.Name == "style")
.ToList()
.ForEach(n => n.Remove());
var text = htmlInner.DocumentNode.InnerText.ToLower();
text = Regex.Replace(text, #"\r\n?|\n", "");
text = Regex.Replace(text, " {2,}", " ");
text = text.Trim();
if (text.Contains("microsoft"))
{
if (!relevantLinks.Contains(inputLink))
{
relevantLinks.Add(inputLink);
}
}
var linkTagsList = html.DocumentNode.QuerySelectorAll("a").ToList();
foreach (var linkTag in linkTagsList)
{
if (linkTag.Attributes["href"] != null)
{
var link = linkTag.Attributes["href"].Value;
// Check if the link found is the sublink of the main link
if (!link.Contains(mainLink))
{
// Check if only partial link then concat with main one
if (link.Substring(0, 1) == "/")
{
if (inputLink.Substring(inputLink.Length - 1, 1) == "/")
inputLink = inputLink.Substring(0, inputLink.Length - 1);
link = inputLink + link;
}
else
{
link = String.Empty;
}
}
if (!string.IsNullOrEmpty(link))
{
Console.WriteLine(link);
GetAllLinks(web, relevantLinks, link);
}
}
}
}
Any hint or advice is highly appreciated.

Related

Linq Or IEnumerable taking Long to run when using Parallel.ForEach [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 5 years ago.
Improve this question
I have an application that reads a csv (200 mb).
var lines = File.ReadLines(file).ToList();
The csv stores pricing information and has around 500k records in it.
The code snippet below when calling StoreValues takes around 18 seconds.
Is there a way to speed this up ?
distinctMarketIds = 54 int values
The lines collection will have 500k lines and each line [0] has marketId which im matching.
IEnumerable<string[]> newList = (lines.Where(t => distinctMarketIds.Contains(t.Split(',')[0]))
.Select(t => t.Split(',')));
log.Info(("Time Taken To Get Filtered Prices " + elapsed.Elapsed.TotalSeconds +" seconds."));
StoreValues(newList, file); //store the prices
log.Info(("Time Taken To Store Prices " + elapsed.Elapsed.TotalSeconds + " seconds."));
The Store value Method uses Parallel.ForEach
Parallel.ForEach(finalLines, new ParallelOptions { MaxDegreeOfParallelism = MaxThreads }, (line) =>
{
});
I cannot seem to find why it would take 18 seconds to go through this loop.
I have tested on another machine with similar specs and it takes 2.5 seconds for StoreValue Method
#region LoadPriceDataFromCsvFile
public int LoadPriceDataFromCsvFile(string filename, string[] marketIdList, int maxThreads)
{
MaxThreads = maxThreads;
int filteredRows = 0;
string[] files = Directory.GetFiles(filename, "*.csv");
elapsed.Start();
log.InfoFormat("Total Csv files to Scan {0}",files.Length);
Parallel.ForEach(files, new ParallelOptions { MaxDegreeOfParallelism = MaxThreads }, (file) =>
{
try
{
log.InfoFormat("About to Scan File {0}", file);
ScanCsvFilesAndGetPrices(file);
}
catch (System.OutOfMemoryException e)
{
log.Info(e);
}
catch (Exception e)
{
log.Info(e);
}
});
return PriceCollection.Count;
}
#endregion
#region ScanCsvFilesAndGetPrices
private void ScanCsvFilesAndGetPrices(string file)
{
try
{
log.Info(("Time Taken " + elapsed.Elapsed.TotalSeconds + " seconds."));
var lines = File.ReadLines(file).ToList();
log.Info(("Time Taken To Read csv " + elapsed.Elapsed.TotalSeconds + " seconds."));
if (lines.Any())
{
log.Info(("Time Taken To Read Any " + elapsed.Elapsed.TotalSeconds + " seconds."));
var firstLine = lines.ElementAt(1); // This is the First Line with Headers
log.Info(("Time Taken To Read First Line " + elapsed.Elapsed.TotalSeconds + " seconds."));
var lastLine = lines.Last(); // This is the Last line in the csv file
log.Info(("Time Taken To Read Last Line " + elapsed.Elapsed.TotalSeconds + " seconds."));
var header = lines.First().Split(',');
log.Info(("Time Taken To Split Header Line " + elapsed.Elapsed.TotalSeconds + " seconds."));
GetIndexOfFields(header);
log.Info(("Time Taken To Read Header " + elapsed.Elapsed.TotalSeconds + " seconds."));
// Get the Publish Date Time
if (PublishedDatetime_Index != -1)
{
var fLine = firstLine.Split(',');
var lLine = lastLine.Split(',');
var firstLineDatetime = (fLine[PublishedDatetime_Index].Contains("+"))? fLine[PublishedDatetime_Index].Remove(fLine[PublishedDatetime_Index].IndexOf("+",StringComparison.Ordinal)): fLine[PublishedDatetime_Index];
var publishDateTimeFirstLine =FileNameGenerator.GetCorrectTime(Convert.ToDateTime(firstLineDatetime));
string lastLineDatetime = (lLine[PublishedDatetime_Index].Contains("+"))? lLine[PublishedDatetime_Index].Remove(lLine[PublishedDatetime_Index].IndexOf("+",StringComparison.Ordinal)): lLine[PublishedDatetime_Index];
var publishDateTimeLastLine =FileNameGenerator.GetCorrectTime(Convert.ToDateTime(lastLineDatetime));
// check if the order execution date time of any order lieas between the date time of first and last line of csv so we can add that csv to our filtered list
string[] distinctMarketIds = OrderEntityColection.FindAll(obj =>obj.OrderLastChangeDateTimeUtc >= publishDateTimeFirstLine &&obj.OrderLastChangeDateTimeUtc <= publishDateTimeLastLine).Select(obj => obj.MarketId.ToString())
.Distinct()
.ToArray();
log.InfoFormat("Total Markets Identified {0}",distinctMarketIds.Length);
List<OrderEntity> foundOrdersList = OrderEntityColection.FindAll(obj =>obj.OrderLastChangeDateTimeUtc >= publishDateTimeFirstLine &&obj.OrderLastChangeDateTimeUtc <= publishDateTimeLastLine);
lock (FoundOrdersList)
{
FoundOrdersList.AddRange(foundOrdersList);
}
log.InfoFormat("Total Orders Identified {0}", FoundOrdersList.Count());
log.Info(("Time Taken To Read Execution Times and Market " + elapsed.Elapsed.TotalSeconds +" seconds."));
if (distinctMarketIds.Length != 0)
{
IEnumerable<string[]> newList =
(lines.Where(
t => distinctMarketIds.Contains(t.Split(',')[0]))
.Select(t => t.Split(','))
);
log.Info(("Time Taken To Get Filtered Prices " + elapsed.Elapsed.TotalSeconds +" seconds."));
// this is taking longer than expected. Somthing to do with IEnumerable<string[]>
StoreValues(newList, file); //store the prices
log.Info(("Time Taken To Store Prices " + elapsed.Elapsed.TotalSeconds + " seconds."));
}
}
}
}
catch (Exception e)
{
log.Info(e);
}
}
#endregion
#region GetIndexOfFields
// These are the fields we want to Look for from the headers and accordingly get their location
private void GetIndexOfFields(IEnumerable<string> lineHeader)
{
int index = 0;
foreach (var column in lineHeader)
{
if (column == "MarketId")
{
MarketId_Index= index;
}
if (column == "Bid")
{
Bid_Index = index; ;
}
if (column == "Ask")
{
Ask_Index = index;
}
if (column == "Mid")
{
Mid_Index = index;
}
if (column == "Is_Indicative")
{
Is_Indicative_Index = index;
}
if (column == "Price_Engine")
{
Price_Engine_Index = index;
}
if (column == "PublishedDatetime")
{
PublishedDatetime_Index = index;
}
if (column == "Market_Is_Open")
{
Market_Is_Open_Index = index;
}
if (column == "AuditId")
{
AuditId_Index = index;
}
if (column == "Row_Update_Version")
{
Row_Update_Version_Index = index;
}
if (column == "DontPublish")
{
DontPublish_Index = index; ;
}
index++;
}
}
#endregion
#region StoreValues
private void StoreValues(IEnumerable<string[]> finalLines, string file)
{
log.InfoFormat("total Finel Lines Sent for Storing {0}", finalLines.Count());
Parallel.ForEach(finalLines, new ParallelOptions { MaxDegreeOfParallelism = MaxThreads }, (line) =>
{
var prices = new Prices();
// the code that you want to measure comes here
var datetime = (line[PublishedDatetime_Index].Contains("+")) ? line[PublishedDatetime_Index].Remove(line[PublishedDatetime_Index].IndexOf("+", StringComparison.Ordinal)) : line[PublishedDatetime_Index];
if (!IsNullOrEmpty(datetime))
{
prices.PublishedDatetime = Convert.ToDateTime(datetime);
}
if (!IsNullOrEmpty(line[MarketId_Index]))
{
prices.MarketId = Convert.ToInt32(line[MarketId_Index]);
}
if (!IsNullOrEmpty(line[Bid_Index]))
{
prices.Bid = Convert.ToDecimal(line[Bid_Index]);
}
if (!IsNullOrEmpty(line[Ask_Index]))
{
prices.Ask = Convert.ToDecimal(line[Ask_Index]);
}
if (!IsNullOrEmpty(line[Mid_Index]))
{
prices.Mid = Convert.ToDecimal(line[Mid_Index]);
}
if (!IsNullOrEmpty(line[Is_Indicative_Index]))
{
prices.Is_Indicative = Convert.ToBoolean(line[Is_Indicative_Index]);
}
else
{
prices.Is_Indicative = false;
}
if (!IsNullOrEmpty(line[Price_Engine_Index]))
{
prices.Price_Engine = Convert.ToString(line[Price_Engine_Index]);
}
if (!IsNullOrEmpty(line[Market_Is_Open_Index]))
{
prices.Market_Is_Open = line[Market_Is_Open_Index] == "1";
}
if (!IsNullOrEmpty(line[AuditId_Index]))
{
prices.AuditId = Convert.ToString(line[AuditId_Index]);
}
if (!IsNullOrEmpty(line[Row_Update_Version_Index]))
{
prices.Row_Update_Version = Convert.ToString(line[Row_Update_Version_Index]);
}
if (!IsNullOrEmpty(line[DontPublish_Index]))
{
if (DontPublish_Index != 0)
{
prices.DontPublish = line[DontPublish_Index] == "1";
}
}
prices.SbProdFile = file;
lock (PriceCollection)
{
PriceCollection.Add(prices);
}
});
}
I don't see how Parallel.ForEach could help to improve performance when you need to process a single file
Don't use File.ReadLines(file).ToList(), either use ReadAllLines if you want all lines in memory or use ReadLines if you want to process the lines one after another
Why do you split the line multiple times?
Use a HashSet<string> for distinctMarketIds:
This should be more efficient:
var marketIdSet = new HashSet<string>(OrderEntityColection.FindAll(obj =>obj.OrderLastChangeDateTimeUtc >= publishDateTimeFirstLine &&obj.OrderLastChangeDateTimeUtc <= publishDateTimeLastLine).Select(obj => obj.MarketId.ToString()));
IEnumerable<string[]> allFields = File.ReadLines(file)
.Select(line => line.Split(','))
.Where(arr => marketIdSet.Contains(arr[0]));
Note that due to deferred execution of Select and Where this is just a query, it is yet not executed. So whenever you will use allFields you will execute this query again. So it's a good idea to create a collection, f.e. with allFields.ToList() which you pass to StoreValues:
StoreValues(allFields.ToList(), file); //store the prices
If you pass a collection you could really benefit from using Parallel.ForEach in StoreValues.
static void Main()
{
//var lines = File.ReadLines(file).ToList();
// this is just a fast generation for sample data
var lines = Enumerable.Range(0, 500000)
.Select(i => string.Join(",", i % 7, i, i & 2))
.ToList();
// HashSet will work as an indexed store and will match faster in your loop
var distinctMarketIds = new HashSet<string>{
"0", "2", "3", "5"
};
// Do this if you are to use the method syntax instead of the query syntax
// var newList = lines.Select(l=>l.Split(','))
// .Where(ps=>distinctMarketIds.Contains(ps[0]));
var newList = from l in lines
// this will parse the string once versus twice as you were doing before
let ps = l.Split(',')
where distinctMarketIds.Contains(ps[0])
select ps;
// can't see the content of your `StoreValues` method but writing to a
// file in parallel will never work as expected.
using (var stream = new StreamWriter("outfile.txt"))
foreach (var l in newList)
stream.WriteLine(string.Join(";", l));
}

How to check if multiple checkboxes are checked

So in my program, i have three checboxes (A, B and C). and I want to save the content of the checkbox the is checked to a text file. I am doing this using if statements as shown below:
if (a.IsChecked == true)
{
res = a.Content.ToString() + " is checked";
}
else if (b.IsChecked == true)
{
res = b.Content.ToString() + " is checked";
}
else if (c.IsChecked == true)
{
res = c.Content.ToString() + " is checked";
}
And here is where i am saving the above values to a string and then later in my code to a text file
string test = res;
Now this is working for me. So i decided to try to check if multiple checkboxes are being checked. So added the below if statements:
else if ((a.IsChecked == true) && (b.IsChecked == true) && (c.IsChecked == true))
{
res= a.Content.ToString() + " " + b.Content.ToString() + " " + c.Content.ToString()
}
but this isn't working with me because in the end res is printed in the text file as a rather than a b c. Any idea what i am doing wrong?
Also please note that i already initialized res at the top of my code as string:
string res;
I am not getting any error when i run my code so i am not sure where my mistake is. any help with this is much much appreciated.
thanks a lot :)
Its a good practice to use a StringBuilder in these cases.
On the other hand, if it is ok to have one line for each CheckBox, you can use the following:
StringBuilder sb = new StringBuilder();
checkappend(ref sb, a);
checkappend(ref sb, b);
checkappend(ref sb, c);
string res = sb.ToString();
in which
static void checkappend(ref StringBuilder sb, CheckBox ck)
{
sb.Append(ck.Content.ToString());
sb.Append(ck.IsChecked == true ? "is checked." : " is NOT checked.");
sb.Append(Environment.NewLine);
}
Note that creating a separate class can help you when there are many CheckBoxes in a List. You can simply use
foreach (var ck in listOfCheckBoxes)
checkappend(ref ck, c);
You can implement it this way:
string res = "";
if (a.IsChecked)
{
res += a.Content.ToString();
}
if (b.IsChecked)
{
res += b.Content.ToString();
}
if (c.IsChecked)
{
res += c.Content.ToString();
}
or simple
string res = $"{(a.IsChecked?a.Content+" ":"")}{(b.IsChecked?b.Content+" ":"")}{(c.IsChecked?c.Content:"")}";
That's multiple combinations to check. Simply remove else from first code snippet to run all checks one after another. You will get only report from last successful check, to have several reports you have to accumulate them somehow (add to a list, combine in multi-line string, etc.).
Here is a simple one-liner (using linq):
var result = string.Join(" and ", new[] { a, b, c }.Where(o => o.IsChecked).Select(o => $"{o.Content} is checked"));

Need to count incidents found multiple times within a text file

I'm really trying to count the number of times a regex is found within a text but there are many regex to be found within a text file.
the problem is that my code only counts the first time, the subsequent IF that contains the other regexes will not count, Everything works but the counting of on each line that error occurred :(
could you please shed some light?
int counter = 1;
string liner;
string pattern = #"access-group\s+\w+\s+out\s+interface\s+\w+";
Boolean foundMatch;
int totalOUTgroups = Lines(ofd.FileName)
.Select(line => Regex.Matches(line, pattern).Count)
.Sum();
if (totalOUTgroups > 0)
{
richTextBox2.SelectionFont = new Font("Courier New", 8);
richTextBox2.AppendText(">>> ACls installed by using access-group using the keyword OUT are NOT supported: " + "\u2028");
richTextBox2.AppendText(">>> Total of incidences found: " + totalOUTgroups.ToString() + "\u2028");
System.IO.StreamReader file = new System.IO.StreamReader(ofd.FileName);
while ((liner = file.ReadLine()) != null)
{
foundMatch = performMatch(pattern, liner);
if (foundMatch)
{
richTextBox2.AppendText("Line: " + counter + " " + liner + "\r\n");
}
counter++;
}
}
//Will end 1
// 2 Mark echo-reply ICMP
int counter2 = 1;
string liner2;
string pattern2 = #"/^(?=.*\baccess-list\b)(?=.*\beq echo-reply\b).*$/gm";
Boolean foundMatch2;
int totalIntACLInt = Lines(ofd.FileName)
.Select(line => Regex.Matches(line, pattern2).Count)
.Sum();
if (totalIntACLInt > 0)
{
richTextBox2.SelectionFont = new Font("Courier New", 8);
richTextBox2.AppendText(" " + "\u2028");
richTextBox2.AppendText(">>> Echo-reply is not necessary: " + "\u2028");
richTextBox2.AppendText(">>> Total of incidences found: " + totalIntACLInt.ToString() + "\u2028");
System.IO.StreamReader file = new System.IO.StreamReader(ofd.FileName);
while ((liner2 = file.ReadLine()) != null)
{
foundMatch2 = performMatch(pattern2, liner2);
if (foundMatch2)
{
richTextBox2.AppendText("Line:" + counter2 + " " + liner2 + "\r\n");
}
counter2++;
}
}
If I understand your question, then the problem you're having is most likely tied to your implementation of performMatch(). Post the code for performMatch() if you want help debugging that.
As #Justin lurman pointed out, try printing out each line and line number while only iterating through the file once. If Regex.Matches(line, pattern) is already working for you, then just make use of that.
For example:
int counter = 1;
string pattern = #"access-group\s+\w+\s+out\s+interface\s+\w+";
var totalMatches = 0;
var output = new StringBuilder();
foreach(var line in Lines(ofd.FileName))
{
var matches = Regex.Matches(line, pattern).Count;
if (matches > 0)
{
totalMatches += matches;
output.AppendLine(string.Format("Line: {0} {1}", counter, line));
}
counter++;
}
if(toatlMatches > 0)
{
richTextBox2.SelectionFont = new Font("Courier New", 8);
richTextBox2.AppendText(">>> ACls installed by using access-group using the keyword OUT are NOT supported: " + "\u2028");
richTextBox2.AppendText(">>> Total of incidences found: " + totalMatches.ToString() + "\u2028");
richTextBox2.AppendText(output.ToString());
}
As a warning I haven't compiled or tested the code above, so use it as a guideline. You can certainly improve upon the code further. To start you could refactor your repeated code into methods.
Update
OK, I still don't know that I'm clear on what exactly your problem is, but I wrote out some code that should achieve what it is that I think you're trying to accomplish. While writing my code I noticed some things about the code you posted that may be causing issues for you.
Your second regex /^(?=.*\baccess-list\b)(?=.*\beq echo-reply\b).*$/gm doesn't look like a valid .NET regex, it looks like a JavaScript regex literal
You're appending text to a RichTextBox control, which has a max length property you may be exceeding. I doubt you're writing out that much text, but it's possible.
When this property is set to 0, the maximum length of the text that can be entered in the control is 64 KB of characters
- source
Here is the relevant snippet from the console app I wrote that reads a text file, line by line, and applies a collection of regexes to each line. If a match is found it stores the pertinent information about each match and then prints out its finding once all lines have been examined.
class CommonError
{
public Regex Pattern { get; private set; }
public string Message { get; private set; }
public List<KeyValuePair<int, IEnumerable<string>>> Details { get; private set; }
public CommonError(Regex pattern, string message)
{
Pattern = pattern;
Message = message;
Details = new List<KeyValuePair<int, IEnumerable<string>>>();
}
}
class Program
{
static void Main(string[] args)
{
//take a file read it once and while reading each line check if that line matches any of a slew of regexes.
//if it does match a regex then add the line number and the matching text into a collection of matches for that regex.
//at the end output all the matches by regex and the totals for each pattern. Along with printing each match also print the line it was found on.
var errorsToFind = new List<CommonError>()
{
new CommonError(new Regex(#"access-group\s+\w+\s+out\s+interface\s+\w+"), "ACls installed by using access-group using the keyword OUT are NOT supported"),
new CommonError(new Regex(#"^(?=.*\baccess-list\b)(?=.*\beq echo-reply\b).*$"), "Echo-reply is not necessary")
};
var errorsFound = FindCommonErrorsInFile(".\\test-file.txt", errorsToFind);
foreach (var error in errorsFound)
{
Console.WriteLine(error.Message);
Console.WriteLine("total incidences found: " + error.Details.Count);
error.Details.ForEach(d => Console.WriteLine(string.Format("Line {0} {1}", d.Key, string.Join(",", d.Value))));
}
}
static IEnumerable<CommonError> FindCommonErrorsInFile(string pathToFile, IEnumerable<CommonError> errorsToFind)
{
var lineNumber = 1;
foreach (var line in File.ReadLines(pathToFile))
{
foreach (var error in errorsToFind)
{
var matches = error.Pattern.Matches(line);
if(matches.Count == 0) continue;
var rawMatches = matches.Cast<Match>().Select(m => m.Value);
error.Details.Add(new KeyValuePair<int, IEnumerable<string>>(lineNumber, rawMatches));
}
lineNumber++;
}
return errorsToFind.Where(e => e.Details.Count > 0);
}
}
If you're still having issues give this code a try--this time I actually compiled it and tested it. Hope this helps.

In file, if line contains substring, get all of the line from the right

I have a file. Each line looks like the following:
[00000] 0xD176234F81150469: foo
What I am attempting to do is, if a line contains a certain substring, I want to extract everything on the right of the substring found. For instance, if I were searching for 0xD176234F81150469: in the above line, it would return foo. Each string is of variable length. I am using C#.
As a note, every line in the file looks like the above, having a base-16 number enclosed in square brackets on the left, followed by a hexadecimal hash and a semicolon, and an english string afterwards.
How could I go about this?
Edit
Here is my code:
private void button1_Click(object sender, EventArgs e)
{
Form1 box = new Form1();
if(MessageBox.Show("This process may take a little while as we loop through all the books.", "Confirm?", MessageBoxButtons.YesNo, MessageBoxIcon.Information) == DialogResult.Yes)
{
XDocument doc = XDocument.Load(#"C:\Users\****\Desktop\books.xml");
var Titles = doc.Descendants("Title");
List<string> list = new List<string>();
foreach(var Title in Titles)
{
string searchstr = Title.Parent.Name.ToString();
string val = Title.Value;
string has = #"Gameplay/Excel/Books/" + searchstr + #":" + val;
ulong hash = FNV64.GetHash(has);
var hash2 = string.Format("0x{0:X}", hash);
list.Add(val + " (" + hash2 + ")");
// Sample output: "foo (0xD176234F81150469)"
}
string[] books = list.ToArray();
File.WriteAllLines(#"C:\Users\****\Desktop\books.txt", books);
}
else
{
MessageBox.Show("Aborted.", "Aborted");
}
}
I also iterated through every line of the file, adding it to a list<>. I must've accidentally deleted this when trying the suggestions. Also, I am very new to C#. The main thing I am getting stumped on is the matching.
You could use File.ReadLines and this Linq query:
string search = "0xD176234F81150469:";
IEnumerable<String> lines = File.ReadLines(path)
.Select(l => new { Line = l, Index = l.IndexOf(search) })
.Where(x => x.Index > -1)
.Select(x => x.Line.Substring(x.Index + search.Length));
foreach (var line in lines)
Console.WriteLine("Line: " + line);
This works if you don't want to use Linq query.
//"I also iterated through every line of the file, adding it to a list<>." Do this again.
List<string> li = new List<string>()
//However you create this string make sure you include the ":" at the end.
string searchStr = "0xD176234F81150469:";
private void button1_Click(object sender, EventArgs e)
{
foreach (string line in li)
{
string[] words;
words = line.Split(' '); //{"[00000]", "0xD176234F81150469:", "foo"}
if (temp[1] == searchStr)
{
list.Add(temp[2] + " (" + temp[1] + ")");
// Sample output: "foo (0xD176234F81150469)"
}
}
}
string file = ...
string search= ...
var result = File.ReadLines(file)
.Where(line => line.Contains(search))
.Select(line => line.Substring(
line.IndexOf(search) + search.Length + 1);
Unfortunately, none of the other solutions worked for me. I was iterating through the hashes using foreach, so I would be iterating through all the items millions of times needlessly. In the end, I did this:
using (StreamReader r = new StreamReader(#"C:\Users\****\Desktop\strings.txt"))
{
string line;
while ((line = r.ReadLine()) != null)
{
lines++;
if (lines >= 6)
{
string[] bits = line.Split(':');
if(string.IsNullOrWhiteSpace(line))
{
continue;
}
try
{
strlist.Add(bits[0].Substring(10), bits[1]);
}
catch (Exception)
{
continue;
}
}
}
}
foreach(var Title in Titles)
{
string searchstr = Title.Parent.Name.ToString();
string val = Title.Value;
string has = #"Gameplay/Excel/Books/" + searchstr + ":" + val;
ulong hash = FNV64.GetHash(has);
var hash2 = " " + string.Format("0x{0:X}", hash);
try
{
if (strlist.ContainsKey(hash2))
{
list.Add(strlist[hash2]);
}
}
catch (ArgumentOutOfRangeException)
{
continue;
}
}
This gave me the output I expected in a short period of time.

String Replacements for Word Merge

using asp.net 4
we do a lot of Word merges at work. rather than using the complicated conditional statements of Word i want to embed my own syntax. something like:
Dear Mr. { select lastname from users where userid = 7 },
Your invoice for this quarter is: ${ select amount from invoices where userid = 7 }.
......
ideally, i'd like this to get turned into:
string.Format("Dear Mr. {0}, Your invoice for this quarter is: ${1}", sqlEval[0], sqlEval[1]);
any ideas?
Well, I don't really recommend rolling your own solution for this, however I will answer the question as asked.
First, you need to process the text and extract the SQL statements. For that you'll need a simple parser:
/// <summary>Parses the input string and extracts a unique list of all placeholders.</summary>
/// <remarks>
/// This method does not handle escaping of delimiters
/// </remarks>
public static IList<string> Parse(string input)
{
const char placeholderDelimStart = '{';
const char placeholderDelimEnd = '}';
var characters = input.ToCharArray();
var placeHolders = new List<string>();
string currentPlaceHolder = string.Empty;
bool inPlaceHolder = false;
for (int i = 0; i < characters.Length; i++)
{
var currentChar = characters[i];
// Start of a placeholder
if (!inPlaceHolder && currentChar == placeholderDelimStart)
{
currentPlaceHolder = string.Empty;
inPlaceHolder = true;
continue;
}
// Start of a placeholder when we already have one
if (inPlaceHolder && currentChar == placeholderDelimStart)
throw new InvalidOperationException("Unexpected character detected at position " + i);
// We found the end marker while in a placeholder - we're done with this placeholder
if (inPlaceHolder && currentChar == placeholderDelimEnd)
{
if (!placeHolders.Contains(currentPlaceHolder))
placeHolders.Add(currentPlaceHolder);
inPlaceHolder = false;
continue;
}
// End of a placeholder with no matching start
if (!inPlaceHolder && currentChar == placeholderDelimEnd)
throw new InvalidOperationException("Unexpected character detected at position " + i);
if (inPlaceHolder)
currentPlaceHolder += currentChar;
}
return placeHolders;
}
Okay, so that will get you a list of SQL statements extracted from the input text. You'll probably want to tweak it to use properly typed parser exceptions and some input guards (which I elided for clarity).
Now you just need to replace those placeholders with the results of the evaluated SQL:
// Sample input
var input = "Hello Mr. {select firstname from users where userid=7}";
string output = input;
var extractedStatements = Parse(input);
foreach (var statement in extractedStatements)
{
// Execute the SQL statement
var result = Evaluate(statement);
// Update the output with the result of the SQL statement
output = output.Replace("{" + statement + "}", result);
}
This is obviously not the most efficient way to do this, but I think it sufficiently demonstrates the concept without muddying the waters.
You'll need to define the Evaluate(string) method. This will handle executing the SQL.
I just finished building a proprietary solution like this for a law firm here.
I evaluated a product called Windward reports. It's a tad pricy, esp if you need a lot of copies, but for one user it's not bad.
it can pull from XML or SQL data sources (or more if I remember).
Might be worth a look (and no I don't work for 'em, just evaluated their stuff)
You might want to check out the razor engine project on codeplex
http://razorengine.codeplex.com/
Using SQL etc within your template looks like a bad idea. I'd suggest you make a ViewModel for each template.
The Razor thing is really easy to use. Just add a reference, import the namespace, and call the Parse method like so:
(VB guy so excuse syntax!)
MyViewModel myModel = new MyViewModel("Bob",150.00); //set properties
string myTemplate = "Dear Mr. #Model.FirstName, Your invoice for this quarter is: #Model.InvoiceAmount";
string myOutput = Razor.Parse(myTemplate, myModel);
Your string can come from anywhere - I use this with my templates stored in a database, you could equally load it from files or whatever. It's very powerful as a view engine, you can do conditional stuff, loops, etc etc.
i ended up rolling my own solution but thanks. i really dislike if statements. i'll need to refactor them out. here it is:
var mailingMergeString = new MailingMergeString(input);
var output = mailingMergeString.ParseMailingMergeString();
public class MailingMergeString
{
private string _input;
public MailingMergeString(string input)
{
_input = input;
}
public string ParseMailingMergeString()
{
IList<SqlReplaceCommand> sqlCommands = new List<SqlReplaceCommand>();
var i = 0;
const string openBrace = "{";
const string closeBrace = "}";
while (string.IsNullOrWhiteSpace(_input) == false)
{
var sqlReplaceCommand = new SqlReplaceCommand();
var open = _input.IndexOf(openBrace) + 1;
var close = _input.IndexOf(closeBrace);
var length = close != -1 ? close - open : _input.Length;
var newInput = _input.Substring(close + 1);
var nextClose = newInput.Contains(openBrace) ? newInput.IndexOf(openBrace) : newInput.Length;
if (i == 0 && open > 0)
{
sqlReplaceCommand.Text = _input.Substring(0, open - 1);
_input = _input.Substring(open - 1);
}
else
{
sqlReplaceCommand.Command = _input.Substring(open, length);
sqlReplaceCommand.PlaceHolder = openBrace + i + closeBrace;
sqlReplaceCommand.Text = _input.Substring(close + 1, nextClose);
sqlReplaceCommand.NewInput = _input.Substring(close + 1);
_input = newInput.Contains(openBrace) ? sqlReplaceCommand.NewInput : string.Empty;
}
sqlCommands.Add(sqlReplaceCommand);
i++;
}
return sqlCommands.GetParsedString();
}
internal class SqlReplaceCommand
{
public string Command { get; set; }
public string SqlResult { get; set; }
public string PlaceHolder { get; set; }
public string Text { get; set; }
protected internal string NewInput { get; set; }
}
}
internal static class SqlReplaceExtensions
{
public static string GetParsedString(this IEnumerable<MailingMergeString.SqlReplaceCommand> sqlCommands)
{
return sqlCommands.Aggregate("", (current, replaceCommand) => current + (replaceCommand.PlaceHolder + replaceCommand.Text));
}
}

Categories