Related
I am having a bit of a conundrum here... basically I am performing some very very basic file compression steps as follows:
open file and read as string/into a string
parse through the string, and replace repeating patterns with smaller size of text that represents the pattern (ex: aaaaaaaaaaa (11 chars) is replaced with [a#$%11] (8 chars))
save the new, smaller string, into a separate file (can compare sizes)
For some reason, even though the new string in memory is like, 3% smaller than the original string, when I save the string into a file, the file itself is BIGGER than the original file on the file system?? How is that even possible? If someone could explain that to me it would be great!
Here is the code I am using to do this:
void bkg_DoWork(object sender, DoWorkEventArgs e)
{
try
{
string file = File.ReadAllText(this.txt_CompressFilename.Text);
int olength = file.Length;
int nlength = 0;
decimal pct = 0;
string lastchar = "";
int count = 0;
List<RepeatingPattern> SinglePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> DoublePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> TriplePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> QuadruplePatterns = new List<RepeatingPattern>();
UpdateProgress("Read file contents", 0, 1, 6);
UpdateProgress("Finding single character replacements.", pct, 1, 6);
//single character replaces.
for (int i = 0; i < olength; i++)
{
if (file[i].ToString() == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[a#$%#]
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
count = 0;
lastchar = file[i].ToString();
}
}
//handle possible trailing pattern
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
if (SinglePatterns.Count > 0)
for (int i = 0; i < SinglePatterns.Count; i++)
file = file.Replace(SinglePatterns[i].ToString(), SinglePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + SinglePatterns.Count, pct, 2, 6);
UpdateProgress("Finding double character replacements.", pct, 2, 6);
lastchar = "";
count = 0;
//double character replaces.
for (int i = 0; i + 1 < file.Length; i = i + 2)
{
if ("" + file[i] + "" + file[i + 1] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aa#$%#]
if (count > 8)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!DoublePatterns.Contains(ptn))
DoublePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1];
}
}
//handle possible trailing pattern
if (count > 8)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!DoublePatterns.Contains(ptn))
DoublePatterns.Add(ptn);
}
if (DoublePatterns.Count > 0)
for (int i = 0; i < DoublePatterns.Count; i++)
file = file.Replace(DoublePatterns[i].ToString(), DoublePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + DoublePatterns.Count, pct, 3, 6);
UpdateProgress("Finding triple character replacements.", pct, 3, 6);
lastchar = "";
count = 0;
//triple character replaces.
for (int i = 0; i + 2 < file.Length; i = i + 3)
{
if ("" + file[i] + "" + file[i + 1] + "" + file[i + 2] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aaa#$%#]
if (count > 9)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!TriplePatterns.Contains(ptn))
TriplePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1] + "" + file[i + 2];
}
}
//handle possible trailing pattern
if (count > 9)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!TriplePatterns.Contains(ptn))
TriplePatterns.Add(ptn);
}
if (TriplePatterns.Count > 0)
for (int i = 0; i < TriplePatterns.Count; i++)
file = file.Replace(TriplePatterns[i].ToString(), TriplePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + TriplePatterns.Count, pct, 4, 6);
UpdateProgress("Finding quadruple character replacements.", pct, 4, 6);
lastchar = "";
count = 0;
//triple character replaces.
for (int i = 0; i + 3 < file.Length; i = i + 4)
{
if ("" + file[i] + "" + file[i + 1] + "" + file[i + 2] + "" + file[i + 3] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aaaa#$%#]
if (count > 10)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!QuadruplePatterns.Contains(ptn))
QuadruplePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1] + "" + file[i + 2] + "" + file[i + 3];
}
}
//Handle possible trailing pattern
if (count > 10)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!QuadruplePatterns.Contains(ptn))
QuadruplePatterns.Add(ptn);
}
if (QuadruplePatterns.Count > 0)
for (int i = 0; i < QuadruplePatterns.Count; i++)
file = file.Replace(QuadruplePatterns[i].ToString(), QuadruplePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + QuadruplePatterns.Count, pct, 5, 6);
UpdateProgress("Saving new .cmp file...", pct, 5, 6);
string newpath = this.txt_FolderName.Text + "\\" + Path.GetFileName(this.txt_CompressFilename.Text);
newpath = newpath.Substring(0, newpath.LastIndexOf("."));
newpath = newpath + ".cmp";
File.WriteAllText(newpath, file);
stopwatch.Stop();
UpdateProgress("Compression completed! Time to compress file: " + string.Format("{0}", stopwatch.Elapsed), pct, 6, 6);
string report = "Compression report\n\n";
FileInfo inf = new FileInfo(this.txt_CompressFilename.Text);
FileInfo infNew = new FileInfo(newpath);
report += "Single character replacements made: " + SinglePatterns.Count + "\n\n";
report += "Double character replacements made: " + DoublePatterns.Count + "\n\n";
report += "Triple character replacements made: " + TriplePatterns.Count + "\n\n";
report += "Quadruple character replacements made: " + QuadruplePatterns.Count + "\n\n";
report += "Total compression ration achieved in string: " + pct + "% \n\n";
report += "Old file size: " + inf.Length + "\nNew file size: " + infNew.Length + " in bytes.";
report += "Total time to achieve compression: " + string.Format("{0}", stopwatch.Elapsed);
e.Result = report;
}
catch (Exception ex)
{
e.Result = ex;
}
}
Here is the code for the RepeatingPattern class...
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Compressor
{
public class RepeatingPattern : IEquatable<RepeatingPattern>
{
public string RepeatingChar { get; set; }
public int Count { get; set; }
public RepeatingPattern()
{
this.RepeatingChar = "";
this.Count = -1;
}
public RepeatingPattern(string rchar, int count)
{
this.RepeatingChar = rchar;
this.Count = count;
}
public RepeatingPattern(string FromReplaceString)
{
FromReplaceString = FromReplaceString.Replace("[", "").Replace("]", "");
List<string> parts = FromReplaceString.Split(new string[] { "#$%" }, StringSplitOptions.None).ToList();
if (parts.Count != 2)
throw new ArgumentException("Invalid argument count. Must be in this format: [a#$%N]");
try
{
this.RepeatingChar = parts[0];
this.Count = int.Parse(parts[1]);
}
catch (Exception ex)
{
throw new ArgumentException("Unable to cast the argument and create an object from it. Error: " + ex.Message);
}
}
public override bool Equals(object obj)
{
RepeatingPattern tmp = obj as RepeatingPattern;
if (tmp != null)
return base.Equals(tmp);
else
throw new Exception("Invalid comparison type. Both objects must be of type RepeatingPattern");
}
public bool Equals(RepeatingPattern tmp)
{
return this.RepeatingChar == tmp.RepeatingChar && this.Count == tmp.Count;
}
public override int GetHashCode()
{
return this.RepeatingChar.GetHashCode() ^ this.Count.GetHashCode();
}
public override string ToString()
{
string retval = "";
for (int i = 0; i < this.Count; i++)
retval += this.RepeatingChar;
return retval;
}
public string ToReplaceString()
{
return "[" + this.RepeatingChar + "#$%" + this.Count + "]";
}
}
}
Out of curiosity, I have made an attempt at the code. Some differences:
I made a helper function to find runs in the text
I build a new string (using StringBuilder) as I go through the old string instead of replacing in the old string
I think my code is a bit simpler than yours. I have tested with:
Input: "aaaaaaaaaaabbbcdcdcdcdcdcdxxxxxxxxxxxxxxxxxxhello"
Output: "[a#$%11]bbb[cd#$%6][x#$%18]hello"
Here's the code. This is a first draft. Probably lots of improvements to make:
static int FindRun(string s, int start, int length)
{
if (start + length >= s.Length) return 0;
int numRuns = 0;
string pattern = s.Substring(start, length);
for (int i = start + length; i <= s.Length - length; i += length)
{
if (s.Substring(i, length) == pattern) numRuns += 1;
else break;
}
return numRuns;
}
static string EncodeString(string src)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < src.Length; i++)
{
string theRun = null;
int numRuns = 0;
// Find runs of lengths 4, 3, 2, 1
for (int j = 4; j >= 1; j--)
{
int runs = FindRun(src, i, j);
if (runs > 1) // Run found!
{
// Save it for later. Want to append the longest run
theRun = src.Substring(i, j);
numRuns = runs;
}
}
// No run? Just append the letter
if (theRun == null)
{
sb.Append(src[i]);
}
else
{
// This is the size of the run
int replacementStringSize = (numRuns * theRun.Length) + (theRun.Length - 1);
// This is the code to use as a replacement
String runCode = String.Format("[{0}#$%{1}]", theRun, numRuns + 1);
// Only append if the code length is smaller than the original run
if (runCode.Length < replacementStringSize)
{
sb.Append(runCode);
}
else
{
// Don't encode. Put original run back
for (int j = 0; j <= numRuns; j++)
{
sb.Append(theRun);
}
}
// Skip over the run
i += replacementStringSize;
}
}
return sb.ToString();
}
The root cause of the much larger output files is because of encoding. ChromeSetup.exe is 1,397,976 bytes. When the file is read in using File.ReadAllText it attempts to detect the string encoding. The string is 1,327,384 characters long in this case. Here's the key though, because of encoding each character isn't necessarily a single byte. For example in UTF-8 each character is 1 to 4 bytes. So then when the result string is written out a single character could become multiple bytes.
For reading/writing executables/binary files you're better off using File.ReadAllBytes()`File.WriteAllBytes()`.
While attempting to run your code I came across several other bugs. Here are the bugs I found.
1) In the double/triple/quad character replaces the for loop bounds should check up to the character that will be used.
//double character replaces.
for (int i = 0; i < file.Length; i = i + 2)
{
if ("" + file[i] + "" + file[i + 1] == lastchar)
This will cause an out of index exception if the file string is an odd number of characters. Add in a + 1 to fix this.
for (int i = 0; i + 1 < file.Length; i = i + 2)
For the triple this will be + 2, for quad + 3.
2) If the string ends with a repeating pattern this isn't handled correctly. In the for loops the pattern count is only checked when a different char is encountered. So if the pattern is at the end of the string it isn't detected. You could handle this by checking the count after the for loop.
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
3) count and lastchar should be reset before each for loop. If one for loop ends with count = 17 and the next for loop runs it would add a repeating pattern of count 17, which has already been replaced.
4) As others have mentioned, doing replacements in your input string as you go along has the potential to cause issues.
If you can post your RepeatingPattern code and your input text file we can run down the exact cause of your larger output file.
Edit: Running with your RepeatingPattern code I see another small bug. The pattern "aaaaaaaaaa" becomes "[a#$%9]a". it should be replacing one more character. This could be making your output string slightly longer than expected. To fix this, in the replacement for loops set count to 1 (instead of 0) when a new pattern is started.
Currently I am making a AI based on text.
I have a database with a pattern for each answer
A pattern looks like [Who is the] Winner of the World Cup 2018
[] = Optional words
<> = Needed words
When I enter the sentence Who is the Winner of the World Cup 2018 my method should return the indentifier of the answer.
My database has 2 rows called "AnswerIndentifier" and "Pattern"
I did it myself and programmed this algoryhm:
private static bool MatchesPattern(string text, string pattern)
{
List<string> patternTokens = new List<string>();
string tok = "";
pattern = pattern.ToLower() + "[";
int state = 0;
for(int i = 0; i < pattern.ToCharArray().Length; i++)
{
char token = pattern[i];
if(token == '[')
{
if(tok != "")
{
patternTokens.Add("NEC" + char.MaxValue + tok);
tok = "";
}
state = 1;
continue;
}
if(token == ']' && state == 1)
{
i++;
state = 0;
patternTokens.Add("OPT" + char.MaxValue + tok);
tok = "";
continue;
}
if(token == ' ' && i + 1 < text.ToCharArray().Length && text[i + 1] == '[')
continue;
tok += token;
}
string[] patternTokensCopy = new string[patternTokens.Count];
for(int i = 0; i < patternTokens.Count; i++)
patternTokensCopy[i] = patternTokens[i];
int max = (int) Math.Pow(2, patternTokens.Where(x => x.StartsWith("OPT")).ToList().Count);
for(int i = 0; i < max; i++)
{
string binary = Convert.ToString(i, 2).PadLeft(patternTokensCopy.Where(x => x.StartsWith("OPT")).ToList().Count, '0');
for(int x = 0; x < patternTokensCopy.Where(t => t.StartsWith("OPT")).ToList().Count; x++)
if(binary[x] == '0')
{
List<string> optionalTokens = new List<string>();
foreach(string curpattern in patternTokensCopy)
if(curpattern.StartsWith("OPT"))
optionalTokens.Add(curpattern);
patternTokens.Remove(optionalTokens[x]);
}
string patternAsSentence = "";
foreach(string patternToken in patternTokens)
patternAsSentence += patternToken.Split(char.MaxValue)[1] + " ";
while(patternAsSentence[patternAsSentence.Length - 1] == ' ')
patternAsSentence = patternAsSentence.Substring(0, patternAsSentence.Length - 1);
patternAsSentence = patternAsSentence.Replace("\r", "").Replace(" ", " ");
int similarity = StringSimilarity.GetStringSimilarity(patternAsSentence, text);
if(text.Length < 6)
{
if(text == patternAsSentence)
return true;
}
else
{
if(similarity <= 6)
return true;
}
patternTokens = new List<string>();
patternTokensCopy.ToList().ForEach(x => patternTokens.Add(x));
}
return false;
}
The only changes are that the needed text must not marked with <> and the similarity check(see C# - Compare String Similarity)
I have this project where I have to sort some txt files, most of them are numbers but one of them is a collection of months. I have the code that sorts the others but not the file containing the months. So i need this code to be changed so I can sort a string array any advice would be brilliant thank you!
public void SortArray(decimal[] numbers)
{
bool swap;
decimal temp;
do
{
swap = false;
for(int index = 0; index < (numbers.Length - 1); index ++)
{
if ( numbers[index] > numbers[index+1])
{
//swap
temp = numbers[index];
numbers[index] = numbers[index + 1];
numbers[index + 1] = temp;
swap = true;
}
}
} while (swap == true);
}
If you have an string array like this:
string[] s = {"bbb", "ccc", "aaa"};
The shorter way to sort it, using:
Array.Sort(s);
and the long way to sort it, using:
for (var i = 1; i < s.Length; i++)
{
for (var j = 0; j < s.Length - i; j++)
{
if (string.Compare(s[j], s[j + 1], StringComparison.Ordinal) <= 0) continue;
var temp = s[j];
s[j] = s[j + 1];
s[j + 1] = temp;
}
}
public void BubbleSortArrayString(string[] letters) //change here
{
bool swap;
string temp; //change this too
do
{
swap = false;
for (int index = 0; index < (letters.Length - 1); index++)
{
if (letters[index] > letters[index + 1]) //if first number is greater then second then swap
{
//swap
temp = letters[index];
letters[index] = letters[index + 1];
letters[index + 1] = temp;
swap = true;
}
}
} while (swap == true);
}
used this code... forgot some answered last time i tried this
So I'm new to working with browsers in Win-forms and I'm stuck a particular point.
What i want to do, for the browser to open a page(I've gotten this far). Once the page is open it must navigate to a particular part(It'somewhere in the middle of the page) and select it. Then copy and store it for when i need it, just the text.
I've been able to select all the text on a page by using the following code just as an example:
WebBrowser wb = (WebBrowser)sender;
wb.Document.ExecCommand("SelectAll", false, null);
wb.Document.ExecCommand("Copy", false, null);
richTextBox1.Text = Clipboard.GetText();
It can work for my program but I want to know if there is better way that will select just the text or info I need. If i can, place them in textboxes, or straight into my database.
This is the link to the page: http://www.lolking.net/news/league-trends-jul30
I want to select and get the info from these sections of the page:
Champion Pick Rates - Top 5 Increases and Decreases
Champion Win Rates - Top 5 Increases and Decreases
Champion Ban Rates - Top 5 Increases and Decreases
Any help would be appreciated.
Your foreach loop will look like this:
foreach (var item in list_ban)
{
string rtbpicker = item.ToString();
foreach (var comp in list_Comp)
{
int count = 0; //Counts for the number of occurences
foreach (Match m in Regex.Matches(rtbpicker, "" + comp.ToString() + ""))
{
int matchindex = m.Index;
int matchlength = m.Length;
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, " "); //Count just moves the index forward by however many postions the original index was shifted
if(Regex.Matches(rtbpicker, "" + comp.ToString() + "").Count > 1)
{
count++;
}
}
}
richTextBox6.Text += rtbpicker + "\n";
//rtbBan.AppendText(rtbpicker + System.Environment.NewLine);
}
I haven't got (yet) the whole solution but I can help you a bit:
Once you got the plain text from the FULLY LOADED webBrowser, and wrote in richTextBox1, then you can print the 3 part to other textboxes:
private void button_Click(object sender, EventArgs e)
{
List<string> rawhtml = new List<string>(); //List for the whole page
List<string> list_pick = new List<string>(); //PICK section
List<string> list_win = new List<string>(); //WIN section
List<string> list_ban = new List<string>(); //BAN section
rawhtml = richTextBox1.Lines.ToList(); //FILL the page to list
int ID_pick = 0;
int ID_win = 0;
int ID_ban = 0;
int ID_cmt = 0; // We need to specify the end of BAN section
for (int i = 0; i < rawhtml.Count; i++) //Search for the line number of section-start
{
if (rawhtml[i] == "Champion Pick Rates") ID_pick = i;
if (rawhtml[i] == "Champion Win Rates") ID_win = i;
if (rawhtml[i] == "Champion Ban Rates") ID_ban = i;
if (rawhtml[i].Contains("Comments")) ID_cmt = i;
}
// PICK
for (int i = ID_pick; i < ID_pick + (ID_win - ID_pick); i++) //Calculate the start and the end line-number
{
list_pick.AddRange(Regex.Split(rawhtml[i], "(?<=[)])")); //Split the five characters, without losing the ')'
}
foreach (var item in list_pick)
{
richTextBox2.AppendText(item + System.Environment.NewLine); //Optinal: Add to richtextbox
}
// WIN
for (int i = ID_win; i < ID_win + (ID_ban - ID_win); i++)
{
list_win.AddRange(Regex.Split(rawhtml[i], "(?<=[)])"));
}
foreach (var item in list_win)
{
richTextBox3.AppendText(item + System.Environment.NewLine);
}
// BAN
for (int i = ID_ban; i < ID_ban + (ID_cmt - ID_ban); i++)
{
list_ban.AddRange(Regex.Split(rawhtml[i], "(?<=[)])"));
}
foreach (var item in list_ban)
{
richTextBox4.AppendText(item + System.Environment.NewLine);
}
}
This code will make output from "Champion Win Rates" like:
Champion Win Rates
Top Five Biggest Increases
Urgot41.38%->43.67%(+2.29%)
Kennen47.7%->49.28%(+1.58%)
Lucian51.61%->53.1%(+1.49%)
Singed48.95%->50.31%(+1.36%)
Fiora53.48%->54.71%(+1.23%)
Top Five Biggest Decreases
Kassadin48.7%->46.67%(-2.03%)
Galio53.18%->51.42%(-1.76%)
Cho'Gath48.03%->46.37%(-1.66%)
Corki50.05%->48.43%(-1.62%)
Graves49.49%->47.98%(-1.51%)
Much better... ;)
I'm faced a problem with spaces, but i can't solve it yet.
I hope you understand this, if you have any question please comment!
Ps.: Sorry for bad eng
Pss.: I know this isn't the full solution, but i must share with you :)
And now the full solution, with perfect spaces. Regex was difficult to me, but I think this is more simple, however longer too.
private void btnspace_Click(object sender, EventArgs e)
{
richTextBox6.Text = null;
for (int i = 0; i < list_ban.Count; i++)
{
string rebuilder = ""; //for the output string (one line)
List<char> temp_chars = list_ban[i].ToCharArray().ToList(); //split one line into char sequence
int number_occur = 0; //occurence counter for numbers
int minus_occur = 0;// occurence counter for '-'
for (int j = 0; j < temp_chars.Count; j++)
{
// NUMBERS
// I don't wanted to hardcode the champions :/
if (number_occur < 2 && (temp_chars[j] == '1' || temp_chars[j] == '2' || temp_chars[j] == '3' || temp_chars[j] == '4' || temp_chars[j] == '5' || temp_chars[j] == '6' || temp_chars[j] == '7' || temp_chars[j] == '8' || temp_chars[j] == '9' || temp_chars[j] == '0')) //looks pretty, isn't?
{
temp_chars.Insert(j, ' '); //insert a space into char seq
j = j + 5; // in the longest case: 12.34, so skip 5 char, or 1 2. 3 4
number_occur = number_occur + 1; //for the difference percentage we don't need spaces, so insert by number only twice
}
// NUMBERS DONE
}
for (int j = 0; j < temp_chars.Count; j++)
{
// ( and -
if (temp_chars[j] == '-' || temp_chars[j] == '(')
{
if (temp_chars[j] == '-') minus_occur = minus_occur + 1; //if the difference is negative, there will be one more minus, which doesn't need space
if (minus_occur <= 1) temp_chars.Insert(j, ' ');
j = j + 1; //avoid endless loop
}
// ( and - DONE
}
foreach (var item in temp_chars)
{
rebuilder = rebuilder + item; //rebuild the line from the char list, with spaces
}
list_ban.RemoveAt(i); //replace the old spaceless lines...
list_ban.Insert(i, rebuilder);
richTextBox1.AppendText(list_ban[i] + System.Environment.NewLine);
}
}
I hope it's clear, i tried to comment everything. Good luck, and feel free to ask. Please mention if it's working because i want to answer this question perfectly :D
Ok, so this is my final solution, it works 100%, it takes your first answer, which you can see ;p and uses my regex.matches. I think the part that i added to the foreach loops, could be done in a method so you can just call it whenever you need it. I just haven't got to that yet! :)
private void button3_Click(object sender, EventArgs e)
{
List<string> rawhtml = new List<string>(); //List for the whole page
List<string> list_pick = new List<string>(); //PICK section
List<string> list_win = new List<string>(); //WIN section
List<string> list_ban = new List<string>(); //BAN section
List<string> list_Comp = new List<string>(); //Champion names
fillchamplist(list_Comp);
rawhtml = richTextBox1.Lines.ToList(); //FILL the page to list
int ID_pick = 0;
int ID_win = 0;
int ID_ban = 0;
int ID_cmt = 0; // We need to specify the end of BAN section
for (int i = 0; i < rawhtml.Count; i++) //Search for the line number of section-start
{
if (rawhtml[i] == "Champion Pick Rates") ID_pick = i;
if (rawhtml[i] == "Champion Win Rates") ID_win = i;
if (rawhtml[i] == "Champion Ban Rates") ID_ban = i;
if (rawhtml[i].Contains("Comments")) ID_cmt = i;
}
// PICK
for (int i = ID_pick; i < ID_pick + (ID_win - ID_pick); i++) //Calculate the start and the end line-number
{
list_pick.AddRange(Regex.Split(rawhtml[i], "(?<=[)])")); //Split the five characters, without losing the ')'
}
foreach (var item in list_pick)
{
string rtbpicker = item.ToString();
foreach (var comp in list_Comp)
{
int count = 0; //To see which match we working with later
foreach (Match m in Regex.Matches(rtbpicker, "" + comp.ToString() + "")) // Checks for all matches and cycles through them
{
if (count == 2) // if the count == 2, it means that its on its 3rd match(the one we dont wana give a space to
{
}
else // puts the space in
{
int matchindex = m.Index;
int matchlength = m.Length;
if (m.Length >= 2) // only champ names are >=2
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, "\t");
}
else
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, " "); // the count variable updates he index so the space doesnt occur before the % sign
}
if (Regex.Matches(rtbpicker, "" + comp.ToString() + "").Count > 0)// just to update the index for the 2nd %
{
count++;
}
}
}
}
rtbPick.AppendText(rtbpicker + System.Environment.NewLine); //Optinal: Add to richtextbox
}
// WIN
for (int i = ID_win; i < ID_win + (ID_ban - ID_win); i++)
{
list_win.AddRange(Regex.Split(rawhtml[i], "(?<=[)])"));
}
foreach (var item in list_win)
{
string rtbpicker = item.ToString();
foreach (var comp in list_Comp)
{
int count = 0;
foreach (Match m in Regex.Matches(rtbpicker, "" + comp.ToString() + ""))
{
if (count == 2)
{
}
else
{
int matchindex = m.Index;
int matchlength = m.Length;
if (m.Length >= 2)
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, "\t");
}
else
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, " ");
}
if (Regex.Matches(rtbpicker, "" + comp.ToString() + "").Count > 0)
{
count++;
}
}
}
}
rtbWin.AppendText(rtbpicker + System.Environment.NewLine);
}
// BAN
for (int i = ID_ban; i < ID_ban + (ID_cmt - ID_ban); i++)
{
list_ban.AddRange(Regex.Split(rawhtml[i], "(?<=[)])"));
}
foreach (var item in list_ban)
{
string rtbpicker = item.ToString();
foreach (var comp in list_Comp)
{
int count = 0;
foreach (Match m in Regex.Matches(rtbpicker, "" + comp.ToString() + ""))
{
if (count == 2)
{
}
else
{
int matchindex = m.Index;
int matchlength = m.Length;
if (m.Length >= 2)
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, "\t");
}
else
{
rtbpicker = rtbpicker.Insert(matchindex + matchlength + count, " ");
}
if (Regex.Matches(rtbpicker, "" + comp.ToString() + "").Count > 0)
{
count++;
}
}
}
}
rtbBan.AppendText(rtbpicker + System.Environment.NewLine);
}
}
This is the outcome:(doesn't show the tabs here for some reason)
Champion Pick Rates
Top Five Biggest Increases
Lucian 27.75% -> 32.3% (+4.55%)
Ahri 8.7% -> 11.3% (+2.6%)
Rengar 11.25% -> 13.84% (+2.59%)
Nidalee 10.7% -> 12.93% (+2.23%)
Tristana 30.07% -> 32.02% (+1.95%)
Top Five Biggest Decreases
Caitlyn 34.44% -> 30.63% (-3.81%)
Vayne 17.25% -> 15.69% (-1.56%)
Ezreal 15.08% -> 13.6% (-1.48%)
Renekton 13.84% -> 12.6% (-1.24%)
Lee Sin 30.54% -> 23.36% (-7.18%)
Ok so :D that works perfect for me, but that's cause i know what i want the outcome to be for this specific thing. Your method also works and i would actually recommend it for scenarios.
If you got any questions, dont be afraid to ask hey :)
I have this sequence 1,2,3,4,5,6,8,10,11
Expected output is 1-6,8,10-11
This problem is about formatting the sequence in easy readable form
I tried with c# and used many if & else.
Interviewer said, there is some simple algorithm to do this.
I have no idea how to achive this very simple.
Also for 1,2,3 i shown 1-3. They said its wrong!.
Is there any design pattern(interpreter) involved in this logic?
Here is one way of doing it:
int[] numbers = { 1, 2, 3, 4, 5, 6, 8, 10, 11 };
int start, end;
for (int i = 0; i < numbers.Length; i++)
{
start = numbers[i];
while (i < numbers.Length - 1 && numbers[i] + 1 == numbers[i + 1])
i++;
end = numbers[i];
if(start == end)
Console.WriteLine(start);
else
Console.WriteLine(start + " - " + end);
}
This will display subsequent numbers that grow incrementally as range. Numbers that are not increasing linearly are not written as part of a range.
Here is another version of the first approach, it utilizes the same for loop to iterate on range:
int temp = numbers[0], start, end;
for (int i = 0; i < numbers.Length; i++)
{
start = temp;
if (i < numbers.Length - 1 )
// if subsequent numbers are incremental loop further
if (numbers[i] + 1 == numbers[i + 1])
continue;
// if they are not, number at index i + 1 is a new 'start' for the next iteration
else
temp = numbers[i + 1];
end = numbers[i];
if (start == end)
Console.WriteLine(start);
else
Console.WriteLine(start + " - " + end);
}
A simple implementation in C# could look like this:
public string Format(IEnumerable<int> input)
{
var result = string.Empty;
var previous = -1;
var start = -1;
var first = true;
foreach(var i in input)
{
if(start == -1)
start = i;
else if(previous + 1 != i)
{
result += FormatRange(start, previous, first);
first = false;
start = i;
}
previous = i;
}
if(start != -1)
result += FormatRange(start, previous, first);
return result;
}
public string FormatRange(int start, int end, bool isFirst)
{
var result = string.Empty;
if(!isFirst)
result += ", ";
if(start == end)
result += start;
else
result += string.Format("{0}-{1}", start, end);
return result;
}
This will also output 1-3 for the input 1,2,3, which is perfectly valid. Without a specification what the output should be instead it's impossible to answer that part.
Probably not a suitable answer for an interview question, but using LINQ is another way to solve this.
int[] numbers = { 1, 2, 3, 4, 5, 6, 8, 10, 11 };
var remains = numbers.AsEnumerable();
while (remains.Any())
{
int first = remains.First();
int last = remains.TakeWhile((x, i) => x - first == i).Last();
remains = remains.Skip(last - first + 1);
Console.Write(first + (first == last ? "" : "-" + last) + (remains.Any() ? "," : Environment.NewLine));
}
The following groups consecutive integers, and outputs a string for each group. However, it also allows you to specify the minimum length of group which you want to hyphenate; anything less will just give you the individual numbers. Thus if you only want to hyphenate groups of 4 or more, you can pass in 4; if you want to hyphenate pairs, you can pass in 2. (I'd want to use 3 myself, but I can't tell what they want.)
It also doesn't keep any collections of numbers as it goes along, because you don't need to.
Method:
static IEnumerable<string> Group(IEnumerable<int> input, int minLength)
{
int currentStart = int.MinValue;
int currentLength = 0;
foreach (int c in input)
{
if (currentLength > 0)
if (currentStart + currentLength == c)
currentLength++;
else
{
if (currentLength >= minLength)
yield return string.Format("{0}-{1}",
currentStart, currentStart + currentLength - 1);
else
for (int i = currentStart; i < currentStart + currentLength; i++)
yield return i.ToString();
currentStart = c;
currentLength = 1;
}
else
{
currentStart = c;
currentLength = 1;
}
}
if (currentLength >= minLength)
yield return string.Format("{0}-{1}",
currentStart, currentStart + currentLength + 1);
else
for (int i = currentStart; i < currentStart + currentLength; i++)
yield return i.ToString();
}
Usage:
int minCount = 3;
int[] input = new[] { 1, 2, 3, 4, 5, 6, 8, 10, 11 };
Console.WriteLine(String.Join(",", Group(input, minCount)));
Java code:
int[] arr = {1,2,3,4,5,6,8,10,11};
int start = arr[0], last = arr[0];
String output = "";
for (int i = 1; i <= arr.length; i++)
{
if (i == arr.length || arr[i] != last+1)
{
if (output.length() != 0)
output += ",";
if (start == last)
output += start;
else
output += start + "-" + last;
if (i != arr.length)
start = last = arr[i];
}
else
last = arr[i];
}
System.out.println(output);
Heres my best attempt. Not clever, but simple enough to satisfy that requirement I believe. I'm still pretty confused as to why "1-3" was wrong though....
var numbers = new int[] { 1, 2, 3, 4, 5, 6, 8, 10, 11, 12 };
var groups = new Dictionary<int, int>();
groups.Add(numbers.First(), numbers.First());
foreach (var num in numbers.Skip(1))
{
var grp = groups.Last();
if (grp.Value + 1 == num)
{
groups[grp.Key] = num;
}
else
{
groups.Add(num, num);
}
}
var output = string.Join(",", groups.Select(grp => (grp.Key == grp.Value) ? grp.Value.ToString() : grp.Key.ToString() + "-" + grp.Value.ToString()));
Note: of course using the dictionary and linq etc is completely unnecessary (and way too specific for an answer requiring an algorithm), but I thought it highlighted the grouping aspect of the problem nicely
This is no valid C# code but to show the Idea.
Sort the list from Min to Max then do this:
For i = Min to Max
{
if i < MaxFound
continue;
int step = 1;
Output = i;
while Found(i + Step)
{
Step++;
MaxFound = i + Step;
}
if i < MaxFound
Output = (i + "-" + MaxFound);
Output += ", ";
}
Here is one of the approach:
public static void main(String[] args) {
print(1, 2, 3, 4, 5, 7, 9, 10, 12);
}
public static void print(int ... nums) {
System.out.print(nums[0]);
int idx = 1;
for(int i = 1; i < nums.length; i++, idx++) {
if(nums[i] - nums[i - 1] != 1) {
if(idx > 1) {
System.out.print(" - " + nums[i - 1]);
}
System.out.print(", " + nums[i]);
idx = 0;
}
}
if(idx > 1)
System.out.println(" - " + nums[nums.length - 1]);
}
Here's a Haskell version:
import Data.List
parseRange [] = ""
parseRange n =
let range = takeWhile (\x -> isInfixOf [x,x+1] n) n
in if not (null range)
then show (head range) ++ "-" ++ show (last range + 1)
++ (if length (tail n) > 1 then "," else "")
++ parseRange (drop (length range + 1) n)
else show (head n) ++ (if null (tail n) then "" else ",")
++ parseRange (drop 1 n)
Output:
*Main> parseRange [1,2,3,4,5,6,8,10,11]
"1-6,8,10-11"
And a way to do it with fold in F# - just for fun.
let parseRange numbers =
numbers
|> Seq.fold
(fun list n ->
match list with
|(a,b) :: tail when b+1 = n -> (a, n) :: tail
|_ -> (n,n) :: list) []
|> List.rev
|> Seq.map (fun (a,b) -> if a = b then sprintf "%i" a else sprintf "%i-%i" a b)
|> String.concat ","