I am having a bit of a conundrum here... basically I am performing some very very basic file compression steps as follows:
open file and read as string/into a string
parse through the string, and replace repeating patterns with smaller size of text that represents the pattern (ex: aaaaaaaaaaa (11 chars) is replaced with [a#$%11] (8 chars))
save the new, smaller string, into a separate file (can compare sizes)
For some reason, even though the new string in memory is like, 3% smaller than the original string, when I save the string into a file, the file itself is BIGGER than the original file on the file system?? How is that even possible? If someone could explain that to me it would be great!
Here is the code I am using to do this:
void bkg_DoWork(object sender, DoWorkEventArgs e)
{
try
{
string file = File.ReadAllText(this.txt_CompressFilename.Text);
int olength = file.Length;
int nlength = 0;
decimal pct = 0;
string lastchar = "";
int count = 0;
List<RepeatingPattern> SinglePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> DoublePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> TriplePatterns = new List<RepeatingPattern>();
List<RepeatingPattern> QuadruplePatterns = new List<RepeatingPattern>();
UpdateProgress("Read file contents", 0, 1, 6);
UpdateProgress("Finding single character replacements.", pct, 1, 6);
//single character replaces.
for (int i = 0; i < olength; i++)
{
if (file[i].ToString() == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[a#$%#]
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
count = 0;
lastchar = file[i].ToString();
}
}
//handle possible trailing pattern
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
if (SinglePatterns.Count > 0)
for (int i = 0; i < SinglePatterns.Count; i++)
file = file.Replace(SinglePatterns[i].ToString(), SinglePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + SinglePatterns.Count, pct, 2, 6);
UpdateProgress("Finding double character replacements.", pct, 2, 6);
lastchar = "";
count = 0;
//double character replaces.
for (int i = 0; i + 1 < file.Length; i = i + 2)
{
if ("" + file[i] + "" + file[i + 1] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aa#$%#]
if (count > 8)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!DoublePatterns.Contains(ptn))
DoublePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1];
}
}
//handle possible trailing pattern
if (count > 8)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!DoublePatterns.Contains(ptn))
DoublePatterns.Add(ptn);
}
if (DoublePatterns.Count > 0)
for (int i = 0; i < DoublePatterns.Count; i++)
file = file.Replace(DoublePatterns[i].ToString(), DoublePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + DoublePatterns.Count, pct, 3, 6);
UpdateProgress("Finding triple character replacements.", pct, 3, 6);
lastchar = "";
count = 0;
//triple character replaces.
for (int i = 0; i + 2 < file.Length; i = i + 3)
{
if ("" + file[i] + "" + file[i + 1] + "" + file[i + 2] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aaa#$%#]
if (count > 9)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!TriplePatterns.Contains(ptn))
TriplePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1] + "" + file[i + 2];
}
}
//handle possible trailing pattern
if (count > 9)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!TriplePatterns.Contains(ptn))
TriplePatterns.Add(ptn);
}
if (TriplePatterns.Count > 0)
for (int i = 0; i < TriplePatterns.Count; i++)
file = file.Replace(TriplePatterns[i].ToString(), TriplePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + TriplePatterns.Count, pct, 4, 6);
UpdateProgress("Finding quadruple character replacements.", pct, 4, 6);
lastchar = "";
count = 0;
//triple character replaces.
for (int i = 0; i + 3 < file.Length; i = i + 4)
{
if ("" + file[i] + "" + file[i + 1] + "" + file[i + 2] + "" + file[i + 3] == lastchar)
count += 1;
else
{
//create a pattern, if the count is more than what a pattern's compressed pattern looks like to save space... 8 chars
//[aaaa#$%#]
if (count > 10)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!QuadruplePatterns.Contains(ptn))
QuadruplePatterns.Add(ptn);
}
count = 0;
lastchar = "" + file[i] + "" + file[i + 1] + "" + file[i + 2] + "" + file[i + 3];
}
}
//Handle possible trailing pattern
if (count > 10)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!QuadruplePatterns.Contains(ptn))
QuadruplePatterns.Add(ptn);
}
if (QuadruplePatterns.Count > 0)
for (int i = 0; i < QuadruplePatterns.Count; i++)
file = file.Replace(QuadruplePatterns[i].ToString(), QuadruplePatterns[i].ToReplaceString());
nlength = file.Length;
pct = (decimal)(((double)(olength - nlength) / olength) * 100);
UpdateProgress("Found and replaced " + QuadruplePatterns.Count, pct, 5, 6);
UpdateProgress("Saving new .cmp file...", pct, 5, 6);
string newpath = this.txt_FolderName.Text + "\\" + Path.GetFileName(this.txt_CompressFilename.Text);
newpath = newpath.Substring(0, newpath.LastIndexOf("."));
newpath = newpath + ".cmp";
File.WriteAllText(newpath, file);
stopwatch.Stop();
UpdateProgress("Compression completed! Time to compress file: " + string.Format("{0}", stopwatch.Elapsed), pct, 6, 6);
string report = "Compression report\n\n";
FileInfo inf = new FileInfo(this.txt_CompressFilename.Text);
FileInfo infNew = new FileInfo(newpath);
report += "Single character replacements made: " + SinglePatterns.Count + "\n\n";
report += "Double character replacements made: " + DoublePatterns.Count + "\n\n";
report += "Triple character replacements made: " + TriplePatterns.Count + "\n\n";
report += "Quadruple character replacements made: " + QuadruplePatterns.Count + "\n\n";
report += "Total compression ration achieved in string: " + pct + "% \n\n";
report += "Old file size: " + inf.Length + "\nNew file size: " + infNew.Length + " in bytes.";
report += "Total time to achieve compression: " + string.Format("{0}", stopwatch.Elapsed);
e.Result = report;
}
catch (Exception ex)
{
e.Result = ex;
}
}
Here is the code for the RepeatingPattern class...
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Compressor
{
public class RepeatingPattern : IEquatable<RepeatingPattern>
{
public string RepeatingChar { get; set; }
public int Count { get; set; }
public RepeatingPattern()
{
this.RepeatingChar = "";
this.Count = -1;
}
public RepeatingPattern(string rchar, int count)
{
this.RepeatingChar = rchar;
this.Count = count;
}
public RepeatingPattern(string FromReplaceString)
{
FromReplaceString = FromReplaceString.Replace("[", "").Replace("]", "");
List<string> parts = FromReplaceString.Split(new string[] { "#$%" }, StringSplitOptions.None).ToList();
if (parts.Count != 2)
throw new ArgumentException("Invalid argument count. Must be in this format: [a#$%N]");
try
{
this.RepeatingChar = parts[0];
this.Count = int.Parse(parts[1]);
}
catch (Exception ex)
{
throw new ArgumentException("Unable to cast the argument and create an object from it. Error: " + ex.Message);
}
}
public override bool Equals(object obj)
{
RepeatingPattern tmp = obj as RepeatingPattern;
if (tmp != null)
return base.Equals(tmp);
else
throw new Exception("Invalid comparison type. Both objects must be of type RepeatingPattern");
}
public bool Equals(RepeatingPattern tmp)
{
return this.RepeatingChar == tmp.RepeatingChar && this.Count == tmp.Count;
}
public override int GetHashCode()
{
return this.RepeatingChar.GetHashCode() ^ this.Count.GetHashCode();
}
public override string ToString()
{
string retval = "";
for (int i = 0; i < this.Count; i++)
retval += this.RepeatingChar;
return retval;
}
public string ToReplaceString()
{
return "[" + this.RepeatingChar + "#$%" + this.Count + "]";
}
}
}
Out of curiosity, I have made an attempt at the code. Some differences:
I made a helper function to find runs in the text
I build a new string (using StringBuilder) as I go through the old string instead of replacing in the old string
I think my code is a bit simpler than yours. I have tested with:
Input: "aaaaaaaaaaabbbcdcdcdcdcdcdxxxxxxxxxxxxxxxxxxhello"
Output: "[a#$%11]bbb[cd#$%6][x#$%18]hello"
Here's the code. This is a first draft. Probably lots of improvements to make:
static int FindRun(string s, int start, int length)
{
if (start + length >= s.Length) return 0;
int numRuns = 0;
string pattern = s.Substring(start, length);
for (int i = start + length; i <= s.Length - length; i += length)
{
if (s.Substring(i, length) == pattern) numRuns += 1;
else break;
}
return numRuns;
}
static string EncodeString(string src)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < src.Length; i++)
{
string theRun = null;
int numRuns = 0;
// Find runs of lengths 4, 3, 2, 1
for (int j = 4; j >= 1; j--)
{
int runs = FindRun(src, i, j);
if (runs > 1) // Run found!
{
// Save it for later. Want to append the longest run
theRun = src.Substring(i, j);
numRuns = runs;
}
}
// No run? Just append the letter
if (theRun == null)
{
sb.Append(src[i]);
}
else
{
// This is the size of the run
int replacementStringSize = (numRuns * theRun.Length) + (theRun.Length - 1);
// This is the code to use as a replacement
String runCode = String.Format("[{0}#$%{1}]", theRun, numRuns + 1);
// Only append if the code length is smaller than the original run
if (runCode.Length < replacementStringSize)
{
sb.Append(runCode);
}
else
{
// Don't encode. Put original run back
for (int j = 0; j <= numRuns; j++)
{
sb.Append(theRun);
}
}
// Skip over the run
i += replacementStringSize;
}
}
return sb.ToString();
}
The root cause of the much larger output files is because of encoding. ChromeSetup.exe is 1,397,976 bytes. When the file is read in using File.ReadAllText it attempts to detect the string encoding. The string is 1,327,384 characters long in this case. Here's the key though, because of encoding each character isn't necessarily a single byte. For example in UTF-8 each character is 1 to 4 bytes. So then when the result string is written out a single character could become multiple bytes.
For reading/writing executables/binary files you're better off using File.ReadAllBytes()`File.WriteAllBytes()`.
While attempting to run your code I came across several other bugs. Here are the bugs I found.
1) In the double/triple/quad character replaces the for loop bounds should check up to the character that will be used.
//double character replaces.
for (int i = 0; i < file.Length; i = i + 2)
{
if ("" + file[i] + "" + file[i + 1] == lastchar)
This will cause an out of index exception if the file string is an odd number of characters. Add in a + 1 to fix this.
for (int i = 0; i + 1 < file.Length; i = i + 2)
For the triple this will be + 2, for quad + 3.
2) If the string ends with a repeating pattern this isn't handled correctly. In the for loops the pattern count is only checked when a different char is encountered. So if the pattern is at the end of the string it isn't detected. You could handle this by checking the count after the for loop.
if (count > 7)
{
//create and add a pattern to the list if necessary.
RepeatingPattern ptn = new RepeatingPattern(lastchar.ToString(), count);
if (!SinglePatterns.Contains(ptn))
SinglePatterns.Add(ptn);
}
3) count and lastchar should be reset before each for loop. If one for loop ends with count = 17 and the next for loop runs it would add a repeating pattern of count 17, which has already been replaced.
4) As others have mentioned, doing replacements in your input string as you go along has the potential to cause issues.
If you can post your RepeatingPattern code and your input text file we can run down the exact cause of your larger output file.
Edit: Running with your RepeatingPattern code I see another small bug. The pattern "aaaaaaaaaa" becomes "[a#$%9]a". it should be replacing one more character. This could be making your output string slightly longer than expected. To fix this, in the replacement for loops set count to 1 (instead of 0) when a new pattern is started.
I recently make a program to calculate sum and avg of a given array. The program also want to print the histogram or stars pattern match with value of index.
Like this:
This is the code I wrote so far:
private void process_Click(object sender, EventArgs e)
{
string temp2 = null;
string temp3 = null;
float sum=0;
int countingNumber = 1;
string message;
int count = Convert.ToInt32(inputArray.Text);
int[] varray = new int[count];
for (int i = 0; i < count; i++)
//for (int j = 1; j <= count; j++)
{
varray[i] = Convert.ToInt32(Interaction.InputBox(message = "enter the value of array number " + countingNumber));
sum = sum+ varray[i];
temp += countingNumber + " "+varray[i] + Environment.NewLine;
temp2 += countingNumber + " " + varray[i] + " *" + Environment.NewLine;
box1.Text = Convert.ToString("Index Value" + Environment.NewLine + temp);
boxSum.Text = Convert.ToString(sum);
boxAvg.Text = Convert.ToString(sum/count);
countingNumber++;
}
for (int stars = 0; stars <= i; stars++)
{
temp3 = " ";
box2.Text = Convert.ToString("Element Value Histogram" + Environment.NewLine + temp2+temp3);
}
}
}
My code won't print the stars match with the value. Could anybody help me?
Try replacing this line:
temp2 += countingNumber + " " + varray[i] + " *" + Environment.NewLine;
with this line:
temp2 += countingNumber + " " + varray[i] + " " + new String('*', i) + Environment.NewLine;
I am trying to implement Best fit memory allocation. I have done the code, which i think should work but somehow it appears its getting stuck in the loop. I can't figure out why. The following is my code. I want to allocate jobs to the memory with the least memory waste.
public void bestFit(int job_size)
{
string str1 = "";
string str2 = "";
string str3 = "";
int memory_block = 99999;
int subscript = 0;
int initial_memory_waste = memory_block - job_array[0];
int job_counter = 0;
int counter = 1;
int memory_waste = 0;
while (counter <= memory_array.Length)
{
if (job_size > memory_array[counter - 1])
{
counter += 1;
}
else
memory_waste = memory_array[counter - 1] - job_size;
{
if (initial_memory_waste > memory_waste)
{
subscript = counter;
initial_memory_waste = memory_waste;
counter += 1;
}
}
}
queued_jobs = counter;
str3 = ("Job number: " + (queued_jobs).ToString() + " of size: " + job_size.ToString());
if (job_counter < job_array.Length)
{
bf_waiting_queue.Add(str3);
}
else
{
str1 = ("Job number: " + (job_counter).ToString() + " of size: " + job_size.ToString() + " is allocated to Memory block: " + (subscript).ToString() + " of size: " + memory_array[subscript - 1]).ToString();
memory_waste = memory_array[subscript - 1] - job_size;
str2 = ("Memory waste is: " + memory_waste.ToString());
bf_total_memory_waste += memory_waste;
memory_array[counter - 1] = (memory_array[counter - 1] - job_size);
bf_jobsInMemory.Add(str1 + "\t" + str2);
job_counter += 1;
counter = 1;
}
}
Aside from the braces issue others pointed out, your logic makes it possible for you to never increment 'counter', which is why you are stuck in the loop. The following reorganization is cleaner and guarantees you always increment 'counter'. Also, a couple of comments couldn't hurt.
while (counter <= memory_array.Length)
{
// If block fits, consider it
if (job_size <= memory_array[counter - 1])
{
memory_waste = memory_array[counter - 1] - job_size;
// If this block is less wasteful, remember it
if (initial_memory_waste > memory_waste)
{
subscript = counter;
initial_memory_waste = memory_waste;
}
}
// Next block
counter += 1;
}
I want to dispaly a list on a button click.I have added a list box in .xaml file and want to add 10 text boxes in list.The following code shows errors.
private void listbutton_C(object sender, RoutedEventArgs e)
{
String str = "thumb_";
TextBox[] name = new TextBox[20];
for (int i = 1; i < 11; i++)
{
if (i == 10)
{
strPath = str + "0" + i + ".jpg";
}
else
{
strPath = str + "00" + i + ".jpg";
}
name[i].Text = strPath;
listBox1.Items.Add(name[i]);
}
ContentPanel2.Visibility = Visibility.Collapsed;
listBox1.Visibility = Visibility.Visible;
}
name[i].text=strpath show nullreferenceExceptions .Can somebody explain what is the problem?
I think you need to instantiate every textbox, you have only created the array.
for (int i = 1; i < 11; i++)
{
name[i] = new TextBox(); // insert this line
if (i == 10)
{
strPath = str + "0" + i + ".jpg";
}
else
{
strPath = str + "00" + i + ".jpg";
}
name[i].Text = strPath;
listBox1.Items.Add(name[i]);
}
I'm trying to create a loop to avoid copy pasting these lines 30 times.
The names are:
sum1 to sum30
br1txt1 to br30txt1
br1txt2 to br30txt2
//decimal sum30 = decimal.Parse(br30txt1.Text) + decimal.Parse(br30txt2.Text);
//sumTxt30.Text = sum30.ToString();
But the error I'm getting is that the textbox array seems to try to put the value of the textbox not the text box refrenc it self in to the array, how should I fix this?
private void sumX()
{
TextBox[] sumTextboxNames;
sumTextboxNames = new TextBox[29];
for (int i = 1; i < 31; i++)
{
if (sumTextboxNames[0] == null)
{
int y = 0;
foreach (Control c in this.Controls)
{
if (c is TextBox && c.Name.StartsWith("sum"))
{
sumTextboxNames[y] = (TextBox)c;
y++;
}
}
}
else
{
}
string1 = "br" + i + "txt" + 1 + ".Text";
string2 = "br" + i + "txt" + 2 + ".Text";
string3 = "sumTxt" + i + ".Text";
sum = decimal.Parse(string1) + decimal.Parse(string2);
int x = i - 1;
sumTextboxNames[x].Text = sum.ToString();
}
}
The following lines won't work at all:
string1 = "br" + i + "txt" + 1 + ".Text";
string2 = "br" + i + "txt" + 2 + ".Text";
As 1 and 2 are not strings and can not be concatenated to a string. That should give a compiler error right away.
In the following line, you're trying to add up the names of the text boxes as numbers - won't work, the names contain non-number characters.
sum = decimal.Parse(string1) + decimal.Parse(string2);
Anyway, you don't need to use the TextBox array at all. What you could do is:
for (int i = 1; i <= 30; i++)
{
TextBox s = (TextBox)this.Controls[<Name for the S-Textbox>];
TextBox b1 = (TextBox)this.Controls[<Name for the first sum textbox>];
TextBox b2 = (TextBox)this.Controls[<Name for the second sum textbox>];
s.Text = Decimal.Parse(b1.Text) + Decimal.Parse(b2.Text);
}
EDIT
Sorry, quoted wrong line from OP's source code.
EDIT 2
Forgot to cast to TextBox - this is required of course... Thanks for pointing it out, everybody.
Thorsten Dittmar's answers is the way you should go.
However, with respect to this code:
foreach (Control c in this.Controls)
{
if (c is TextBox && c.Name.StartsWith("sum"))
{
sumTextboxNames[y] = (TextBox)c;
y++;
}
}
You should try a solution that uses LINQ.
For example
TextBox [] sumTextBoxes = (from t in this.Controls.Cast<Control>
where t is TextBox
&& t.Name.StartsWith("sum")
select t).Cast<TextBox>().ToArray();
Thanks to Thorsten this is what I ended up with:
string string1;
string string2;
string string3;
private void sumX()
{
for (int i = 1; i < 31; i++)
{
string1 = "br" + i + "txt" + '1';
string2 = "br" + i + "txt" + '2';
string3 = "sumTxt" + i;
TextBox s = (TextBox)this.Controls[string3];
TextBox b1 = (TextBox)this.Controls[string1];
TextBox b2 = (TextBox)this.Controls[string2];
decimal sum = Decimal.Parse(b1.Text) + Decimal.Parse(b2.Text);
s.Text = sum.ToString();
}