getting the best record from a file

getting the best record from a file - c#

I have a file with the following text inside
mimi,m,70
tata,f,60
bobo,m,100
soso,f,30
I did the reading from file thing and many many other methods and functions, but how I can get the best male name and his grade according to the grade.
here is the code I wrote. Hope it's not so long
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
namespace practice_Ex
{
class Program
{
public static int[] ReadFile(string FileName, out string[] Name, out char[] Gender)
{
Name = new string[1];
int[] Mark = new int[1];
Gender = new char[1];
if (File.Exists(FileName))
{
FileStream Input = new FileStream(FileName, FileMode.Open, FileAccess.Read);
StreamReader SR = new StreamReader(Input);
string[] Current;
int Counter = 0;
string Str = SR.ReadLine();
while (Str != null)
{
Current = Str.Split(',');
Name[Counter] = Current[0];
Mark[Counter] = int.Parse(Current[2]);
Gender[Counter] = char.Parse(Current[1].ToUpper());
Counter++;
Array.Resize(ref Name, Counter + 1);
Array.Resize(ref Mark, Counter + 1);
Array.Resize(ref Gender, Counter + 1);
Str = SR.ReadLine();
}
}
return Mark;
}
public static int MostFreq(int[] M, out int Frequency)
{
int Counter = 0;
int Frequent = 0;
Frequency = 0;
for (int i = 0; i < M.Length; i++)
{
Counter = 0;
for (int j = 0; j < M.Length; j++)
if (M[i] == M[j])
Counter++;
if (Counter > Frequency)
{
Frequency = Counter;
Frequent = M[i];
}
}
return Frequent;
}
public static int Avg(int[] M)
{
int total = 0;
for (int i = 0; i < M.Length; i++)
total += M[i];
return total / M.Length;
}
public static int AvgCond(char[] G, int[] M, char S)
{
int total = 0;
int counter = 0;
for (int i = 0; i < G.Length; i++)
if (G[i] == S)
{
total += M[i];
counter++;
}
return total / counter;
}
public static int BelowAvg(int[] M, out int AboveAvg)
{
int Bcounter = 0;
AboveAvg = 0;
for (int i = 0; i < M.Length; i++)
{
if (M[i] < Avg(M))
Bcounter++;
else
AboveAvg++;
}
return Bcounter;
}
public static int CheckNames(string[] Name, char C)
{
C = char.Parse(C.ToString().ToLower());
int counter = 0;
string Str;
for (int i = 0; i < Name.Length - 1; i++)
{
Str = Name[i].ToLower();
if (Str[0] == C || Str[Str.Length - 1] == C)
counter++;
}
return counter;
}
public static void WriteFile(string FileName, string[] Output)
{
FileStream FS = new FileStream(FileName, FileMode.OpenOrCreate, FileAccess.Write);
StreamWriter SW = new StreamWriter(FS);
for (int i = 0; i < Output.Length; i++)
SW.WriteLine(Output[i]);
}
static void Main(string[] args)
{
int[] Mark;
char[] Gender;
string[] Name;
string[] Output = new string[8];
int Frequent, Frequency, AvgAll, MaleAvg, FemaleAvg, BelowAverage, AboveAverage, NamesCheck;
Mark = ReadFile("c:\\IUST1.txt", out Name, out Gender);
Frequent = MostFreq(Mark, out Frequency);
AvgAll = Avg(Mark);
MaleAvg = AvgCond(Gender, Mark, 'M');
FemaleAvg = AvgCond(Gender, Mark, 'F');
BelowAverage = BelowAvg(Mark, out AboveAverage);
NamesCheck = CheckNames(Name, 'T');
Output [0]= "Frequent Mark = " + Frequent.ToString();
Output [1]= "Frequency = " + Frequency.ToString();
Output [2]= "Average Of All = " + AvgAll.ToString();
Output [3]= "Average Of Males = " + MaleAvg.ToString();
Output [4]= "Average Of Females = " + FemaleAvg.ToString();
Output [5]= "Below Average = " + BelowAverage.ToString();
Output [6]= "Above Average = " + AboveAverage.ToString();
Output [7]= "Names With \"T\" = " + NamesCheck.ToString();
WriteFile("c:\\Output.txt", Output);
}
}
}

Well, I like LINQ (update: excluded via comments) for querying, especially if I can do it without buffering the data (so I can process a huge file efficiently). For example below (update: removed LINQ); note the use of iterator blocks (yield return) makes this fully "lazy" - only one record is held in memory at a time.
This also shows separation of concerns: one method deals with reading a file line by line; one method deals with parsing a line into a typed data record; one (or more) method(s) work with those data record(s).
using System;
using System.Collections.Generic;
using System.IO;
enum Gender { Male, Female, Unknown }
class Record
{
public string Name { get; set; }
public Gender Gender { get; set; }
public int Score { get; set; }
}
static class Program
{
static IEnumerable<string> ReadLines(string path)
{
using (StreamReader reader = File.OpenText(path))
{
string line;
while ((line = reader.ReadLine()) != null)
{
yield return line;
}
}
}
static IEnumerable<Record> Parse(string path)
{
foreach (string line in ReadLines(path))
{
string[] segments = line.Split(',');
Gender gender;
switch(segments[1]) {
case "m": gender = Gender.Male; break;
case "f": gender = Gender.Female; break;
default: gender = Gender.Unknown; break;
}
yield return new Record
{
Name = segments[0],
Gender = gender,
Score = int.Parse(segments[2])
};
}
}
static void Main()
{
Record best = null;
foreach (Record record in Parse("data.txt"))
{
if (record.Gender != Gender.Male) continue;
if (best == null || record.Score > best.Score)
{
best = record;
}
}
Console.WriteLine("{0}: {1}", best.Name, best.Score);
}
}
The advantage of writing things as iterators is that you can easily use either streaming or buffering - for example, you can do:
List<Record> data = new List<Record>(Parse("data.txt"));
and then manipulate data all day long (assuming it isn't too large) - useful for multiple aggregates, mutating data, etc.

This question asks how to find a maximal element by a certain criterion. Combine that with Marc's LINQ part and you're away.

In the real world, of course, these would be records in a database, and you would use one line of SQL to select the best record, ie:
SELECT Name, Score FROM Grades WHERE Score = MAX(Score)
(This returns more than one record where there's more than one best record, of course.) This is an example of the power of using the right tool for the job.

I think the fastest and least-code way would be to transform the txt to xml and then use Linq2Xml to select from it. Here's a link.
Edit: That might be more work than you'd like to do. Another option is to create a class called AcademicRecord that has properties for the persons name gender etc. Then when you read the file, add to a List for each line in the file. Then use a Sort predicate to sort the list; the highest record would then be the first one in the list. Here's a link.

Your assignment might have different requirements, but if you only want to get "best male name and grade" from a file you described, a compact way is:
public String FindRecord()
{
String[] lines = File.ReadAllLines("MyFile.csv");
Array.Sort(lines, CompareByBestMaleName);
return lines[0];
}
int SortByBestMaleName(String a, String b)
{
String[] ap = a.Split();
String[] bp = b.Split();
// Always rank male higher
if (ap[1] == "m" && bp[1] == "f") { return 1; }
if (ap[1] == "f" && bp[1] == "m") { return -1; }
// Compare by score
return int.Parse(ap[2]).CompareTo(int.Parse(bp[2]));
}
Note that this is neither fast nor robust.

Related

How to auto-increment number and letter to generate a string sequence wise in c#

I have to make a string which consists a string like - AAA0009, and once it reaches AAA0009, it will generate AA0010 to AAA0019 and so on.... till AAA9999 and when it will reach to AAA9999, it will give AAB0000 to AAB9999 and so on till ZZZ9999.
I want to use static class and static variables so that it can auto increment by itself on every hit.
I have tried some but not even close, so help me out thanks.
Thanks for being instructive I was trying as I Said already but anyways you already want to put negatives over there without even knowing the thing:
Code:
public class GenerateTicketNumber
{
private static int num1 = 0;
public static string ToBase36()
{
const string base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
var sb = new StringBuilder(9);
do
{
sb.Insert(0, base36[(byte)(num1 % 36)]);
num1 /= 36;
} while (num1 != 0);
var paddedString = "#T" + sb.ToString().PadLeft(8, '0');
num1 = num1 + 1;
return paddedString;
}
}
above is the code. this will generate a sequence but not the way I want anyways will use it and thanks for help.

Though there's already an accepted answer, I would like to share this one.
P.S. I do not claim that this is the best approach, but in my previous work we made something similar using Azure Table Storage which is a no sql database (FYI) and it works.
1.) Create a table to store your running ticket number.
public class TicketNumber
{
public string Type { get; set; } // Maybe you want to have different types of ticket?
public string AlphaPrefix { get; set; }
public string NumericPrefix { get; set; }
public TicketNumber()
{
this.AlphaPrefix = "AAA";
this.NumericPrefix = "0001";
}
public void Increment()
{
int num = int.Parse(this.NumericPrefix);
if (num + 1 >= 9999)
{
num = 1;
int i = 2; // We are assuming that there are only 3 characters
bool isMax = this.AlphaPrefix == "ZZZ";
if (isMax)
{
this.AlphaPrefix = "AAA"; // reset
}
else
{
while (this.AlphaPrefix[i] == 'Z')
{
i--;
}
char iChar = this.AlphaPrefix[i];
StringBuilder sb = new StringBuilder(this.AlphaPrefix);
sb[i] = (char)(iChar + 1);
this.AlphaPrefix = sb.ToString();
}
}
else
{
num++;
}
this.NumericPrefix = num.ToString().PadLeft(4, '0');
}
public override string ToString()
{
return this.AlphaPrefix + this.NumericPrefix;
}
}
2.) Make sure you perform row-level locking and issue an error when it fails.
Here's an oracle syntax:
SELECT * FROM TICKETNUMBER WHERE TYPE = 'TYPE' FOR UPDATE NOWAIT;
This query locks the row and returns an error if the row is currently locked by another session.
We need this to make sure that even if you have millions of users generating a ticket number, it will not mess up the sequence.
Just make sure to save the new ticket number before you perform a COMMIT.
I forgot the MSSQL version of this but I recall using WITH (ROWLOCK) or something. Just google it.
3.) Working example:
static void Main()
{
TicketNumber ticketNumber = new TicketNumber();
ticketNumber.AlphaPrefix = "ZZZ";
ticketNumber.NumericPrefix = "9999";
for (int i = 0; i < 10; i++)
{
Console.WriteLine(ticketNumber);
ticketNumber.Increment();
}
Console.Read();
}
Output:

Looking at your code that you've provided, it seems that you're backing this with a number and just want to convert that to a more user-friendly text representation.
You could try something like this:
private static string ValueToId(int value)
{
var parts = new List<string>();
int numberPart = value % 10000;
parts.Add(numberPart.ToString("0000"));
value /= 10000;
for (int i = 0; i < 3 || value > 0; ++i)
{
parts.Add(((char)(65 + (value % 26))).ToString());
value /= 26;
}
return string.Join(string.Empty, parts.AsEnumerable().Reverse().ToArray());
}
It will take the first 4 characters and use them as is, and then for the remainder of the value if will convert it into characters A-Z.
So 9999 becomes AAA9999, 10000 becomes AAB0000, and 270000 becomes ABB0000.
If the number is big enough that it exceeds 3 characters, it will add more letters at the start.

Here's an example of how you could go about implementing it
void Main()
{
string template = #"AAAA00";
var templateChars = template.ToCharArray();
for (int i = 0; i < 100000; i++)
{
templateChars = IncrementCharArray(templateChars);
Console.WriteLine(string.Join("",templateChars ));
}
}
public static char Increment(char val)
{
if(val == '9') return 'A';
if(val == 'Z') return '0';
return ++val;
}
public static char[] IncrementCharArray(char[] val)
{
if (val.All(chr => chr == 'Z'))
{
var newArray = new char[val.Length + 1];
for (int i = 0; i < newArray.Length; i++)
{
newArray[i] = '0';
}
return newArray;
}
int length = val.Length;
while (length > -1)
{
char lastVal = val[--length];
val[length] = Increment(lastVal);
if ( val[length] != '0') break;
}
return val;
}

Applying Algorithm to multiple files

I'm currently working on a project where I have to choose a file and then sort it along with other files. The files are just filled with numbers but each file has linked data. So the first number in a file is linked to the first number in the second file and so on. I Currently have code that allows me to read a file and display the file unsorted and sorted using the bubble sort. I am not sure how I would be able to apply this same principle to multiple files at once. So that I could choose a file and then sort it in line with the same method I have for a single file.
Currently, the program loads and asks the user to choose between 1 and 2. 1 Shows the code unsorted and 2 shows the code sorted. I can read in the files but the problem is sorting and displaying in order. Basically How do I sort multiple files that are linked together. What steps do I need to take to do this?
An example of one file:
4
28
77
96
An example of the second file:
66.698
74.58
2.54
48.657
Any help would be appreciated.
Thanks
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
public class Program
{
public Program()
{
}
public void ReadFile(int [] numbers)
{
string path = "Data1/Day_1.txt";
StreamReader sr = new StreamReader(path);
for (int index = 0; index < numbers.Length; index++)
{
numbers[index] = Convert.ToInt32(sr.ReadLine());
}
sr.Close(); // closes file when done
}
public void SortArray(int[] numbers)
{
bool swap;
int temp;
do
{
swap = false;
for (int index = 0; index < (numbers.Length - 1); index++)
{
if (numbers[index] > numbers[index + 1])
{
temp = numbers[index];
numbers[index] = numbers[index + 1];
numbers[index + 1] = temp;
swap = true;
}
}
} while (swap == true);
}
public void DisplayArray(int[] numbers)
{
for(int index = 0; index < numbers.Length; index++)
{
Console.WriteLine("{0}", numbers[index]);
}
}
}
The main is in another file to keep work organised:
using System;
public class FileDemoTest
{
public static void Main(string[] args)
{
int[] numbers = new int[300];
Program obj = new Program();
int operation = 0;
Console.WriteLine("1 or 2 ?");
operation = Convert.ToInt32(Console.ReadLine());
// call the read from file method
obj.ReadFile(numbers);
if (operation == 1)
{
//Display unsorted values
Console.Write("Unsorted:");
obj.DisplayArray(numbers);
}
if (operation == 2)
{
//sort numbers and display
obj.SortArray(numbers);
Console.Write("Sorted: ");
obj.DisplayArray(numbers);
}
}
}

What I would do is create a class that will hold the values from file 1 and file 2. Then you can populate a list of these classes by reading values from both files. After that, you can sort the list of classes on either field, and the relationships will be maintained (since the two values are stored in a single object).
Here's an example of the class that would hold the file data:
public class FileData
{
public int File1Value { get; set; }
public decimal File2Value { get; set; }
/// <summary>
/// Provides a friendly string representation of this object
/// </summary>
/// <returns>A string showing the File1 and File2 values</returns>
public override string ToString()
{
return $"{File1Value}: {File2Value}";
}
}
Then you can create a method that reads both files and creates and returns a list of these objects:
public static FileData[] GetFileData(string firstFilePath, string secondFilePath)
{
// These guys hold the strongly typed version of the string values in the files
int intHolder = 0;
decimal decHolder = 0;
// Get a list of ints from the first file
var fileOneValues = File
.ReadAllLines(firstFilePath)
.Where(line => int.TryParse(line, out intHolder))
.Select(v => intHolder)
.ToArray();
// Get a list of decimals from the second file
var fileTwoValues = File
.ReadAllLines(secondFilePath)
.Where(line => decimal.TryParse(line, out decHolder))
.Select(v => decHolder)
.ToArray();
// I guess the file lengths should match, but in case they don't,
// use the size of the smaller one so we have matches for all items
var numItems = Math.Min(fileOneValues.Count(), fileTwoValues.Count());
// Populate an array of new FileData objects
var fileData = new FileData[numItems];
for (var index = 0; index < numItems; index++)
{
fileData[index] = new FileData
{
File1Value = fileOneValues[index],
File2Value = fileTwoValues[index]
};
}
return fileData;
}
Now, we need to modify your sorting method to work on a FileData array instead of an int array. I also added an argument that, if set to false, will sort on the File2Data field instead of File1Data:
public static void SortArray(FileData[] fileData, bool sortOnFile1Data = true)
{
bool swap;
do
{
swap = false;
for (int index = 0; index < (fileData.Length - 1); index++)
{
bool comparison = sortOnFile1Data
? fileData[index].File1Value > fileData[index + 1].File1Value
: fileData[index].File2Value > fileData[index + 1].File2Value;
if (comparison)
{
var temp = fileData[index];
fileData[index] = fileData[index + 1];
fileData[index + 1] = temp;
swap = true;
}
}
} while (swap);
}
And finally, you can display the non-sorted and sorted lists of data. Note I added a second question to the user if they choose "Sorted" where they can decide if it should be sorted by File1 or File2:
private static void Main()
{
Console.WriteLine("1 or 2 ?");
int operation = Convert.ToInt32(Console.ReadLine());
var fileData = GetFileData(#"f:\public\temp\temp1.txt", #"f:\public\temp\temp2.txt");
if (operation == 1)
{
//Display unsorted values
Console.WriteLine("Unsorted:");
foreach (var data in fileData)
{
Console.WriteLine(data);
}
}
if (operation == 2)
{
Console.WriteLine("Sort on File1 or File2 (1 or 2)?");
operation = Convert.ToInt32(Console.ReadLine());
//sort numbers and display
SortArray(fileData, operation == 1);
Console.WriteLine("Sorted: ");
foreach (var data in fileData)
{
Console.WriteLine(data);
}
}
Console.Write("\nDone!\nPress any key to exit...");
Console.ReadKey();
}
If you wanted to use an int to decide which field to sort on, you could do something like the following:
public static void SortArray(FileData[] fileData, int sortFileNumber = 1)
{
bool swap;
do
{
swap = false;
for (int index = 0; index < (fileData.Length - 1); index++)
{
bool comparison;
// Set our comparison to the field sortFileNumber
if (sortFileNumber == 1)
{
comparison = fileData[index].File1Value > fileData[index + 1].File1Value;
}
else if (sortFileNumber == 2)
{
comparison = fileData[index].File2Value > fileData[index + 1].File2Value;
}
else // File3Value becomes default for anything else
{
comparison = fileData[index].File3Value > fileData[index + 1].File3Value;
}
if (comparison)
{
var temp = fileData[index];
fileData[index] = fileData[index + 1];
fileData[index + 1] = temp;
swap = true;
}
}
} while (swap);
}

Importing and removing duplicates from a massive amount of text files using C# and Redis

This is a bit of a doozy and it's been a while since I worked with C#, so bear with me:
I'm running a jruby script to iterate through 900 files (5 Mb - 1500 Mb in size) to figure out how many dupes STILL exist within these (already uniq'd) files. I had little luck with awk.
My latest idea was to insert them into a local MongoDB instance like so:
db.collection('hashes').update({ :_id => hash}, { $inc: { count: 1} }, { upsert: true)
... so that later I could just query it like db.collection.where({ count: { $gt: 1 } }) to get all the dupes.
This is working great except it's been over 24 hours and at the time of writing I'm at 72,532,927 Mongo entries and growing.
I think Ruby's .each_line is bottlnecking the IO hardcore:
So what I'm thinking now is compiling a C# program which fires up a thread PER EACH FILE and inserts the line (md5 hash) into a Redis list.
From there, I could have another compiled C# program simply pop the values off and ignore the save if the count is 1.
So the questions are:
Will using a compiled file reader and multithreading the file reads significantly improve performance?
Is using Redis even necessary? With a tremendous amount of AWS memory, could I not just use the threads to fill some sort of a list atomically and proceed from there?
Thanks in advance.

Updated
New solution. Old solution. The main idea is to calculate dummy hashes(just sum of all chars in string) of each line and store it in Dictionary<ulong, List<LinePosition>> _hash2LinePositions. It's possible to have multiple hashes in the same stream and it solves by List in Dictionary Value. When the hashes are the same, we read and compare the strings from the streams. LinePosition is using for storing info about line - position in stream and its length. I don't have such huge files as you, but my tests shows that it works. Here is the full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Solution
{
struct LinePosition
{
public long Start;
public long Length;
public LinePosition(long start, long count)
{
Start = start;
Length = count;
}
public override string ToString()
{
return string.Format("Start: {0}, Length: {1}", Start, Length);
}
}
class TextFileHasher : IDisposable
{
readonly Dictionary<ulong, List<LinePosition>> _hash2LinePositions;
readonly Stream _stream;
bool _isDisposed;
public HashSet<ulong> Hashes { get; private set; }
public string Name { get; private set; }
public TextFileHasher(string name, Stream stream)
{
Name = name;
_stream = stream;
_hash2LinePositions = new Dictionary<ulong, List<LinePosition>>();
Hashes = new HashSet<ulong>();
}
public override string ToString()
{
return Name;
}
public void CalculateFileHash()
{
int readByte = -1;
ulong dummyLineHash = 0;
// Line start position in file
long startPosition = 0;
while ((readByte = _stream.ReadByte()) != -1) {
// Read until new line
if (readByte == '\r' || readByte == '\n') {
// If there was data
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - 1 - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
else {
// Was it new line ?
if (dummyLineHash == 0)
startPosition = _stream.Position - 1;
// Calculate dummy hash
dummyLineHash += (uint)readByte;
}
}
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
public List<LinePosition> GetLinePositions(ulong hash)
{
return _hash2LinePositions[hash];
}
public List<string> GetDuplicates()
{
List<string> duplicates = new List<string>();
foreach (var key in _hash2LinePositions.Keys) {
List<LinePosition> linesPos = _hash2LinePositions[key];
if (linesPos.Count > 1) {
duplicates.AddRange(FindExactDuplicates(linesPos));
}
}
return duplicates;
}
public void Dispose()
{
if (_isDisposed)
return;
_stream.Dispose();
_isDisposed = true;
}
private void AddToDictAndHash(ulong hash, long start, long count)
{
List<LinePosition> linesPosition;
if (!_hash2LinePositions.TryGetValue(hash, out linesPosition)) {
linesPosition = new List<LinePosition>() { new LinePosition(start, count) };
_hash2LinePositions.Add(hash, linesPosition);
}
else {
linesPosition.Add(new LinePosition(start, count));
}
Hashes.Add(hash);
}
public byte[] GetLineAsByteArray(LinePosition prevPos)
{
long len = prevPos.Length;
byte[] lineBytes = new byte[len];
_stream.Seek(prevPos.Start, SeekOrigin.Begin);
_stream.Read(lineBytes, 0, (int)len);
return lineBytes;
}
private List<string> FindExactDuplicates(List<LinePosition> linesPos)
{
List<string> duplicates = new List<string>();
linesPos.Sort((x, y) => x.Length.CompareTo(y.Length));
LinePosition prevPos = linesPos[0];
for (int i = 1; i < linesPos.Count; i++) {
if (prevPos.Length == linesPos[i].Length) {
var prevLineArray = GetLineAsByteArray(prevPos);
var thisLineArray = GetLineAsByteArray(linesPos[i]);
if (prevLineArray.SequenceEqual(thisLineArray)) {
var line = System.Text.Encoding.Default.GetString(prevLineArray);
duplicates.Add(line);
}
#if false
string prevLine = System.Text.Encoding.Default.GetString(prevLineArray);
string thisLine = System.Text.Encoding.Default.GetString(thisLineArray);
Console.WriteLine("PrevLine: {0}\r\nThisLine: {1}", prevLine, thisLine);
StringBuilder sb = new StringBuilder();
sb.Append(prevPos);
sb.Append(" is '");
sb.Append(prevLine);
sb.Append("'. ");
sb.AppendLine();
sb.Append(linesPos[i]);
sb.Append(" is '");
sb.Append(thisLine);
sb.AppendLine("'. ");
sb.Append("Equals => ");
sb.Append(prevLine.CompareTo(thisLine) == 0);
Console.WriteLine(sb.ToString());
#endif
}
else {
prevPos = linesPos[i];
}
}
return duplicates;
}
}
public static void Main(String[] args)
{
List<TextFileHasher> textFileHashers = new List<TextFileHasher>();
string text1 = "abc\r\ncba\r\nabc";
TextFileHasher tfh1 = new TextFileHasher("Text1", new MemoryStream(System.Text.Encoding.Default.GetBytes(text1)));
tfh1.CalculateFileHash();
textFileHashers.Add(tfh1);
string text2 = "def\r\ncba\r\nwet";
TextFileHasher tfh2 = new TextFileHasher("Text2", new MemoryStream(System.Text.Encoding.Default.GetBytes(text2)));
tfh2.CalculateFileHash();
textFileHashers.Add(tfh2);
string text3 = "def\r\nbla\r\nwat";
TextFileHasher tfh3 = new TextFileHasher("Text3", new MemoryStream(System.Text.Encoding.Default.GetBytes(text3)));
tfh3.CalculateFileHash();
textFileHashers.Add(tfh3);
List<string> totalDuplicates = new List<string>();
Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>> totalHashes = new Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>>();
textFileHashers.ForEach(tfh => {
foreach(var dummyHash in tfh.Hashes) {
Dictionary<TextFileHasher, List<LinePosition>> tfh2LinePositions = null;
if (!totalHashes.TryGetValue(dummyHash, out tfh2LinePositions))
totalHashes[dummyHash] = new Dictionary<TextFileHasher, List<LinePosition>>() { { tfh, tfh.GetLinePositions(dummyHash) } };
else {
List<LinePosition> linePositions = null;
if (!tfh2LinePositions.TryGetValue(tfh, out linePositions))
tfh2LinePositions[tfh] = tfh.GetLinePositions(dummyHash);
else
linePositions.AddRange(tfh.GetLinePositions(dummyHash));
}
}
});
HashSet<TextFileHasher> alreadyGotDuplicates = new HashSet<TextFileHasher>();
foreach(var hash in totalHashes.Keys) {
var tfh2LinePositions = totalHashes[hash];
var tfh = tfh2LinePositions.Keys.FirstOrDefault();
// Get duplicates in the TextFileHasher itself
if (tfh != null && !alreadyGotDuplicates.Contains(tfh)) {
totalDuplicates.AddRange(tfh.GetDuplicates());
alreadyGotDuplicates.Add(tfh);
}
if (tfh2LinePositions.Count <= 1) {
continue;
}
// Algo to get duplicates in more than 1 TextFileHashers
var tfhs = tfh2LinePositions.Keys.ToArray();
for (int i = 0; i < tfhs.Length; i++) {
var tfh1Positions = tfhs[i].GetLinePositions(hash);
for (int j = i + 1; j < tfhs.Length; j++) {
var tfh2Positions = tfhs[j].GetLinePositions(hash);
for (int k = 0; k < tfh1Positions.Count; k++) {
var tfh1Pos = tfh1Positions[k];
var tfh1ByteArray = tfhs[i].GetLineAsByteArray(tfh1Pos);
for (int m = 0; m < tfh2Positions.Count; m++) {
var tfh2Pos = tfh2Positions[m];
if (tfh1Pos.Length != tfh2Pos.Length)
continue;
var tfh2ByteArray = tfhs[j].GetLineAsByteArray(tfh2Pos);
if (tfh1ByteArray.SequenceEqual(tfh2ByteArray)) {
var line = System.Text.Encoding.Default.GetString(tfh1ByteArray);
totalDuplicates.Add(line);
}
}
}
}
}
}
Console.WriteLine();
if (totalDuplicates.Count > 0) {
Console.WriteLine("Total number of duplicates: {0}", totalDuplicates.Count);
Console.WriteLine("#######################");
totalDuplicates.ForEach(x => Console.WriteLine("{0}", x));
Console.WriteLine("#######################");
}
// Free resources
foreach (var tfh in textFileHashers)
tfh.Dispose();
}
}

If you have tons of ram... You guys are overthinking it...
var fileLines = File.ReadAllLines(#"c:\file.csv").Distinct();

How to delete a specific record (fixed length) includes data from a .txt file?

I am new in C# and I am making project but I can't make this delete part ..
If I save my data in .txt file in one line, but contain many fixed length record with no delimiter, if each record has fixed length and each field has fixed length and saved in file like this
1ahly2zamalek
how do I delete, for example the record 2zamalek from line with entering to the program id=2?
public team()
{
Team_ID_Len = 5;
Team_Name_Len = 10;
Team_Rec_Len = 15; ;
Team_ID = new char[Team_ID_Len];
Team_Name = new char[Team_Name_Len];
}

Sounds like you're looking for Substring. Give it a start (length of record * how many), and the length (length of record).
Actually, you might want to create the string as string s = part1+part2 where part1 is the substring from 0 till the start of the record, and part2 is the start of the NEXT record, until the end.
Then just save it.

Your number is your delimiter, split with a char array
using System;
using System;
public static class Program
{
public static void Main()
{
string words = "1sdklfjlsdf2lksjdf3sfd4sfd5fsd6fsd7fsd8fsd9sfd10aslkdfj11jklh12hjk";
int deleteRecordId = 11;
string [] split = words.Split(new Char [] {'1', '2','3','4','5','6','7','8','9','0'});
string newString = "";
int j = 0;
for( int i = 0; i < split.Length; i++)
{
if ( j == deleteRecordId)
{
//ignore this record
Console.WriteLine("ignore i = " + i);
j++;
}
else
{
Console.WriteLine("i = " + i);
if(!( split[i] == ""))
{
newString += j + split[i];
j++;
}
}
}
Console.WriteLine(newString);
}
}
then WriteAll to the file

C# Create high score text file and reorder the results

I'm trying to create a text file containing the players' scores of a hangman game. The structure of the text file should follow the order: number. name score (e.g. 1. Helen 2500). I tried to split the line so that I can introduce the data into a specific array for name and score so that I can compare the results and reorder them (the numbers remain the same: 1, 2, 3, etc.) but it doesn't work. I don't get errors but the building stops at this function because of an incorrect use of the array v[]. What do you suggest me to do to make it work?
[code]
private void New_Score(int score)
{
int k=0, i;
char[] sep = new char[] { ' ', '\n', '.' };
string line, aux1, aux2;
string n=null, s=null;
n = textBox1.Text;
s = Convert.ToString(score);
string[] part=null, nr=null, name=null, result=null;
file_path = #"D:\Visual Studio 2005\Projects\WindowsApplication2\WindowsApplication2\Resources\HighScore.txt";
StreamReader f = new StreamReader(file_path);
while ((line = f.ReadLine()) != null)
{
part = null;
v = null;
part = line.Split(sep);
i=0;
foreach(string c in part)
{
v[i]= c;
i++;
}
nr[k] = v[0];
name[k] = v[1];
result[k] = v[2];
}
for (i = 0; i < k; i++)
if (string.CompareOrdinal(s,result[i]) == 1)
{
aux1 = s;
s = result[i];
result[i] = aux1;
aux2 = n;
n = name[i];
name[i] = aux2;
}
for (i = 0; i < k; i++)
{
line = nr[i] + ". " + name[i] + " " + result[i] + "\n";
File.WriteAllText(file_path, line);
}
}
[/code]

I would personally abstract the code a bit more, but if you don't want to add any more classes or really do anything outside of the method, here's what I'd do:
Parse high scores in as a List<Tuple<int, string, int>>
Add the new score
Use either list.Sort() or LINQ to sort the list
Write the list out to the file.
It's a lot cleaner and way more readable than what you have in the question.

Despite the fact this goes entirely against the saying about fishing and eating which I strongly support, I took the liberty of making a few improvements by completely rewriting your code.
First of all, I got rid of storing the position of the player in the text file. This isn't efficient, since when you add a player with the highest score (rendering him #1), you'll have to re-number everyone else that's present in the file at that point.
So the resulting file looks like this:
Foo 123
Qux 714
Bar 456
Baz 999
The main() method looks like this:
var scores = ReadScoresFromFile("Highscores.txt");
scores.ForEach(s => Console.WriteLine(s));
Console.ReadKey();
Then there's the Highscore class:
class Highscore
{
public String Name { get; set; }
public int Position { get; set; }
public int Score { get; set; }
public Highscore(String data)
{
var d = data.Split(' ');
if (String.IsNullOrEmpty(data) || d.Length < 2)
throw new ArgumentException("Invalid high score string", "data");
this.Name = d[0];
int num;
if (int.TryParse(d[1], out num))
{
this.Score = num;
}
else
{
throw new ArgumentException("Invalid score", "data");
}
}
public override string ToString()
{
return String.Format("{0}. {1}: {2}", this.Position, this.Name, this.Score);
}
}
You see a Highscore populates itself based on the line from the Highscore file it's fed. I populate the list of scores using this method:
static List<Highscore> ReadScoresFromFile(String path)
{
var scores = new List<Highscore>();
using (StreamReader reader = new StreamReader(path))
{
String line;
while (!reader.EndOfStream)
{
line = reader.ReadLine();
try
{
scores.Add(new Highscore(line));
}
catch (ArgumentException ex)
{
Console.WriteLine("Invalid score at line \"{0}\": {1}", line, ex);
}
}
}
return SortAndPositionHighscores(scores);
}
And finally some sorting and position assigment:
static List<Highscore> SortAndPositionHighscores(List<Highscore> scores)
{
scores = scores.OrderByDescending(s => s.Score).ToList();
int pos = 1;
scores.ForEach(s => s.Position = pos++);
return scores.ToList();
}
Resulting in:
1. Baz: 999
2. Qux: 714
3. Bar: 456
4. Foo: 123

There is no reason to store number, line position can serve to that purpose. Even better would be serialize List with score objects to for example XML (if you want to keep human readable score file), to avoid line parsing. But if you want to store to plain text here is a simple example :
private void New_Score(int score, string name)
{
string filename = "scores.txt";
List<string> scoreList;
if (File.Exists(filename))
scoreList = File.ReadAllLines(filename).ToList();
else
scoreList = new List<string>();
scoreList.Add(name + " " + score.ToString());
var sortedScoreList = scoreList.OrderByDescending(ss => int.Parse(ss.Substring(ss.LastIndexOf(" ") + 1)));
File.WriteAllLines(filename, sortedScoreList.ToArray());
}
And later when displaying results add order number in front, something like this:
int xx = 1;
List<string> scoreList = File.ReadAllLines(filename).ToList();
foreach (string oneScore in scoreList)
{
Console.WriteLine(xx.ToString() + ". " + oneScore);
xx++;
}

Seems like a complicated way to store a simple list of high scores. Why don't you try the following.
Define a simple object to hold a player's score.
[Serializable]
public class HighScore
{
public string PlayerName { get; set; }
public int Score { get; set; }
}
Make sure you mark it with the [Serializable] attribute.
Let's quickly create a list of high scores for a couple of players.
var highScores = new List<HighScore>()
{
new HighScore { PlayerName = "Helen", Score = 1000 },
new HighScore { PlayerName = "Christophe", Score = 2000 },
new HighScore { PlayerName = "Ruben", Score = 3000 },
new HighScore { PlayerName = "John", Score = 4000 },
new HighScore { PlayerName = "The Last Starfighter", Score = 5000 }
};
Now you can use the BinaryFormatter to serialize the scores and save them to a local file.
using (var fileStream = new FileStream(#"C:\temp\scores.dat", FileMode.Create, FileAccess.Write))
{
var formatter = new BinaryFormatter();
formatter.Serialize(fileStream, highScores);
}
Later you can load the high scores from this files in a similar manner.
using (var fileStream = new FileStream(#"C:\temp\scores.dat", FileMode.Open, FileAccess.Read))
{
var formatter = new BinaryFormatter();
highScores = (List<HighScore>) formatter.Deserialize(fileStream);
}
If you want to sort them you can implement the IComparable interface on the HighScore type.
[Serializable]
public class HighScore : IComparable
{
//...
public int CompareTo(object obj)
{
var otherScore = (HighScore) obj;
if (Score == otherScore.Score)
return 0;
if (Score < otherScore.Score)
return 1;
return -1;
}
}
Now you can just call the Sort(...) on your generic List collection.
highScores.Sort();
And voila the scores are sorted in a descending order.
foreach(var score in highScores)
{
Console.WriteLine(String.Format("{0}: {1} points", score.PlayerName, score.Score));
}
Or even more simple, just use LINQ to sort the high scores.
var sortedScores = highScores.OrderByDescending(s => s.Score).ToList();

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

getting the best record from a file - c#

This question asks how to find a maximal element by a certain criterion. Combine that with Marc's LINQ part and you're away.

Related

How to auto-increment number and letter to generate a string sequence wise in c#

Applying Algorithm to multiple files

Importing and removing duplicates from a massive amount of text files using C# and Redis

How to delete a specific record (fixed length) includes data from a .txt file?

C# Create high score text file and reorder the results

Categories

Resources