c# csv file with unwanted CRLFs - c#

I have a CSV file, and I want to get all the data from index position 1 (The Company Name in the sample data) and compare them too each other.
I am currently using this line of code to read in the CSV file line by line,
string[] csvData = System.IO.File.ReadAllLines(#"C:\Path");
Then I would split them by rows and try to run a code to grab the wanted data like this
var comNames = new List<string>();
for (int i = 0; i < csvData.Length; i++){
string[] rows = csvData[i].Split(',');
comNames.Add(rows[1]);
}
But as you all know that won't work for lines 4 and 5 even though it is still the same column. Is there a way for me to delete the CRLF's that are causing this issue so I can make this code work or is there another workaround?
Sample data
Serial Number,Company Name,Employee Markme,Description,Leave
9788189999599,TALES OF SHIVA,Mark,mark,0
9780099578079,1Q84
THE
COMPLETE
TRILOGY,HARUKI MURAKAMI,Mark,0
9780198082897,MY KUMAN,Mark,Mark,0

The code below will work if the following assumptions hold true:
There is always a serial #
There is always a company name
There is always a comma before and after the company name
The serial # is always exactly 13 digits
#1-3 are required for this solution. You can tweak the RegEx pattern to deal with #4.
public List<string> GetListOfCompanies() {
string data = File.ReadAllText(#"C:\Users\adam\Documents\test.csv");
var companies = new List<string>();
var pattern = #"\d{13}";
//replace the line ending with something unique
data = data.Replace(System.Environment.NewLine, "#thisisreallyunique#");
//find each serial number, and grab the item after it
foreach (Match match in Regex.Matches(data, pattern)) {
var temp = data.Substring(match.Index); //cut off everything before this match
var temp2 = temp.Substring(temp.IndexOf(",") + 1); //cut off the serial # and the comma following it
//at this point we have the company name, plus everything after it
var company = temp2.Substring(0, temp2.IndexOf(",")); //cut off everything after it
//oh, and put the spaces back into the company
company = company.Replace("#thisisreallyunique#", " ");
companies.Add(company);
}
return companies;
}

Here a solution using Superpower parser you could customize it more for your use case. It's a nice library that is really expressive but there are faster solutions. You could write it by hand using the Span to avoid unnecessary memory allocations.
using Superpower;
using Superpower.Parsers;
using Superpower.Tokenizers;
var csv = #"Serial Number,Company Name,Employee Markme,Description,Leave
9788189999599,TALES OF SHIVA,Mark,mark,0
9780099578079,1Q84
THE
COMPLETE
TRILOGY,HARUKI MURAKAMI,Mark,0
9780198082897,MY KUMAN,Mark,Mark,0";
var tokenizer = new TokenizerBuilder<CsvToken>()
.Match(Character.EqualTo(','), CsvToken.Comma)
.Match(Span.EqualTo("\n\r"), CsvToken.NewLine)
.Match(Character.EqualTo('\n'), CsvToken.NewLine)
.Match(Character.EqualTo('\r'), CsvToken.NewLine)
.Match(Numerics.Integer, CsvToken.Numeric)
.Match(Span.WithoutAny((char c) => c == '\n' || c == '\r' || c == ',' || (c >= '0' && c <= '9')), CsvToken.String)
.Build();
//Parses the header will fail if not correct
var Header = from columnName1 in Token.EqualToValue(CsvToken.String, "Serial Number").Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from columnName2 in Token.EqualToValue(CsvToken.String, "Company Name").Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from columnName3 in Token.EqualToValue(CsvToken.String, "Employee Markme").Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from columnName4 in Token.EqualToValue(CsvToken.String, "Description").Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from columnName5 in Token.EqualToValue(CsvToken.String, "Leave")
select new[]
{
columnName1.ToStringValue(),
columnName2.ToStringValue(),
columnName3.ToStringValue(),
columnName4.ToStringValue(),
columnName5.ToStringValue()
};
//A serial number an integer of 13 digets
var SerialNumber = from number in Token.EqualTo(CsvToken.Numeric)
where number.Span.Length == 13
select number.ToStringValue();
// An intiger number
var Number = from number in Token.EqualTo(CsvToken.Numeric)
select number.ToStringValue();
//Text that can hold new lines
var RichText = from values in Token.EqualTo(CsvToken.String)
.Or(Token.EqualTo(CsvToken.Numeric))
.Or(Token.EqualTo(CsvToken.NewLine)).Many()
select string.Join("", values.Select(v => v.ToStringValue()));
//Regular text no new lines
var Text = from values in Token.EqualTo(CsvToken.String)
.Or(Token.EqualTo(CsvToken.Numeric)).Many()
select string.Join("", values.Select(v => v.ToStringValue()));
//Parses the row with defined order
var Row = from serialNumber in SerialNumber.Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from companyName in RichText.Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from employeeMarkme in Text.Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from description in Text.Then(s => Token.EqualTo(CsvToken.Comma).Value(s))
from level in Number
select new Info
{
SerialNumber = serialNumber,
CompanyName = companyName,
EmployeeMarkme = employeeMarkme,
Description = description,
Level = level
};
//The actual parser it must have a single row.
var parser = from header in Header.Then(s => Token.EqualTo(CsvToken.NewLine).AtLeastOnce().Value(s))
from rows in Row.ManyDelimitedBy(Token.EqualTo(CsvToken.NewLine).AtLeastOnce())
select rows;
var tokens = tokenizer.Tokenize(csv);
var result = parser.Parse(tokens);
Console.WriteLine(string.Join(Environment.NewLine, result.AsEnumerable()));
Console.ReadLine();
public enum CsvToken
{
Comma,
Numeric,
String,
NewLine
}
public record Info()
{
public string SerialNumber { get; set; }
public string CompanyName { get; set; }
public string EmployeeMarkme { get; set; }
public string Description { get; set; }
public string Level { get; set; }
}
Here's a dotnet fiddle https://dotnetfiddle.net/0GX3KN

Related

How to Split and Sum Members of a String Value

I have a database column that is a text field, and this text field contains values that look like
I=5212;A=97920;D=20181121|I=5176;A=77360;D=20181117|I=5087;A=43975;D=20181109
and can vary sometimes to look like:
I=29;A=20009.34;D=20190712;F=300|I=29;A=2259.34;D=20190714;F=300
Where 'I' represents the invoice Id, 'A' the invoice amount, 'D' the date in YYYYMMDD format and 'F' the original foreign currency value if the invoice was from a foreign supplier.
I am fetching that column and binding it to a datagrid which has a button labelled "Show Amount". On button click, it fetches the selected row and splits the string to extract "A"
I need to fetch all the sections with A= within the column result... i.e
A=97920
A=77360
A=43975
Then sum them all together and display the result on a label.
I have tried splitting using '|' first, extracting the substring 'A=' then splitting it using ';' to get the amount after "=".
string cAlloc;
string[] amount;
string InvoiceTotal;
string SupplierAmount;
string BalanceUnpaid;
DataRowView dv = invoicesDataGrid.SelectedItem as DataRowView;
if (dv != null)
{
cAlloc = dv.Row.ItemArray[7].ToString();
InvoiceTotal = dv.Row.ItemArray[6].ToString();
if (invoicesDataGrid.Columns[3].ToString() == "0")
{
lblAmount.Foreground = Brushes.Red;
lblAmount.Content = "No Amount Has Been Paid Out to the Supplier";
}
else
{
amount = cAlloc.Split('|');
foreach (string i in amount)
{
string toBeSearched = "A=";
string code = i.Substring(i.IndexOf(toBeSearched) + toBeSearched.Length);
string[] res = code.Split(';');
SupplierAmount = res[0];
float InvTotIncl = float.Parse(InvoiceTotal, CultureInfo.InvariantCulture.NumberFormat);
float AmountPaid = float.Parse(SupplierAmount, CultureInfo.InvariantCulture.NumberFormat);
float BalUnpaid = InvTotIncl - AmountPaid;
BalanceUnpaid = Convert.ToString(BalUnpaid);
if (BalUnpaid == 0)
{
lblAmount.Content = "Amount Paid = " + SupplierAmount + " No Balance Remaining, Supplier Invoice Paid in Full";
}
else if (BalUnpaid < 0)
{
lblAmount.Content = "Amount Paid = " + SupplierAmount + " Supplier Paid an Excess of " + BalanceUnpaid;
}
else
{
lblAmount.Content = "Amount Paid = " + SupplierAmount + " You Still Owe the Supplier a Total of " + BalanceUnpaid; ;
}
}
}
But I am only able to extract A=43975, the very last "A=". Instead of all three, plus I have not figured out how to sum the strings. Somebody help... please.
Regex is prefered solution. Alternatively split, split and split.
var cAlloc = "I=29;A=20009.34;D=20190712;F=300|I=29;A=2259.34;D=20190714;F=300";
var amount = cAlloc.Split('|');
decimal sum = 0;
foreach (string i in amount)
{
foreach (var t in i.Split(';'))
{
var p = t.Split('=');
if (p[0] == "A")
{
var s = decimal.Parse(p[1], CultureInfo.InvariantCulture);
sum += s;
break;
}
}
}
var in1 = "I=5212;A=97920;D=20181121|I=5176;A=77360;D=20181117|I=5087;A=43975;D=20181109";
var in2 = "I=29;A=20009.34;D=20190712;F=300|I=29;A=2259.34;D=20190714;F=300";
var reg = #"A=(\d+(\.\d+)?)";
Regex.Matches(in1, reg).OfType<Match>().Sum(m => double.Parse(m.Groups[1].Value));
Regex.Matches(in2, reg).OfType<Match>().Sum(m => double.Parse(m.Groups[1].Value));
You're doing too much work for something like this. Here's a simpler solution using Regex.
If the invoice amount is always located as a second value in the set you can access it directly by index after split:
var str = "I=5212;A=97920;D=20181121|I=5176;A=77360;D=20181117|I=5087;A=43975;D=20181109";
var invoices = str.Trim().Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries);
var totalSum = 0M;
foreach (var invoice in invoices)
{
var invoiceParts = invoice.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
var invoiceAmount = decimal.Parse(invoiceParts[1].Trim().Substring(2));
totalSum += invoiceAmount;
}
Otherwise, you can use a little more "flexible" solution like this:
var str = "I=5212;A=97920;D=20181121|I=5176;A=77360;D=20181117|I=5087;A=43975;D=20181109";
var invoices = str.Trim().Split(new[] { '|' }, StringSplitOptions.RemoveEmptyEntries);
var totalSum = 0M;
foreach (var invoice in invoices)
{
var invoiceParts = invoice.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
var invoiceAmount = decimal.Parse(invoiceParts.First(ip => ip.Trim().ToLower().StartsWith("a=")).Substring(2));
totalSum += invoiceAmount;
}
Import the input: "Deserialisation"
With the following given input, we have a list of object with property name I,A, and D.
var input = "I=5212;A=97920;D=20181121|I=5176;A=77360;D=20181117|I=5087;A=43975;D=20181109";
Give this simple class:
public class inputClass
{
public decimal I { get; set; }
public decimal A { get; set; }
public decimal D { get; set; }
}
Parsing it will look like:
var inputItems =
input.Split('|')
.Select(
x =>
x.Split(';')
.ToDictionary(
y => y.Split('=')[0],
y => y.Split('=')[1]
)
)
.Select(
x => //Manual parsing from dictionary to inputClass.
//If dictionary Key match an object property we could use something more generik.
new inputClass
{
I = decimal.Parse(x["I"], CultureInfo.InvariantCulture.NumberFormat),
A = decimal.Parse(x["A"], CultureInfo.InvariantCulture.NumberFormat),
D = decimal.Parse(x["D"], CultureInfo.InvariantCulture.NumberFormat),
}
)
.ToList();
It look complexe? lets give the inputClass the responsability to initialise it self based on string
PropertyName=Value[; PropertyName=Value] :
public inputClass(string input, NumberFormatInfo numberFormat)
{
var dict = input
.Split(';')
.ToDictionary(
y => y.Split('=')[0],
y => y.Split('=')[1]
);
I = decimal.Parse(dict["I"], numberFormat);
A = decimal.Parse(dict["A"], numberFormat);
D = decimal.Parse(dict["D"], numberFormat);
}
Then the parsing is simple:
var inputItems = input.Split('|').Select(x => new inputClass(x, CultureInfo.InvariantCulture.NumberFormat));
Once we have a more useable Structure a List of object We can easly compute Sum, Avg, Max, Min:
var sumA = inputItems.Sum(x => x.A);
Producing the output: "Serialisation"
In order to process the input we will define an object like similar to the Input
public class outputClass
{
public decimal I { get; set; }
public decimal A { get; set; }
public decimal D { get; set; }
public decimal F { get; set; }
The Class should be able to produce the String PropertyName=Value[; PropertyName=Value], :
public override string ToString()
{
return $"I={I};A={A};D={D};F={F}";
}
Then producing and string "serialisation" after computing the ListOutput based on the List input:
//process The input into the output.
var outputItems = new List<outputClass>();
foreach (var item in inputItems)
{
// compute things to be able to create the nex output item
item.A++;
outputItems.Add(
new outputClass { A = item.A, D = item.D, I = item.I, F = 42 }
);
}
// "Serialisation"
var outputString = String.Join("|", outputItems);
Online Demo. https://dotnetfiddle.net/VcEQmf
Long story short:
Define a class with the property you will use/display.
Add a constructor that take a string like "I=5212;A=97920;D=20181121"
nb: the String may contain property that will not be map to the object
Override the ToString(), so It can easly produce it's serialisation.
nb: Property and value that are not stored in the object will not be in the serialisation result.
Now You simply have to split on your line/object separator "|" and you are ready to go using real object, not having to care about that weird string anymore.
PS:
There was a little missunderstand about your 2 type of inputs, I mentally saw them as input, output. Dont mind those name. It can be the same class. It doens't change anything in this answer.

Linq return current rows where the next row has value

I need to process an invoice which is given to me as a single Excel column of strings, which I have converted to a List<string>. A simplified sample looks like this.
This is a nothing line
No001 FOO67 368.80
No001 FOO67 17.68
SHORT 12345
In this example, I might need to extract data from each line that begins with "No" - the Reference (e.g. FOO67) and amount (e.g. 368.80). However, if I encounter a line that starts with "SHORT", that means that the previous line amount was really an adjustment and the reference should be whatever I find on the "SHORT" line, with the sign of the amount reversed. In the case above, the data I hope to extract would be as follows (the first line is column headings):
Reference Amount
FOO67 368.80
12345 -17.68
I cannot find any way to achieve this using a linq query against the list. A mock-up of what I think a solution might look like is this (the below will not parse as I have added "nextline" as a pseudocode addition):
var inv = new List<string> { "This is a nothing line", "No001 FOO67 368.80", "No001 FOO67 17.68", "SHORT 123456" };
var myTable = new DataTable();
myTable.Columns.Add("Id", typeof(int));
myTable.Columns.Add("Ref", typeof(string));
myTable.Columns.Add("Amount", typeof(double));
var foo = from l in inv
where (l.Substring(0, 2) == "No" && double.TryParse(l.Split(' ')[2], out i))
select myTable.LoadDataRow(new object[]
{ inv.IndexOf(l)
,nextline.Contains("SHORT")? nextline.Split(' ')[1] : l.Split(' ')[1]
,nextline.Contains("SHORT")? -1:1 * double.Parse(l.Split(' ')[2].Replace(",", "").Replace("-", ""))
}, LoadOption.OverwriteChanges);
Is there a known way to get the value from the next line of a query and use it to decide which rows to return? If so, what is the way to do it?
You could use something like following.
var inv = new List<string>
{
"This is a nothing line",
"No001 FOO67 368.80",
"No001 FOO67 17.68",
"SHORT 123456"
};
var filterValidLines = inv.ToLookup(c=>c.StartsWith("No")
||c.StartsWith("SHORT"));
var result = filterValidLines[true].Zip(
filterValidLines[true].Skip(1),
(x,y)=>
{
var subData= x.Split(new []{" "},StringSplitOptions.RemoveEmptyEntries);
var multiplier = y.StartsWith("SHORT")? -1:1;
return new Data{Reference= subData[1], Amount = double.Parse(subData[2]) * multiplier};
});
Output
Reference Amount
FOO67 368.8
FOO67 -17.68
A quick and dirty solution:
void Main()
{
var list = new List<string>
{
"This is a nothing line",
"No001 FOO67 368.80",
"No001 FOO67 17.68",
"SHORT 12345"
};
var invoices = list
.Select((l, i) => new InvoiceData
{
Line = l,
NextLine = i < list.Count - 1 ? list[i + 1] : string.Empty
})
.Where(x => x.Line.StartsWith("No"))
.ToList();
}
public class InvoiceData
{
public string Line { get; set; }
public string NextLine { get; set; }
public bool IsAdjustment => NextLine.StartsWith("SHORT");
public decimal Amount =>
IsAdjustment
? -decimal.Parse(Line.Split(' ')[2])
: decimal.Parse(Line.Split(' ')[2]);
public string Reference =>
IsAdjustment
? NextLine.Split(' ')[1]
: Line.Split(' ')[1];
}

What is the easiest way to split columns from a txt file

I've been looking around a bit but haven't really found a good example with what I'm struggling right now.
I have a .txt file with a couple of columns as follows:
# ID,YYYYMMDD, COLD,WATER, OD, OP,
52,20120406, 112, 91, 20, 130,
53,20130601, 332, 11, 33, 120,
And I'm reading these from the file into a string[] array.
I'd like to split them into a list
for example
List results, and [0] index will be the first index of the columns
results[0].ID
results[0].COLD
etc..
Now I've been looking around, and came up with the "\\\s+" split
but I'm not sure how to go about it since each entry is under another one.
string[] lines = File.ReadAllLines(path);
List<Bus> results = new List<Bus>();
//Bus = class with all the vars in it
//such as Bus.ID, Bus.COLD, Bus.YYYYMMDD
foreach (line in lines) {
var val = line.Split("\\s+");
//not sure where to go from here
}
Would greatly appreciate any help!
Kind regards, Venomous.
I suggest using Linq, something like this:
List<Bus> results = File
.ReadLines(#"C:\MyFile.txt") // we have no need to read All lines in one go
.Skip(1) // skip file's title
.Select(line => line.Split(','))
.Select(items => new Bus( //TODO: check constructor's syntax
int.Parse(items[1]),
int.Parse(items[3]),
DateTime.ParseExact(items[2], "yyyyMMdd", CultureInfo.InvariantCulture)))
.ToList();
I would do
public class Foo
{
public int Id {get; set;}
public string Date {get; set;}
public double Cold {get; set;}
//...more
}
Then read the file
var l = new List<Foo>();
foreach (line in lines)
{
var sp = line.Split(',');
var foo = new Foo
{
Id = int.Parse(sp[0].Trim()),
Date = sp[1].Trim(),//or pharse the date to a date time struct
Cold = double.Parse(sp[2].Trim())
}
l.Add(foo);
}
//now l contains a list filled with Foo objects
I would probably keep a List of properties and use reflection to populate the object, something like this :
var columnMap = new[]{"ID","YYYYMMDD","COLD","WATER","OD","OP"};
var properties = columnMap.Select(typeof(Bus).GetProperty).ToList();
var resultList = new List<Bus>();
foreach(var line in lines)
{
var val = line.Split(',');
var adding = new Bus();
for(int i=0;i<val.Length;i++)
{
properties.ForEach(p=>p.SetValue(adding,val[i]));
}
resultList.Add(adding);
}
This is assuming that all of your properties are strings however
Something like this perhaps...
results.Add(new Bus
{
ID = val[0],
YYYYMMDD = val[1],
COLD = val[2],
WATER = val[3],
OD = val[4],
OP = val[5]
});
Keep in mind that all of the values in the val array are still strings at this point. If the properties of Bus are typed, you will need to parse them into the correct types e.g. assume ID is typed as an int...
ID = string.IsNullOrEmpty(val[0]) ? default(int) : int.Parse(val[0]),
Also, if the column headers are actually present in the file in the first line, you'll need to skip/disregard that line and process the rest.
Given that we have the Bus class with all the variables from your textfile:
class Bus
{
public int id;
public DateTime date;
public int cold;
public int water;
public int od;
public int op;
public Bus(int _id, DateTime _date, int _cold, int _water, int _od, int _op)
{
id = _id;
date = _date;
cold = _cold;
water = _water;
od = _od;
op = _op;
}
}
Then we can list them all in the results list like this:
List<Bus> results = new List<Bus>();
foreach (string line in File.ReadAllLines(path))
{
if (line.StartsWith("#"))
continue;
string[] parts = line.Replace(" ", "").Split(','); // Remove all spaces and split at commas
results.Add(new Bus(
int.Parse(parts[0]),
DateTime.ParseExact(parts[1], "yyyyMMdd", CultureInfo.InvariantCulture),
int.Parse(parts[2]),
int.Parse(parts[3]),
int.Parse(parts[4]),
int.Parse(parts[5])
));
}
And access the values as you wish:
results[0].id;
results[0].cold;
//etc.
I hope this helps.

C# Search Textfile after multiple Datas and fill them into a Datagrid View

I get the datas from an textfile. The File itself is already inserted by ReadAllLines and converted into a string - this works fine for me and I checked the content with a MessageBox.
The Textfile looks like this (This is just 1 line from about thousand):
3016XY1234567891111111ABCDEFGHIJKabcdef+0000001029916XY1111111123456789ABCDEFGHIJKabcdef+00000003801
Now these are 2 records and I need 2 datas from every record.
The "XY Number" - these are the first 16 digits AFTER "16XY" (16XY is always the same value)
Value from the example: XY1234567891111111
The "Price" - that is the 11 digits value after the plus. The last 2 digits specify the amount of Cent.
Value from the example: 102,99$
I Need both of this datas to be in the same row in my Datagrid View and also for all other Datas in this textfile.
All I can imagine is to write a code, which searchs the string after "16XY" and counts the next 16 digits - the same with the Price which searchs for a "plus" and counts the next 11 digits. Just in this case I would need to ignore the first line of the file because there are about 10x"+".
I tried several possibilities to search and count for that values but without any success right now. Im also not sure how to get the datas into the specific Datagrid View.
This is all I have to show at the moment:
List<List<string>> groups = new List<List<string>>();
List<string> current = null;
foreach (var line in File.ReadAllLines(path))
{
if (line.Contains("") && current == null)
current = new List<string>();
else if (line.Contains("") && current != null)
{
groups.Add(current);
current = null;
}
if (current != null)
current.Add(line);
}
//array
string output = string.Join(Environment.NewLine, current.ToArray());
//string
string final = string.Join("", output.ToCharArray());
MessageBox.Show(output);
Thanks in advance!
Create a class or struct to hold data
public class Data
{
String XYValue { set; get; }
Decimal Price { set; get; }
}
Then the reading logic (You might need to add some more checks):
string decimalSeperator = CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator;
List<Data> results = new List<Data>();
foreach(string line in File.ReadAllLines(path).Skip(1))
{
if (line == null)
continue;
int indexOfNextXY = 0;
while (true)
{
int indexOfXY = line.IndexOf("16XY", indexOfNextXY) + "16XY".Length;
int indexOfPlus = line.IndexOf("+", indexOfXY + 16) + "+".Length;
indexOfNextXY = line.IndexOf("16XY", indexOfPlus);
string xyValue = line.Substring(indexOfXY - 2, 18); // -2 to get the XY part
string price = indexOfNextXY < 0 ? line.Substring(indexOfPlus) : line.Substring(indexOfPlus, indexOfNextXY - indexOfPlus);
string intPart = price.Substring(0, price.Length - 2);
string decimalPart = price.Substring(price.Length - 2);
price = intPart + decimalSeperator + decimalPart;
results.Add(new Data (){ XYValue = xyValue, Price = Convert.ToDecimal(price) });
if (indexOfNextXY < 0)
break;
}
}
var regex = new Regex(#"\+(\d+)(\d{2})16(XY\d{16})");
var q =
from e in File.ReadLines("123.txt")
let find = regex.Match(e)
where find.Success
select new
{
price = double.Parse(find.Groups[1].Value) + (double.Parse(find.Groups[2].Value) / 100),
value = find.Groups[3]
};
dataGridView1.DataSource = q.ToList();
If you need the whole text file as string, you can manipulate it with .Split method.
The action will look something like this:
var values = final.Split(new string[] { "16XY" }, StringSplitOptions.RemoveEmptyEntries).ToList();
List <YourModel> models = new List<YourModel>();
foreach (var item in values)
{
if (item.IndexOf('+') > 0)
{
var itemSplit = item.Split('+');
if (itemSplit[0].Length > 15 &&
itemSplit[1].Length > 10)
{
models.Add(new YourModel(itemSplit[0].Substring(0, 16), itemSplit[1].Substring(0, 11)));
}
}
}
And you will need some model
public class YourModel
{
public YourModel(string xy, string price)
{
float forTest = 0;
XYNUMBER = xy;
string addForParse = string.Format("{0}.{1}", price.Substring(0, price.Length - 2), price.Substring(price.Length - 2, 2));
if (float.TryParse(addForParse, out forTest))
{
Price = forTest;
}
}
public string XYNUMBER { get; set; }
public float Price { get; set; }
}
After that you can bind it to your gridview.
Given that the "data pairs" are variable each line (and can get truncated to the next line), it is best to use File.ReadAllText() instead. This will give you a single string to work on, eliminating the truncation issue.
var data = File.ReadAllText(path);
Define a model to contain your data:
public class Item {
public string XYNumber { get; set; }
public double Price { get; set; }
}
You can then use regular expressions to find matches and store them in a list:
var list = List<Item>();
var regex = new Regex(#"(XY\d{16})\w+\+(\d{11})");
var match = regex.Match(data);
while (match.Success) {
var ps = match.Group[1].Captures[0].Value.Insert(9, ".");
list.Add(new Item {
XYNumber = match.Group[0].Captures[0].Value,
Price = Convert.ToDouble(ps)
});
match = match.NextMatch();
}
The list can also be used as a data source to a grid view:
gridView.DataSource = list;
Consider employing the Split method. From the example data, I notice there is "16XY" between each value. So something like this:
var data = "3016XY1234567891111111ABCDEFGHIJKabcdef+0000001029916XY1111111123456789ABCDEFGHIJKabcdef+00000003801";
var records = data.Split(new string[] { "16XY" }, StringSplitOptions.RemoveEmptyEntries);
Given the example data this will return the following array:
[0]: "30"
[1]: "1234567891111111ABCDEFGHIJKabcdef+00000010299"
[2]: "1111111123456789ABCDEFGHIJKabcdef+00000003801"
Now it will be easier to count characters in each string and give them meaning in your code.
So we know valuable data is separated by +. Lets split it further and fill a Dictionary<string, double>.
var parsed = new Dictionary<string, double>(records.Length - 1);
foreach (var pairX in records.Skip(1))
{
var fields = pairX.Split('+');
var cents = double.Parse(fields[1]);
parsed.Add(fields[0], cents / 100);
}
// Now you bind to the GridView
gv.DataSource = parsed;
And your 'GridView` declaration should look like this:
<asp:GridView ID="gv" runat="server" AutoGenerateColumns="false">
<Columns>
<asp:BoundField DataField="Key" HeaderText="ID" />
<asp:BoundField DataField="Value" HeaderText="Price" />
</Columns>
</asp:GridView>

How to make the custom parser for text file

Actually I set four columns using data table and I want this column retrieve value from text file. I used regex for remove the particular line from the text file.
My objective is that I want to show text file on the grid using data table so first I am trying to create data table and remove the line (show at the program) using regex.
Here I post my full code.
namespace class
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
StreamReader sreader = File.OpenText(#"C:\FareSearchRegex.txt");
string line;
DataTable dt = new DataTable();
DataRow dr;
dt.Columns.Add("PTC");
dt.Columns.Add("CUR");
dt.Columns.Add("TAX");
dt.Columns.Add("FARE BASIS");
while ((line = sreader.ReadLine()) != null)
{
var pattern = "---------- RECOMMENDATION 1 OF 3 IN GROUP 1 (USD 168.90)----------";
var result = Regex.Replace(line,pattern," ");
dt.Rows.Add(line);
}
}
}
class Class1
{
string PTC;
string CUR;
float TAX;
public string gsPTC
{
get{ return PTC; }
set{ PTC = value; }
}
public string gsCUR
{
get{ return CUR; }
set{ CUR = value; }
}
public float gsTAX
{
get{ return TAX; }
set{ TAX = value; }
}
}
}
If your format is strict(e.g. always 4 columns) and you want to remove only this complete line i don't see any reason to use regex:
var rows = File.ReadLines(#"C:\FareSearchRegex.txt")
.Where(l => l != "---------- RECOMMENDATION 1 OF 3 IN GROUP 1 (USD 168.90)----------")
.Select(l => new { line = l, items = l.Split(','), row = dt.Rows.Add() });
foreach (var x in rows)
x.row.ItemArray = x.items;
(assumed that the fields are separated by comma)
Edit: This works with your pastebin:
string header = " PTC CUR TAX FARE BASIS";
bool takeNextLine = false;
foreach (string line in File.ReadLines(#"C:\FareSearchRegex.txt"))
{
if (line.StartsWith(header))
takeNextLine = true;
else if (takeNextLine)
{
var tokens = line.Split(new[] { #" " }, StringSplitOptions.RemoveEmptyEntries);
dt.Rows.Add().ItemArray = tokens.Where((t, i) => i != 2).ToArray();
takeNextLine = false;
}
}
(since you have an empty column which you want to exclude from the result i've used the clumsy and possibly error-prone(?) query Where((t, i) => i != 2))
To parse the file you'll need to:
Split the text of the file into data chunks. A chunk, in your case can be identified by the header PTC CUR TAX FARE BASIS and by the TOTAL line. To split the text you'll need to tokenize the input as follows> (i) define a regular expression to match the headers, (ii) define a regular expression to match the Total lines (footers); Using (i) and (ii) you can join them by the order of appearance index and determine the total size of each chunk (see the line with (x,y)=>new{StartIndex = x.Match.Index, EndIndex = y.Match.Index + y.Match.Length}) below). Use String.Substring method to separate the chunks.
Extract the data from each individual chunk. Knowing that data is split by lines you just have to iterate through all lines in a chunk (ignoring header and footer) and process each line.
This code should help:
string file = #"C:\FareSearchRegex.txt";
string text = File.ReadAllText(file);
var headerRegex = new Regex(#"^(\)>)?\s+PTC\s+CUR\s+TAX\s+FARE BASIS$", RegexOptions.IgnoreCase | RegexOptions.Multiline);
var totalRegex = new Regex(#"^\s+TOTAL[\w\s.]+?$",RegexOptions.IgnoreCase | RegexOptions.Multiline);
var lineRegex = new Regex(#"^(?<Num>\d+)?\s+(?<PTC>[A-Z]+)\s+\d+\s(?<Cur>[A-Z]{3})\s+[\d.]+\s+(?<Tax>[\d.]+)",RegexOptions.IgnoreCase | RegexOptions.Multiline);
var dataIndices =
headerRegex.Matches(text).Cast<Match>()
.Select((m, index) => new{ Index = index, Match = m })
.Join(totalRegex.Matches(text).Cast<Match>().Select((m, index) => new{ Index = index, Match = m }),
x => x.Index,
x => x.Index,
(x, y) => new{ StartIndex = x.Match.Index, EndIndex = y.Match.Index + y.Match.Length });
var items = dataIndices
.Aggregate(new List<string>(), (list, x) =>
{
var item = text.Substring(x.StartIndex, x.EndIndex - x.StartIndex);
list.Add(item);
return list;
});
var result = items.SelectMany(x =>
{
var lines = x.Split(new string[]{Environment.NewLine, "\r", "\n"}, StringSplitOptions.RemoveEmptyEntries);
return lines.Skip(1) //Skip header
.Take(lines.Length - 2) // Ignore footer
.Select(line =>
{
var match = lineRegex.Match(line);
return new
{
Ptc = match.Groups["PTC"].Value,
Cur = match.Groups["Cur"].Value,
Tax = Convert.ToDouble(match.Groups["Tax"].Value)
};
});
});

Categories