Related
I have a CSV with records which have break line but I have consider it in same row.
Here is a sample:
• strbarcode,strdescription,strsize,decprice,intcaseqty,deccost,vat,departmentId
• 5015397310361,Yellow/Black Post Complete with Base ,1,0.01,2,0.01,20,18
• 5015397615305,"Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• 5015397310354,Red/White Post Complete with Base ,1,0.01,2,0.01,20,18
Here is my current code:
private void ImportProductDataFile(string fileName)
{
try
{
pictureBox_loading.Visible = true;
pictureBox_loading.Refresh();
Cursor.Current = Cursors.WaitCursor;
StreamReader sr = new StreamReader(fileName);
string[] headers = sr.ReadLine().Split(',');
DataTable dt = new DataTable();
DataTable dtnew = new DataTable();
foreach (string header in headers)
{
dt.Columns.Add(header);
dtnew.Columns.Add(header);
}
while (!sr.EndOfStream)
{
string[] rows = Regex.Split(sr.ReadLine(), ",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
DataRow dr = dt.NewRow();
for (int i = 0; i < headers.Length; i++)
{
if (rows.Length > 0 && i >= (rows.Length))
{
dr[i] = "";
}
else
{
dr[i] = rows[i];
}
}
dt.Rows.Add(dr);
}
dt.AcceptChanges();
var emptyRows = dt.Select().Where(
row => dt.Columns.Cast<DataColumn>().All(column => string.IsNullOrEmpty(row[column].ToString()))).ToArray();
Array.ForEach(emptyRows, x => x.Delete());
dt.AcceptChanges();
if (dt.Columns.Contains("departmentId"))
{
dt.Columns.Remove("departmentId");
dt.AcceptChanges();
}
if (dtnew.Columns.Contains("departmentId"))
{
dtnew.Columns.Remove("departmentId");
dtnew.AcceptChanges();
}
//dt = dt.AsEnumerable().GroupBy(r => new { strbarcode = r.Field<string>("strbarcode")}).Select(g => g.Last()).CopyToDataTable();
dt = dt.AsEnumerable()
.GroupBy(r => r.Field<string>("strbarcode").Trim(),
(key, g) => g.OrderByDescending(
y =>
{
try
{
return Convert.ToDecimal(y.Field<string>("decprice"));
}
catch (Exception)
{
return Decimal.Zero;
}
}
).First()).CopyToDataTable();
dt.AcceptChanges();
//add data dt to dtnew
DataRow Rownew;
foreach (DataRow row in dt.Rows)//dt as sourcetable and dt new as destination table
{
Rownew = dtnew.NewRow();
//if (row["strbarcode"].ToString().Trim() == "")
//{
// continue;
//}
Rownew["strbarcode"] = row["strbarcode"].ToString().Trim() != "" ? row["strbarcode"].ToString() : "";
Rownew["strdescription"] = row["strdescription"].ToString().Trim() != "" ? row["strdescription"].ToString().Replace(",", "") : "";
Rownew["strsize"] = row["strsize"].ToString() != "" ? row["strsize"].ToString() : "";
Rownew["decprice"] = row["decprice"].ToString().Trim() != "" ? objUtility.ToDecimal(row["decprice"].ToString()) : 0;
Rownew["intcaseqty"] = row["intcaseqty"].ToString().Trim() != "" ? objUtility.ToInt(row["intcaseqty"].ToString()) : 0;
Rownew["deccost"] = row["deccost"].ToString().Trim() != "" ? objUtility.ToDecimal(row["deccost"].ToString()) : 0;
Rownew["vat"] = row["vat"].ToString().Trim() != "" ? objUtility.ToDecimal(row["vat"].ToString()) : 0;
dtnew.Rows.Add(Rownew);
}
dtnew.AcceptChanges();
sr.Close();
//create use type table and stored procedure
string strStatus = OjbDataAccess.UpdateDBForImportProductSP();
if (strStatus == "success")
{
//inserting and updating data in plof database
string strMsg = OjbDataAccess.ImportProdcttosuperplofInsertUpdate(dtnew);
pictureBox_loading.Visible = false;
Cursor.Current = Cursors.Default;
MessageBox.Show(strMsg, "Data Import Status", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
else
{
pictureBox_loading.Visible = false;
Cursor.Current = Cursors.Default;
MessageBox.Show(strStatus, "Data Import Status", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
catch (Exception ex)
{
pictureBox_loading.Visible = false;
Cursor.Current = Cursors.Default;
txtFile.Text = string.Empty;
btnImportFile.Enabled = true;
MessageBox.Show("Please enter valid CSV file");
WriteErrorLog("=============================================================================" + Environment.NewLine);
WriteErrorLog("Product CSV file is not imported due to some error in import file." + Environment.NewLine);
WriteErrorLog(ex.ToString() + Environment.NewLine);
WriteErrorLog("=============================================================================" + Environment.NewLine);
return;
}
this.Dispose();
}
If the structure of your text file is always the same, a single regular expression can parse the file:
Regex lineExp = new Regex(#"(?<strbarcode>[0-9]+)?,((?<strdescription>[^,""]+)|""(?<strdescription>[^""]+)""),(?<strsize>[0-9]+),(?<decprice>[0-9.]+),(?<intcaseqty>[0-9]+),(?<deccost>[0-9.]+),(?<vat>[0-9]+),(?<departmentId>[0-9]+)([\r\n]+|$)", RegexOptions.Singleline | RegexOptions.Compiled);
Regex lineBreakExp = new Regex(#"[\r\n+]", RegexOptions.Singleline | RegexOptions.Compiled);
string data = #"strbarcode,strdescription,strsize,decprice,intcaseqty,deccost,vat,departmentId
5015397310361,Yellow/Black Post Complete with Base ,1,0.01,2,0.01,20,18
5015397615305,""Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
5015397310354,Red/White Post Complete with Base ,1,0.01,2,0.01,20,18
5015397310361,""YProduct with delimiter, so take care"",1,0.01,2,0.01,20,18
5015397310361,Product with incalid strsize will be ignored,a,0.01,2,0.01,20,18
5015397310361,Last line ends not with CRLF,3,0.01,2,0.01,20,18";
var matches = lineExp.Matches(data);
int i = 1;
foreach (Match m in matches)
{
Console.Write($"Item {i++}\t");
Console.Write($"{m.Groups["strbarcode"].Value}\t");
Console.Write($"{lineBreakExp.Replace(m.Groups["strdescription"].Value, " ")}\t");
Console.Write($"{m.Groups["strsize"].Value}\t");
Console.Write($"{m.Groups["decprice"].Value}\t");
Console.Write($"{m.Groups["intcaseqty"].Value}\t");
Console.Write($"{m.Groups["deccost"].Value}\t");
Console.Write($"{m.Groups["vat"].Value}\t");
Console.WriteLine(m.Groups["departmentId"].Value);
}
Output:
Item 1 5015397310361 Yellow/Black Post Complete with Base 1 0.01 2 0.01 20 18
Item 2 5015397615305 Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm) Complete with fixings 1 0.01 0 0.01 20 18
Item 3 Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm) Complete with fixings 1 0.01 0 0.01 20 18
Item 4 Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm) Complete with fixings 1 0.01 0 0.01 20 18
Item 5 Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm) Complete with fixings 1 0.01 0 0.01 20 18
Item 6 5015397310354 Red/White Post Complete with Base 1 0.01 2 0.01 20 18
Item 7 5015397310361 YProduct with delimiter, so take care 1 0.01 2 0.01 20 18
Item 8 5015397310361 Last line ends not with CRLF 3 0.01 2 0.01 20 18
I maintain a couple libraries that might help you: Sylvan.Data.Csv and Sylvan.Data. The Csv library provides CsvDataReader which handles parsing CSV data with quoted fields with delimiters and new lines such as in your file. The Sylvan.Data library (currently prerelease, but available on nuget) provides the Schema type which can be used to apply a strongly typed schema to your CSV data. Together, these libraries make it very easy to load a DataTable with strongly-typed data from a CSV file.
// define a schema for your csv.
var schema = Schema.Parse("strbarcode,strdescription,strsize,decprice:decimal,intcaseqty:int,deccost:decimal,vat:int,departmentId:int");
// provide the schema when constructing the CsvDataReader
var options = new CsvDataReaderOptions { Schema = new CsvSchema(schema) };
var data = CsvDataReader.Create("data.csv", options);
var dt = new DataTable();
// Load the data table with the csv data
dt.Load(data);
I don't have any documentation for the Schema.Parse syntax, but all of the .NET primitive types are supported. You can also remap column names via the schema if you desire using ">" notation:
"strbarcode>BarCode,strdescription>Description,strsize>Size,decprice>Price:decimal,intcaseqty>CaseQuantity:int,deccost>Cost:decimal,vat>Vat:int,departmentId>DepartmentId:int"
Sylvan.Data.Csv also happens to be the fastest CSV parser in the .NET ecosystem, so if you have huge files to deal with you won't find a faster solution.
I have hundreds of files in a directory. Many of the text files have the Code Column values as blank and i need to iterate over all the text files and fill it. I am able to write the code to add the code value in a new line, but i am not able to write it under code column. String value is: "STRINGTOENTER". I only want it be entered in the 1st line after the header. The last line should be left alone
Id Code File_Number Suffix Check_Number Check_Date
047 7699 01 99999 11/11/2012
1 -6.15
Below is my code snippets that add the value at a newline. I think I need to do a regular expression or a tab delimited type solution here.
public static void AddAStringtoAllTextFiles()
{
try
{
string path = #"C:\Users\ur\Desktop\TestFiles\";
string[] fileEntries = Directory.GetFiles(path);
for (int i = 0; i < fileEntries.Length; i++)
{
File.AppendAllText(fileEntries[i], "STRINGTOENTER" + Environment.NewLine);
}
}
catch (Exception e)
{
throw e;
}
}
EDITED
please try this with the assumption that its space(s) delimited.
its working on my VS2017 and kindly add the using statement on the top as below .
using System.Text.RegularExpressions
public static void AddAStringtoAllTextFiles()
{
try
{
string path = #"C:\Users\ur\Desktop\TestFiles\";
var fileEntries = Directory.GetFiles(path);
int indexPosition2InsertData=1;
foreach (var entry in fileEntries)
{
var lines = File.ReadAllLines(entry);
for (var index = 1; index < lines.Length; index++) //starting from first row, leaving the header
{
var split= Regex.Split(lines[index].Trim(), #"\s{1,}"); //reading the line with space(s)
if(split.Length==5) //edited //checking if the row is not blank
{
var list = split.ToList(); //convert to list to insert
list.Insert(indexPosition2InsertData, "STRINGTOENTER"); //inserting at the index 1
lines[index] = string.Join("\t", list);
}
}
File.WriteAllLines(entry, lines);
}
}
catch (Exception e)
{
throw e;
}
}
I am getting this after running the code.
Id Code File_Number Suffix Check_Number Check_Date
047 STRINGTOENTER 7699 01 99999 11/11/2012
1 -6.15
Please let me know if this helps.
Assuming each file has the right tab delimitation (and that's a big assumption given the question quality)
// Get the files
var fileEntries = Directory.GetFiles(path);
// iterate through each file name
foreach (var entry in fileEntries)
{
// Load the File into the lines array
var lines = File.ReadAllLines(entry);
// Iterate over each line
if(lines.Length >1)
{
// Split the lines by tab
var split = lines[1].Split('\t');
// your code should be at array index 1
split[1] = "STRINGTOENTER";
// write the whole line back
lines[1] = string.Join("\t", split);
// write the file
File.WriteAllLines(entry, lines);
}
}
Note : you should probably do this with a CSV parser, this was only for academic purposes and totally untested
I want to show my desired solution based on your input. Amazing how a simple piece of code can contribute to solving a larger and a complex problem. Thanks again!
public static void AddClientCodetoAllTextFiles(string update_batch_with_clientcode, string batchfilepathtobeupdated)
{
try
{
var fileEntries = Directory.GetFiles(#batchfilepathtobeupdated.Trim());
foreach (var entry in fileEntries)
{
var lines = File.ReadAllLines(entry);
if (lines.Length > 1)
{
for (int i = 1; i < lines.Length - 1; i++)
{
var split = lines[i].Split('\t');
split[1] = update_batch_with_clientcode.Trim();
lines[i] = string.Join("\t", split);
File.WriteAllLines(entry, lines);
}
}
}
}
catch (Exception e)
{
throw e;
}
}
I'm attempting to parse a multi-line email so I can get at the data which is on its own newline under the heading in the body of the email.
It looks like this:
EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
It appears I am getting everything on each messagebox when I use string reader readline, though all I want is the data under each ------ as shown
This is my code:
foreach (MailItem mail in publicFolder.Items)
{
if (mail != null)
{
if (mail is MailItem)
{
MessageBox.Show(mail.Body, "MailItem body");
// Creates new StringReader instance from System.IO
using (StringReader reader = new StringReader(mail.Body))
{
string line;
while ((line = reader.ReadLine()) !=null)
//Loop over the lines in the string.
if (mail.Body.Contains("Marketing ID"))
{
// var localno = mail.Body.Substring(247,15);//not correct approach
// MessageBox.Show(localrefno);
//MessageBox.Show("found");
//var conexid = mail.Body.Replace(Environment.NewLine);
var regex = new Regex("<br/>", RegexOptions.Singleline);
MessageBox.Show(line.ToString());
}
}
//var stringBuilder = new StringBuilder();
//foreach (var s in mail.Body.Split(' '))
//{
// stringBuilder.Append(s).AppendLine();
//}
//MessageBox.Show(stringBuilder.ToString());
}
else
{
MessageBox.Show("Nothing found for MailItem");
}
}
}
You can see I had numerous attempts with it, even using substring position and using regex. Please help me get the data from each line under the ---.
It is not a very good idea to do that with Regex because it is quite easy to forget the edge cases, not easy to understand, and not easy to debug. It's quite easy to get into a situation that the Regex hangs your CPU and times out. (I cannot make any comment to other answers yet. So, please check at least my other two cases before you pick your final solution.)
In your cases, the following Regex solution works for your provided example. However, some additional limitations are there: You need to make sure there are no empty values in the non-starting or non-ending column. Or, let's say if there are more than two columns and any one of them in the middle is empty will make the names and values of that line mismatched.
Unfortunately, I cannot give you a non-Regex solution because I don't know the spec, e.g.: Will there be empty spaces? Will there be TABs? Does each field has a fixed count of characters or will they be flexible? If it is flexible and can have empty values, what kind of rules to detected which columns are empty? I assume that it is quite possible that they are defined by the column name's length and will have only space as delimiter. If that's the case, there are two ways to solve it, two-pass Regex or write your own parser. If all the fields has fixed length, it would be even more easier to do: Just using the substring to cut the lines and then trim them.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
public class Program
{
public class Record{
public string Name {get;set;}
public string Value {get;set;}
}
public static void Main()
{
var regex = new Regex(#"(?<name>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<name>((?!-)[\w]+[ ]?)+)?)+(?:\r\n|\r|\n)(?>(?<splitters>(-+))(?>[ \t]+)?)+(?:\r\n|\r|\n)(?<value>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<value>((?!-)[\w]+[ ]?)+)?)+", RegexOptions.Compiled);
var testingValue =
#"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
var matches = regex.Matches(testingValue);
var rows = (
from match in matches.OfType<Match>()
let row = (
from grp in match.Groups.OfType<Group>()
select new {grp.Name, Captures = grp.Captures.OfType<Capture>().ToList()}
).ToDictionary(item=>item.Name, item=>item.Captures.OfType<Capture>().ToList())
let names = row.ContainsKey("name")? row["name"] : null
let splitters = row.ContainsKey("splitters")? row["splitters"] : null
let values = row.ContainsKey("value")? row["value"] : null
where names != null && splitters != null &&
names.Count == splitters.Count &&
(values==null || values.Count <= splitters.Count)
select new {Names = names, Values = values}
);
var records = new List<Record>();
foreach(var row in rows)
{
for(int i=0; i< row.Names.Count; i++)
{
records.Add(new Record{Name=row.Names[i].Value, Value=i < row.Values.Count ? row.Values[i].Value : ""});
}
}
foreach(var record in records)
{
Console.WriteLine(record.Name + " = " + record.Value);
}
}
}
output:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID = 3411144
Destination details =
Please note that this also works for this kind of message:
EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
output:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
Or this:
EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
output:
Marketing ID =
Local Number =
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0, end = 0, length = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var mc = Regex.Matches(lines[i], #"(?:^| )-");
foreach (Match m in mc)
{
int start = m.Value.StartsWith(" ") ? m.Index + 1 : m.Index;
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length - 1) ;
length = Math.Min(end - start, lines[i - 1].Length - start);
string key = length > 0 ? lines[i - 1].Substring(start, length).Trim() : "";
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length) ;
length = Math.Min(end - start, lines[i + 1].Length - start);
string value = length > 0 ? lines[i + 1].Substring(start, length).Trim() : "";
dict.Add(key, value);
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}
Live Demo
Using Regular Expressions:
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var keys = Regex.Matches(lines[i - 1], #"(?:^| )(\w+\s?)+");
var values = Regex.Matches(lines[i + 1], #"(?:^| )(\w+\s?)+");
if (keys.Count == values.Count)
for (int j = 0; j < keys.Count; j++)
dict.Add(keys[j].Value.Trim(), values[j].Value.Trim());
else // remove bug if value of first key in a line has no value
{
if (lines[i + 1].StartsWith(" "))
{
dict.Add(keys[0].Value.Trim(), "");
dict.Add(keys[1].Value.Trim(), values[0].Value.Trim());
}
else
{
dict.Add(keys[0].Value, values[0].Value.Trim());
dict.Add(keys[1].Value.Trim(), "");
}
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}
Live Demo
Here is my attempt. I don't know if the email format can change (rows, columns, etc).
I can't think of an easy way to separate the columns besides checking for a double space (my solution).
class Program
{
static void Main(string[] args)
{
var emailBody = GetEmail();
using (var reader = new StringReader(emailBody))
{
var lines = new List<string>();
const int startingRow = 2; // Starting line to read from (start at Marketing ID line)
const int sectionItems = 4; // Header row (ex. Marketing ID & Local Number Line) + Dash Row + Value Row + New Line
// Add all lines to a list
string line = "";
while ((line = reader.ReadLine()) != null)
{
lines.Add(line.Trim()); // Add each line to the list and remove any leading or trailing spaces
}
for (var i = startingRow; i < lines.Count; i += sectionItems)
{
var currentLine = lines[i];
var indexToBeginSeparatingColumns = currentLine.IndexOf(" "); // The first time we see double spaces, we will use as the column delimiter, not the best solution but should work
var header1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
var header2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
currentLine = lines[i+2]; //Skip dash line
indexToBeginSeparatingColumns = currentLine.IndexOf(" ");
string value1 = "", value2 = "";
if (indexToBeginSeparatingColumns == -1) // Use case of there being no value in the 2nd column, could be better
{
value1 = currentLine.Trim();
}
else
{
value1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
value2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
}
Console.WriteLine(string.Format("{0},{1},{2},{3}", header1, value1, header2, value2));
}
}
}
static string GetEmail()
{
return #"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
}
}
Output looks something like this:
Marketing ID,GR332230,Local Number,0000232323
Dispatch Code,GX3472,Logic code,1
Destination ID,3411144,Destination details,
Here is an aproach asuming you don't need the headers, info comes in order and mandatory.
This won't work for data that has spaces or optional fields.
foreach (MailItem mail in publicFolder.Items)
{
MessageBox.Show(mail.Body, "MailItem body");
// Split by line, remove dash lines.
var data = Regex.Split(mail.Body, #"\r?\n|\r")
.Where(l => !l.StartsWith('-'))
.ToList();
// Remove headers
for(var i = data.Count -2; lines >= 0; i -2)
{
data.RemoveAt(i);
}
// now data contains only the info you want in the order it was presented.
// Asuming info doesn't have spaces.
var result = data.SelectMany(d => d.Split(' '));
// WARNING: Missing info will not be present.
// {"GR332230", "0000232323", "GX3472", "1", "3411144"}
}
I have a csv file and would like to count the 2. column how many times contains 111.
the csv file has 46 separated columns with separator ; .
"first col" "second col" "....."
abc 111 a
abc 112 b
abc 113 c
abc 111 d
abc 112 e
abc 113 f
i would like to count the 111.
Filled up first the datagridview fom datatable.
dgv.DataSource = dgv_table;
string[] raw_text = File.ReadAllLines("d:\\"+lb_csv.Text);
string[] data_col = null;
int x = 0;
foreach (string text_line in raw_text)
{
// MessageBox.Show(text_line);
data_col = text_line.Split(';');
if (x == 0)
{
for (int i = 0; i <= data_col.Count() - 1; i++)
{
dgv_table.Columns.Add(data_col[i]);
}
//header
x++;
}
else
{
//data
dgv_table.Rows.Add(data_col);
}
I find a lots of solution to count the 2nd columns specified data:111
but all time i had problems.
int xCount = dgv.Rows.Cast<DataGridViewRow>().Select(row => row.Cells["second col"].Value).Where(s => s !=null && Equals(111)).Count();
this.lb_qty.Text = xCount.ToString();
But it gives error for row.Cells["second col"].Value
An unhandled exception of type 'System.ArgumentException' occurred in System.Windows.Forms.dll
Additional information: Column named second col cannot be found.
Can someone help me how to solve this problem and get the needed result?
I would suggest you to skip using DataGridView and use counter variable in your loop, like Arkadiusz suggested.
If you still want to work with DataTable, count values like this:
int xCount = dgv_table.Rows.Cast<DataRow>().Count(r => r["second col"] != null && r["second col"].ToString() == "111");
I would try to read the file into a DataTable and use it as DataSource for the DataGridView.
DataTable d_Table = new DataTable();
//fill the DataTable
this.dgv_table.DataSource = d_Table;
To count the rows wich contains 111 in the second column, you can select the DataTable like this:
DataTable d_Table = new DataTable();
//fill the DataTable
DataRow[] rowCount = d_Table.Select("secondCol = '111'");
this.lb_qty.Text = rowCount.Length.ToString();
Or you can do it in a foreach-loop:
int count = 0;
foreach(DataGridViewRow dgr in this.dgv_table.Rows)
{
if(dgr.Cells["secondCol"].Value.ToString() == "111") count++;
}
this.lb_qty.Text = count.ToString();
you can use this method to save the CSV into List of arrays List
public static List<string[]> readCSV(String filename)
{
List<string[]> result = new List<string[]>();
try
{
string[] line = File.ReadAllLines(filename);
foreach (string l in line)
{
string[] value= vrstica.Split(',');
result.Add(value);
}
}
catch (Exception e)
{
Console.WriteLine("Error: '{0}'", e);
}
return result;
}
every array will represent a column, so you can simply find the frequency of any value using LINQ or even loop:
foreach (var item in tmp[1].GroupBy(c => c))
{
Console.WriteLine("{0} : {1}", item.Key, item.Count());
}
int CountValues(string input, string searchedValue, int ColumnNumber, bool skipFirstLine = false)
{
int numberOfSearchedValue= 0;
string line;
using (StreamReader reader = new StreamReader (input))
{
if(skipFirstLine)
reader.ReadLine();
while ((line = reader.ReadLine()) != null)
{
if(line.Split(';')[ColumnNumber] == searchedValue)
numberOfSearchedValue++;
}
}
return numberOfSearchedValue;
}
Edit:
StreamReader.ReadLine() reads the line but also, using this method we are jumping to second line. If there is no more lines it returns null, so that is our ending condition. Rest of the code is readable, I think
:)
Didn't test that so be careful :)
It might be necessary to use Trim() or ToUpperCase() in some places (as usually when you are searching).
This question already has answers here:
Reading CSV files using C#
(12 answers)
Closed 7 years ago.
User input is a csv file which has data like below:
SiteID,Format,Title,Category,Quantity,StartPrice,BuyItNowPrice,Duration,Description,PicURL
US ,FixedPriceItem,PLease Do Not Bid. TISSOT STAINLESS STEEL Women's Quartz Watch 1,14111,1,,341,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid. TISSOT STAINLESS STEEL Women's Quartz Watch 2,14111,1,,342,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid. TISSOT STAINLESS STEEL Women's Quartz Watch 3,14111,1,,343,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
US ,FixedPriceItem,PLease Do Not Bid. TISSOT STAINLESS STEEL Women's Quartz Watch 4,14111,1,,344,30,"
Brand: TISSOT
Gender: Women's
Style: Fashion
Features: Water Resistant
Band Material: Stainless Steel
Movement: Quartz : Battery
Display: Analog
Model: STYLIST
Country/Region of Manufacture: Switzerland
Case Width (mm): 25mm
Wrist (inches cm): 6.7 inches / 17 cm
Serial Number: R452","http://img2.jpegbay.com/gallery/004791260/1_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/2_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/3_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/4_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/5_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/6_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/7_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/8_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/9_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/10_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/11_f.jpg?1334,
http://img2.jpegbay.com/gallery/004791260/33_f.jpg?1334"
This is exactly a csv file data.
to read this data, I have tried this code :
private static List<TempBulkCSVItems> ProcessCSV(string fileName)
{
List<TempBulkCSVItems> tbcil = new List<TempBulkCSVItems>();
//Set up our variables
string Feedback = string.Empty;
string line = string.Empty;
string[] strArray;
DataTable dt = new DataTable();
DataRow row;
// work out where we should split on comma, but not in a sentance
Regex r = new Regex(",(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))");
//Set the filename in to our stream
StreamReader sr = new StreamReader(fileName);
//Read the first line and split the string at , with our regular express in to an array
line = sr.ReadLine();
strArray = r.Split(line);
//For each item in the new split array, dynamically builds our Data columns. Save us having to worry about it.
Array.ForEach(strArray, s => dt.Columns.Add(new DataColumn()));
//Read each line in the CVS file until it's empty
while ((line = sr.ReadLine()) != null)
{
line = line.Replace(",\"", ",").Replace("\"", "");
if (line.Length > 0)
{
row = dt.NewRow();
//add our current value to our data row
row.ItemArray = r.Split(line);
dt.Rows.Add(row);
TempBulkCSVItems tbci = new TempBulkCSVItems();
tbci.SiteID = row[0].ToString();
tbci.Format = row[1].ToString();
tbci.Title = row[2].ToString();
tbci.Category = row[3].ToString();
tbci.Quantity = row[4].ToString();
tbci.StartPrice = row[5].ToString();
tbci.BuyItNowPrice = row[6].ToString();
tbci.Duration = row[7].ToString();
tbci.Description = row[8].ToString();
tbci.PicURL = row[9].ToString();
tbcil.Add(tbci);
}
}
//Tidy Streameader up
sr.Dispose();
//return a the new DataTable
return tbcil;
}
the problem I found, I am reading a row by readline(). and another point is, description and PicURL field contains multiple line and multiple comma (,). That's why its breaking by my code. But in this case, what will be the solution?
You can read the values form a CSV file like this:
using (TextFieldParser parser = new TextFieldParser(#"c:\temp\test.csv"))
{
parser.TextFieldType = FieldType.Delimited;
parser.SetDelimiters(",");
while (!parser.EndOfData)
{
//Processing row
string[] fields = parser.ReadFields();
foreach (string field in fields)
{
//TODO: Process field
}
}
}
See: Reading CSV files using C#
Here's something you can use for doing this type of parsing.
public static IEnumerable<IList<string>> ParseDelimitedLines(
this IEnumerable<string> lines,
char delimiter,
char? singleEscape,
char? beginEndEscape)
{
var row = new List<string>();
var currentItem = new StringBuilder();
bool previousSingleEscape = false;
bool insideEscape = false;
bool needsAppendLine = false;
foreach (var line in lines)
{
previousSingleEscape = false;
if (needsAppendLine)
{
currentItem.AppendLine();
needsAppendLine = false;
}
foreach (char c in line)
{
if (c == beginEndEscape && !previousSingleEscape)
{
insideEscape = !insideEscape;
}
if (c == delimiter && !previousSingleEscape && !insideEscape)
{
row.Add(currentItem.ToString());
currentItem.Clear();
continue;
}
previousSingleEscape = c == singleEscape && !previousSingleEscape;
if(!previousSingleEscape)
currentItem.Append(c);
}
if (!insideEscape && !previousSingleEscape)
{
row.Add(currentItem.ToString());
yield return row;
row = new List<string>();
currentItem.Clear();
}
else
{
needsAppendLine = true;
}
}
if (insideEscape || previousSingleEscape)
{
row.Add(currentItem.ToString());
yield return row;
}
}
With that the following
string text = #"This,is,simple,stuff
Now,""it,gets"",harder
But,wait\,there,\""is,more\""
And,this\
way,to,do,newline
And,""another
way"",fin";
int r = 0;
foreach (
var row in text.Split(new[] { Environment.NewLine }, StringSplitOptions.None).ParseDelimitedLines(',', '\\', '"'))
{
Console.WriteLine("Row " + ++r);
int c = 0;
foreach (var item in row)
{
Console.WriteLine("Column " + ++c + ": <<" + item + ">>");
}
}
Will output
Row 1
Column 1: <<This>>
Column 2: <<is>>
Column 3: <<simple>>
Column 4: <<stuff>>
Row 2
Column 1: <<Now>>
Column 2: <<"it,gets">>
Column 3: <<harder>>
Row 3
Column 1: <<But>>
Column 2: <<wait,there>>
Column 3: <<"is>>
Column 4: <<more">>
Row 4
Column 1: <<And>>
Column 2: <<this
way>>
Column 3: <<to>>
Column 4: <<do>>
Column 5: <<newline>>
Row 5
Column 1: <<And>>
Column 2: <<"another
way">>
Column 3: <<fin>>
And you can use it like this
var rows = File.ReadLines("yourFile.txt").ParseDelimitedLines(',', '\\', '"');
foreach(var row in rows)
{
string column1 = row[0];
...
}