I am using CsvHelper lib to read CSV file and I can successfully read the file with the lib. However I cannot use SQL condition to filter values. How can I do that without using SQL Server. I am really stuck on it.
It was very easy with Pandas and Pandasql libs in Python but it is being too hard in C#..
My Code:
public static void Main(string[] args)
{
var fileInfo = new FileInfo(#"filePath");
using (TextReader reader = fileInfo.OpenText())
using (var csvReader = new CsvReader(reader))
{
csvReader.Configuration.Delimiter = ",";
csvReader.Configuration.HasHeaderRecord = false;
csvReader.Configuration.IgnoreQuotes = true;
csvReader.Configuration.TrimFields = true;
csvReader.Configuration.WillThrowOnMissingField = false;
while (csvReader.Read())
{
var myStrinVar = csvReader.GetField<string>(0);
Console.Write(myStrinVar); //SELECT * FROM table...
}
}
}
I would suggest using LINQ to filter your results.
https://msdn.microsoft.com/en-us/library/bb397906.aspx
Say you have some class MyClass that you can serialize the lines in your file into.
For example:
public class MyClass
{
public int ID { get; set; }
}
var records = csv.GetRecords<MyClass>().ToList();
var filtered = records.Where(r => r.ID >= 10);
That example is a bit contrived but you can use any boolean expression you like in the where clause.
I know this is too late for OP, but the issue with the accepted answer is that you have to read in the entire result set to memory which may not be tenable for large files. Also, if you can extend this code below to get the top N rows without having to read the entire CSV if you find matches early in the file.
public static void Main(string[] args)
{
var fileInfo = new FileInfo(#"filePath");
var where = ""; //Code to set up where clause part of query goes here
using (TextReader reader = fileInfo.OpenText())
using (var csvReader = new CsvReader(reader))
{
csvReader.Configuration.Delimiter = ",";
csvReader.Configuration.HasHeaderRecord = false;
csvReader.Configuration.IgnoreQuotes = true;
csvReader.Configuration.TrimFields = true;
csvReader.Configuration.WillThrowOnMissingField = false;
DataTable dt = null;
while (csvReader.Read())
{
//Use the first row to initialize the columns.
if (dt == null)
{
dt = new DataTable();
for (var i = 0; i < csvReader.FieldCount; i++)
{
var fieldType = csvReader.GetFieldType(i);
DataColumn dc;
if (fieldType.IsNullableType())
{
dc = new DataColumn(csvReader.GetName(i), Nullable.GetUnderlyingType(fieldType));
dc.AllowDBNull = true;
}
else
dc = new DataColumn(csvReader.GetName(i), data.GetFieldType(i));
dt.Columns.Add(dc);
}
}
//Map DataReader to DataRow
var newRow = dt.Rows.Add();
foreach(DataColumn col in dt.Columns)
{
newRow[col.ColumnName] = csvReader[col.ColumnName];
}
//Create a temporary DataView and filter it with the where clause.
DataView dv = new DataView(dt);
dv.RowFilter = where;
var data = dv.Count > 0 ? dv[0] : null;
if(data != null)
{
//Row in here matches your where clause.
//Code to read this row or do something with it.
}
//Empty the temporary data table.
dt.Rows.Clear();
}
}
}
Related
I am trying to perform Data driven testing by loading client codes from Excel file to login and perform other operation.I am trying to iterate my test for all clients in the User_Name column. I only need to read Data From SheetName, right now i have Contstant SheetName (i,e sheet1), wanted to add SheetName Parameter
Any help with this would be much appreciated thank you.
I am using ExcelDataReader v3.4.0, ExcelDataReader.DataSet v3.4.0, selenium Webdriver v3.11.0
My Excel Generic Code is Below:
```
using ExcelDataReader;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Runtime.Remoting.Messaging;
using System.Text;
using System.Threading.Tasks;
namespace MyDemoAutomation
{
public class ExcelUtil
{
public DataTable ExcelToDatable(string fileName)
{
// open file and returns as stream
FileStream stream = File.Open(fileName, FileMode.Open, FileAccess.Read);
// create openXmlReader via ExcelReaderFactory
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream);
//Set the first row as column name
var result1 = excelReader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true
}
});
// Return as dataset
DataSet result = excelReader.AsDataSet();
// Get all tables
DataTableCollection table = result.Tables;
// Store in Database
DataTable resultTable = table["Sheet1"];
Here instead Sheet 1 i Want to Pass SheetName as Parameter(In Existing Code How can i Add
// return
return resultTable;
}
List<DataCollection> dataCol = new List<DataCollection>();
public void PopulateInCollection(string fileName)
{
DataTable table = ExcelToDatable(fileName);
for (int row = 1; row <= table.Rows.Count; row++)
{
for (int col = 0; col < table.Columns.Count; col++)
{
DataCollection dtTable = new DataCollection()
{
rowNumber = row,
colName = table.Columns[col].ColumnName,
colValue = table.Rows[row - 1][col].ToString()
};
dataCol.Add(dtTable);
}
}
}
public string ReadData(int rowNumber, string columnName)
{
try
{
// Retriving data using LINQ to reduce much of iterations
string data = (from colData in dataCol
where colData.colName == columnName && colData.rowNumber == rowNumber
select colData.colValue).SingleOrDefault();
return data.ToString();
}
catch (Exception e)
{
return null;
}
}
internal class DataCollection
{
public int rowNumber { get; internal set; }
public string colName { get; internal set; }
public string colValue { get; internal set; }
}
}
}
and the TestClass:
[Test]
public void DataDrivenTest_FromExcel()
{
Driver = new ChromeDriver();
ExcelUtil util = new ExcelUtil();
util.PopulateInCollection(#"C:\dan\AutomationTest\TestData\test.xlsx");
Driver.FindElement(By.Id("contentPlaceholder_txtClientCode"))
.SendKeys(util.ReadData(i));
Driver.FindElement(By.XPath("//*[#id='btnLogin']")).Click();
Driver.FindElement(By.XPath("//*
[#id='tabContent0']/table/tbody/tr[2]/td[1]")).Click();
Driver.FindElement(By.Id("contentPlaceholder_txtcloseButton")).Click();
Driver.Quit
}
List<DataCollection> dataCol = new List<DataCollection>();
public void PopulateInCollection(string fileName, string sheetName)
{
DataTable table = ExcelToDataTable(fileName, sheetName);
//Iterate through the rows and columns of the Table
for (int row = 1; row <= table.Rows.Count; row++)
{
for (int col = 0; col <= table.Columns.Count; col++)
{
Datacollection dtTable = new Datacollection()
{
rowNumber = row,
colName = table.Columns[col].ColumnName,
colValue = table.Rows[row - 1][col].ToString()
};
//Add all the details for each row
dataCol.Add(dtTable);
}
}
}
public DataTable ExcelToDaTable(string fileName, string sheetName)
{
// open file and returns as stream
FileStream stream = File.Open(fileName, FileMode.Open, FileAccess.Read);
// create openXmlReader via ExcelReaderFactory
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream);
//Set the first row as column name
var result = excelReader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (_) => new ExcelDataTableConfiguration()
{
UseHeaderRow = true
}
});
// Return as dataset
DataSet result = excelReader.AsDataSet();
// Get all tables
DataTableCollection table = result.Tables;
// Store in Database
DataTable resultTable = table[sheetName];
return resultTable;
}
#End
//Define this first
util.PopulateInCollection("fileName", "sheetName");
//Reading from excel file
util.ReadData(rowNumber, columnName)
I have a .csv file structured like so, first row is the header column, for each SomeID I need to add the NetCharges together(or substract if the code calls for it) and put each item into its own column by the SomeCode column.
Heres the file I receive;
SomeID,OrderNumber,Code,NetCharge,Total
23473,30388,LI 126.0000, 132.00
96021, 000111, LI, 130.00, 126.00
23473,30388,FU 6.0000, 132.00
4571A,10452,LI,4100.0000, 4325.0000
4571A,10452,FU,150.00,4325.0000
4571A,10452,DT,75.00,4325.0000
I need to insert the data to my sql table which is structured like this. This is what I'm aiming for:
ID OrderNumber LICode LICodeValue FUCode FUCodeValue DTCode, DTCodeValue, total
23473 30388n LI 126.000 FU 6.0000 NULL NULL 132.0000
4571A 10452 LI 4100.0000 FU 150.0000 DT 75.00 4325.0000
My SomeID will not always be grouped together like the 4571A id is.I basically need to iterate over this file and create one record for each SomeID. I cannot seem to find a way with csvHelper. I'm using C# and csvHelper. I have trid this so far but I cannot get back to the SomeId after passing on to the nexr one:
using (var reader = new StreamReader( "C:\testFiles\some.csv" ))
using (var csv = new CsvReader( reader, CultureInfo.InvariantCulture ))
{
var badRecords = new List<string>();
var isRecordBad = false;
csv.Configuration.HasHeaderRecord = true;
csv.Configuration.HeaderValidated = null;
csv.Configuration.IgnoreBlankLines = true;
csv.Configuration.Delimiter = ",";
csv.Configuration.BadDataFound = context =>
{
isRecordBad = true;
badRecords.Add( context.RawRecord );
};
csv.Configuration.MissingFieldFound = ( s, i, context ) =>
{
isRecordBad = true;
badRecords.Add( context.RawRecord );
};
List<DataFile> dataFile = csv.GetRecords<DataFile>().ToList();
//initialize variable
string lastSomeId = "";
if (!isRecordBad)
{
foreach (var item in dataFile)
{
// check if its same record
if (lastSomeId != item.SomeID)
{
MyClass someClass = new MyClass();
lastSomeId = item.SomeID;
//decimal? LI = 0;//was going to use these as vars for calculations not sure I need them???
//decimal? DSC = 0;
//decimal? FU = 0;
someClass.Id = lastSomeId;
someClass.OrdNum = item.OrderNumber;
if (item.Code == "LI")
{
someClass.LICode = item.Code;
someClass.LICodeValue = item.NetCharge;
}
if (item.Code == "DT")
{
someClass.DTCode = item.Code;
someClass.DTCodeValue = item.NetCharge
}
if (item.Code == "FU")
{
someClass.FUCode = item.Code;
someClass.FUCodeValue = item.NetCharge;
}
someClass.Total = (someClass.LICodeValue + someClass.FUCodeValue);
//check for other values to calculate
//insert record to DB
}
else
{
//Insert into db after maipulation of values
}
}
}
isRecordBad = false;
}//END Using
Any clues would be greatly appreciated. Thank you in advance.
I have a working solution for uploading a CSV file. Currently, I use the IFormCollection for a user to upload multiple CSV files from a view.
The CSV files are saved as a temp file as follows:
List<string> fileLocations = new List<string>();
foreach (var formFile in files)
{
filePath = Path.GetTempFileName();
if (formFile.Length > 0)
{
using (var stream = new FileStream(filePath, FileMode.Create))
{
await formFile.CopyToAsync(stream);
}
}
fileLocations.Add(filePath);
}
I send the list of file locations to another method (just below). I loop through the file locations and stream the data from the temp files, I then use a data table and SqlBulkCopyto insert the data. I currently upload between 50 and 200 files at a time and each file is around 330KB. To insert a hundred, it takes around 6 minutes, which is around 30-35MB.
public void SplitCsvData(string fileLocation, Guid uid)
{
MetaDataModel MetaDatas;
List<RawDataModel> RawDatas;
var reader = new StreamReader(File.OpenRead(fileLocation));
List<string> listRows = new List<string>();
while (!reader.EndOfStream)
{
listRows.Add(reader.ReadLine());
}
var metaData = new List<string>();
var rawData = new List<string>();
foreach (var row in listRows)
{
var rowName = row.Split(',')[0];
bool parsed = int.TryParse(rowName, out int result);
if (parsed == false)
{
metaData.Add(row);
}
else
{
rawData.Add(row);
}
}
//Assigns the vertical header name and value to the object by splitting string
RawDatas = GetRawData.SplitRawData(rawData);
SaveRawData(RawDatas);
MetaDatas = GetMetaData.SplitRawData(rawData);
SaveRawData(RawDatas);
}
This code then passes the object to the to create the datatable and insert the data.
private DataTable CreateRawDataTable
{
get
{
var dt = new DataTable();
dt.Columns.Add("Id", typeof(int));
dt.Columns.Add("SerialNumber", typeof(string));
dt.Columns.Add("ReadingNumber", typeof(int));
dt.Columns.Add("ReadingDate", typeof(string));
dt.Columns.Add("ReadingTime", typeof(string));
dt.Columns.Add("RunTime", typeof(string));
dt.Columns.Add("Temperature", typeof(double));
dt.Columns.Add("ProjectGuid", typeof(Guid));
dt.Columns.Add("CombineDateTime", typeof(string));
return dt;
}
}
public void SaveRawData(List<RawDataModel> data)
{
DataTable dt = CreateRawDataTable;
var count = data.Count;
for (var i = 1; i < count; i++)
{
DataRow row = dt.NewRow();
row["Id"] = data[i].Id;
row["ProjectGuid"] = data[i].ProjectGuid;
row["SerialNumber"] = data[i].SerialNumber;
row["ReadingNumber"] = data[i].ReadingNumber;
row["ReadingDate"] = data[i].ReadingDate;
row["ReadingTime"] = data[i].ReadingTime;
row["CombineDateTime"] = data[i].CombineDateTime;
row["RunTime"] = data[i].RunTime;
row["Temperature"] = data[i].Temperature;
dt.Rows.Add(row);
}
using (var conn = new SqlConnection(connectionString))
{
conn.Open();
using (SqlTransaction tr = conn.BeginTransaction())
{
using (var sqlBulk = new SqlBulkCopy(conn, SqlBulkCopyOptions.Default, tr))
{
sqlBulk.BatchSize = 1000;
sqlBulk.DestinationTableName = "RawData";
sqlBulk.WriteToServer(dt);
}
tr.Commit();
}
}
}
Is there another way to do this or a better way to improve performance so that the time to upload is reduced as it can take a long time and I am seeing an ever increasing use of memory to around 500MB.
TIA
You can improve performance by removing the DataTable and reading from the input stream directly.
SqlBulkCopy has a WriteToServer overload that accepts an IDataReader instead of an entire DataTable.
CsvHelper can CSV files using a StreamReader as an input. It provides CsvDataReader as an IDataReader implementation on top of the CSV data. This allows reading directly from the input stream and writing to SqlBulkCopy.
The following method will read from an IFormFile, parse the stream using CsvHelper and use the CSV's fields to configure a SqlBulkCopy instance :
public async Task ToTable(IFormFile file, string table)
{
using (var stream = file.OpenReadStream())
using (var tx = new StreamReader(stream))
using (var reader = new CsvReader(tx))
using (var rd = new CsvDataReader(reader))
{
var headers = reader.Context.HeaderRecord;
var bcp = new SqlBulkCopy(_connection)
{
DestinationTableName = table
};
//Assume the file headers and table fields have the same names
foreach(var header in headers)
{
bcp.ColumnMappings.Add(header, header);
}
await bcp.WriteToServerAsync(rd);
}
}
This way nothing is ever written to a temp table or cached in memory. The uploaded files are parsed and written to the database directly.
In addition to #Panagiotis's answer, why don't you interleave your file processing with the file upload? Wrap up your file processing logic in an async method and change the loop to a Parallel.Foreach and process each file as it arrives instead of waiting for all of them?
private static readonly object listLock = new Object(); // only once at class level
List<string> fileLocations = new List<string>();
Parallel.ForEach(files, (formFile) =>
{
filePath = Path.GetTempFileName();
if (formFile.Length > 0)
{
using (var stream = new FileStream(filePath, FileMode.Create))
{
await formFile.CopyToAsync(stream);
}
await ProcessFileInToDbAsync(filePath);
}
// Added lock for thread safety of the List
lock (listLock)
{
fileLocations.Add(filePath);
}
});
Thanks to #Panagiotis Kanavos, I was able to work out what to do. Firstly, the way I was calling the methods, was leaving them in memory. The CSV file I have is in two parts, vertical metadata and then the usual horizontal information. So I needed to split them into two. Saving them as tmp files was also causing an overhead. It has gone from taking 5-6 minutes to now taking a minute, which for a 100 files containing 8,500 rows isn't bad I suppose.
Calling the method:
public async Task<IActionResult> UploadCsvFiles(ICollection<IFormFile> files, IFormCollection fc)
{
foreach (var f in files)
{
var getData = new GetData(_configuration);
await getData.SplitCsvData(f, uid);
}
return whatever;
}
This is the method doing the splitting:
public async Task SplitCsvData(IFormFile file, string uid)
{
var data = string.Empty;
var m = new List<string>();
var r = new List<string>();
var records = new List<string>();
using (var stream = file.OpenReadStream())
using (var reader = new StreamReader(stream))
{
while (!reader.EndOfStream)
{
var line = reader.ReadLine();
var header = line.Split(',')[0].ToString();
bool parsed = int.TryParse(header, out int result);
if (!parsed)
{
m.Add(line);
}
else
{
r.Add(line);
}
}
}
//TODO: Validation
//This splits the list into the Meta data model. This is just a single object, with static fields.
var metaData = SplitCsvMetaData.SplitMetaData(m, uid);
DataTable dtm = CreateMetaData(metaData);
var serialNumber = metaData.LoggerId;
await SaveMetaData("MetaData", dtm);
//
var lrd = new List<RawDataModel>();
foreach (string row in r)
{
lrd.Add(new RawDataModel
{
Id = 0,
SerialNumber = serialNumber,
ReadingNumber = Convert.ToInt32(row.Split(',')[0]),
ReadingDate = Convert.ToDateTime(row.Split(',')[1]).ToString("yyyy-MM-dd"),
ReadingTime = Convert.ToDateTime(row.Split(',')[2]).ToString("HH:mm:ss"),
RunTime = row.Split(',')[3].ToString(),
Temperature = Convert.ToDouble(row.Split(',')[4]),
ProjectGuid = uid.ToString(),
CombineDateTime = Convert.ToDateTime(row.Split(',')[1] + " " + row.Split(',')[2]).ToString("yyyy-MM-dd HH:mm:ss")
});
}
await SaveRawData("RawData", lrd);
}
I then use a data table for the metadata (which takes 20 seconds for a 100 files) as I map the field names to the columns.
public async Task SaveMetaData(string table, DataTable dt)
{
using (SqlBulkCopy sqlBulk = new SqlBulkCopy(_configuration.GetConnectionString("DefaultConnection"), SqlBulkCopyOptions.Default))
{
sqlBulk.DestinationTableName = table;
await sqlBulk.WriteToServerAsync(dt);
}
}
I then use FastMember for the large data parts for the raw data, which is more like a traditional CSV.
public async Task SaveRawData(string table, IEnumerable<LogTagRawDataModel> lrd)
{
using (SqlBulkCopy sqlBulk = new SqlBulkCopy(_configuration.GetConnectionString("DefaultConnection"), SqlBulkCopyOptions.Default))
using (var reader = ObjectReader.Create(lrd, "Id","SerialNumber", "ReadingNumber", "ReadingDate", "ReadingTime", "RunTime", "Temperature", "ProjectGuid", "CombineDateTime"))
{
sqlBulk.DestinationTableName = table;
await sqlBulk.WriteToServerAsync(reader);
}
}
I am sure this can be improved on, but for now, this works really well.
I have a csv file delimited with pipe(|). I am reading it using the following line of code:
IEnumerable<string[]> lineFields = File.ReadAllLines(FilePath).Select(line => line.Split('|'));
Now, I need to bind this to a GridView. So I am creating a dynamic DataTable as follows:
DataTable dt = new DataTable();
int i = 0;
foreach (string[] order in lineFields)
{
if (i == 0)
{
foreach (string column in order)
{
DataColumn _Column = new DataColumn();
_Column.ColumnName = column;
dt.Columns.Add(_Column);
i++;
//Response.Write(column);
//Response.Write("\t");
}
}
else
{
int j = 0;
DataRow row = dt.NewRow();
foreach (string value in order)
{
row[j] = value;
j++;
//Response.Write(column);
//Response.Write("\t");
}
dt.Rows.Add(row);
}
//Response.Write("\n");
}
This works fine. But I want to know if there is a better way to convert IEnumerable<string[]> to a DataTable. I need to read many CSVs like this, so I think the above code might have performance issues.
Starting from .Net 4:
use ReadLines.
DataTable FileToDataTable(string FilePath)
{
var dt = new DataTable();
IEnumerable<string[]> lineFields = File.ReadLines(FilePath).Select(line => line.Split('|'));
dt.Columns.AddRange(lineFields.First().Select(i => new DataColumn(i)).ToArray());
foreach (var order in lineFields.Skip(1))
dt.Rows.Add(order);
return dt;
}
(edit: instead this code, use the code of #Jodrell answer, This prevents double charging of the Enumerator).
Before .Net 4:
use streaming:
DataTable FileToDataTable1(string FilePath)
{
var dt = new DataTable();
using (var st = new StreamReader(FilePath))
{
// first line procces
if (st.Peek() >= 0)
{
var order = st.ReadLine().Split('|');
dt.Columns.AddRange(order.Select(i => new DataColumn(i)).ToArray());
}
while (st.Peek() >= 0)
dt.Rows.Add(st.ReadLine().Split('|'));
}
return dt;
}
since, in your linked example, the file has a header row.
const char Delimiter = '|';
var dt = new DataTable;
using (var m = File.ReadLines(filePath).GetEnumerator())
{
m.MoveNext();
foreach (var name in m.Current.Split(Delimiter))
{
dt.Columns.Add(name);
}
while (m.MoveNext())
{
dt.Rows.Add(m.Current.Split(Delimiter));
}
}
This reads the file in one pass.
Here is my code:
using (System.Net.WebResponse tmpRes = tmpReq.GetResponse())
{
using (System.IO.Stream tmpStream = tmpRes.GetResponseStream())
{
using (System.IO.TextReader tmpReader = new System.IO.StreamReader(tmpStream))
{
string fileContents = tmpReader.ReadToEnd();
for (int i = 0; i < fileContents.Length; i++)
{
if (fileContents[i] == "")
{
fileContents[i] = "null";
}
}
using (Stream s = GenerateStreamFromString(fileContents))
{}
}
}
}
this shows error 'string' to 'char' convert implicitly. Is there any other way to set "NULL" in empty fields in CSVReader
You are not using the CsvReader at all. You are also not splitting the string by your delimiter to get "cells". However, you can load a DataTable from the CsvReader and modify that.
Here's an example presuming tab as delimiter:
var tblCSV = new DataTable();
using (System.Net.WebResponse tmpRes = tmpReq.GetResponse())
using (System.IO.Stream tmpStream = tmpRes.GetResponseStream())
using (System.IO.TextReader tmpReader = new System.IO.StreamReader(tmpStream))
using (var csv = new CsvReader(tmpReader, true, '\t', '"', '\0', '\0', ValueTrimmingOptions.All))
{
csv.MissingFieldAction = MissingFieldAction.ParseError;
csv.DefaultParseErrorAction = ParseErrorAction.RaiseEvent;
csv.ParseError += csv_ParseError;
csv.SkipEmptyLines = true;
// load into DataTable
tblCSV.Load(csv, LoadOption.OverwriteChanges, csvTable_FillError);
}
Now loop the rows and columns and modify them accordingly:
foreach(DataRow row in tblCSV.Rows)
{
foreach(DataColumn col in tblCSV.Columns)
{
if(string.IsNullOrWhiteSpace(row.Field<string>(col)))
row.SetField(col, "null");
}
}
Update related to your comment:
I wants to add NULL value in empty cells in sql database when the csv
data Save in database. Is there any other way?
You could simply use the loop above to update your table instead:
using (var con = new SqlConnection("ConnectionString"))
{
con.Open();
foreach (DataRow row in tblCSV.Rows)
{
using (var cmd = new SqlCommand("INSERT INTO table(Col1,Col2) VALUES (#Col1,Col2);", con))
{
string col1 = row.Field<string>("Col1");
if (string.IsNullOrWhiteSpace(col1))
col1 = null;
string col2 = row.Field<string>("Col2");
if (string.IsNullOrWhiteSpace(col2))
col2 = null;
cmd.Parameters.AddWithValue("#col1", col1);
cmd.Parameters.AddWithValue("#col2", col2);
int inserted = cmd.ExecuteNonQuery();
}
}
}