Have put together the code below to read a particular set of CSV files. It works but is very much a work in progress. There is one section of the code (populating datatable row - see snip below) that is taking as long to run as the SqlBulkCopy operation. Asking for advice/recommendations on how to improve performance.
In the code (below) processing a ~15M row file in 50K batches took just under 11.5 minutes. Breaking down the sections. SqlBulkCopy took ~236Kms (4 min) the reader only needed 105Kms (~1.5min), and the section populating the datatable took ~200Kms (3.33 min).
csvTableTimer.Start();
// Process row and populate datatable
DataRow dr = dt.NewRow();
foreach (DataColumn dc in dt.Columns)
{
if (row.GetType().GetProperty(dc.ToString()).GetValue(row) != null)
{
dr[dc.ToString()] = row.GetType().GetProperty(dc.ToString()).GetValue(row);
}
}
dt.Rows.Add(dr);
csvTableTimer.Stop();
The CSV files are very large (10+GB) and do not have headers. I'm using the Class to build the datatable structure and like to continue with that approach when populating the datatable rows as I'll need to expand this to work with multiple CSV types.
The datatable reflects the column names from the class which line up with the SQL DB table. Had wanted to use GetField (converted, not raw) walking each column in the datatable row[column.ColumnName] = csv.GetField( column.DataType, column.ColumnName ); but kept getting an error about there not being headers. Found an open issue relating to HasHeaderRecord = false that matches up with what I was trying to do so that added to my desire to seek advice from those who are more skilled at this. Appreciate the help!
Expanding on the code block;
var rconfig = new CsvHelper.Configuration.CsvConfiguration(CultureInfo.InvariantCulture)
{
BufferSize = 1024,
Delimiter = ",",
AllowComments = true,
HasHeaderRecord = false,
HeaderValidated = null,
IgnoreBlankLines = true,
MissingFieldFound = null,
Comment = '#',
Escape = '"',
TrimOptions = TrimOptions.Trim,
BadDataFound = x =>
{
isBadRecord = true;
ErrRecords.Add(x.RawRecord);
++badCount;
}
};
var loadFType = #"B";
// Create datatable using class as definition.
PropertyDescriptorCollection props1 = TypeDescriptor.GetProperties(loaderFileType);
DataTable dt = new DataTable();
dt = UtilExtensions.CreateDataTable(props1);
using (var reader = new StreamReader(rFile))
{
reader.ReadLine();
using (var csv = new CsvReader(reader, rconfig))
{
switch (loadFType)
{
case "ALL":
csv.Context.RegisterClassMap<CSVLoader.AMap>();
var allRecords = new List<CSVLoader.A>();
break;
case "BAL":
csv.Context.RegisterClassMap<CSVLoader.BMap>();
var balRecords = new List<CSVLoader.B>();
break;
case "CIF":
csv.Context.RegisterClassMap<CSVLoader.CMap>();
var cifRecords = new List<CSVLoader.C>();
break;
}
dt.BeginLoadData();
while (csv.Read())
{
csvReadTimer.Start();
var row = csv.GetRecord(loaderFileType);
csvReadTimer.Stop();
runningCount++;
if (!isBadRecord)
{
csvTableTimer.Start();
// Process row and populate datatable
DataRow dr = dt.NewRow();
foreach (DataColumn dc in dt.Columns)
{
if (row.GetType().GetProperty(dc.ToString()).GetValue(row) != null)
{
dr[dc.ToString()] = row.GetType().GetProperty(dc.ToString()).GetValue(row);
}
}
dt.Rows.Add(dr);
csvTableTimer.Stop();
++goodCount;
if (batchCount >= dtbatchSize || runningCount >= fileRecCount)
{
try
{
// Write from the source to the destination.
bcpLoadTimer.Start();
bulkCopy.WriteToServer(dt);
bcpLoadTimer.Stop();
bcpLoadBatchCount++;
}
catch (Exception ex)
{
}
dt.Clear();
batchCount = 0;
}
batchCount++;
}
isBadRecord = false;
}
dt.EndLoadData();
reader.Close();
dt.Clear();
transaction.Commit();
// B
public class B
{
[Index(0)]
public string A { get; set; }
[Index(1)]
public string BString { get; set; }
[Index(2)]
public int? C { get; set; }
[Index(3)]
public string D { get; set; }
[Index(4)]
public string E { get; set; }
[Index(5)]
public DateTime? F { get; set; }
[Index(6)]
public decimal? G { get; set; }
[Index(7)]
public decimal? H { get; set; }
[Index(8)]
public decimal? I { get; set; }
[Index(9)]
public decimal? J { get; set; }
[Index(10)]
public int? K { get; set; }
[Index(11)]
public string L { get; set; }
[Index(12)]
public DateTime? M { get; set; }
}
// B
public sealed class BMap : ClassMap<B>
{
public BMap()
{
// AutoMap(CultureInfo.InvariantCulture);
Map(m => m.A).Index(0);
Map(m => m.BString).Index(1);
Map(m => m.C).Index(2);
Map(m => m.D).Index(3);
Map(m => m.E).Index(4);
Map(m => m.F).Index(5).TypeConverterOption.Format("yyyyMMdd");
Map(m => m.G).Index(6);
Map(m => m.H).Index(7);
Map(m => m.I).Index(8);
Map(m => m.J).Index(9);
Map(m => m.K).Index(10);
Map(m => m.L).Index(11);
Map(m => m.M).Index(12).TypeConverterOption.Format("yyyy-MM-dd-hh.mm.ss.ffffff");
}
}
Your question doesn't really include a minimal reproducible example, so I simplified your code to create the following FileLoader class that times how long it takes to populate the DataTable from instances of some class TClass (here B) that had been read from a CSV row using CsvReader:
public class FileLoader
{
public System.Diagnostics.Stopwatch csvTableTimer { get; } = new();
public long Load<TClass, TClassMap>(string rFile, int dtbatchSize) where TClassMap : ClassMap<TClass>, new()
{
bool isBadRecord = false;
long badCount = 0;
long runningCount = 0;
long goodCount = 0;
long batchCount = 0;
var rconfig = CreateCsvConfiguration(
x =>
{
isBadRecord = true;
//ErrRecords.Add(x.RawRecord);
++badCount;
});
// Create datatable using class as definition.
var dt = UtilExtensions.CreateDataTable(typeof(TClass));
using (var reader = new StreamReader(rFile))
{
//reader.ReadLine(); FIXED - THIS SKIPPED THE FIRST LINE AND CAUSED A RECORD TO BE OMITTED.
using (var csv = new CsvReader(reader, rconfig))
{
csv.Context.RegisterClassMap<TClassMap>();
dt.BeginLoadData();
while (csv.Read())
{
isBadRecord = false;
//csvReadTimer.Start();
var record = csv.GetRecord<TClass>();
//csvReadTimer.Stop();
runningCount++;
if (!isBadRecord)
{
csvTableTimer.Start();
// Process row and populate datatable
DataRow dr = dt.NewRow();
foreach (DataColumn dc in dt.Columns)
{
if (record.GetType().GetProperty(dc.ToString()).GetValue(record) != null)
{
dr[dc.ToString()] = record.GetType().GetProperty(dc.ToString()).GetValue(record);
}
}
dt.Rows.Add(dr);
csvTableTimer.Stop();
goodCount++;
if (++batchCount >= dtbatchSize)
{
// Flush the data table
FlushTable(dt);
batchCount = 0;
}
}
}
dt.EndLoadData();
FlushTable(dt);
Commit();
}
}
return goodCount;
}
protected virtual void FlushTable(DataTable dt) => dt.Clear(); // Replace with SqlBulkCopy
protected virtual void Commit() {} // Replace with transaction.Commit();
public static CsvConfiguration CreateCsvConfiguration(BadDataFound badDataFound) =>
new CsvHelper.Configuration.CsvConfiguration(CultureInfo.InvariantCulture)
{
BufferSize = 1024,
Delimiter = ",",
AllowComments = true,
HasHeaderRecord = false,
HeaderValidated = null,
IgnoreBlankLines = true,
MissingFieldFound = null,
Comment = '#',
Escape = '"',
TrimOptions = TrimOptions.Trim,
BadDataFound = badDataFound,
};
}
public static partial class UtilExtensions
{
static IEnumerable<PropertyInfo> GetSerializableProperties(this Type type) =>
type.GetProperties().Where(p => p.GetIndexParameters().Length == 0 && p.CanRead && p.CanWrite && p.GetGetMethod() != null && p.GetSetMethod() != null);
public static DataTable CreateDataTable(Type type)
{
var dt = new DataTable();
foreach (var p in type.GetSerializableProperties())
dt.Columns.Add(p.Name, Nullable.GetUnderlyingType(p.PropertyType) ?? p.PropertyType);
return dt;
}
}
Then, if I use the file loader and call loader.Load<B, BMap>(rFile, 1000) to read a CSV file with 5555 rows 20 times, it takes roughly 1049 ms on dotnetfiddle. See demo #1 here.
One problem you are encountering is that reflection in c# can be very slow. You are calling record.GetType().GetProperty(dc.ToString()).GetValue(record) twice, and if I simply reduce the number of calls by 1, the time is reduced to around 706 ms:
foreach (DataColumn dc in dt.Columns)
{
var value = record.GetType().GetProperty(dc.ToString()).GetValue(record);
if (value != null)
{
dr[dc.ToString()] = value;
}
}
Demo #2 here.
However, we can do better by manufacturing a delegate in runtime. First, add the following utility methods that make use of the System.Linq.Expressions namespace:
public static partial class UtilExtensions
{
public static Func<TSource, object> CreatePropertyGetter<TSource>(PropertyInfo propertyInfo)
{
var parameter = Expression.Parameter(typeof(TSource), "obj");
var property = Expression.Property(parameter, propertyInfo);
var convert = Expression.Convert(property, typeof(object));
var lambda = Expression.Lambda(typeof(Func<TSource, object>), convert, parameter);
return (Func<TSource, object>)lambda.Compile();
}
public static ReadOnlyDictionary<string, Func<TSource, object>> PropertyGetters<TSource>() => PropertyExpressionsCache<TSource>.PropertyGetters;
static ReadOnlyDictionary<string, Func<TSource, object>> CreatePropertyGetters<TSource>() =>
typeof(TSource)
.GetSerializableProperties()
.ToDictionary(p => p.Name,
p => CreatePropertyGetter<TSource>(p))
.ToReadOnly();
static class PropertyExpressionsCache<TSource>
{
public static ReadOnlyDictionary<string, Func<TSource, object>> PropertyGetters { get; } = UtilExtensions.CreatePropertyGetters<TSource>();
}
public static ReadOnlyDictionary<TKey, TValue> ToReadOnly<TKey, TValue>(this IDictionary<TKey, TValue> dictionary) =>
new ReadOnlyDictionary<TKey, TValue>(dictionary ?? throw new ArgumentNullException());
}
And modify Load<TClass, TClassMap>() as follows:
public long Load<TClass, TClassMap>(string rFile, int dtbatchSize) where TClassMap : ClassMap<TClass>, new()
{
bool isBadRecord = false;
long badCount = 0;
long runningCount = 0;
long goodCount = 0;
long batchCount = 0;
var rconfig = CreateCsvConfiguration(
x =>
{
isBadRecord = true;
//ErrRecords.Add(x.RawRecord);
++badCount;
});
var loaderFileType = typeof(TClass);
// Create datatable using class as definition.
var dt = UtilExtensions.CreateDataTable(loaderFileType);
var properties = UtilExtensions.PropertyGetters<TClass>();
using (var reader = new StreamReader(rFile))
{
//reader.ReadLine(); FIXED - THIS SKIPPED THE FIRST LINE AND CAUSED A RECORD TO BE OMITTED.
using (var csv = new CsvReader(reader, rconfig))
{
csv.Context.RegisterClassMap<TClassMap>();
dt.BeginLoadData();
while (csv.Read())
{
isBadRecord = false;
//csvReadTimer.Start();
var record = csv.GetRecord<TClass>();
//csvReadTimer.Stop();
runningCount++;
if (!isBadRecord)
{
csvTableTimer.Start();
// Process row and populate datatable
DataRow dr = dt.NewRow();
foreach (var p in properties)
{
var value = p.Value(record);
if (value != null)
dr[p.Key] = value;
}
dt.Rows.Add(dr);
csvTableTimer.Stop();
goodCount++;
if (++batchCount >= dtbatchSize)
{
// Flush the data table
FlushTable(dt);
batchCount = 0;
}
}
}
dt.EndLoadData();
FlushTable(dt);
}
}
return goodCount;
}
The time will be further reduced, to roughly 404 ms. Demo fiddle #3 here.
I also tried using Delegate.CreateDelegate() instead of Expression:
public static partial class UtilExtensions
{
static Func<TSource, object> CreateTypedPropertyGetter<TSource, TValue>(PropertyInfo propertyInfo)
{
var typedFunc = (Func<TSource, TValue>)Delegate.CreateDelegate(typeof(Func<TSource, TValue>), propertyInfo.GetGetMethod());
return i => (object)typedFunc(i);
}
public static Func<TSource, object> CreatePropertyGetter<TSource>(PropertyInfo propertyInfo)
{
var typedCreator = typeof(UtilExtensions).GetMethod(nameof(CreateTypedPropertyGetter), BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);
var concreteTypedCreator = typedCreator = typedCreator.MakeGenericMethod(typeof(TSource), propertyInfo.PropertyType);
return (Func<TSource, object>)concreteTypedCreator.Invoke(null, new object [] { propertyInfo });
}
public static ReadOnlyDictionary<string, Func<TSource, object>> PropertyGetters<TSource>() => PropertyExpressionsCache<TSource>.PropertyGetters;
static ReadOnlyDictionary<string, Func<TSource, object>> CreatePropertyGetters<TSource>() =>
typeof(TSource)
.GetSerializableProperties()
.ToDictionary(p => p.Name,
p => CreatePropertyGetter<TSource>(p))
.ToReadOnly();
static class PropertyExpressionsCache<TSource>
{
public static ReadOnlyDictionary<string, Func<TSource, object>> PropertyGetters { get; } = UtilExtensions.CreatePropertyGetters<TSource>();
}
public static ReadOnlyDictionary<TKey, TValue> ToReadOnly<TKey, TValue>(this IDictionary<TKey, TValue> dictionary) =>
new ReadOnlyDictionary<TKey, TValue>(dictionary ?? throw new ArgumentNullException());
}
And got roughly the same time, of 410 ms. Demo fiddle #4 here.
Notes:
The code in your question skips the first line of the CSV file, by calling reader.ReadLine();.
In my test harness this caused an incorrect number of records to be read, so I removed this line.
Rather than having a non-generic method that has a switch on the record type, I extracted a generic method that takes the record type and class map type as generic parameters. This makes delegate creation a little easier as it is no longer necessary to do runtime casting to the record type.
DataTable tbl = new DataTable();
tbl.Columns.Add("Column");
for (int i = 0; i < 61; i++)
tbl.Rows.Add(i.ToString());
DataTable[] splittedtables = tbl.AsEnumerable()
.Select((row, index) => new { row, index })
.GroupBy(x => x.index / 12) // integer division, the fractional part is truncated
.Select(g => g.Select(x => x.row).CopyToDataTable())
.ToArray();
DataTable dtarr1 = splittedtables[0];
This is my code, It is working fine, but I want the most efficient way.
From your Comments you want to convert List to DataTable:
Here is the Common method which i am using
ConvertToDataTable
public static DataTable CopyToDataTable<T>(this IEnumerable<T> source)
{
return new ObjectShredder<T>().Shred(source, null, null);
}
public class ObjectShredder<T>
{
private FieldInfo[] _fi;
private PropertyInfo[] _pi;
private Dictionary<string, int> _ordinalMap;
private Type _type;
public ObjectShredder()
{
_type = typeof(T);
_fi = _type.GetFields();
_pi = _type.GetProperties();
_ordinalMap = new Dictionary<string, int>();
}
public DataTable Shred(IEnumerable<T> source, DataTable table, LoadOption? options)
{
if (typeof(T).IsPrimitive)
{
return ShredPrimitive(source, table, options);
}
if (table == null)
{
table = new DataTable(typeof(T).Name);
}
// now see if need to extend datatable base on the type T + build ordinal map
table = ExtendTable(table, typeof(T));
table.BeginLoadData();
using (IEnumerator<T> e = source.GetEnumerator())
{
while (e.MoveNext())
{
if (options != null)
{
table.LoadDataRow(ShredObject(table, e.Current), (LoadOption)options);
}
else
{
table.LoadDataRow(ShredObject(table, e.Current), true);
}
}
}
table.EndLoadData();
return table;
}
public DataTable ShredPrimitive(IEnumerable<T> source, DataTable table, LoadOption? options)
{
if (table == null)
{
table = new DataTable(typeof(T).Name);
}
if (!table.Columns.Contains("Value"))
{
table.Columns.Add("Value", typeof(T));
}
table.BeginLoadData();
using (IEnumerator<T> e = source.GetEnumerator())
{
Object[] values = new object[table.Columns.Count];
while (e.MoveNext())
{
values[table.Columns["Value"].Ordinal] = e.Current;
if (options != null)
{
table.LoadDataRow(values, (LoadOption)options);
}
else
{
table.LoadDataRow(values, true);
}
}
}
table.EndLoadData();
return table;
}
public DataTable ExtendTable(DataTable table, Type type)
{
// value is type derived from T, may need to extend table.
foreach (FieldInfo f in type.GetFields())
{
if (!_ordinalMap.ContainsKey(f.Name))
{
DataColumn dc = table.Columns.Contains(f.Name) ? table.Columns[f.Name]
: table.Columns.Add(f.Name);
_ordinalMap.Add(f.Name, dc.Ordinal);
}
}
foreach (PropertyInfo p in type.GetProperties())
{
if (!_ordinalMap.ContainsKey(p.Name))
{
DataColumn dc = table.Columns.Contains(p.Name) ? table.Columns[p.Name]
: table.Columns.Add(p.Name);
_ordinalMap.Add(p.Name, dc.Ordinal);
}
}
return table;
}
public object[] ShredObject(DataTable table, T instance)
{
FieldInfo[] fi = _fi;
PropertyInfo[] pi = _pi;
if (instance.GetType() != typeof(T))
{
ExtendTable(table, instance.GetType());
fi = instance.GetType().GetFields();
pi = instance.GetType().GetProperties();
}
Object[] values = new object[table.Columns.Count];
foreach (FieldInfo f in fi)
{
values[_ordinalMap[f.Name]] = f.GetValue(instance);
}
foreach (PropertyInfo p in pi)
{
values[_ordinalMap[p.Name]] = p.GetValue(instance, null);
}
return values;
}
}
public static DataTable create_DataTable_From_Generic_Class(Type t)
{
DataTable d = new DataTable();
FieldInfo[] fI = t.GetFields();
for(int i = 0; i < fI.Length; i++)
{
DataColumn dC = new DataColumn(fI[i].Name, fI[i].FieldType);
d.Columns.Add(dC);
}
return d;
}
public static object[] Create_Datatable_Row_From_Generic_Class(Type t, object instance,DataTable dt)
{
FieldInfo[] f = t.GetFields();
object[] ret = new object[f.Length];
for (int i = 0; i < dt.Columns.Count; i++)
{
ret[i] = t.GetField(dt.Columns[i].ColumnName).GetValue(instance);
}
return ret;
}
This is for fields. Repeat for properties and whatever other types you want converted.
Currently, I'm using:
DataTable dt = CreateDataTableInSomeWay();
List<DataRow> list = new List<DataRow>();
foreach (DataRow dr in dt.Rows)
{
list.Add(dr);
}
Is there a better/magic way?
If you're using .NET 3.5, you can use DataTableExtensions.AsEnumerable (an extension method) and then if you really need a List<DataRow> instead of just IEnumerable<DataRow> you can call Enumerable.ToList:
IEnumerable<DataRow> sequence = dt.AsEnumerable();
or
using System.Linq;
...
List<DataRow> list = dt.AsEnumerable().ToList();
List<Employee> emp = new List<Employee>();
//Maintaining DataTable on ViewState
//For Demo only
DataTable dt = ViewState["CurrentEmp"] as DataTable;
//read data from DataTable
//using lamdaexpression
emp = (from DataRow row in dt.Rows
select new Employee
{
_FirstName = row["FirstName"].ToString(),
_LastName = row["Last_Name"].ToString()
}).ToList();
With C# 3.0 and System.Data.DataSetExtensions.dll,
List<DataRow> rows = table.Rows.Cast<DataRow>().ToList();
You could use
List<DataRow> list = new List<DataRow>(dt.Select());
dt.Select() will return all rows in your table, as an array of datarows, and the List constructor accepts that array of objects as an argument to initially fill your list with.
If you just want a list of values from the "ID" int field returned, you could use...
List<int> ids = (from row in dt.AsEnumerable() select Convert.ToInt32(row["ID"])).ToList();
You can create a extension function as :
public static List<T> ToListof<T>(this DataTable dt)
{
const BindingFlags flags = BindingFlags.Public | BindingFlags.Instance;
var columnNames = dt.Columns.Cast<DataColumn>()
.Select(c => c.ColumnName)
.ToList();
var objectProperties = typeof(T).GetProperties(flags);
var targetList = dt.AsEnumerable().Select(dataRow =>
{
var instanceOfT = Activator.CreateInstance<T>();
foreach (var properties in objectProperties.Where(properties => columnNames.Contains(properties.Name) && dataRow[properties.Name] != DBNull.Value))
{
properties.SetValue(instanceOfT, dataRow[properties.Name], null);
}
return instanceOfT;
}).ToList();
return targetList;
}
var output = yourDataInstance.ToListof<targetModelType>();
I have added some modification to the code from this answer (https://stackoverflow.com/a/24588210/4489664) because for nullable Types it will return exception
public static List<T> DataTableToList<T>(this DataTable table) where T: new()
{
List<T> list = new List<T>();
var typeProperties = typeof(T).GetProperties().Select(propertyInfo => new
{
PropertyInfo = propertyInfo,
Type = Nullable.GetUnderlyingType(propertyInfo.PropertyType) ?? propertyInfo.PropertyType
}).ToList();
foreach (var row in table.Rows.Cast<DataRow>())
{
T obj = new T();
foreach (var typeProperty in typeProperties)
{
object value = row[typeProperty.PropertyInfo.Name];
object safeValue = value == null || DBNull.Value.Equals(value)
? null
: Convert.ChangeType(value, typeProperty.Type);
typeProperty.PropertyInfo.SetValue(obj, safeValue, null);
}
list.Add(obj);
}
return list;
}
using System.Data;
var myEnumerable = myDataTable.AsEnumerable();
List<MyClass> myClassList =
(from item in myEnumerable
select new MyClass{
MyClassProperty1 = item.Field<string>("DataTableColumnName1"),
MyClassProperty2 = item.Field<string>("DataTableColumnName2")
}).ToList();
Again, using 3.5 you may do it like:
dt.Select().ToList()
BRGDS
// this is better suited for expensive object creation/initialization
IEnumerable<Employee> ParseEmployeeTable(DataTable dtEmployees)
{
var employees = new ConcurrentBag<Employee>();
Parallel.ForEach(dtEmployees.AsEnumerable(), (dr) =>
{
employees.Add(new Employee()
{
_FirstName = dr["FirstName"].ToString(),
_LastName = dr["Last_Name"].ToString()
});
});
return employees;
}
A more 'magic' way, and doesn't need .NET 3.5.
If, for example, DBDatatable was returning a single column of Guids (uniqueidentifier in SQL) then you could use:
Dim gList As New List(Of Guid)
gList.AddRange(DirectCast(DBDataTable.Select(), IEnumerable(Of Guid)))
DataTable dt; // datatable should contains datacolumns with Id,Name
List<Employee> employeeList=new List<Employee>(); // Employee should contain EmployeeId, EmployeeName as properties
foreach (DataRow dr in dt.Rows)
{
employeeList.Add(new Employee{EmployeeId=dr.Id,EmplooyeeName=dr.Name});
}
The Easiest way of Converting the DataTable into the Generic list of class
using Newtonsoft.Json;
var json = JsonConvert.SerializeObject(dataTable);
var model = JsonConvert.DeserializeObject<List<ClassName>>(json);
DataTable.Select() doesnt give the Rows in the order they were present in the datatable.
If order is important I feel iterating over the datarow collection and forming a List is the right way to go or you could also use overload of DataTable.Select(string filterexpression, string sort).
But this overload may not handle all the ordering (like order by case ...) that SQL provides.
/* This is a generic method that will convert any type of DataTable to a List
*
*
* Example : List< Student > studentDetails = new List< Student >();
* studentDetails = ConvertDataTable< Student >(dt);
*
* Warning : In this case the DataTable column's name and class property name
* should be the same otherwise this function will not work properly
*/
The following are the two functions in which if we pass a
DataTable
and a user defined class.
It will then return the List of that class with the DataTable data.
public static List<T> ConvertDataTable<T>(DataTable dt)
{
List<T> data = new List<T>();
foreach (DataRow row in dt.Rows)
{
T item = GetItem<T>(row);
data.Add(item);
}
return data;
}
private static T GetItem<T>(DataRow dr)
{
Type temp = typeof(T);
T obj = Activator.CreateInstance<T>();
foreach (DataColumn column in dr.Table.Columns)
{
foreach (PropertyInfo pro in temp.GetProperties())
{
//in case you have a enum/GUID datatype in your model
//We will check field's dataType, and convert the value in it.
if (pro.Name == column.ColumnName){
try
{
var convertedValue = GetValueByDataType(pro.PropertyType, dr[column.ColumnName]);
pro.SetValue(obj, convertedValue, null);
}
catch (Exception e)
{
//ex handle code
throw;
}
//pro.SetValue(obj, dr[column.ColumnName], null);
}
else
continue;
}
}
return obj;
}
This method will check the datatype of field, and convert dataTable value in to that datatype.
private static object GetValueByDataType(Type propertyType, object o)
{
if (o.ToString() == "null")
{
return null;
}
if (propertyType == (typeof(Guid)) || propertyType == typeof(Guid?))
{
return Guid.Parse(o.ToString());
}
else if (propertyType == typeof(int) || propertyType.IsEnum)
{
return Convert.ToInt32(o);
}
else if (propertyType == typeof(decimal) )
{
return Convert.ToDecimal(o);
}
else if (propertyType == typeof(long))
{
return Convert.ToInt64(o);
}
else if (propertyType == typeof(bool) || propertyType == typeof(bool?))
{
return Convert.ToBoolean(o);
}
else if (propertyType == typeof(DateTime) || propertyType == typeof(DateTime?))
{
return Convert.ToDateTime(o);
}
return o.ToString();
}
To call the preceding method, use the following syntax:
List< Student > studentDetails = new List< Student >();
studentDetails = ConvertDataTable< Student >(dt);
Change the Student class name and dt value based on your requirements. In this case the DataTable column's name and class property name should be the same otherwise this function will not work properly.
lPerson = dt.AsEnumerable().Select(s => new Person()
{
Name = s.Field<string>("Name"),
SurName = s.Field<string>("SurName"),
Age = s.Field<int>("Age"),
InsertDate = s.Field<DateTime>("InsertDate")
}).ToList();
Link to working DotNetFiddle Example
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Data.DataSetExtensions;
public static void Main()
{
DataTable dt = new DataTable();
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("SurName", typeof(string));
dt.Columns.Add("Age", typeof(int));
dt.Columns.Add("InsertDate", typeof(DateTime));
var row1= dt.NewRow();
row1["Name"] = "Adam";
row1["SurName"] = "Adam";
row1["Age"] = 20;
row1["InsertDate"] = new DateTime(2020, 1, 1);
dt.Rows.Add(row1);
var row2 = dt.NewRow();
row2["Name"] = "John";
row2["SurName"] = "Smith";
row2["Age"] = 25;
row2["InsertDate"] = new DateTime(2020, 3, 12);
dt.Rows.Add(row2);
var row3 = dt.NewRow();
row3["Name"] = "Jack";
row3["SurName"] = "Strong";
row3["Age"] = 32;
row3["InsertDate"] = new DateTime(2020, 5, 20);
dt.Rows.Add(row3);
List<Person> lPerson = new List<Person>();
lPerson = dt.AsEnumerable().Select(s => new Person()
{
Name = s.Field<string>("Name"),
SurName = s.Field<string>("SurName"),
Age = s.Field<int>("Age"),
InsertDate = s.Field<DateTime>("InsertDate")
}).ToList();
foreach(Person pers in lPerson)
{
Console.WriteLine("{0} {1} {2} {3}", pers.Name, pers.SurName, pers.Age, pers.InsertDate);
}
}
public class Person
{
public string Name { get; set; }
public string SurName { get; set; }
public int Age { get; set; }
public DateTime InsertDate { get; set; }
}
}
Use System.Data namespace then you will get .AsEnumerable().
This worked for me:
Need at least .Net Framework 3.5,
Code below displays DataRow turned to Generic.IEnumerable, comboBox1 has been used for a better illustration.
using System.Linq;
DataTable dt = new DataTable();
dt = myClass.myMethod();
List<object> list = (from row in dt.AsEnumerable() select (row["name"])).ToList();
comboBox1.DataSource = list;
Output
public class ModelUser
{
#region Model
private string _username;
private string _userpassword;
private string _useremail;
private int _userid;
/// <summary>
///
/// </summary>
public int userid
{
set { _userid = value; }
get { return _userid; }
}
/// <summary>
///
/// </summary>
public string username
{
set { _username = value; }
get { return _username; }
}
/// <summary>
///
/// </summary>
public string useremail
{
set { _useremail = value; }
get { return _useremail; }
}
/// <summary>
///
/// </summary>
public string userpassword
{
set { _userpassword = value; }
get { return _userpassword; }
}
#endregion Model
}
public List<ModelUser> DataTableToList(DataTable dt)
{
List<ModelUser> modelList = new List<ModelUser>();
int rowsCount = dt.Rows.Count;
if (rowsCount > 0)
{
ModelUser model;
for (int n = 0; n < rowsCount; n++)
{
model = new ModelUser();
model.userid = (int)dt.Rows[n]["userid"];
model.username = dt.Rows[n]["username"].ToString();
model.useremail = dt.Rows[n]["useremail"].ToString();
model.userpassword = dt.Rows[n]["userpassword"].ToString();
modelList.Add(model);
}
}
return modelList;
}
static DataTable GetTable()
{
// Here we create a DataTable with four columns.
DataTable table = new DataTable();
table.Columns.Add("userid", typeof(int));
table.Columns.Add("username", typeof(string));
table.Columns.Add("useremail", typeof(string));
table.Columns.Add("userpassword", typeof(string));
// Here we add five DataRows.
table.Rows.Add(25, "Jame", "Jame#hotmail.com", DateTime.Now.ToString());
table.Rows.Add(50, "luci", "luci#hotmail.com", DateTime.Now.ToString());
table.Rows.Add(10, "Andrey", "Andrey#hotmail.com", DateTime.Now.ToString());
table.Rows.Add(21, "Michael", "Michael#hotmail.com", DateTime.Now.ToString());
table.Rows.Add(100, "Steven", "Steven#hotmail.com", DateTime.Now.ToString());
return table;
}
protected void Page_Load(object sender, EventArgs e)
{
List<ModelUser> userList = new List<ModelUser>();
DataTable dt = GetTable();
userList = DataTableToList(dt);
gv.DataSource = userList;
gv.DataBind();
}[enter image description here][1]
</asp:GridView>
</div>
We can use a Generic Method for converting DataTable to List instead of manually converting a DataTable to List.
Note: DataTable's ColumnName and Type's PropertyName should be same.
Call the below Method:
long result = Utilities.ConvertTo<Student>(dt ,out listStudent);
// Generic Method
public class Utilities
{
public static long ConvertTo<T>(DataTable table, out List<T> entity)
{
long returnCode = -1;
entity = null;
if (table == null)
{
return -1;
}
try
{
entity = ConvertTo<T>(table.Rows);
returnCode = 0;
}
catch (Exception ex)
{
returnCode = 1000;
}
return returnCode;
}
static List<T> ConvertTo<T>(DataRowCollection rows)
{
List<T> list = null;
if (rows != null)
{
list = new List<T>();
foreach (DataRow row in rows)
{
T item = CreateItem<T>(row);
list.Add(item);
}
}
return list;
}
static T CreateItem<T>(DataRow row)
{
string str = string.Empty;
string strObj = string.Empty;
T obj = default(T);
if (row != null)
{
obj = Activator.CreateInstance<T>();
strObj = obj.ToString();
NameValueCollection objDictionary = new NameValueCollection();
foreach (DataColumn column in row.Table.Columns)
{
PropertyInfo prop = obj.GetType().GetProperty(column.ColumnName);
if (prop != null)
{
str = column.ColumnName;
try
{
objDictionary.Add(str, row[str].ToString());
object value = row[column.ColumnName];
Type vType = obj.GetType();
if (value == DBNull.Value)
{
if (vType == typeof(int) || vType == typeof(Int16)
|| vType == typeof(Int32)
|| vType == typeof(Int64)
|| vType == typeof(decimal)
|| vType == typeof(float)
|| vType == typeof(double))
{
value = 0;
}
else if (vType == typeof(bool))
{
value = false;
}
else if (vType == typeof(DateTime))
{
value = DateTime.MaxValue;
}
else
{
value = null;
}
prop.SetValue(obj, value, null);
}
else
{
prop.SetValue(obj, value, null);
}
}
catch(Exception ex)
{
}
}
}
PropertyInfo ActionProp = obj.GetType().GetProperty("ActionTemplateValue");
if (ActionProp != null)
{
object ActionValue = objDictionary;
ActionProp.SetValue(obj, ActionValue, null);
}
}
return obj;
}
}
You can use a generic method like that for datatable to generic list
public static List<T> DataTableToList<T>(this DataTable table) where T : class, new()
{
try
{
List<T> list = new List<T>();
foreach (var row in table.AsEnumerable())
{
T obj = new T();
foreach (var prop in obj.GetType().GetProperties())
{
try
{
PropertyInfo propertyInfo = obj.GetType().GetProperty(prop.Name);
if (propertyInfo.PropertyType.IsEnum)
{
propertyInfo.SetValue(obj, Enum.Parse(propertyInfo.PropertyType, row[prop.Name].ToString()));
}
else
{
propertyInfo.SetValue(obj, Convert.ChangeType(row[prop.Name], propertyInfo.PropertyType), null);
}
}
catch
{
continue;
}
}
list.Add(obj);
}
return list;
}
catch
{
return null;
}
}
Converting DataTable to Generic Dictionary
public static Dictionary<object,IList<dynamic>> DataTable2Dictionary(DataTable dt)
{
Dictionary<object, IList<dynamic>> dict = new Dictionary<dynamic, IList<dynamic>>();
foreach(DataColumn column in dt.Columns)
{
IList<dynamic> ts = dt.AsEnumerable()
.Select(r => r.Field<dynamic>(column.ToString()))
.ToList();
dict.Add(column, ts);
}
return dict;
}
Use Extension :
public static class Extensions
{
#region Convert Datatable To List
public static IList<T> ToList<T>(this DataTable table) where T : new()
{
IList<PropertyInfo> properties = typeof(T).GetProperties().ToList();
IList<T> result = new List<T>();
foreach (var row in table.Rows)
{
var item = CreateItemFromRow<T>((DataRow)row, properties);
result.Add(item);
}
return result;
}
private static T CreateItemFromRow<T>(DataRow row, IList<PropertyInfo> properties) where T : new()
{
T item = new T();
foreach (var property in properties)
{
property.SetValue(item, row[property.Name], null);
}
return item;
}
#endregion
}
To assign the DataTable rows to the generic List of class
List<Candidate> temp = new List<Candidate>();//List that holds the Candidate Class,
//Note:The Candidate class contains RollNo,Name and Department
//tb is DataTable
temp = (from DataRow dr in tb.Rows
select new Candidate()
{
RollNO = Convert.ToInt32(dr["RollNO"]),
Name = dr["Name"].ToString(),
Department = dr["Department"].ToString(),
}).ToList();
you can use following two Generic functions
private static List<T> ConvertDataTable<T>(DataTable dt)
{
List<T> data = new List<T>();
foreach (DataRow row in dt.Rows)
{
T item = GetItem<T>(row);
data.Add(item);
}
return data;
}
private static T GetItem<T>(DataRow dr)
{
Type temp = typeof(T);
T obj = Activator.CreateInstance<T>();
foreach (DataColumn column in dr.Table.Columns)
{
foreach (PropertyInfo pro in temp.GetProperties())
{
if (pro.Name == column.ColumnName)
pro.SetValue(obj, dr[column.ColumnName].ToString(), null);
else
continue;
}
}
return obj;
}
and use it as following
List<StudentScanExamsDTO> studentDetails = ConvertDataTable<StudentScanExamsDTO>(dt);
If anyone want's to create custom function to convert datatable to list
class Program
{
static void Main(string[] args)
{
DataTable table = GetDataTable();
var sw = new Stopwatch();
sw.Start();
LinqMethod(table);
sw.Stop();
Console.WriteLine("Elapsed time for Linq Method={0}", sw.ElapsedMilliseconds);
sw.Reset();
sw.Start();
ForEachMethod(table);
sw.Stop();
Console.WriteLine("Elapsed time for Foreach method={0}", sw.ElapsedMilliseconds);
Console.ReadKey();
}
private static DataTable GetDataTable()
{
var table = new DataTable();
table.Columns.Add("ID", typeof(double));
table.Columns.Add("CategoryName", typeof(string));
table.Columns.Add("Active", typeof(double));
var rand = new Random();
for (int i = 0; i < 100000; i++)
{
table.Rows.Add(i, "name" + i, rand.Next(0, 2));
}
return table;
}
private static void LinqMethod(DataTable table)
{
var list = table.AsEnumerable()
.Skip(1)
.Select(dr =>
new Category
{
Id = Convert.ToInt32(dr.Field<double>("ID")),
CategoryName = dr.Field<string>("CategoryName"),
IsActive =
dr.Field<double>("Active") == 1 ? true : false
}).ToList();
}
private static void ForEachMethod(DataTable table)
{
var categoryList = new List<Category>(table.Rows.Count);
foreach (DataRow row in table.Rows)
{
var values = row.ItemArray;
var category = new Category()
{
Id = Convert.ToInt32(values[0]),
CategoryName = Convert.ToString(values[1]),
IsActive = (double)values[2] == 1 ? true : false
};
categoryList.Add(category);
}
}
private class Category
{
public int Id { get; set; }
public string CategoryName { get; set; }
public bool IsActive { get; set; }
}
}
If we execute above code, Foreach method finishes in 56ms while linq one takes 101ms ( for 1000 records).
So Foreach method is better to use.
Source:Ways to Convert Datatable to List in C# (with performance test example)
try this using Newtonsoft Json:
var json = JsonConvert.SerializeObject(dataTable);
var YourConvertedDataType = JsonConvert.DeserializeObject<YourDataType>(json);
To get List of values instead of ItemArray, do this:
List<string> s = dt.AsEnumerable().Select(x => x[0].ToString()).ToList();
The above assumes that you want list of string values from column 0.