I am looking to export an excel sheet using NPOI library. Is there any way to insert the datatable into the sheet without losing the data format?
Previously, I used to use the Gembox Spreadsheet Library. This worked well for me. The code for that looke like:
public void ExportTest(DataSet ds)
{
SpreadsheetInfo.SetLicense("FREE-LIMITED-KEY");
ExcelFile ef = new ExcelFile();
var filename = DateTime.Now.ToString("yyyyMMdd") + "BSI_MEMBERAmendment" + ".xls";
foreach (DataTable dt in ds.Tables)
{
ExcelWorksheet ws = ef.Worksheets.Add(dt.TableName);
ws.InsertDataTable(dt,
new InsertDataTableOptions(0, 0)
{
ColumnHeaders = true,
StartRow = 0,
});
}
ef.Save(this.Response, filename);
I had to stop using the Gembox library because I need to make excel files with more than 5 sheets. Gembox, unfortunately does not allow that on a free platform. As such, I've switched to NPOI.
Now that I'm using the NPOI library, I've change my code to:
public void WriteExcelWithNPOI(String extension, DataSet dataSet)
{
HSSFWorkbook workbook = new HSSFWorkbook(); ;
if (extension == "xls")
{
workbook = new HSSFWorkbook();
}
else
{
throw new Exception("This format is not supported");
}
foreach (DataTable dt in dataSet.Tables)
{
var sheet1 = workbook.CreateSheet(dt.TableName);
// How can i insert the data's from dataTable in this sheet
}
using (var exportData = new MemoryStream())
{
Response.Clear();
workbook.Write(exportData);
if (extension == "xls")
{
Response.ContentType = "application/vnd.ms-excel";
Response.AddHeader("Content-Disposition", string.Format("attachment;filename={0}", "tpms_dict.xls"));
Response.BinaryWrite(exportData.GetBuffer());
}
Response.End();
}
}
The problem that I'm encountering with using the NPOI library is, all the cells in the exported excel file is formatted as text. I'd like to retain the format that's used in the data table.
Looking for help. Thanks in advance!!!
To insert data from a data table, you could perhaps use this code in place of the comment "// How can i insert the data's from dataTable in this sheet".
// 1. make a header row
IRow row1 = sheet1.CreateRow(0);
for (int j = 0; j < dt.Columns.Count; j++)
{
ICell cell = row1.CreateCell(j);
string columnName = dt.Columns[j].ToString();
cell.SetCellValue(columnName);
}
// 2. loop through data
for (int i = 0; i < dt.Rows.Count; i++)
{
IRow row = sheet1.CreateRow(i + 1);
for (int j = 0; j < dt.Columns.Count; j++)
{
ICell cell = row.CreateCell(j);
string columnName = dt.Columns[j].ToString();
cell.SetCellValue(dt.Rows[i][columnName].ToString());
}
}
// 3. Auto size columns
for (int i = 0; i < 5; i++)
{
for (int j = 0; j < row1.LastCellNum; j++)
{
sheet1.AutoSizeColumn(j);
}
}
For data types, you could use the function cell.SetCellType(CellType.[TYPE HERE]);. The type entered in the function SetCellType must match the data type in cell.SetCellValue() afterwards.
This will modify the data loop to look as follows:
// 2. loop through data
for (int i = 0; i < dt.Rows.Count; i++)
{
IRow row = sheet1.CreateRow(i + 1);
for (int j = 0; j < dt.Columns.Count; j++)
{
ICell cell = row.CreateCell(j);
string columnName = dt.Columns[j].ToString();
// Set the cell type
cell.SetCellType(GetCorrectCellType(dt.Rows[i][columnName].GetType()))
// Set the cell value
cell.SetCellValue(dt.Rows[i][columnName]);
}
}
// Function to return the correct cell type
public int GetCorrectCellType(Type dataType)
{
if(dataType == typeof(string))
return CellType.String;
else if(dataType == typeof(int) || dataType == typeof(double))
return CellType.Numeric;
else if(dataType == typeof(bool))
return CellType.Boolean;
else
return CellType.Unknown; // Not sure how to set Date Type --> Unknown
}
EDIT
I found how set set Date values in a human readable format in this answer.
I know I am a little late here but I think it may help others, yes, there is a way to convert datatable directly to export excel without losing data format.
I have developed an excel utility with the use of the NPOI package, which can
Simply takes your data table or the collection
And Returns you excel while maintaining all the data table/list data type intact in the excel.
Github Code repo.: https://github.com/ansaridawood/.NET-Generic-Excel-Export-Sample/tree/master/GenericExcelExport/ExcelExport
Looking for a code explanation, you can find it here:
https://www.codeproject.com/Articles/1241654/Export-to-Excel-using-NPOI-Csharp-and-WEB-API
It uses NPOI DLL and it has 2 cs files to include and then you are good to go
Below is the first file for reference AbstractDataExport.cs:
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
namespace GenericExcelExport.ExcelExport
{
public interface IAbstractDataExport
{
HttpResponseMessage Export(List exportData, string fileName, string sheetName);
}
public abstract class AbstractDataExport : IAbstractDataExport
{
protected string _sheetName;
protected string _fileName;
protected List _headers;
protected List _type;
protected IWorkbook _workbook;
protected ISheet _sheet;
private const string DefaultSheetName = "Sheet1";
public HttpResponseMessage Export
(List exportData, string fileName, string sheetName = DefaultSheetName)
{
_fileName = fileName;
_sheetName = sheetName;
_workbook = new XSSFWorkbook(); //Creating New Excel object
_sheet = _workbook.CreateSheet(_sheetName); //Creating New Excel Sheet object
var headerStyle = _workbook.CreateCellStyle(); //Formatting
var headerFont = _workbook.CreateFont();
headerFont.IsBold = true;
headerStyle.SetFont(headerFont);
WriteData(exportData); //your list object to NPOI excel conversion happens here
//Header
var header = _sheet.CreateRow(0);
for (var i = 0; i < _headers.Count; i++)
{
var cell = header.CreateCell(i);
cell.SetCellValue(_headers[i]);
cell.CellStyle = headerStyle;
}
for (var i = 0; i < _headers.Count; i++)
{
_sheet.AutoSizeColumn(i);
}
using (var memoryStream = new MemoryStream()) //creating memoryStream
{
_workbook.Write(memoryStream);
var response = new HttpResponseMessage(HttpStatusCode.OK)
{
Content = new ByteArrayContent(memoryStream.ToArray())
};
response.Content.Headers.ContentType = new MediaTypeHeaderValue
("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
response.Content.Headers.ContentDisposition =
new ContentDispositionHeaderValue("attachment")
{
FileName = $"{_fileName}_{DateTime.Now.ToString("yyyyMMddHHmmss")}.xlsx"
};
return response;
}
}
//Generic Definition to handle all types of List
public abstract void WriteData(List exportData);
}
}
and this the second and final file AbstractDataExportBridge.cs:
using NPOI.SS.UserModel;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Text.RegularExpressions;
namespace GenericExcelExport.ExcelExport
{
public class AbstractDataExportBridge : AbstractDataExport
{
public AbstractDataExportBridge()
{
_headers = new List<string>();
_type = new List<string>();
}
public override void WriteData<T>(List<T> exportData)
{
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(T));
DataTable table = new DataTable();
foreach (PropertyDescriptor prop in properties)
{
var type = Nullable.GetUnderlyingType(prop.PropertyType) ?? prop.PropertyType;
_type.Add(type.Name);
table.Columns.Add(prop.Name, Nullable.GetUnderlyingType(prop.PropertyType) ??
prop.PropertyType);
string name = Regex.Replace(prop.Name, "([A-Z])", " $1").Trim(); //space separated
//name by caps for header
_headers.Add(name);
}
foreach (T item in exportData)
{
DataRow row = table.NewRow();
foreach (PropertyDescriptor prop in properties)
row[prop.Name] = prop.GetValue(item) ?? DBNull.Value;
table.Rows.Add(row);
}
IRow sheetRow = null;
for (int i = 0; i < table.Rows.Count; i++)
{
sheetRow = _sheet.CreateRow(i + 1);
for (int j = 0; j < table.Columns.Count; j++)
{
ICell Row1 = sheetRow.CreateCell(j);
string type = _type[j].ToLower();
var currentCellValue = table.Rows[i][j];
if (currentCellValue != null &&
!string.IsNullOrEmpty(Convert.ToString(currentCellValue)))
{
if (type == "string")
{
Row1.SetCellValue(Convert.ToString(currentCellValue));
}
else if (type == "int32")
{
Row1.SetCellValue(Convert.ToInt32(currentCellValue));
}
else if (type == "double")
{
Row1.SetCellValue(Convert.ToDouble(currentCellValue));
}
}
else
{
Row1.SetCellValue(string.Empty);
}
}
}
}
}
}
For a detailed explanation, refer link provided in the beginning.
Related
The following code works fine from a .NET Core app running on a PC. The code loads an excel file and reads it using the NPOI library.
public void ReadExcel()
{
DataTable dtTable = new DataTable();
List<string> rowList = new List<string>();
ISheet sheet;
using (var stream = new FileStream("Test.xlsx", FileMode.Open))
{
stream.Position = 0;
XSSFWorkbook xssWorkbook = new XSSFWorkbook(stream);
sheet = xssWorkbook.GetSheetAt(0);
IRow headerRow = sheet.GetRow(0);
int cellCount = headerRow.LastCellNum;
for (int j = 0; j < cellCount; j++)
{
ICell cell = headerRow.GetCell(j);
if (cell == null || string.IsNullOrWhiteSpace(cell.ToString())) continue;
{
dtTable.Columns.Add(cell.ToString());
}
}
for (int i = (sheet.FirstRowNum + 1); i <= sheet.LastRowNum; i++)
{
IRow row = sheet.GetRow(i);
if (row == null) continue;
if (row.Cells.All(d => d.CellType == CellType.Blank)) continue;
for (int j = row.FirstCellNum; j < cellCount; j++)
{
if (row.GetCell(j) != null)
{
if (!string.IsNullOrEmpty(row.GetCell(j).ToString()) && !string.IsNullOrWhiteSpace(row.GetCell(j).ToString()))
{
rowList.Add(row.GetCell(j).ToString());
}
}
}
if (rowList.Count > 0)
dtTable.Rows.Add(rowList.ToArray());
rowList.Clear();
}
}
return JsonConvert.SerializeObject(dtTable);
}
I want to use this code in my Blazor app to be able to read an Excel file from a browser. I can use the InputFile component to get the file:
<InputFile OnChange="GetFile"/>
The question is how to get the the uploaded file as a stream that I can pass to the ReadExcel function? So it should be something like this:
public async Task GetFile(InputFileChangeEventArgs e) //get excel file
{
stream = e.File.OpenReadStream(); //need a stream here that ReadExcel() can use!
ReadExcel();
}
If I use the above stream in the ReadExcel function instead of the one it has, the code doesnt work. What is the correct way of forming this stream so that ReadExcel can use that instead of the one it has now?
Thanks,
Amjad.
I think the major problem is that a ReadStream is not Seekable (CanSeek == false).
You can copy it to MemoryStream but do keep an eye on the size limits.
public async Task GetFile(InputFileChangeEventArgs e) //get excel file
{
var stream1 = e.File.OpenReadStream(); //need a stream here that ReadExcel() can use!
var stream2 = new MemoryStream();
await stream1.CopyToAsync(stream2);
stream1.Close();
ReadExcel(stream2);
}
I have a requirement where-in I have to fill dataTable from a sheet of Microsoft excel.
The sheet may have lots of data so the requirement is that when a foreach loop is iterated over the data table which is supposed to hold the data from Microsoft excel sheet should fill the table on demand.
Meaning if there are 1000000 records in the sheet the data table should fetch data in batches of 100 depending on the current position of the foreach current item in the loop.
Any pointer or suggestion will be appreciated.
I would suggest you to use OpenXML to parse and read your excel data from file.
This will also allow you to read out specific sections/regions from your workbook.
You will find more information and also an example at this link:
Microsoft Docs - Parse and read a large spreadsheet document (Open XML SDK)
This will be more efficiently and easier to develop than use the official microsoft office excel interop.
**I am not near a PC with Visual stuido, so this code is untested, and may have syntax errors until I can test it later.
It will still give you the main idea of what needs to be done.
private void ExcelDataPages(int firstRecord, int numberOfRecords)
{
Excel.Application dataApp = new Excel.Application();
Excel.Workbook dataWorkbook = new Excel.Workbook();
int x = 0;
dataWorkbook.DisplayAlerts = false;
dataWorkbook.Visible = false;
dataWorkbook.AutomationSecurity = Microsoft.Office.Core.MsoAutomationSecurity.msoAutomationSecurityLow;
dataWorkbook = dataApp.Open(#"C:\Test\YourWorkbook.xlsx");
try
{
Excel.Worksheet dataSheet = dataWorkbook.Sheet("Name of Sheet");
while (x < numberOfRecords)
{
Range currentRange = dataSheet.Rows[firstRecord + x]; //For all columns in row
foreach (Range r in currentRange.Cells) //currentRange represents all the columns in the row
{
// do what you need to with the Data here.
}
x++;
}
}
catch (Exception ex)
{
//Enter in Error handling
}
dataWorkbook.Close(false); //Depending on how quick you will access the next batch of data, you may not want to close the Workbook, reducing load time each time. This may also mean you need to move the open of the workbook to a higher level in your class, or if this is the main process of the app, make it static, stopping the garbage collector from destroying the connection.
dataApp.Quit();
}
Give the following a try--it uses NuGet package DocumentFormat.OpenXml The code is from Using OpenXmlReader. However, I modified it to add data to a DataTable. Since you're reading data from the same Excel file multiple times, it's faster to open the Excel file once using an instance of SpreadSheetDocument and dispose of it when finished. Since the instance of SpreedSheetDocument needs to be disposed of before your application exits, IDisposable is used.
Where it says "ToDo", you'll need to replace the code that creates the DataTable columns with your own code to create the correct columns for your project.
I tested the code below with an Excel file containing approximately 15,000 rows. When reading 100 rows at a time, the first read took approximately 500 ms - 800 ms, whereas subsequent reads took approximately 100 ms - 400 ms.
Create a class (name: HelperOpenXml)
HelperOpenXml.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.Data;
using System.Diagnostics;
namespace ExcelReadSpecifiedRowsUsingOpenXml
{
public class HelperOpenXml : IDisposable
{
public string Filename { get; private set; } = string.Empty;
public int RowCount { get; private set; } = 0;
private SpreadsheetDocument spreadsheetDocument = null;
private DataTable dt = null;
public HelperOpenXml(string filename)
{
this.Filename = filename;
}
public void Dispose()
{
if (spreadsheetDocument != null)
{
try
{
spreadsheetDocument.Dispose();
dt.Clear();
}
catch(Exception ex)
{
throw ex;
}
}
}
public DataTable GetRowsSax(int startRow, int endRow, bool firstRowIsHeader = false)
{
int startIndex = startRow;
int endIndex = endRow;
if (firstRowIsHeader)
{
//if first row is header, increment by 1
startIndex = startRow + 1;
endIndex = endRow + 1;
}
if (spreadsheetDocument == null)
{
//create new instance
spreadsheetDocument = SpreadsheetDocument.Open(Filename, false);
//create new instance
dt = new DataTable();
//ToDo: replace 'dt.Columns.Add(...)' below with your code to create the DataTable columns
//add columns to DataTable
dt.Columns.Add("A");
dt.Columns.Add("B");
dt.Columns.Add("C");
dt.Columns.Add("D");
dt.Columns.Add("E");
dt.Columns.Add("F");
dt.Columns.Add("G");
dt.Columns.Add("H");
dt.Columns.Add("I");
dt.Columns.Add("J");
dt.Columns.Add("K");
}
else
{
//remove existing data from DataTable
dt.Rows.Clear();
}
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
int numWorkSheetParts = 0;
foreach (WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
using (OpenXmlReader reader = OpenXmlReader.Create(worksheetPart))
{
int rowIndex = 0;
//use the reader to read the XML
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
List<string> cValues = new List<string>();
int colIndex = 0;
do
{
//only get data from desired rows
if ((rowIndex > 0 && rowIndex >= startIndex && rowIndex <= endIndex) ||
(rowIndex == 0 && !firstRowIsHeader && rowIndex >= startIndex && rowIndex <= endIndex))
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellRef = c.CellReference; //ex: A1, B1, ..., A2, B2
string cellValue = string.Empty;
//string/text data is stored in SharedString
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//Debug.WriteLine("{0}: {1} ", c.CellReference, cellValue);
//add value to List which is used to add a row to the DataTable
cValues.Add(cellValue);
}
}
colIndex += 1; //increment
} while (reader.ReadNextSibling());
if (cValues.Count > 0)
{
//if List contains data, use it to add row to DataTable
dt.Rows.Add(cValues.ToArray());
}
rowIndex += 1; //increment
if (rowIndex > endIndex)
{
break; //exit loop
}
}
}
}
numWorkSheetParts += 1; //increment
}
DisplayDataTableData(dt); //display data in DataTable
return dt;
}
private void DisplayDataTableData(DataTable dt)
{
foreach (DataColumn dc in dt.Columns)
{
Debug.WriteLine("colName: " + dc.ColumnName);
}
foreach (DataRow r in dt.Rows)
{
Debug.WriteLine(r[0].ToString() + " " + r[1].ToString());
}
}
}
}
Usage:
private string excelFilename = #"C:\Temp\Test.xlsx";
private HelperOpenXml helperOpenXml = null;
...
private void GetData(int startIndex, int endIndex, bool firstRowIsHeader)
{
helperOpenXml.GetRowsSax(startIndex, endIndex, firstRowIsHeader);
}
Note: Make sure to call Dispose() (ex: helperOpenXml.Dispose();) before your application exits.
Update:
OpenXML stores dates as the number of days since 01 Jan 1900. For dates prior to 01 Jan 1900, they are stored in SharedString. For more info see Reading a date from xlsx using open xml sdk
Here's a code snippet:
Cell c = (Cell)reader.LoadCurrentElement();
...
string cellValue = string.Empty
...
cellValue = c.CellValue.InnerText;
double dateCellValue = 0;
Double.TryParse(cellValue, out dateCellValue);
DateTime dt = DateTime.FromOADate(dateCellValue);
cellValue = dt.ToString("yyyy/MM/dd");
Another simple alternative is this: Take a look at the NUGET package ExcelDataReader, with additional information on
https://github.com/ExcelDataReader/ExcelDataReader
Usage example:
[Fact]
void Test_ExcelDataReader()
{
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
var scriptPath = Path.GetDirectoryName(Util.CurrentQueryPath); // LinqPad script path
var filePath = $#"{scriptPath}\TestExcel.xlsx";
using (var stream = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
// Auto-detect format, supports:
// - Binary Excel files (2.0-2003 format; *.xls)
// - OpenXml Excel files (2007 format; *.xlsx, *.xlsb)
using (var reader = ExcelDataReader.ExcelReaderFactory.CreateReader(stream))
{
var result = reader.AsDataSet();
// The result of each spreadsheet is in result.Tables
var t0 = result.Tables[0];
Assert.True(t0.Rows[0][0].Dump("R0C0").ToString()=="Hello", "Expected 'Hello'");
Assert.True(t0.Rows[0][1].Dump("R0C1").ToString()=="World!", "Expected 'World!'");
} // using
} // using
} // fact
Before you start reading, you need to set and encoding provider as follows:
System.Text.Encoding.RegisterProvider(
System.Text.CodePagesEncodingProvider.Instance);
The cells are addressed the following way:
var t0 = result.Tables[0]; // table 0 is the first worksheet
var cell = t0.Rows[0][0]; // on table t0, read cell row 0 column 0
And you can easily loop through the rows and columns in a for loop as follows:
for (int r = 0; r < t0.Rows.Count; r++)
{
var row = t0.Rows[r];
var columns = row.ItemArray;
for (int c = 0; c < columns.Length; c++)
{
var cell = columns[c];
cell.Dump();
}
}
I use this code with EPPlus DLL, Don't forget to add reference. But should check to match with your requirement.
public DataTable ReadExcelDatatable(bool hasHeader = true)
{
using (var pck = new OfficeOpenXml.ExcelPackage())
{
using (var stream = File.OpenRead(this._fullPath))
{
pck.Load(stream);
}
var ws = pck.Workbook.Worksheets.First();
DataTable tbl = new DataTable();
int i = 1;
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
//table head
tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
tbl.Columns.Add(_tableHead[i]);
i++;
}
var startRow = hasHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
return tbl;
}
}
I'm going to give you a different answer. If the performance is bad loading a million rows into a DataTable resort to using a Driver to load the data: How to open a huge excel file efficiently
DataSet excelDataSet = new DataSet();
string filePath = #"c:\temp\BigBook.xlsx";
// For .XLSXs we use =Microsoft.ACE.OLEDB.12.0;, for .XLS we'd use Microsoft.Jet.OLEDB.4.0; with "';Extended Properties=\"Excel 8.0;HDR=YES;\"";
string connectionString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source='" + filePath + "';Extended Properties=\"Excel 12.0;HDR=YES;\"";
using (OleDbConnection conn = new OleDbConnection(connectionString))
{
conn.Open();
OleDbDataAdapter objDA = new System.Data.OleDb.OleDbDataAdapter
("select * from [Sheet1$]", conn);
objDA.Fill(excelDataSet);
//dataGridView1.DataSource = excelDataSet.Tables[0];
}
Next filter the DataSet's DataTable using a DataView. Using a DataView's RowFilter property you can specify subsets of rows based on their column values.
DataView prodView = new DataView(excelDataSet.Tables[0],
"UnitsInStock <= ReorderLevel",
"SupplierID, ProductName",
DataViewRowState.CurrentRows);
Ref: https://www.c-sharpcorner.com/article/dataview-in-C-Sharp/
Or you could use the DataTables' DefaultView RowFilter directly:
excelDataSet.Tables[0].DefaultView.RowFilter = "Amount >= 5000 and Amount <= 5999 and Name = 'StackOverflow'";
I have an .xlsx file stored on my desktop that my C# program reads from, then it loads each worksheet into datatables that my program uses. When a modification is made to a dataTable, I save to the .xlsx file by first loading the datatable back into the worksheet, and then saving the excelPackage with the modified dataTable information.
The problem is, I sometimes need to overwrite a cell with a blank string, and after saving, the previous value is still there.
It will let me update a value to a space (" "), but I want to save the cell as empty, "", or null.
This is loading my dataTables from the .xlsx
using (var pck = new OfficeOpenXml.ExcelPackage())
{
try
{
using (var stream = File.OpenRead(filePath + "/dataSet.xlsx"))
{
pck.Load(stream);
}
for (int i = 1; i < pck.Workbook.Worksheets.Count; i++)
{
var ws = pck.Workbook.Worksheets[i];
DataTable tbl = new DataTable();
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(true ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = true ? 2 : 1;
for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
if (ws.Name == "customerDataTable") { _customerDataTable = tbl; }
else if (ws.Name == "vehicleDataTable") { _vehicleDataTable = tbl; }
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
This is saving the dataTable information into worksheet, then to .xlsx
string tableName = "";
if (table == _customerDataTable) { tableName = "customerDataTable"; }
else if (table == _vehicleDataTable) { tableName = "vehicleDataTable"; }
FileInfo file = new FileInfo(filePath + "/dataSet.xlsx");
using (ExcelPackage excelPackage = new ExcelPackage(file))
{
ExcelWorkbook excelWorkBook = excelPackage.Workbook;
ExcelWorksheet excelWorksheet = excelWorkBook.Worksheets[tableName];
excelWorksheet.Cells.LoadFromDataTable(table, true);
excelPackage.Save();
}
I got it to work following VDWWDs post, using the following code:
for (int i = 0; i < table.Rows.Count; i++)
{
for (int j = 0; j < table.Columns.Count; j++)
{
excelWorksheet.Cells[i+2,j+1].Value = table.Rows[i][j];
}
}
//excelWorksheet.Cells.LoadFromDataTable(table, true);
I replaced the LoadFromTable() line with a nested For loop. Although it functionally works, I'm going to be using this method a lot throughout my program, and this solution seems a bit bulky compared to EPPlus LoadFromTable method. I'm thinking there has to be a better way...
using .net 4.5
I'm trying to read .xls/.xlsx file using EPPlus (v4.0.4), but get an error. SO has questions on the same error but none relate or solve my problem.
protected void Page_Load(object sender, EventArgs e)
{
GetDataTableFromExcel(#"D:\test.xlsx");
}
private DataTable GetDataTableFromExcel(string path, bool hasHeader = true)
{
using (var pck = new OfficeOpenXml.ExcelPackage())
{
using (var stream = File.OpenRead(path))
{
pck.Load(stream);
}
var ws = pck.Workbook.Worksheets[1];
DataTable tbl = new DataTable();
foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
{
tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = hasHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
{
var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
return tbl;
}
}
The error occurs at pck.Load(stream);
A disk error occurred during a write operation. (Exception from
HRESULT: 0x8003001D (STG_E_WRITEFAULT)
A simple example how you can use EPPlus to read excel file:
Reff : http://sforsuresh.in/reading-excel-file-using-epplus-package/
public void readXLS(string FilePath)
{
FileInfo existingFile = new FileInfo(FilePath);
using (ExcelPackage package = new ExcelPackage(existingFile))
{
//get the first worksheet in the workbook
ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
int colCount = worksheet.Dimension.End.Column; //get Column Count
int rowCount = worksheet.Dimension.End.Row; //get row count
for (int row = 1; row <= rowCount; row++)
{
for (int col = 1; col <= colCount; col++)
{
Console.WriteLine(" Row:" + row + " column:" + col + " Value:" + worksheet.Cells[row, col].Value.ToString().Trim());
}
}
}
}
We can first convert the xls file to xlsx format using Microsoft.Office.Introp.excel, AFTER the conversion use new formatted file to read with EPPPLUS.
public static DataTable ReadExcelFileToDataTable(string filePath, bool isFirstRowHeader = true)
{
#region Convert xls file to xlsx file
// Convert xls file to xlsx file --to use below code Microsoft.Excel must installed on the system on which cod eis running
var app = new Microsoft.Office.Interop.Excel.Application();
var web = app.Workbooks.Open("");
web.SaveAs(filePath + ".x", FileFormat: Microsoft.Office.Interop.Excel.XlFileFormat.xlOpenXMLWorkbook);
web.Close();
app.Quit();
#endregion
var newFileName = filePath + ".x";
DataTable tbl = new DataTable(); ;
Excel.ExcelPackage xlsPackage = new Excel.ExcelPackage(new FileInfo(newFileName)); //using Excel = OfficeOpenXml; <--EPPLUS
Excel.ExcelWorkbook workBook = xlsPackage.Workbook;
try
{
Excel.ExcelWorksheet wsworkSheet = workBook.Worksheets[0];
foreach (var firstRowCell in wsworkSheet.Cells[1, 1, 1, wsworkSheet.Dimension.End.Column])
{
var colName = "";
colName = firstRowCell.Text;
tbl.Columns.Add(isFirstRowHeader ? colName : string.Format("Column {0}", firstRowCell.Start.Column));
}
var startRow = isFirstRowHeader ? 2 : 1;
for (int rowNum = startRow; rowNum <= wsworkSheet.Dimension.End.Row; rowNum++)
{
var wsRow = wsworkSheet.Cells[rowNum, 1, rowNum, wsworkSheet.Dimension.End.Column];
DataRow row = tbl.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return tbl;
}
With this code you won't get an error because a cell is null. it will also cast the data type according to the properties in your class!
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.OleDb;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using OfficeOpenXml;
public static class ReadExcel
{
public static List<T> ReadExcelToList<T>(this ExcelWorksheet worksheet) where T : new()
{
List<T> collection = new List<T>();
try
{
DataTable dt = new DataTable();
foreach (var firstRowCell in new T().GetType().GetProperties().ToList())
{
//Add table colums with properties of T
dt.Columns.Add(firstRowCell.Name);
}
for (int rowNum = 2; rowNum <= worksheet.Dimension.End.Row; rowNum++)
{
var wsRow = worksheet.Cells[rowNum, 1, rowNum, worksheet.Dimension.End.Column];
DataRow row = dt.Rows.Add();
foreach (var cell in wsRow)
{
row[cell.Start.Column - 1] = cell.Text;
}
}
//Get the colums of table
var columnNames = dt.Columns.Cast<DataColumn>().Select(c => c.ColumnName).ToList();
//Get the properties of T
List<PropertyInfo> properties = new T().GetType().GetProperties().ToList();
collection = dt.AsEnumerable().Select(row =>
{
T item = Activator.CreateInstance<T>();
foreach (var pro in properties)
{
if (columnNames.Contains(pro.Name) || columnNames.Contains(pro.Name.ToUpper()))
{
PropertyInfo pI = item.GetType().GetProperty(pro.Name);
pro.SetValue(item, (row[pro.Name] == DBNull.Value) ? null : Convert.ChangeType(row[pro.Name], (Nullable.GetUnderlyingType(pI.PropertyType) == null) ? pI.PropertyType : Type.GetType(pI.PropertyType.GenericTypeArguments[0].FullName)));
}
}
return item;
}).ToList();
}
catch (Exception ex)
{
//Save error log
}
return collection;
}
}
How to call this function? please view below code.
public List<Users> GetStudentsFromExcel(HttpPostedFileBase file)
{
List<Users> list = new List<Users>();
if (file != null)
{
try
{
using (ExcelPackage package = new ExcelPackage(file.InputStream))
{
ExcelWorkbook workbook = package.Workbook;
if (workbook != null)
{
ExcelWorksheet worksheet = workbook.Worksheets.FirstOrDefault();
if (worksheet != null)
{
list = worksheet.ReadExcelToList<Users>();
//Your code
}
}
}
}
catch (Exception ex)
{
//Save error log
}
}
return list;
}
public class Users
{
public string Code { get; set; }
public string Name { get; set; }
public string Email { get; set; }
public string Phone { get; set; }
public string Address { get; set; }
public DateTime CreatedAt { get; set; }
}
Hope to help someone!
I know that there are different ways to read an Excel file:
Iterop
Oledb
Open Xml SDK
Compatibility is not a question because the program will be executed in a controlled environment.
My Requirement :
Read a file to a DataTable / CUstom Entities (I don't know how to make dynamic properties/fields to an object[column names will be variating in an Excel file])
Use DataTable/Custom Entities to perform some operations using its data.
Update DataTable with the results of the operations
Write it back to excel file.
Which would be simpler.
Also if possible advice me on custom Entities (adding properties/fields to an object dynamically)
Take a look at Linq-to-Excel. It's pretty neat.
var book = new LinqToExcel.ExcelQueryFactory(#"File.xlsx");
var query =
from row in book.Worksheet("Stock Entry")
let item = new
{
Code = row["Code"].Cast<string>(),
Supplier = row["Supplier"].Cast<string>(),
Ref = row["Ref"].Cast<string>(),
}
where item.Supplier == "Walmart"
select item;
It also allows for strongly-typed row access too.
I realize this question was asked nearly 7 years ago but it's still a top Google search result for certain keywords regarding importing excel data with C#, so I wanted to provide an alternative based on some recent tech developments.
Importing Excel data has become such a common task to my everyday duties, that I've streamlined the process and documented the method on my blog: best way to read excel file in c#.
I use NPOI because it can read/write Excel files without Microsoft Office installed and it doesn't use COM+ or any interops. That means it can work in the cloud!
But the real magic comes from pairing up with NPOI Mapper from Donny Tian because it allows me to map the Excel columns to properties in my C# classes without writing any code. It's beautiful.
Here is the basic idea:
I create a .net class that matches/maps the Excel columns I'm interested in:
class CustomExcelFormat
{
[Column("District")]
public int District { get; set; }
[Column("DM")]
public string FullName { get; set; }
[Column("Email Address")]
public string EmailAddress { get; set; }
[Column("Username")]
public string Username { get; set; }
public string FirstName
{
get
{
return Username.Split('.')[0];
}
}
public string LastName
{
get
{
return Username.Split('.')[1];
}
}
}
Notice, it allows me to map based on column name if I want to!
Then when I process the excel file all I need to do is something like this:
public void Execute(string localPath, int sheetIndex)
{
IWorkbook workbook;
using (FileStream file = new FileStream(localPath, FileMode.Open, FileAccess.Read))
{
workbook = WorkbookFactory.Create(file);
}
var importer = new Mapper(workbook);
var items = importer.Take<CustomExcelFormat>(sheetIndex);
foreach(var item in items)
{
var row = item.Value;
if (string.IsNullOrEmpty(row.EmailAddress))
continue;
UpdateUser(row);
}
DataContext.SaveChanges();
}
Now, admittedly, my code does not modify the Excel file itself. I am instead saving the data to a database using Entity Framework (that's why you see "UpdateUser" and "SaveChanges" in my example). But there is already a good discussion on SO about how to save/modify a file using NPOI.
Using OLE Query, it's quite simple (e.g. sheetName is Sheet1):
DataTable LoadWorksheetInDataTable(string fileName, string sheetName)
{
DataTable sheetData = new DataTable();
using (OleDbConnection conn = this.returnConnection(fileName))
{
conn.Open();
// retrieve the data using data adapter
OleDbDataAdapter sheetAdapter = new OleDbDataAdapter("select * from [" + sheetName + "$]", conn);
sheetAdapter.Fill(sheetData);
conn.Close();
}
return sheetData;
}
private OleDbConnection returnConnection(string fileName)
{
return new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + "; Jet OLEDB:Engine Type=5;Extended Properties=\"Excel 8.0;\"");
}
For newer Excel versions:
return new OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + fileName + ";Extended Properties=Excel 12.0;");
You can also use Excel Data Reader an open source project on CodePlex. Its works really well to export data from Excel sheets.
The sample code given on the link specified:
FileStream stream = File.Open(filePath, FileMode.Open, FileAccess.Read);
//1. Reading from a binary Excel file ('97-2003 format; *.xls)
IExcelDataReader excelReader = ExcelReaderFactory.CreateBinaryReader(stream);
//...
//2. Reading from a OpenXml Excel file (2007 format; *.xlsx)
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream);
//...
//3. DataSet - The result of each spreadsheet will be created in the result.Tables
DataSet result = excelReader.AsDataSet();
//...
//4. DataSet - Create column names from first row
excelReader.IsFirstRowAsColumnNames = true;
DataSet result = excelReader.AsDataSet();
//5. Data Reader methods
while (excelReader.Read())
{
//excelReader.GetInt32(0);
}
//6. Free resources (IExcelDataReader is IDisposable)
excelReader.Close();
Reference: How do I import from Excel to a DataSet using Microsoft.Office.Interop.Excel?
Try to use this free way to this, https://freenetexcel.codeplex.com
Workbook workbook = new Workbook();
workbook.LoadFromFile(#"..\..\parts.xls",ExcelVersion.Version97to2003);
//Initialize worksheet
Worksheet sheet = workbook.Worksheets[0];
DataTable dataTable = sheet.ExportDataTable();
If you can restrict it to just (Open Office XML format) *.xlsx files, then probably the most popular library would be EPPLus.
Bonus is, there are no other dependencies. Just install using nuget:
Install-Package EPPlus
Try to use Aspose.cells library (not free, but trial is enough to read), it is quite good
Install-package Aspose.cells
There is sample code:
using Aspose.Cells;
using System;
namespace ExcelReader
{
class Program
{
static void Main(string[] args)
{
// Replace path for your file
readXLS(#"C:\MyExcelFile.xls"); // or "*.xlsx"
Console.ReadKey();
}
public static void readXLS(string PathToMyExcel)
{
//Open your template file.
Workbook wb = new Workbook(PathToMyExcel);
//Get the first worksheet.
Worksheet worksheet = wb.Worksheets[0];
//Get cells
Cells cells = worksheet.Cells;
// Get row and column count
int rowCount = cells.MaxDataRow;
int columnCount = cells.MaxDataColumn;
// Current cell value
string strCell = "";
Console.WriteLine(String.Format("rowCount={0}, columnCount={1}", rowCount, columnCount));
for (int row = 0; row <= rowCount; row++) // Numeration starts from 0 to MaxDataRow
{
for (int column = 0; column <= columnCount; column++) // Numeration starts from 0 to MaxDataColumn
{
strCell = "";
strCell = Convert.ToString(cells[row, column].Value);
if (String.IsNullOrEmpty(strCell))
{
continue;
}
else
{
// Do your staff here
Console.WriteLine(strCell);
}
}
}
}
}
}
Read from excel, modify and write back
/// <summary>
/// /Reads an excel file and converts it into dataset with each sheet as each table of the dataset
/// </summary>
/// <param name="filename"></param>
/// <param name="headers">If set to true the first row will be considered as headers</param>
/// <returns></returns>
public DataSet Import(string filename, bool headers = true)
{
var _xl = new Excel.Application();
var wb = _xl.Workbooks.Open(filename);
var sheets = wb.Sheets;
DataSet dataSet = null;
if (sheets != null && sheets.Count != 0)
{
dataSet = new DataSet();
foreach (var item in sheets)
{
var sheet = (Excel.Worksheet)item;
DataTable dt = null;
if (sheet != null)
{
dt = new DataTable();
var ColumnCount = ((Excel.Range)sheet.UsedRange.Rows[1, Type.Missing]).Columns.Count;
var rowCount = ((Excel.Range)sheet.UsedRange.Columns[1, Type.Missing]).Rows.Count;
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[1, j + 1];
var column = new DataColumn(headers ? cell.Value : string.Empty);
dt.Columns.Add(column);
}
for (int i = 0; i < rowCount; i++)
{
var r = dt.NewRow();
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[i + 1 + (headers ? 1 : 0), j + 1];
r[j] = cell.Value;
}
dt.Rows.Add(r);
}
}
dataSet.Tables.Add(dt);
}
}
_xl.Quit();
return dataSet;
}
public string Export(DataTable dt, bool headers = false)
{
var wb = _xl.Workbooks.Add();
var sheet = (Excel.Worksheet)wb.ActiveSheet;
//process columns
for (int i = 0; i < dt.Columns.Count; i++)
{
var col = dt.Columns[i];
//added columns to the top of sheet
var currentCell = (Excel.Range)sheet.Cells[1, i + 1];
currentCell.Value = col.ToString();
currentCell.Font.Bold = true;
//process rows
for (int j = 0; j < dt.Rows.Count; j++)
{
var row = dt.Rows[j];
//added rows to sheet
var cell = (Excel.Range)sheet.Cells[j + 1 + 1, i + 1];
cell.Value = row[i];
}
currentCell.EntireColumn.AutoFit();
}
var fileName="{somepath/somefile.xlsx}";
wb.SaveCopyAs(fileName);
_xl.Quit();
return fileName;
}
I used Office's NuGet Package: DocumentFormat.OpenXml and pieced together the code from that component's doc site.
With the below helper code, was similar in complexity to my other CSV file format parsing in that project...
public static async Task ImportXLSX(Stream stream, string sheetName) {
{
// This was necessary for my Blazor project, which used a BrowserFileStream object
MemoryStream ms = new MemoryStream();
await stream.CopyToAsync(ms);
using (var document = SpreadsheetDocument.Open(ms, false))
{
// Retrieve a reference to the workbook part.
WorkbookPart wbPart = document.WorkbookPart;
// Find the sheet with the supplied name, and then use that
// Sheet object to retrieve a reference to the first worksheet.
Sheet theSheet = wbPart?.Workbook.Descendants<Sheet>().Where(s => s?.Name == sheetName).FirstOrDefault();
// Throw an exception if there is no sheet.
if (theSheet == null)
{
throw new ArgumentException("sheetName");
}
WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(theSheet.Id));
// For shared strings, look up the value in the
// shared strings table.
var stringTable =
wbPart.GetPartsOfType<SharedStringTablePart>()
.FirstOrDefault();
// I needed to grab 4 cells from each row
// Starting at row 11, until the cell in column A is blank
int row = 11;
while (true) {
var accountNameCell = GetCell(wsPart, "A" + row.ToString());
var accountName = GetValue(accountNameCell, stringTable);
if (string.IsNullOrEmpty(accountName)) {
break;
}
var investmentNameCell = GetCell(wsPart, "B" + row.ToString());
var investmentName = GetValue(investmentNameCell, stringTable);
var symbolCell = GetCell(wsPart, "D" + row.ToString());
var symbol = GetValue(symbolCell, stringTable);
var marketValue = GetCell(wsPart, "J" + row.ToString()).InnerText;
// DO STUFF with data
row++;
}
}
}
private static string? GetValue(Cell cell, SharedStringTablePart stringTable) {
try {
return stringTable.SharedStringTable.ElementAt(int.Parse(cell.InnerText)).InnerText;
} catch (Exception) {
return null;
}
}
private static Cell GetCell(WorksheetPart wsPart, string cellReference) {
return wsPart.Worksheet.Descendants<Cell>().Where(c => c.CellReference.Value == cellReference)?.FirstOrDefault();
}