I have a problem when saving and opening a file with OpenXml library. Here is my code:
public static void SaveExcel(List<Dictionary<string, object>> listData, List<string> entityTypes, string appName)
{
using (var ms = new MemoryStream())
using (var excel = SpreadsheetDocument.Create(ms, SpreadsheetDocumentType.Workbook))
{
var workBookPart = excel.AddWorkbookPart();
workBookPart.Workbook = new Workbook();
var workSheetPart = workBookPart.AddNewPart<WorksheetPart>();
var workSheetData = new SheetData();
workSheetPart.Worksheet = new Worksheet(workSheetData);
var sheets = workBookPart.Workbook.AppendChild(new Sheets());
var index = 1;
foreach (var entityType in entityTypes)
{
var sheet = new Sheet
{
Id = excel.WorkbookPart.GetIdOfPart(workSheetPart),
SheetId = 1U,
Name = entityType
};
sheets.AppendChild(sheet);
}
workBookPart.Workbook.Save(ms);
File.WriteAllBytes("D:/nothing123.xlsx", ms.ToArray());
}
}
I am pretty sure I did the right thing though I have this error when opening the file:
Excel cannot be opened the file 'nothing123.xlsx' because the file format or file extension is not valid. Verify that the file has not been corrupted and the file extension matches the format of the file.
Any idea whats going on with my code?
I don't know if this is relevant to the tag but I made up my mind to use ClosedXml library since it's much easier to use than OpenXml. I can easily create a DataTable and create an Excel file out of the DataTable which is very convenient. Here is my quick sample code:
Sample Data Table
public DataTable getData() {
DataTable dt = new DataTable();
dt.TableName = "SheetName1";
dt.Columns.Add("FirstName");
dt.Columns.Add("LastName");
var row = dt.NewRow();
row["FirstName"] = "Alvin";
row["LastName"] = "Quezon";
dt.Rows.Add(row);
return dt;
}
Sample Code to Excel to Byte Array
public static byte[] GetExcelBytes(DataTable dataTable)
{
using (var ms = new MemoryStream())
using (var workBook = new XLWorkbook())
{
workBook.Worksheets.Add(dataTable);
workBook.SaveAs(ms);
return ms.ToArray();
}
}
I didn't get any issue when opening the file and with superb minimal code usage.
Hopefully this will help anyone would like to use this in the future.
I'm exporting a DataSet to Excel (.xlsx) and for whatever reason, I'm getting an additional (unwanted) worksheet added to my workbook.
My dataTables are setup like so:
DataTable table1 = new DataTable();
table1 = CallReport("ReportName");
DataTable table2 = new DataTable();
table2 = CallReport("ReportName");
DataTable table3 = new DataTable();
table3 = CallReport("ReportName");
//add to DataSet
DataSet exportSet = new DataSet();
table3.TableName = "Combined";
exportSet.Tables.Add(table3);
table2.TableName = "Non-Services";
exportSet.Tables.Add(table2);
table1.TableName = "Services";
exportSet.Tables.Add(table1);
I'm then creating an Excel instance, then using foreach to loop through my DataSet and write the tables to the Excel workbook:
MSClass.CreateExcelFile();
foreach (DataTable table in exportSet.Tables)
{
MSClass.WriteToExcel(table, table.TableName);
}
MSClass.SaveExcelFile(exportPath);
Here is how I'm creating the Excel file, writing to it and saving it within my MSClass:
class MSClass
{
static Microsoft.Office.Interop.Excel.Application xl;
public static void CreateExcelFile()
{
//Creates new instance of Excel.Application()
xl = new Excel.Application();
//Adds new workbook to the application instance
xl.Workbooks.Add();
}
public static void SaveExcelFile(string filePath)
{
//Check to make sure the filePath isn't empty, and if it is, just show the file instance
if (filePath != null && filePath != "")
{
//in case there's an error
try
{
//Makes sure the sheet is in the active session
Excel._Worksheet sheet = xl.ActiveSheet;
//Stops from asking to overwrite the file.
//If I didn't want to overwrite I would be changing the file name everytime.
xl.DisplayAlerts = false;
//Saves the sheet to the file path
sheet.SaveAs(filePath);
//shows the final product
xl.Visible = true;
}
catch (Exception ex)
{
//throw up an Exception for any error and show the error message
throw new Exception("Export To Excel: Excel file could not be saved! Check filePath.\n" + ex.Message);
}
}
else //no file path is given
{
//Just show the current session of the Excel Application
xl.Visible = true;
}
}
public static void WriteToExcel(DataTable table, string sheetName)
{
//Adds new worksheet to workbook
Excel._Worksheet sheet = xl.ActiveWorkbook.Sheets[1];
//Gets count of columns within supplied DataTable
int colCount = table.Columns.Count;
//Creates object array for headers for each column
object[] header = new object[colCount];
//loop to add column names to header object array
for (int i = 0; i < colCount; i++)
{
//Adds column names to header object array
header[i] = table.Columns[i].ColumnName;
}
//Get range of headers
Excel.Range headerRange = sheet.get_Range((Excel.Range)(sheet.Cells[1, 1]), (Excel.Range)(sheet.Cells[1, colCount]));
//Applies the header to Excel file
headerRange.Value = header;
//Adds color to header to make more distinct
headerRange.Interior.Color = ColorTranslator.ToOle(Color.LightGray);
//Bolds the header
headerRange.Font.Bold = true;
//gets count of rows from DataTable
int rowCount = table.Rows.Count;
//Object multidimensional array for the cells within the dataTable
object[,] cells = new object[rowCount, colCount];
//Adds the cells from DataTable to cell Object array
for (int j = 0; j < rowCount; j++)
{
for (int i = 0; i < colCount; i++)
{
//Sets cell values to DataTable values
cells[j, i] = table.Rows[j][i];
}
}
//prints the cell values to Excel cell(s)
sheet.get_Range((Excel.Range)(sheet.Cells[2, 1]), (Excel.Range)(sheet.Cells[rowCount + 1, colCount])).Value = cells;
sheet.Name = sheetName;
sheet.Activate();
xl.Worksheets.Add(sheet);
}
}
I'm unsure what is currently causing the extra sheet to be exported. the sheets export in the order of: Sheet4 | Services | Non-Services | Combined
Where Sheet4 is completely blank, and the other sheets are exported in reversed order.
What is causing my extra (blank) sheet to be added to the workbook?
//Need to update Excel sheet columns while retrieving data from data table enter code here
foreach (DataRow datarow in dt.Rows)
{
int[] colNumber = new int[] { 9,5,13,24,111,17,76,34,38 };
rowcount += 1;
for (int i = 0; i < colNumber.Length; i++)
{ string value = datarow[i].ToString();
ws.Cells[rowcount, colNumber[i]] = value;
}
}
The problem will not be solved by using EntityFramework as I'm afraid you are focusing on the wrong area. The main reason for this delay is that you are trying to access the worksheet object for each record in dt.Rows.
I recommend using GemBox.Spreadsheet for this purpose. You can directly attach the datatable to the worksheet. Also, you don't need to explicitly convert values to string.
----------
// Code sample.
private void Download()
{
DataTable datatable=yourDatatable;// callStore Procedure to fetch DataTable
ExcelFile csvFile = new ExcelFile();//GemBox.Spreadsheet ExcelFile
ExcelWorksheet ws = csvFile.Worksheets.Add("YourWorkSheetName");
if (ws != null)
{
ws.InsertDataTable(datatable, 0, 0, true);
// Use MemoryStream to save or to send it to client as response.
}
}
----------
I am using the following code to get data from excel sheet:-
private static DataTable GetDataFromCSVFile(string csv_file_path)
{
DataTable csvData = new DataTable();
using (TextFieldParser csvReader = new TextFieldParser(csv_file_path))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData);
}
return csvData;
}
}
but I need to retrieve same from sheets within same workbook. Any idea how can I do this using datatable in c#.
Thanks
"As users input data in Excel sheets" doesn't mean input into xls or xlsx file. It may mean that users use MS Excel to edit CSV. You can create "true excel" file with data from 4 source csv files and read a new single file, but you need another API, not TextFieldParser. To read new xlsx files i use DocumentFormat.OpenXml assembly. Quick sample is
using (var doc = SpreadsheetDocument.Open(_docPath, false))
{
var sheet = doc.WorkbookPart.Workbook.Sheets.Elements<Sheet>().First()
//Or iterate over all sheets, get sheet name, read it's data etc.
//i'm sure you can find a lot of samples
var worksheetPart = (WorksheetPart)doc.WorkbookPart.GetPartById(sheet.Id);
var sheetData = worksheetPart.Worksheet.GetFirstChild<SheetData>();
foreach (var row in sheetData.Elements<Row>())
{
//some code here
}
}
I know that there are different ways to read an Excel file:
Iterop
Oledb
Open Xml SDK
Compatibility is not a question because the program will be executed in a controlled environment.
My Requirement :
Read a file to a DataTable / CUstom Entities (I don't know how to make dynamic properties/fields to an object[column names will be variating in an Excel file])
Use DataTable/Custom Entities to perform some operations using its data.
Update DataTable with the results of the operations
Write it back to excel file.
Which would be simpler.
Also if possible advice me on custom Entities (adding properties/fields to an object dynamically)
Take a look at Linq-to-Excel. It's pretty neat.
var book = new LinqToExcel.ExcelQueryFactory(#"File.xlsx");
var query =
from row in book.Worksheet("Stock Entry")
let item = new
{
Code = row["Code"].Cast<string>(),
Supplier = row["Supplier"].Cast<string>(),
Ref = row["Ref"].Cast<string>(),
}
where item.Supplier == "Walmart"
select item;
It also allows for strongly-typed row access too.
I realize this question was asked nearly 7 years ago but it's still a top Google search result for certain keywords regarding importing excel data with C#, so I wanted to provide an alternative based on some recent tech developments.
Importing Excel data has become such a common task to my everyday duties, that I've streamlined the process and documented the method on my blog: best way to read excel file in c#.
I use NPOI because it can read/write Excel files without Microsoft Office installed and it doesn't use COM+ or any interops. That means it can work in the cloud!
But the real magic comes from pairing up with NPOI Mapper from Donny Tian because it allows me to map the Excel columns to properties in my C# classes without writing any code. It's beautiful.
Here is the basic idea:
I create a .net class that matches/maps the Excel columns I'm interested in:
class CustomExcelFormat
{
[Column("District")]
public int District { get; set; }
[Column("DM")]
public string FullName { get; set; }
[Column("Email Address")]
public string EmailAddress { get; set; }
[Column("Username")]
public string Username { get; set; }
public string FirstName
{
get
{
return Username.Split('.')[0];
}
}
public string LastName
{
get
{
return Username.Split('.')[1];
}
}
}
Notice, it allows me to map based on column name if I want to!
Then when I process the excel file all I need to do is something like this:
public void Execute(string localPath, int sheetIndex)
{
IWorkbook workbook;
using (FileStream file = new FileStream(localPath, FileMode.Open, FileAccess.Read))
{
workbook = WorkbookFactory.Create(file);
}
var importer = new Mapper(workbook);
var items = importer.Take<CustomExcelFormat>(sheetIndex);
foreach(var item in items)
{
var row = item.Value;
if (string.IsNullOrEmpty(row.EmailAddress))
continue;
UpdateUser(row);
}
DataContext.SaveChanges();
}
Now, admittedly, my code does not modify the Excel file itself. I am instead saving the data to a database using Entity Framework (that's why you see "UpdateUser" and "SaveChanges" in my example). But there is already a good discussion on SO about how to save/modify a file using NPOI.
Using OLE Query, it's quite simple (e.g. sheetName is Sheet1):
DataTable LoadWorksheetInDataTable(string fileName, string sheetName)
{
DataTable sheetData = new DataTable();
using (OleDbConnection conn = this.returnConnection(fileName))
{
conn.Open();
// retrieve the data using data adapter
OleDbDataAdapter sheetAdapter = new OleDbDataAdapter("select * from [" + sheetName + "$]", conn);
sheetAdapter.Fill(sheetData);
conn.Close();
}
return sheetData;
}
private OleDbConnection returnConnection(string fileName)
{
return new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + fileName + "; Jet OLEDB:Engine Type=5;Extended Properties=\"Excel 8.0;\"");
}
For newer Excel versions:
return new OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + fileName + ";Extended Properties=Excel 12.0;");
You can also use Excel Data Reader an open source project on CodePlex. Its works really well to export data from Excel sheets.
The sample code given on the link specified:
FileStream stream = File.Open(filePath, FileMode.Open, FileAccess.Read);
//1. Reading from a binary Excel file ('97-2003 format; *.xls)
IExcelDataReader excelReader = ExcelReaderFactory.CreateBinaryReader(stream);
//...
//2. Reading from a OpenXml Excel file (2007 format; *.xlsx)
IExcelDataReader excelReader = ExcelReaderFactory.CreateOpenXmlReader(stream);
//...
//3. DataSet - The result of each spreadsheet will be created in the result.Tables
DataSet result = excelReader.AsDataSet();
//...
//4. DataSet - Create column names from first row
excelReader.IsFirstRowAsColumnNames = true;
DataSet result = excelReader.AsDataSet();
//5. Data Reader methods
while (excelReader.Read())
{
//excelReader.GetInt32(0);
}
//6. Free resources (IExcelDataReader is IDisposable)
excelReader.Close();
Reference: How do I import from Excel to a DataSet using Microsoft.Office.Interop.Excel?
Try to use this free way to this, https://freenetexcel.codeplex.com
Workbook workbook = new Workbook();
workbook.LoadFromFile(#"..\..\parts.xls",ExcelVersion.Version97to2003);
//Initialize worksheet
Worksheet sheet = workbook.Worksheets[0];
DataTable dataTable = sheet.ExportDataTable();
If you can restrict it to just (Open Office XML format) *.xlsx files, then probably the most popular library would be EPPLus.
Bonus is, there are no other dependencies. Just install using nuget:
Install-Package EPPlus
Try to use Aspose.cells library (not free, but trial is enough to read), it is quite good
Install-package Aspose.cells
There is sample code:
using Aspose.Cells;
using System;
namespace ExcelReader
{
class Program
{
static void Main(string[] args)
{
// Replace path for your file
readXLS(#"C:\MyExcelFile.xls"); // or "*.xlsx"
Console.ReadKey();
}
public static void readXLS(string PathToMyExcel)
{
//Open your template file.
Workbook wb = new Workbook(PathToMyExcel);
//Get the first worksheet.
Worksheet worksheet = wb.Worksheets[0];
//Get cells
Cells cells = worksheet.Cells;
// Get row and column count
int rowCount = cells.MaxDataRow;
int columnCount = cells.MaxDataColumn;
// Current cell value
string strCell = "";
Console.WriteLine(String.Format("rowCount={0}, columnCount={1}", rowCount, columnCount));
for (int row = 0; row <= rowCount; row++) // Numeration starts from 0 to MaxDataRow
{
for (int column = 0; column <= columnCount; column++) // Numeration starts from 0 to MaxDataColumn
{
strCell = "";
strCell = Convert.ToString(cells[row, column].Value);
if (String.IsNullOrEmpty(strCell))
{
continue;
}
else
{
// Do your staff here
Console.WriteLine(strCell);
}
}
}
}
}
}
Read from excel, modify and write back
/// <summary>
/// /Reads an excel file and converts it into dataset with each sheet as each table of the dataset
/// </summary>
/// <param name="filename"></param>
/// <param name="headers">If set to true the first row will be considered as headers</param>
/// <returns></returns>
public DataSet Import(string filename, bool headers = true)
{
var _xl = new Excel.Application();
var wb = _xl.Workbooks.Open(filename);
var sheets = wb.Sheets;
DataSet dataSet = null;
if (sheets != null && sheets.Count != 0)
{
dataSet = new DataSet();
foreach (var item in sheets)
{
var sheet = (Excel.Worksheet)item;
DataTable dt = null;
if (sheet != null)
{
dt = new DataTable();
var ColumnCount = ((Excel.Range)sheet.UsedRange.Rows[1, Type.Missing]).Columns.Count;
var rowCount = ((Excel.Range)sheet.UsedRange.Columns[1, Type.Missing]).Rows.Count;
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[1, j + 1];
var column = new DataColumn(headers ? cell.Value : string.Empty);
dt.Columns.Add(column);
}
for (int i = 0; i < rowCount; i++)
{
var r = dt.NewRow();
for (int j = 0; j < ColumnCount; j++)
{
var cell = (Excel.Range)sheet.Cells[i + 1 + (headers ? 1 : 0), j + 1];
r[j] = cell.Value;
}
dt.Rows.Add(r);
}
}
dataSet.Tables.Add(dt);
}
}
_xl.Quit();
return dataSet;
}
public string Export(DataTable dt, bool headers = false)
{
var wb = _xl.Workbooks.Add();
var sheet = (Excel.Worksheet)wb.ActiveSheet;
//process columns
for (int i = 0; i < dt.Columns.Count; i++)
{
var col = dt.Columns[i];
//added columns to the top of sheet
var currentCell = (Excel.Range)sheet.Cells[1, i + 1];
currentCell.Value = col.ToString();
currentCell.Font.Bold = true;
//process rows
for (int j = 0; j < dt.Rows.Count; j++)
{
var row = dt.Rows[j];
//added rows to sheet
var cell = (Excel.Range)sheet.Cells[j + 1 + 1, i + 1];
cell.Value = row[i];
}
currentCell.EntireColumn.AutoFit();
}
var fileName="{somepath/somefile.xlsx}";
wb.SaveCopyAs(fileName);
_xl.Quit();
return fileName;
}
I used Office's NuGet Package: DocumentFormat.OpenXml and pieced together the code from that component's doc site.
With the below helper code, was similar in complexity to my other CSV file format parsing in that project...
public static async Task ImportXLSX(Stream stream, string sheetName) {
{
// This was necessary for my Blazor project, which used a BrowserFileStream object
MemoryStream ms = new MemoryStream();
await stream.CopyToAsync(ms);
using (var document = SpreadsheetDocument.Open(ms, false))
{
// Retrieve a reference to the workbook part.
WorkbookPart wbPart = document.WorkbookPart;
// Find the sheet with the supplied name, and then use that
// Sheet object to retrieve a reference to the first worksheet.
Sheet theSheet = wbPart?.Workbook.Descendants<Sheet>().Where(s => s?.Name == sheetName).FirstOrDefault();
// Throw an exception if there is no sheet.
if (theSheet == null)
{
throw new ArgumentException("sheetName");
}
WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(theSheet.Id));
// For shared strings, look up the value in the
// shared strings table.
var stringTable =
wbPart.GetPartsOfType<SharedStringTablePart>()
.FirstOrDefault();
// I needed to grab 4 cells from each row
// Starting at row 11, until the cell in column A is blank
int row = 11;
while (true) {
var accountNameCell = GetCell(wsPart, "A" + row.ToString());
var accountName = GetValue(accountNameCell, stringTable);
if (string.IsNullOrEmpty(accountName)) {
break;
}
var investmentNameCell = GetCell(wsPart, "B" + row.ToString());
var investmentName = GetValue(investmentNameCell, stringTable);
var symbolCell = GetCell(wsPart, "D" + row.ToString());
var symbol = GetValue(symbolCell, stringTable);
var marketValue = GetCell(wsPart, "J" + row.ToString()).InnerText;
// DO STUFF with data
row++;
}
}
}
private static string? GetValue(Cell cell, SharedStringTablePart stringTable) {
try {
return stringTable.SharedStringTable.ElementAt(int.Parse(cell.InnerText)).InnerText;
} catch (Exception) {
return null;
}
}
private static Cell GetCell(WorksheetPart wsPart, string cellReference) {
return wsPart.Worksheet.Descendants<Cell>().Where(c => c.CellReference.Value == cellReference)?.FirstOrDefault();
}