Reading Excel data into DataTable - c#

Hi all I have my excel with the following Rows and columns
SKU Quantity Line Price Unit Price Line Discount
XYZ 2 4 2 1
case 2
SKU Quantity Line Price Unit Price Line Discount
XYZ 2 1
In case2 after excecuting Line Discount value is showing in Line Price
public static DataSet LoadExcelData(string pUserID, string pFilePath)
{
DataSet lDSExcel = new DataSet();
DataTable lDTExcel = new DataTable();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(pFilePath, false))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
lDTExcel.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows)
{
DataRow tempRow = lDTExcel.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
}
lDTExcel.Rows.Add(tempRow);
}
lDSExcel.Tables.Add(lDTExcel);
}
lDTExcel.Rows.RemoveAt(0);
return lDSExcel;
}
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
string value = string.Empty;
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
//if (cell.CellValue != null)
// value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}
}
But this is giving an exception can some one help me

In the function GetCellValue(), change the shared string return portion to:
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(cell.CellValue.InnerText)].InnerText;
You were parsing an empty string (the variable "value", which was initialised with an empty string). I'll leave you with error checking on cell.CellValue in case that's not initialised...

Related

How to convert the data that contains 9 header rows from excel to XML in C#?

I'm new in C#. I'm trying to read the data from excel file to convert to XML in C#.
However, I encounter issue to read the two separate data (columns and rows) from the table.
Refer to the picture below, I would like to read the data from the first two columns then only read the data from the fifth rows onwards. After that, combine the data and display in XML format. But the data mess up after convert to XML.
It would be appreciated if anyone could help.
Requirement:
excelData
I have tried to skip the first 4 rows then read the data starting from fifth rows with 9 header rows, but it failed to get the correct value from the cell.
private void btnConvert_Click(object sender, EventArgs e)
{
try
{
if (string.IsNullOrEmpty(txtXMLPath.Text) || string.IsNullOrEmpty(txtNodeName.Text))
{
throw new Exception("The input cannot be empty. Please fill in.");
}
string XMLpath = #"C:\temp\" + txtXMLPath.Text + ".xml";
// Read the Excel file with skip of 4 rows and 9 header rows
(int headerRowCount, int skipRowCount) = (9, 4);
(DataTable dataTable, List<string> headerRow) = ReadExcelFile(txtFilePath.Text, skipRowCount, headerRowCount);
WriteDataTableToXmlFile_Attribute(dataTable, XMLpath, txtNodeName.Text);
MessageBox.Show("Conversion completed successfully! \nXML file saved in " + XMLpath, "Success", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
static (DataTable, List<string>) ReadExcelFile(string filePath, int skipRowCount, int headerRowCount)
{
DataTable dataTable = new DataTable();
List<string> headerRow = new List<string>();
using (SpreadsheetDocument document = SpreadsheetDocument.Open(filePath, false))
{
WorkbookPart workbookPart = document.WorkbookPart;
Sheet sheet = workbookPart.Workbook.Descendants<Sheet>().First();
WorksheetPart worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
int columnCount = 0;
foreach (Row row in rows)
{
if (row.RowIndex <= skipRowCount)
{
// Skip this row
continue;
}
else if (row.RowIndex <= headerRowCount)
{
foreach (Cell cell in row.Descendants<Cell>())
{
string cellValue = GetCellValue(workbookPart, cell);
if (!string.IsNullOrEmpty(cellValue))
{
headerRow.Add(cellValue);
if (!dataTable.Columns.Contains(cellValue))
{
dataTable.Columns.Add(cellValue);
columnCount++;
}
}
}
}
else
{
DataRow dataRow = dataTable.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
while (columnIndex >= columnCount)
{
columnIndex--;
}
string cellValue = GetCellValue(workbookPart, cell);
dataRow[columnIndex++] = cellValue;
}
dataTable.Rows.Add(dataRow);
}
}
}
return (dataTable, headerRow);
}
Expected Output in XML:
<Root>
<Data Lot="#01" Walson Lot No.="8A0M008888" SPEC="Return Loss" Frequency[ GHz ]="2.405" Specification="AAA11" Specification="<-12" UNIT="dB" MAX="-16.01" MIN="-34.70" AVG="-21.67" SD="3.46" 1="-16.49" /></Root>

Is there anyway to upload a large Excel into a database quickly using C#?

I am trying to upload a large Ms Excel file into a relational database using C#.
Using following code:
But the process takes a very long time to read the excel (Approximated 45 hours to read an excel file with 185,000 record).
public static DataTable GetSpreadsheetWorkbookSheet(string filepath)
{
DataTable dataTable = new DataTable();
List<string> sheetList = new List<string>();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(filepath, true))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
int sheetCount = workbookPart.Workbook.Descendants<Sheet>().Count();
string relationshipId = "";
dataTable.Columns.Add("Sheet");
dataTable.Columns.Add("Path");
for (int i = 0; i < sheetCount; i++)
{
DataRow dataRow = dataTable.NewRow();
string sheetName = workbookPart.Workbook.Descendants<Sheet>().ElementAt(i).Name;
relationshipId = workbookPart.Workbook.Descendants<Sheet>().ElementAt(i).Id;
dataTable.Rows.Add(sheetName, filepath);
}
}
return dataTable;
}
public static DataTable CreateSpreadsheetWorkbook(string filepath, string sheetName)
{
DataTable dataTable = new DataTable();
List<string> sheetList = new List<string>();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(filepath, true))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = "";
int x = sheets.ToList().Count;
if (sheets.ToList().Count > 1)
{
relationshipId = sheets.ToList().Find(io => io.Name.ToString().Equals(sheetName)).Id;
}
else
{
relationshipId = sheets.ToList().First().Id.Value;
}
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
dataTable.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows)
{
int t = row.Descendants<Cell>().Count();
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
Thread.Sleep(1);
dataRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
Thread.Sleep(1);
}
dataTable.Rows.Add(dataRow);
Thread.Sleep(1);
}
}
dataTable.Rows.RemoveAt(0);
return dataTable;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.InnerText;
if (cell.DataType != null && (cell.DataType.Value == CellValues.SharedString))
{
string txt = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText.ToString();
Thread.Sleep(1);
return txt;
}
else if (cell.DataType != null && (cell.DataType.Value == CellValues.String))
{
string txt = value;
Thread.Sleep(1);
return txt;
}
else if (cell.DataType != null && (cell.DataType.Value == CellValues.InlineString))
{
string txt = stringTablePart.ToString();
Thread.Sleep(1);
return txt;
}
else if (cell.DataType == null)
{
string txt = value;
Thread.Sleep(1);
return txt;
}
else
{
if (String.IsNullOrEmpty(value) || String.IsNullOrWhiteSpace(value))
{
value = "0";
}
return Convert.ToDecimal(value).ToString("N4");
}
}
I need to upload the file quickly, Is there anyway to doing it quickly, Please help?

OpenXML read performance too slow while parsing excel file

I am reading excel files using C# and OpenXML (SAX). The file size ranges between 5-10 mb and has 5-6 sheets. Number of rows per sheet vary between 25-100K.
I am using the following code to fetch the data. It's reading about 100 rows per second. In comparison, Apache POI is able to read a thousand rows in the same time.
I expected better results as both products are from Microsoft. Am I doing something wrong?
using (SpreadsheetDocument excelDoc = SpreadsheetDocument.Open(filename, true))
{
WorkbookPart workbookPart = excelDoc.WorkbookPart;
WorksheetPart mainSheet = (WorksheetPart)workbookPart.GetPartById(sheetIds[0].ToString());
OpenXmlReader reader = OpenXmlReader.Create(mainSheet);
//CellType c;
SharedStringTable t = workbookPart.SharedStringTablePart.SharedStringTable;
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellValue;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
int index = int.Parse(c.CellValue.InnerText);
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//Console.Out.Write("{0}: {1} ", c.CellReference, cellValue);
}
}
while (reader.ReadNextSibling());
}
}
}
Please try below code.
using (SpreadsheetDocument excelDoc = SpreadsheetDocument.Open(filename, true))
{
WorkbookPart workbookPart = excelDoc.WorkbookPart;
WorksheetPart mainSheet = (WorksheetPart)workbookPart.GetPartById(sheetIds[0].ToString());
OpenXmlReader reader = OpenXmlReader.Create(mainSheet);
//CellType c;
SharedStringTable t = workbookPart.SharedStringTablePart.SharedStringTable;
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellValue;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
int index = int.Parse(c.CellValue.InnerText);
SharedStringItem ssi = t.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//Console.Out.Write("{0}: {1} ", c.CellReference, cellValue);
}
}
while (reader.ReadNextSibling());
}
}
}

OpenXML Add Cell to WorkSheet

I am currently doing my head in working over the OpenXML 2.5 Framework on the MSDN site here, https://msdn.microsoft.com/en-us/library/office/cc861607.aspx
All methods I have tried to add a cell to an existing worksheet corrupt the workbook as the MSDN site only outlines creating the worksheet and not modifying it.
Everytime I add a cell the system wants a whole new worksheet and will not allow the addition of a cell to an existing worksheet. I have been banging my head for hours going over MSDN and Googling this with no luck.
The problem is I need a class that can receiving strings and update the excel file. Has anyone been able to add a cell to an existing worksheet? My issue seems to be due to a string by string solution.
Working input (PowerShell) only works if a new Worksheet is created for the Cell,
[CmdletBinding(SupportsShouldProcess=$true, ConfirmImpact='Medium')]
$cSharpData = (
[Reflection.Assembly]::LoadWithPartialName("DocumentFormat.OpenXml"),
[Reflection.Assembly]::LoadWithPartialName("WindowsBase"),
[Reflection.Assembly]::LoadWithPartialName("System.Linq")
)
[String]$cSharpClass = Get-Content .\method.cs
$cSharpType = Add-Type -ReferencedAssemblies $cSharpData -TypeDefinition $cSharpClass
$testData = Get-WmiObject Win32_QuickFixEngineering
[DoExcelMethod]::CreateXLSX('.\test.xlsx')
$locNo = 1
[DoExcelMethod]::AddSheetData('.\test.xlsx', $testData, 'TestWS', 'A', $locNo)
The file this is point at has the following,
using System;
using System.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
public class DoExcelMethod {
private static int SharedDataItem(string sData, SharedStringTablePart ssPart) {
if (ssPart.SharedStringTable == null) {
ssPart.SharedStringTable = new SharedStringTable();
}
int cnt = 0;
foreach (SharedStringItem sspItem in ssPart.SharedStringTable.Elements<SharedStringItem>()) {
if (sspItem.InnerText == sData) {
return cnt;
}
cnt++;
}
ssPart.SharedStringTable.AppendChild(new SharedStringItem(new DocumentFormat.OpenXml.Spreadsheet.Text(sData)));
ssPart.SharedStringTable.Save();
return cnt;
}
private static WorksheetPart InsertWorksheet(string wsName, WorkbookPart wbPart) {
WorksheetPart newWsPart = wbPart.AddNewPart<WorksheetPart>();
newWsPart.Worksheet = new Worksheet(new SheetData());
newWsPart.Worksheet.Save();
Sheets sheets = wbPart.Workbook.GetFirstChild<Sheets>();
string relId = wbPart.GetIdOfPart(newWsPart);
uint sheetId = 1;
if (sheets.Elements<Sheet>().Count() > 0) {
sheetId = sheets.Elements<Sheet>().Select(s => s.SheetId.Value).Max() + 1;
}
Sheet sheet = new Sheet() { Id = relId, SheetId = sheetId, Name = wsName };
sheets.Append(sheet);
wbPart.Workbook.Save();
return newWsPart;
}
private static Cell InsertCellInWorksheet(string columnName, uint rowIndex, WorksheetPart worksheetPart) {
Worksheet worksheet = worksheetPart.Worksheet;
SheetData sheetData = worksheet.GetFirstChild<SheetData>();
string cellReference = columnName + rowIndex;
Row row;
if (sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).Count() != 0) {
row = sheetData.Elements<Row>().Where(r => r.RowIndex == rowIndex).First();
} else {
row = new Row() { RowIndex = rowIndex };
sheetData.Append(row);
}
if (row.Elements<Cell>().Where(c => c.CellReference.Value == columnName + rowIndex).Count() > 0) {
return row.Elements<Cell>().Where(c => c.CellReference.Value == cellReference).First();
} else {
Cell refCell = null;
foreach (Cell cell in row.Elements<Cell>()) {
if (string.Compare(cell.CellReference.Value, cellReference, true) > 0) {
refCell = cell;
break;
}
}
Cell newCell = new Cell() { CellReference = cellReference };
row.InsertBefore(newCell, refCell);
worksheet.Save();
return newCell;
}
}
public static void CreateXLSX(string xlsxFile) {
SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Create(xlsxFile, SpreadsheetDocumentType.Workbook);
WorkbookPart workbookpart = spreadsheetDocument.AddWorkbookPart();
workbookpart.Workbook = new Workbook();
WorksheetPart worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
worksheetPart.Worksheet = new Worksheet(new SheetData());
Sheets sheets = spreadsheetDocument.WorkbookPart.Workbook.AppendChild<Sheets>(new Sheets());
Sheet sheet = new Sheet() { Id = spreadsheetDocument.WorkbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = "Default" };
sheets.Append(sheet);
workbookpart.Workbook.Save();
spreadsheetDocument.Close();
}
public static void AddSheetData(string xlsxFile, string psData, string wsName, string psCol, uint psRow) {
using (SpreadsheetDocument sSheet = SpreadsheetDocument.Open(xlsxFile, true)) {
SharedStringTablePart ssPart;
if (sSheet.WorkbookPart.GetPartsOfType<SharedStringTablePart>().Count() > 0) {
ssPart = sSheet.WorkbookPart.GetPartsOfType<SharedStringTablePart>().First();
} else {
ssPart = sSheet.WorkbookPart.AddNewPart<SharedStringTablePart>();
}
int ssIns = SharedDataItem(psData, ssPart);
WorksheetPart wsPart = InsertWorksheet(wsName, sSheet.WorkbookPart);
Cell cell = InsertCellInWorksheet(psCol, psRow, wsPart);
cell.CellValue = new CellValue(ssIns.ToString());
cell.DataType = new EnumValue<CellValues>(CellValues.SharedString);
wsPart.Worksheet.Save();
}
}
}
So despite this working I cannot get a cell into an existing worksheet, can anyone help as I am going insane :(
Thanks all
The issue you have is in the call to InsertWorksheet in AddSheetData. You are calling the InsertWorksheet method irrespective of whether or not the worksheet already exists. Instead of doing that, you can first search for the worksheet then if it exists you can use it and if it doesn't you can create a new one.
Firstly, you can search for a WorksheetPart by its name using a method such as this one (taken from my answer here):
private static WorksheetPart GetWorksheetPartBySheetName(WorkbookPart workbookPart, string sheetName)
{
WorksheetPart worksheetPart = null;
//find the sheet (note this is case-sensitive)
IEnumerable<Sheet> sheets = workbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>().Where(s => s.Name == sheetName);
if (sheets.Count() > 0)
{
string relationshipId = sheets.First().Id.Value;
worksheetPart = (WorksheetPart)workbookPart.GetPartById(relationshipId);
}
return worksheetPart;
}
If that method finds the WorksheetPart then it will return it, if not it will return null.
Once you have that you just need a small tweak to AddSheetData to call GetWorksheetPartBySheetName then only call InsertWorksheet if that method returns null. To do that you can replace this line
WorksheetPart wsPart = InsertWorksheet(wsName, sSheet.WorkbookPart);
with this
WorksheetPart wsPart = GetWorksheetPartBySheetName(sSheet.WorkbookPart, wsName);
if (wsPart == null)
wsPart = InsertWorksheet(wsName, sSheet.WorkbookPart);

open xml reading from excel file

I want to implement openXml sdk 2.5 into my project. I do everything in this link
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.IO.Packaging;
static void Main(string[] args)
{
String fileName = #"C:\OPENXML\BigData.xlsx";
// Comment one of the following lines to test the method separately.
ReadExcelFileDOM(fileName); // DOM
//ReadExcelFileSAX(fileName); // SAX
}
// The DOM approach.
// Note that the code below works only for cells that contain numeric values.
//
static void ReadExcelFileDOM(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
string text;
int rowCount= sheetData.Elements<Row>().Count();
foreach (Row r in sheetData.Elements<Row>())
{
foreach (Cell c in r.Elements<Cell>())
{
text = c.CellValue.Text;
Console.Write(text + " ");
}
}
Console.WriteLine();
Console.ReadKey();
}
}
But i am not getting any row. It hasn't entered loop. Note: I also set up openXml sdk 2.5 my computer
And I find below code this is work for numeric value.For string value it writes 0 1 2 ...
private static void Main(string[] args)
{
var filePath = #"C:/OPENXML/BigData.xlsx";
using (var document = SpreadsheetDocument.Open(filePath, false))
{
var workbookPart = document.WorkbookPart;
var workbook = workbookPart.Workbook;
var sheets = workbook.Descendants<Sheet>();
foreach (var sheet in sheets)
{
var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
var sharedStringPart = workbookPart.SharedStringTablePart;
//var values = sharedStringPart.SharedStringTable.Elements<SharedStringItem>().ToArray();
string text;
var rows = worksheetPart.Worksheet.Descendants<Row>();
foreach (var row in rows)
{
Console.WriteLine();
int count = row.Elements<Cell>().Count();
foreach (Cell c in row.Elements<Cell>())
{
text = c.CellValue.InnerText;
Console.Write(text + " ");
}
}
}
}
Console.ReadLine();
}
Your approach seemed to work ok for me - in that it did "enter the loop".
Nevertheless you could also try something like the following:
void Main()
{
string fileName = #"c:\path\to\my\file.xlsx";
using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false))
{
WorkbookPart workbookPart = doc.WorkbookPart;
SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First();
SharedStringTable sst = sstpart.SharedStringTable;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
Worksheet sheet = worksheetPart.Worksheet;
var cells = sheet.Descendants<Cell>();
var rows = sheet.Descendants<Row>();
Console.WriteLine("Row count = {0}", rows.LongCount());
Console.WriteLine("Cell count = {0}", cells.LongCount());
// One way: go through each cell in the sheet
foreach (Cell cell in cells)
{
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
{
int ssid = int.Parse(cell.CellValue.Text);
string str = sst.ChildElements[ssid].InnerText;
Console.WriteLine("Shared string {0}: {1}", ssid, str);
}
else if (cell.CellValue != null)
{
Console.WriteLine("Cell contents: {0}", cell.CellValue.Text);
}
}
// Or... via each row
foreach (Row row in rows)
{
foreach (Cell c in row.Elements<Cell>())
{
if ((c.DataType != null) && (c.DataType == CellValues.SharedString))
{
int ssid = int.Parse(c.CellValue.Text);
string str = sst.ChildElements[ssid].InnerText;
Console.WriteLine("Shared string {0}: {1}", ssid, str);
}
else if (c.CellValue != null)
{
Console.WriteLine("Cell contents: {0}", c.CellValue.Text);
}
}
}
}
}
}
I used the filestream approach to open the workbook because this allows you to open it with shared access - so that you can have the workbook open in Excel at the same time. The Spreadsheet.Open(... method won't work if the workbook is open elsewhere.
Perhaps that is why your code didn't work.
Note, also, the use of the SharedStringTable to get the cell text where appropriate.
EDIT 2018-07-11:
Since this post is still getting votes I should also point out that in many cases it may be a lot easier to use ClosedXML to manipulate/read/edit your workbooks. The documentation examples are pretty user friendly and the coding is, in my limited experience, much more straight forward. Just be aware that it does not (yet) implement all the Excel functions (for example INDEX and MATCH) which may or may not be an issue. [Not that I would want to be trying to deal with INDEX and MATCH in OpenXML anyway.]
I had the same issue as the OP, and the answer above did not work for me.
I think this is the issue: when you create a document in Excel (not programmatically), you have 3 sheets by default and the WorksheetParts that has the row data for Sheet1 is the last WorksheetParts element, not the first.
I figured this out by putting a watch for document.WorkbookPart.WorksheetParts in Visual Studio, expanding Results, then looking at all of the sub elements until I found a SheetData object where HasChildren = true.
Try this:
// open the document read-only
SpreadSheetDocument document = SpreadsheetDocument.Open(filePath, false);
SharedStringTable sharedStringTable = document.WorkbookPart.SharedStringTablePart.SharedStringTable;
string cellValue = null;
foreach (WorksheetPart worksheetPart in document.WorkbookPart.WorksheetParts)
{
foreach (SheetData sheetData in worksheetPart.Worksheet.Elements<SheetData>())
{
if (sheetData.HasChildren)
{
foreach (Row row in sheetData.Elements<Row>())
{
foreach (Cell cell in row.Elements<Cell>())
{
cellValue = cell.InnerText;
if (cell.DataType == CellValues.SharedString)
{
Console.WriteLine("cell val: " + sharedStringTable.ElementAt(Int32.Parse(cellValue)).InnerText);
}
else
{
Console.WriteLine("cell val: " + cellValue);
}
}
}
}
}
}
document.Close();
Read Large Excel :
openxml has two approaches of DOM and SAX to read an excel. the DOM one consume more RAM resource since it loads the whole xml content(Excel file) in Memory but its strong typed approach.
SAX in other hand is event base parse. more here
so if you are facing large excel file its better to use SAX.
the below code sample uses SAX approach and also handle two important scenario in excel file reading.
open xml skips the empty cells so your dataset faces displacement and wrong index.
you need to skip the empty rows also.
this function returns the exact actual index of the cell at the time and handle the first scenario.
from here
private static int CellReferenceToIndex(Cell cell)
{
int index = 0;
string reference = cell.CellReference.ToString().ToUpper();
foreach (char ch in reference)
{
if (Char.IsLetter(ch))
{
int value = (int)ch - (int)'A';
index = (index == 0) ? value : ((index + 1) * 26) + value;
}
else
return index;
}
return index;
}
code to read excel sax approach.
//i want to import excel to data table
dt = new DataTable();
using (SpreadsheetDocument document = SpreadsheetDocument.Open(path, false))
{
WorkbookPart workbookPart = document.WorkbookPart;
WorksheetPart worksheetPart = workbookPart.WorksheetParts.First();
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
//row counter
int rcnt = 0;
while (reader.Read())
{
//find xml row element type
//to understand the element type you can change your excel file eg : test.xlsx to test.zip
//and inside that you may observe the elements in xl/worksheets/sheet.xml
//that helps to understand openxml better
if (reader.ElementType == typeof(Row))
{
//create data table row type to be populated by cells of this row
DataRow tempRow = dt.NewRow();
//***** HANDLE THE SECOND SENARIO*****
//if row has attribute means it is not a empty row
if (reader.HasAttributes)
{
//read the child of row element which is cells
//here first element
reader.ReadFirstChild();
do
{
//find xml cell element type
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellValue;
int actualCellIndex = CellReferenceToIndex(c);
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//if row index is 0 its header so columns headers are added & also can do some headers check incase
if (rcnt == 0)
{
dt.Columns.Add(cellValue);
}
else
{
// instead of tempRow[c.CellReference] = cellValue;
tempRow[actualCellIndex] = cellValue;
}
}
}
while (reader.ReadNextSibling());
//if its not the header row so append rowdata to the datatable
if (rcnt != 0)
{
dt.Rows.Add(tempRow);
}
rcnt++;
}
}
}
}
Everything is explained in the accepted answer.
Here is just an extension method to solve the problem
public static string GetCellText(this Cell cell, in SharedStringTable sst)
{
if (cell.CellValue is null)
return string.Empty;
if ((cell.DataType is not null) &&
(cell.DataType == CellValues.SharedString))
{
int ssid = int.Parse(cell.CellValue.Text);
return sst.ChildElements[ssid].InnerText;
}
return cell.CellValue.Text;
}

Categories