i need to read (and parse) large spreadsheet files (20-50MB) using the openxml libraries and there doesn't seem to be a way to stream the rows one at a time for parsing.
i'm consistently getting Out Of Memory exceptions as it seems as soon as i attempt to access a row (or iterate) the entire row contents are loaded (100K+ rows).
each of the calls, whether Elements.Where( with query )
or Descendants ( ) seem to load the entire rowset
is there a way to stream or just read a row at a time ?
thx
i found an answer. if you use the OpenXmlReader on the worksheet part you can iterate through and effectively lazy load the elements you come across.
OpenXmlReader oxr = OpenXmlReader.Create(worksheetPart);
look for
ElementType == typeof(SheetData)
and load the row (lazy)
Row row = (Row)oxr.LoadCurrentElement();
do the openxml libraries use dom or sax models? with dom you usually have to hold the entire document in memory at once, but with sax you can stream the events as they come.
Here is the code to read large excel file with multiple sheets using SAX approach:
public static DataTable ReadIntoDatatableFromExcel(string newFilePath)
{
/*Creating a table with 20 columns*/
var dt = CreateProviderRvenueSharingTable();
try
{
/*using stream so that if excel file is in another process then it can read without error*/
using (Stream stream = new FileStream(newFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(stream, false))
{
var workbookPart = spreadsheetDocument.WorkbookPart;
var workbook = workbookPart.Workbook;
/*get only unhide tabs*/
var sheets = workbook.Descendants<Sheet>().Where(e => e.State == null);
foreach (var sheet in sheets)
{
var worksheetPart = (WorksheetPart)workbookPart.GetPartById(sheet.Id);
/*Remove empty sheets*/
List<Row> rows = worksheetPart.Worksheet.Elements<SheetData>().First().Elements<Row>()
.Where(r => r.InnerText != string.Empty).ToList();
if (rows.Count > 1)
{
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
int i = 0;
int BTR = 0;/*Break the reader while empty rows are found*/
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
/*ignoring first row with headers and check if data is there after header*/
if (i < 2)
{
i++;
continue;
}
reader.ReadFirstChild();
DataRow row = dt.NewRow();
int CN = 0;
if (reader.ElementType == typeof(Cell))
{
do
{
Cell c = (Cell)reader.LoadCurrentElement();
/*reader skipping blank cells so data is getting worng in datatable's rows according to header*/
if (CN != 0)
{
int cellColumnIndex =
ExcelHelper.GetColumnIndexFromName(
ExcelHelper.GetColumnName(c.CellReference));
if (cellColumnIndex < 20 && CN < cellColumnIndex - 1)
{
do
{
row[CN] = string.Empty;
CN++;
} while (CN < cellColumnIndex - 1);
}
}
/*stopping execution if first cell does not have any value which means empty row*/
if (CN == 0 && c.DataType == null && c.CellValue == null)
{
BTR++;
break;
}
string cellValue = GetCellValue(c, workbookPart);
row[CN] = cellValue;
CN++;
/*if any text exists after T column (index 20) then skip the reader*/
if (CN == 20)
{
break;
}
} while (reader.ReadNextSibling());
}
/*reader skipping blank cells so fill the array upto 19 index*/
while (CN != 0 && CN < 20)
{
row[CN] = string.Empty;
CN++;
}
if (CN == 20)
{
dt.Rows.Add(row);
}
}
/*escaping empty rows below data filled rows after checking 5 times */
if (BTR > 5)
break;
}
reader.Close();
}
}
}
}
}
catch (Exception ex)
{
throw ex;
}
return dt;
}
private static string GetCellValue(Cell c, WorkbookPart workbookPart)
{
string cellValue = string.Empty;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi =
workbookPart.SharedStringTablePart.SharedStringTable
.Elements<SharedStringItem>()
.ElementAt(int.Parse(c.CellValue.InnerText));
if (ssi.Text != null)
{
cellValue = ssi.Text.Text;
}
}
else
{
if (c.CellValue != null)
{
cellValue = c.CellValue.InnerText;
}
}
return cellValue;
}
public static int GetColumnIndexFromName(string columnNameOrCellReference)
{
int columnIndex = 0;
int factor = 1;
for (int pos = columnNameOrCellReference.Length - 1; pos >= 0; pos--) // R to L
{
if (Char.IsLetter(columnNameOrCellReference[pos])) // for letters (columnName)
{
columnIndex += factor * ((columnNameOrCellReference[pos] - 'A') + 1);
factor *= 26;
}
}
return columnIndex;
}
public static string GetColumnName(string cellReference)
{
/* Advance from L to R until a number, then return 0 through previous position*/
for (int lastCharPos = 0; lastCharPos <= 3; lastCharPos++)
if (Char.IsNumber(cellReference[lastCharPos]))
return cellReference.Substring(0, lastCharPos);
throw new ArgumentOutOfRangeException("cellReference");
}
private static DataTable CreateProviderRvenueSharingTable()
{
DataTable dt = new DataTable("RevenueSharingTransaction");
// Create fields
dt.Columns.Add("IMId", typeof(string));
dt.Columns.Add("InternalPlanId", typeof(string));
dt.Columns.Add("PaymentReceivedDate", typeof(string));
dt.Columns.Add("PaymentAmount", typeof(string));
dt.Columns.Add("BPS", typeof(string));
dt.Columns.Add("Asset", typeof(string));
dt.Columns.Add("PaymentType", typeof(string));
dt.Columns.Add("InvestmentManager", typeof(string));
dt.Columns.Add("Frequency", typeof(string));
dt.Columns.Add("StartDateForPayment", typeof(string));
dt.Columns.Add("EndDateForPayment", typeof(string));
dt.Columns.Add("Participant", typeof(string));
dt.Columns.Add("SSN", typeof(string));
dt.Columns.Add("JEDate", typeof(string));
dt.Columns.Add("GL", typeof(string));
dt.Columns.Add("JEDescription", typeof(string));
dt.Columns.Add("CRAccount", typeof(string));
dt.Columns.Add("ReportName", typeof(string));
dt.Columns.Add("ReportLocation", typeof(string));
dt.Columns.Add("Division", typeof(string));
return dt;
}
Code works for:
1. read the sheets from first in ascending order
2. if excel file is being used by another process, OpenXML still reads that.
3. This code reads blank cells
4. skip empty rows after reading complete.
5. it reads 5000 rows within 4 seconds.
Related
excel(XLSM) file starts with first column empty and second column with values and so on it replaces the empty column with immediate column available
XLSM FILE :Before uploading
XLSM FILE:After uploading xlsm shifts to immediate null column
how to find the range or total column without shifting
i.e:when i count column it has to display as 3(A2,B2,C2)
but it gives me total column when converting
below is the code:
private void Get_XLSM_Data(ref DataTable dt)
{
string strPath = Path.GetExtension(this.FilePath);
if (strPath != null && strPath.ToUpper() == ".XLSM")
{
using (SpreadsheetDocument spreadSheetDocument =
SpreadsheetDocument.Open(this.FilePath, true))
{
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook
.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument
.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
var dimensionReference = workSheet.SheetDimension.Reference;
var cellTablePart = workSheet.WorksheetPart.SingleCellTablePart;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Column> columnsDescendants = sheetData.Descendants<Column>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
var sheetIdValue = sheets.First().SheetId.Value;
// ReSharper disable once PossibleNullReferenceException
var column = workSheet.GetFirstChild<SheetData>().ChildElements.FirstOrDefault().ChildElements.Count();
if (dt.TableName == "specific table ")
{
dt.Columns.Clear();
for (int col = 1; col <= column; col++)
{
string colName = "Column" + (col);
dt.Columns.Add(colName);
}
//// START: To add Headers (First row) in data table
string[] rowData = new string[dt.Columns.Count];
int colIndex = 0;
foreach (Cell cell in rows.ElementAt(0))
{
rowData[colIndex] = GetCellValue(spreadSheetDocument, cell); colIndex++;
}
dt.Rows.Add(rowData);
//// END: To add Headers (First row) in data table
}
try
{
for (int i = 1; i < rows.Count(); i++)
{
string[] rowData = new string[dt.Columns.Count];
int col = 0;
foreach (Cell cell in rows.ElementAt(i))
{
rowData[col] = GetCellValue(spreadSheetDocument, cell); col++;
}
dt.Rows.Add(rowData);
}
}
}
}
}
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
if (cell != null)
{
string cellValue = cell.CellValue != null ? cell.CellValue.InnerXml : String.Empty;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
cellValue = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(cellValue)].InnerText;
}
else
{
if(!string.IsNullOrEmpty(cellValue))
{
//return Convert.ToString(cellValue, CultureInfo.InvariantCulture);
return double.Parse(cellValue, CultureInfo.InvariantCulture).ToString();
}
return cellValue;
}
return cellValue;
}
return String.Empty;
}
Row row = worksheetPart.Worksheet.GetFirstChild<SheetData>().Elements<Row>().FirstOrDefault();
var totalnumberOfColumns = 0;
if (row != null)
{
var spans = row.Spans != null ? row.Spans.InnerText : "";
if (spans != String.Empty)
{
//spans.Split(':')[1];
string[] columns = spans.Split(':');
startcolumnInuse = int.Parse(columns[0]);
endColumnInUse = int.Parse(columns[1]);
totalnumberOfColumns = int.Parse(columns[1]);
}
}
Below is the screen shot to find the maximum column present through span with above code i have shared
Here i have used different excel file(XLSM)
Below is the screen shot to find the maximum column present through span
with above code i have shared
Here i have used different excel file(XLSM)
I have the following piece of code in a much larger OpenXML Excel reader. This reader grabs the information assigns to a dataset and then is displayed in a datagridview:
public static DataTable ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName, int startingRow) {
DataTable dt = new DataTable();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true)) {
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName)) {
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
} else {
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null) {
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
#region ColumnCreation
//Returns the columns - come back to this later - may be able to modify this to have
//A checkbox "Column names in first row"
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
int columnInt = 0;
//if (firstRow != null)
//{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(columnInt + ": " + value);
columnInt++;
}
//}
#endregion
#region Create Rows
//if (lastRow != null)
//{
//lastRow.RowIndex;
for (int i = startingRow; i <= 150000; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.ChildElements)
{
//Get cell value
string value = GetValue(c, stringTablePart);
if (!string.IsNullOrEmpty(value) && value != "")
empty = false;
dr[j] = value;
j++;
if (j == dt.Columns.Count)
break;
}
if (empty)
break;
dt.Rows.Add(dr);
}
}
}
#endregion
}
// }
return dt;
}
public static string GetValue(Cell cell, SharedStringTablePart stringTablePart) {
if (cell.ChildElements.Count == 0) return null;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
public void GetSheetInfo(string fileName)
{
Sheets theSheets = null;
// Open file as read-only.
using (SpreadsheetDocument mySpreadsheet = SpreadsheetDocument.Open(fileName, false))
{
S sheets = mySpreadsheet.WorkbookPart.Workbook.Sheets;
WorkbookPart wbPart = mySpreadsheet.WorkbookPart;
theSheets = wbPart.Workbook.Sheets;
foreach (Sheet item in theSheets)
{
cmbSheetSelect.Items.Add(item.Name);
}
}
}
This has worked for basic spreadsheets but as I try to read more advanced ones I get a problem or two.
Firstly, I have a worksheet that has 5 columns:see here
However when I run my program it only returns the first 4 columns and not column E and all its data.
My second question would is it possible using that code (or a variation of it) to be able to specify the line I want the program to read as the datagridview column heading?
In case anyone needs this I found that changing:
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
To
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().ElementAtOrDefault(columnIndex)
Worked. With columnIndex being a variable I can change based on the sheet selected.
I am writing a C# console application that will read values from an excel spreadsheet using OpenXML and create a DataTable. The app is able to read all values except those cells which contain a dropdown list. Is there a way for OpenXML to read these cells and determine which value is selected? Any suggestions are greatly appreciated. Thanks in advance.
Current Code:
public static string GetValue(Cell cell, SharedStringTablePart stringTablePart)
{
if (cell.ChildElements.Count == 0) return null;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
public static void ReadData(string xlsxFilePath, string sheetName)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
{
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName))
{
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
}
else
{
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null)
{
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
IEnumerable<Row> firstRows = worksheetPart.Worksheet.Descendants<Row>().Skip(10);
Row firstRow = firstRows.FirstOrDefault();
int numColumns = 0;
//Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
if (firstRow != null)
{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(value);
numColumns++;
}
}
if (lastRow != null)
{
for (int i = 11; i <= lastRow.RowIndex; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>() .Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.ChildElements)
{
//Get cell value
string value = GetValue(c, stringTablePart);
if (string.IsNullOrEmpty(value) && value == " ")
dr[j] = "";
//if (!string.IsNullOrEmpty(value) && value != " ")
// empty = false;
else
dr[j] = value;
Console.Write(dr[j] + "\t");
j++;
if (j == numColumns-1)
{
Console.Write("\n");
break;
}
}
//if (empty)
// break;
dt.Rows.Add(dr);
}
}
}
}
}
}
I am using C#. I have two data tables and i want to find the rows of first data table into second data table.
Example.
First data table's data:
1 inam
2 sohan
Second data tables's data:
3 ranjan
1 inam
2 sohan
Now i want to know the index of first two rows of first data table into second data table.
Please help guys.
Any answer or advice
Best Regards
You can use following extension method which returns the first index of a "sub-sequence":
// I've used String.Join to get something that is comparable easily
// from the ItemArray that is the object-array of all fields
IEnumerable<string> first = table1.AsEnumerable()
.Select(r => string.Join(",",r.ItemArray)); //
IEnumerable<string> second = table2.AsEnumerable()
.Select(r => string.Join(",", r.ItemArray));
int index = second.IndexOfSequence(first, null); // 1
Here the extension:
public static int IndexOfSequence<TSource>(this IEnumerable<TSource> input, IEnumerable<TSource> sequence, IEqualityComparer<TSource> comparer)
{
if (input == null) throw new ArgumentNullException("input");
if (sequence == null) throw new ArgumentNullException("sequence");
if (!sequence.Any()) throw new ArgumentException("Sequence must not be empty", "sequence");
if (comparer == null)
{
comparer = EqualityComparer<TSource>.Default;
}
int index = -1;
int firstIndex = -1;
bool found = false;
TSource first = sequence.First();
using (IEnumerator<TSource> enumerator = input.GetEnumerator())
{
using (IEnumerator<TSource> enumerator2 = sequence.GetEnumerator())
{
enumerator2.MoveNext();
while (enumerator.MoveNext())
{
index++;
found = comparer.Equals(enumerator.Current, enumerator2.Current);
if (found && firstIndex == -1) firstIndex = index;
if (found && !enumerator2.MoveNext())
return firstIndex;
}
}
}
return -1;
}
tested with this sample data:
var table1 = new DataTable();
table1.Columns.Add("ID", typeof(int));
table1.Columns.Add("Name");
var table2 = table1.Clone();
table1.Rows.Add(1, "inam");
table1.Rows.Add(2, "Sohan");
table2.Rows.Add(3, "ranjan");
table2.Rows.Add(1, "inam");
table2.Rows.Add(2, "Sohan");
If you don't have much volume this might work....
var tableOneIndex = -1;
var tableTwoIndex = -1;
foreach (var tableOneRow in tableOne.Rows)
{
tableOneIndex++;
foreach (var tableTwoRow in tableTwo.Rows)
{
tableTwoIndex++;
if (tableOneRow["name"].ToString() == tableTwoRow["name"].ToString())
{
// Do whatever you wanted to do with the index values
}
}
}
As a simple solution, this should suffice:
// Create and populate data tables
DataTable dataTable1 = new DataTable();
dataTable1.Columns.Add("Name", typeof(string));
DataRow row1 = dataTable1.NewRow();
row1["Name"] = "Inam";
DataRow row2 = dataTable1.NewRow();
row2["Name"] = "Sohan";
dataTable1.Rows.Add(row1);
dataTable1.Rows.Add(row2);
DataTable dataTable2 = new DataTable();
dataTable2.Columns.Add("Name", typeof(string));
DataRow row3 = dataTable2.NewRow();
row3["Name"] = "Ranjan";
DataRow row4 = dataTable2.NewRow();
row4["Name"] = "Inam";
DataRow row5 = dataTable2.NewRow();
row5["Name"] = "Sohan";
dataTable2.Rows.Add(row3);
dataTable2.Rows.Add(row4);
dataTable2.Rows.Add(row5);
// Loop through rows in first table
foreach (DataRow row in dataTable1.Rows)
{
int rowIndexInSecondTable = 0;
// Loop through rows in second table
for (int i = 0; i < dataTable2.Rows.Count; i++)
{
// Check if the column values are the same
if (row["Name"] == dataTable2.Rows[i]["Name"])
{
// Set the current index and break to stop further processing
rowIndexInSecondTable = i;
break;
}
}
// The index of the row in the second table is now stored in the rowIndexInSecondTable variable, use it as needed, for example, writing to the console
Console.WriteLine("Row with name '" + row["Name"] + "' found at index " + rowIndexInSecondTable.ToString());
}
I have a problem with reading .xlsx files in asp.net mvc2.0 application, using c#. Problem occurs when reading empty cell from .xlsx file. My code simply skips this cell and reads the next one.
For example, if the contents of .xlsx file are:
FirstName LastName Age
John 36
They will be read as:
FirstName LastName Age
John 36
Here's the code that does the reading.
private string GetValue(Cell cell, SharedStringTablePart stringTablePart)
{
if (cell.ChildElements.Count == 0)
return string.Empty;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
private DataTable ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
{
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName))
{
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
}
else
{
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null)
{
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
if (firstRow != null)
{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(value);
}
}
if (lastRow != null)
{
for (int i = 2; i <= lastRow.RowIndex; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.ChildElements)
{
//Get cell value
string value = GetValue(c, stringTablePart);
if (!string.IsNullOrEmpty(value) && value != "")
empty = false;
dr[j] = value;
j++;
if (j == dt.Columns.Count)
break;
}
if (empty)
break;
dt.Rows.Add(dr);
}
}
}
}
}
return dt;
}
i had same problem.
This is my workout:
int offset = GetColDiff(lastCol, cell.CellReference);
//filling empty columns
while (offset-- > 1)
dt.Rows[rowCounter][cnt++] = DBNull.Value;
//filling regular column
dt.Rows[rowCounter][cnt++] = value;
lastCol = cell.CellReference;
******************
//calculating column distance
int GetColDiff(string prev, string curr)
{
int i=0;
int index1 = 0;
int index2 = 0;
while (prev!="0" && prev.Length>i && Char.IsLetter(prev[i]))//prev=="0"-startingcondition
{
index1 += ('Z' - 'A' + 1) * index1 + (prev[i] - 'A');
i++;
}
i = 0;
while (curr.Length>i && char.IsLetter(curr[i]))
{
index2 += ('Z' - 'A'+ 1) * index2 + (curr[i] - 'A');
i++;
}
return index2 - index1;
}
My solution to this problem isn't quite as elegant as some might use.
First, I map the columns to a char (A, B, C, D, etc), so I can know that FirstName = A, LastName = B, and Age = C.
Next, I look through the dataCells to see if there is a cell that has the Age reference. If there is a Age cell referenced, I will check the cell's DataType.
ex: dataCells.Where(x => x.CellReference.Value.Contains(cellIndex)).First().DataType == CellValues.SharedString)
In this case, cellIndex would = 'C'.
If the previous linq query is true, then you'll go to the sharedString table and find the value for the age by CellReference.
var age = sharedStrings.ChildElements[int.Parse(dataCells.Where(x => x.CellReference.Value.Contains(cellIndex)).FirstOrDefault().InnerText)].InnerText;
Your problem with accidentally setting the LastName (Column B) to whatever the Age (Column C) should be avoided if you work off of cell reference for each DataRow.
Side note: One thing I just ran into is that blank cells in Excel are stored two different ways. Sometimes there's a reference to a SharedStringTable index (cell.DataType = "s" and cell.InnerText = "37"), and sometimes the cell is just empty (cell.DataType = null and cell.InnerText = "").