I am looking for some assistance in obtaining the Columns from a specific worksheet using C#. I am currently able to connect to the Excel file and obtain a read of the Columns, but it is giving me the Columns for every worksheet in my Excel, not a specific one.
What can I do to my code to obtain only the Columns from the desired worksheet? Here is my code which currently fills my Checkbox List with all of the columns.
OleDbConnection excelConnection = new OleDbConnection(String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0\"", strFullPath));
using (OleDbCommand cmd = new OleDbCommand("SELECT * FROM [LogFile$]", excelConnection))
{
excelConnection.Open();
DataTable dt = excelConnection.GetSchema("Columns");
cbColumnList.DataSource = dt;
cbColumnList.DataTextField = "Column_name";
cbColumnList.DataValueField = "Column_name";
cbColumnList.DataBind();
}
I am fairely sure my issue has something to do with where I am creating hte DataTable, as i'm pulling the Scheme from excelConnection and not cmd, thus it's most likely bypassing my query where I have defined the Worksheet to get the columns from. If this is the case, how would I fix it?
First Solution
using System;
using System.Data;
using System.Data.OleDb;
namespace ConsoleApp35
{
class Program
{
static void Main(string[] args)
{
using (OleDbConnection excelConnection = new OleDbConnection(String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0\"", #"D:\Coverage.xlsx")))
{
excelConnection.Open();
var dt = new DataTable();
var da = new OleDbDataAdapter();
var _command = new OleDbCommand();
_command.Connection = excelConnection;
_command.CommandText = "SELECT * FROM [Sheet1$]";
da.SelectCommand = _command;
try
{
da.Fill(dt);
// printing columns names
foreach (DataColumn d in dt.Columns)
{
Console.WriteLine(d.ColumnName);
}
// dt has all data from Sheet1
foreach (DataRow r in dt.Rows)
{
Console.WriteLine(String.Join(", ", r.ItemArray));
}
}
catch (Exception e)
{
// process error here
}
Console.ReadLine();
}
}
}
}
Second solution
This solution is for those who have the same requirements but use Open XML.
Install both Open XML packages from Microsoft:
https://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=5124
In your solution add a references to DocumentFormat.OpenXml and WindowsBase assemblies. Add the following using directives:
using DocumentFormat.OpenXml.Spreadsheet;
using DocumentFormat.OpenXml.Packaging;
using System.Text.RegularExpressions;
Add the following methods to your class that processes your Excel file.
private static string GetColumnName(string cellName)
{
var regex = new Regex("[a-zA-Z]+");
var match = regex.Match(cellName);
return match.Value;
}
public static SharedStringItem GetSharedStringItemById(WorkbookPart
workbookPart, int id)
{
return workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem> ().ElementAt(id);
}
Use the following code to get columns names from the specified sheet. Also, additionally, you can process here all the data you need from the file. I have tested reading data from a simple Excel sheet that contains 2 columns and several rows where cells have integer and string values. For processing other data types you need to add additional blocks of code checking cell.DataType.
var columnsNames = new HashSet<string>();
var data = new List<List<string>>();
using (SpreadsheetDocument myDoc =
DocumentFormat.OpenXml.Packaging.SpreadsheetDocument.Open(#"D:\FileName.xlsx",
false))
{
foreach (Sheet s in myDoc.WorkbookPart.Workbook.Sheets)
{
if (s.Name == "Sheet1") {
string relationshipId = s.Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)myDoc.WorkbookPart.GetPartById(relationshipId);
var sd = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rows = sd.Elements<Row>();
foreach (Row row in rows)
{
var rowList = new List<string>();
foreach (Cell cell in row.Elements<Cell>())
{
// get columns names
var columnName = GetColumnName(cell.CellReference.Value);
columnsNames.Add(columnName);
// process data
string cellValue = string.Empty;
if (cell.DataType != null)
{
if (cell.DataType == CellValues.SharedString)
{
int id = -1;
if (Int32.TryParse(cell.InnerText, out id))
{
SharedStringItem item = GetSharedStringItemById(myDoc.WorkbookPart, id);
if (item.Text != null)
{
cellValue = item.Text.Text;
}
else if (item.InnerText != null)
{
cellValue = item.InnerText;
}
else if (item.InnerXml != null)
{
cellValue = item.InnerXml;
}
}
}
}
else
{
cellValue = cell.CellValue.Text;
}
rowList.Add(cellValue);
}
data.Add(rowList);
}
}
}
myDoc.Close();
}
Related
I'm trying to get the name of the first sheet of an excel workbook.
Instead of getting the sheets name in the order as it appears in the Excel workbook it appears sorted alphabetically.
Does anyone have an idea to get the names not sorted??
private String[] GetExcelSheetNames(string excelFile)
{
OleDbConnection objConn = null;
System.Data.DataTable dt = null;
try
{
// Connection String. Change the excel file to the file you
// will search.
String connString = "Provider=Microsoft.Jet.OLEDB.4.0;" +
"Data Source=" + excelFile + ";Extended Properties=Excel 8.0;";
// Create connection object by using the preceding connection string.
objConn = new OleDbConnection(connString);
// Open connection with the database.
objConn.Open();
// Get the data table containg the schema guid.
dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if(dt == null)
{
return null;
}
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
// Add the sheet name to the string array.
foreach(DataRow row in dt.Rows)
{
excelSheets[i] = row["TABLE_NAME"].ToString();
i++;
}
// Loop through all of the sheets if you want too...
for(int j=0; j < excelSheets.Length; j++)
{
// Query each excel sheet.
}
return excelSheets;
}
catch(Exception ex)
{
return null;
}
finally
{
// Clean up.
if(objConn != null)
{
objConn.Close();
objConn.Dispose();
}
if(dt != null)
{
dt.Dispose();
}
}
}
I already get a solution with my question.
//get sheet number 1 name
var excelFile = Path.GetFullPath(llFileName);
var excel = new Excel.Application();
var workbook = excel.Workbooks.Open(llFileName);
var sheet = (Excel.Worksheet)workbook.Worksheets.Item[1]; // 1 is the first item, this is NOT a zero-based collection
string sheetName = sheet.Name;
Hope it help for the other
I am creating a winform application where every day, a user will select a xlsx file with the day's shipping information to be merged with our invoicing data.
The challenge I am having is when the user does not download the xlsx file with the specification that the winform data requires. (I wish I could eliminate this step with an API connection but sadly I cannot)
My first step is checking to see if the xlsx file has headers to that my file path is valid
Example
string connString = "provider=Microsoft.ACE.OLEDB.12.0;Data Source='" + *path* + "';Extended Properties='Excel 12.0;HDR=YES;';";
Where path is returned from an OpenFileDialog box
If the file was chosen wasn't downloaded with headers the statement above throws an exception.
If change HDR=YES; to HDR=NO; then I have trouble identifying the columns I need and if the User bothered to include the correct ones.
My code then tries to load the data into a DataTable
private void loadRows()
{
for (int i = 0; i < deliveryTable.Rows.Count; i++)
{
DataRow dr = deliveryTable.Rows[i];
int deliveryId = 0;
bool result = int.TryParse(dr[0].ToString(), out deliveryId);
if (deliveryId > 1 && !Deliveries.ContainsKey(deliveryId))
{
var delivery = new Delivery(deliveryId)
{
SalesOrg = Convert.ToInt32(dr[8]),
SoldTo = Convert.ToInt32(dr[9]),
SoldName = dr[10].ToString(),
ShipTo = Convert.ToInt32(dr[11]),
ShipName = dr[12].ToString(),
};
Which all works only if the columns are in the right place.
If they are not in the right place my thought is to display a message to the user to get the right information
Does anyone have any suggestions?
(Sorry, first time posting a question and still learning to think through it)
I guess you're loading the spreadsheet into a Datatable? Hard to tell with one line of code. I would use the columns collection in the datatable and check to see if all the columns you want are there. Sample code to enumerate the columns below.
private void PrintValues(DataTable table)
{
foreach(DataRow row in table.Rows)
{
foreach(DataColumn column in table.Columns)
{
Console.WriteLine(row[column]);
}
}
}
private void GetExcelSheetForUpload(string PathName, string UploadExcelName)
{
string excelFile = "DateExcel/" + PathName;
OleDbConnection objConn = null;
System.Data.DataTable dt = null;
try
{
DataSet dss = new DataSet();
String connString = "Provider=Microsoft.ACE.OLEDB.12.0;Persist Security Info=True;Extended Properties=Excel 12.0 Xml;Data Source=" + PathName;
objConn = new OleDbConnection(connString);
objConn.Open();
dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (dt == null)
{
return;
}
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
foreach (DataRow row in dt.Rows)
{
if (i == 0)
{
excelSheets[i] = row["TABLE_NAME"].ToString();
OleDbCommand cmd = new OleDbCommand("SELECT * FROM [" + excelSheets[i] + "]", objConn);
OleDbDataAdapter oleda = new OleDbDataAdapter();
oleda.SelectCommand = cmd;
oleda.Fill(dss, "TABLE");
}
i++;
}
grdExcel.DataSource = dss.Tables[0].DefaultView;
grdExcel.DataBind();
lblTotalRec.InnerText = Convert.ToString(grdExcel.Rows.Count);
}
catch (Exception ex)
{
ViewState["Fuletypeidlist"] = "0";
grdExcel.DataSource = null;
grdExcel.DataBind();
}
finally
{
if (objConn != null)
{
objConn.Close();
objConn.Dispose();
}
if (dt != null)
{
dt.Dispose();
}
}
}
if (grdExcel.HeaderRow.Cells[0].Text.ToString() == "CODE")
{
GetExcelSheetForEmpl(PathName);
}
else
{
divStatusMsg.Style.Add("display", "");
divStatusMsg.Attributes.Add("class", "alert alert-danger alert-dismissable");
divStatusMsg.InnerText = "ERROR !!... Upload Excel Sheet in header Defined Format ";
}
I have a huge Excel file with around 50k rows. I am reading it using the following connection string
string.Format(#"Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties='Excel 12.0 Xml;HDR=No;IMEX=1'", MyFilePath);
In this huge Excel file, the rows are grouped in nesting pattern. Meaning, lets say first 500 rows are grouped under Group A there is a sub group in that group comprising of rows from 300-400 as Group B and then again from 350-400
in Group C. Now when I read the excel file in my program, I get all the rows, but I cannot distinguish between the row grouping I mentioned above. Is there any smart way to identify and group them accordingly?
Here's a sample of my code.
rivate List<List<string>> ReadSheetData(string _query, bool _HasHeaders = true)
{
string conn = "";
if (!_HasHeaders)
conn = string.Format(#"Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties='Excel 12.0 Xml;HDR=No;IMEX=1'", MyFilePath);
else
conn = this.conn;
List<List<string>> ret = new List<List<string>>();
using (OleDbConnection connection = new OleDbConnection(conn))
{
connection.Open();
try
{
OleDbCommand command = new OleDbCommand(_query, connection);
using (OleDbDataReader dr = command.ExecuteReader())
{
DataTable tbl = dr.GetSchemaTable();
while (dr.Read())
{
List<string> rowVals = new List<string>();
ret.Add(rowVals);
for (int i = 0; i < dr.FieldCount; i++)
{
dynamic cell = dr[i];
string value = cell != null ? cell.ToString() : "";
rowVals.Add(value);
}
}
}
}
catch (Exception ex)
{ }
}
ret.RemoveAll(a => a.All(b => b == "") == true);
return ret;
}
Using Oledb, is it possible to get all NamedRanges of a particula sheet in Excel?
I have written following code which gives me NamedRanges but I am not able to figure out to which sheet does the NamedRange refer to.
private String[] GetExcelSheetNames(string excelFilePath)
{
OleDbConnection objConn = null;
System.Data.DataTable dt = null;
try
{
//String connString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + excelFile + ";Extended Properties=Excel 12.0;";
string connectionString = string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=Excel 12.0", excelFilePath);
objConn = new OleDbConnection(connectionString);
objConn.Open();
// Get the data table containg the schema guid.
dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables_Info, null);
if (dt == null)
return null;
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
// Add the sheet name to the string array.
foreach (DataRow row in dt.Rows)
excelSheets[i++] = row["TABLE_NAME"].ToString();
return excelSheets;
}
catch (Exception ex)
{
return null;
}
finally
{
// Clean up.
if (objConn != null)
{
objConn.Close();
objConn.Dispose();
}
if (dt != null)
{
dt.Dispose();
}
}
}
I am a Open XML SDK fan. The solution is straightforward. This returns both workbook and sheet scoped named ranges, on the left there's the Excel name manager definitions, 2 sheets with 2 named ranges in each sheet, on the right a sample run.
MSDN reference.
/// <summary>
/// The procedure examines the workbook that you specify,
/// looking for the part that contains defined names.
/// If it exists, the procedure iterates through all the
/// contents of the part, adding the name and value for
/// each defined name to the returned dictionary
/// </summary>
public static IDictionary<String, String> XLGetDefinedNames(String fileName)
{
var returnValue = new Dictionary<String, String>();
//
using (SpreadsheetDocument document =
SpreadsheetDocument.Open(fileName, false))
{
var wbPart = document.WorkbookPart;
//
DefinedNames definedNames = wbPart.Workbook.DefinedNames;
if (definedNames != null)
{
foreach (DefinedName dn in definedNames)
returnValue.Add(dn.Name.Value, dn.Text);
}
}
//
return returnValue;
}
Can anyone suggest me how to read excel files in c# using ado.net in disconnected mode.My excel file is quite large and cant be kept in memory.please suggest a method of loading the data into a dataset.
for now i am reading them by using Excel = Microsoft.Office.Interop.Excel and adding the excel reference(com) and then using objects like range etc.
May be this is what you are looking for
http://exceldatareader.codeplex.com/
Id say connect to it with ADO and treat it like a database:
http://www.connectionstrings.com/excel
Here is an example of the what I am using that might be handy for anyone else wanting to get data from an Excel spreadsheet.
It loads each worksheet into a DataTable within a DataSet.
It assumes that you have your headers going from A1 to x1.
using System;
using System.Data;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
public class clsExcelWriter : IDisposable
{
private Excel.Application oExcel;
private Excel._Workbook oBook;
private Excel._Worksheet oSheet;
// Used to store the name of the current file
public string FileName
{
get;
private set;
}
public clsExcelWriter(string filename)
{
// Initialize Excel
oExcel = new Excel.Application();
if (!File.Exists(filename))
{
// Create a new one?
}
else
{
oBook = (Excel._Workbook)oExcel.Workbooks.Open(filename);
oSheet = (Excel._Worksheet)oBook.ActiveSheet;
}
this.FileName = filename;
// Supress any alerts
oExcel.DisplayAlerts = false;
}
private string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return columnName;
}
public void Dispose()
{
// Lets make sure we release those COM objects!
if (oExcel != null)
{
Marshal.FinalReleaseComObject(oSheet);
oBook.Close();
Marshal.FinalReleaseComObject(oBook);
oExcel.Quit();
Marshal.FinalReleaseComObject(oExcel);
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
public static DataSet OpenFile(string filename)
{
DataSet ds = new DataSet();
using (clsExcelWriter xl = new clsExcelWriter(filename))
{
// Iterate through each worksheet
foreach (Excel._Worksheet sheet in xl.oBook.Worksheets)
{
// Create a new table using the sheets name
DataTable dt = new DataTable(sheet.Name);
// Get the first row (where the headers should be located)
object[,] xlValues = (object[,])sheet.get_Range("A1", xl.GetExcelColumnName(sheet.UsedRange.Columns.Count) + 1).Value;
// Iterate through the values to add new DataColumns to the DataTable
for (int i = 0; i < xlValues.GetLength(1); i++)
{
dt.Columns.Add(new DataColumn(xlValues[1, i + 1].ToString()));
}
// Now get the rest of the rows
xlValues = (object[,])sheet.get_Range("A2", xl.GetExcelColumnName(sheet.UsedRange.Columns.Count) + sheet.UsedRange.Rows.Count).Value;
for (int row = 0; row < xlValues.GetLength(0); row++)
{
DataRow dr = dt.NewRow();
for (int col = 0; col < xlValues.GetLength(1); col++)
{
// xlValues array starts from 1, NOT 0 (just to confuse yee)
dr[dt.Columns[col].ColumnName] = xlValues[row + 1, col + 1];
}
dt.Rows.Add(dr);
}
ds.Tables.Add(dt);
}
}
// Your DataSet should now be filled! :)
return ds;
}
}
}
Usage
using System.Data;
using ExcelWriter;
namespace Test
{
class Program
{
static void Main(string[] args)
{
DataSet ds = clsExcelWriter.OpenFile(#"C:\Results.xls");
// Do some fancy stuff with the DataSet! xD
while (true) ;
}
}
}
Here what I use to read data from an Excel sheet:
private DbDataReader ReadExcelSheet(string file, string sheet)
{
string connStr = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + file + ";Extended Properties=Excel 8.0;";
DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");
DbConnection connection = factory.CreateConnection();
connection.ConnectionString = connStr;
DbCommand command = connection.CreateCommand();
string query = BuildSelectQuery(sheet, names_mapping);//you need column names here
command.CommandText = query;
connection.Open();
DbDataReader dr = command.ExecuteReader();
return dr;
}