Can anyone suggest me how to read excel files in c# using ado.net in disconnected mode.My excel file is quite large and cant be kept in memory.please suggest a method of loading the data into a dataset.
for now i am reading them by using Excel = Microsoft.Office.Interop.Excel and adding the excel reference(com) and then using objects like range etc.
May be this is what you are looking for
http://exceldatareader.codeplex.com/
Id say connect to it with ADO and treat it like a database:
http://www.connectionstrings.com/excel
Here is an example of the what I am using that might be handy for anyone else wanting to get data from an Excel spreadsheet.
It loads each worksheet into a DataTable within a DataSet.
It assumes that you have your headers going from A1 to x1.
using System;
using System.Data;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
public class clsExcelWriter : IDisposable
{
private Excel.Application oExcel;
private Excel._Workbook oBook;
private Excel._Worksheet oSheet;
// Used to store the name of the current file
public string FileName
{
get;
private set;
}
public clsExcelWriter(string filename)
{
// Initialize Excel
oExcel = new Excel.Application();
if (!File.Exists(filename))
{
// Create a new one?
}
else
{
oBook = (Excel._Workbook)oExcel.Workbooks.Open(filename);
oSheet = (Excel._Worksheet)oBook.ActiveSheet;
}
this.FileName = filename;
// Supress any alerts
oExcel.DisplayAlerts = false;
}
private string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return columnName;
}
public void Dispose()
{
// Lets make sure we release those COM objects!
if (oExcel != null)
{
Marshal.FinalReleaseComObject(oSheet);
oBook.Close();
Marshal.FinalReleaseComObject(oBook);
oExcel.Quit();
Marshal.FinalReleaseComObject(oExcel);
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
public static DataSet OpenFile(string filename)
{
DataSet ds = new DataSet();
using (clsExcelWriter xl = new clsExcelWriter(filename))
{
// Iterate through each worksheet
foreach (Excel._Worksheet sheet in xl.oBook.Worksheets)
{
// Create a new table using the sheets name
DataTable dt = new DataTable(sheet.Name);
// Get the first row (where the headers should be located)
object[,] xlValues = (object[,])sheet.get_Range("A1", xl.GetExcelColumnName(sheet.UsedRange.Columns.Count) + 1).Value;
// Iterate through the values to add new DataColumns to the DataTable
for (int i = 0; i < xlValues.GetLength(1); i++)
{
dt.Columns.Add(new DataColumn(xlValues[1, i + 1].ToString()));
}
// Now get the rest of the rows
xlValues = (object[,])sheet.get_Range("A2", xl.GetExcelColumnName(sheet.UsedRange.Columns.Count) + sheet.UsedRange.Rows.Count).Value;
for (int row = 0; row < xlValues.GetLength(0); row++)
{
DataRow dr = dt.NewRow();
for (int col = 0; col < xlValues.GetLength(1); col++)
{
// xlValues array starts from 1, NOT 0 (just to confuse yee)
dr[dt.Columns[col].ColumnName] = xlValues[row + 1, col + 1];
}
dt.Rows.Add(dr);
}
ds.Tables.Add(dt);
}
}
// Your DataSet should now be filled! :)
return ds;
}
}
}
Usage
using System.Data;
using ExcelWriter;
namespace Test
{
class Program
{
static void Main(string[] args)
{
DataSet ds = clsExcelWriter.OpenFile(#"C:\Results.xls");
// Do some fancy stuff with the DataSet! xD
while (true) ;
}
}
}
Here what I use to read data from an Excel sheet:
private DbDataReader ReadExcelSheet(string file, string sheet)
{
string connStr = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + file + ";Extended Properties=Excel 8.0;";
DbProviderFactory factory = DbProviderFactories.GetFactory("System.Data.OleDb");
DbConnection connection = factory.CreateConnection();
connection.ConnectionString = connStr;
DbCommand command = connection.CreateCommand();
string query = BuildSelectQuery(sheet, names_mapping);//you need column names here
command.CommandText = query;
connection.Open();
DbDataReader dr = command.ExecuteReader();
return dr;
}
Related
I'm trying to get the name of the first sheet of an excel workbook.
Instead of getting the sheets name in the order as it appears in the Excel workbook it appears sorted alphabetically.
Does anyone have an idea to get the names not sorted??
private String[] GetExcelSheetNames(string excelFile)
{
OleDbConnection objConn = null;
System.Data.DataTable dt = null;
try
{
// Connection String. Change the excel file to the file you
// will search.
String connString = "Provider=Microsoft.Jet.OLEDB.4.0;" +
"Data Source=" + excelFile + ";Extended Properties=Excel 8.0;";
// Create connection object by using the preceding connection string.
objConn = new OleDbConnection(connString);
// Open connection with the database.
objConn.Open();
// Get the data table containg the schema guid.
dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if(dt == null)
{
return null;
}
String[] excelSheets = new String[dt.Rows.Count];
int i = 0;
// Add the sheet name to the string array.
foreach(DataRow row in dt.Rows)
{
excelSheets[i] = row["TABLE_NAME"].ToString();
i++;
}
// Loop through all of the sheets if you want too...
for(int j=0; j < excelSheets.Length; j++)
{
// Query each excel sheet.
}
return excelSheets;
}
catch(Exception ex)
{
return null;
}
finally
{
// Clean up.
if(objConn != null)
{
objConn.Close();
objConn.Dispose();
}
if(dt != null)
{
dt.Dispose();
}
}
}
I already get a solution with my question.
//get sheet number 1 name
var excelFile = Path.GetFullPath(llFileName);
var excel = new Excel.Application();
var workbook = excel.Workbooks.Open(llFileName);
var sheet = (Excel.Worksheet)workbook.Worksheets.Item[1]; // 1 is the first item, this is NOT a zero-based collection
string sheetName = sheet.Name;
Hope it help for the other
I am looking for some assistance in obtaining the Columns from a specific worksheet using C#. I am currently able to connect to the Excel file and obtain a read of the Columns, but it is giving me the Columns for every worksheet in my Excel, not a specific one.
What can I do to my code to obtain only the Columns from the desired worksheet? Here is my code which currently fills my Checkbox List with all of the columns.
OleDbConnection excelConnection = new OleDbConnection(String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0\"", strFullPath));
using (OleDbCommand cmd = new OleDbCommand("SELECT * FROM [LogFile$]", excelConnection))
{
excelConnection.Open();
DataTable dt = excelConnection.GetSchema("Columns");
cbColumnList.DataSource = dt;
cbColumnList.DataTextField = "Column_name";
cbColumnList.DataValueField = "Column_name";
cbColumnList.DataBind();
}
I am fairely sure my issue has something to do with where I am creating hte DataTable, as i'm pulling the Scheme from excelConnection and not cmd, thus it's most likely bypassing my query where I have defined the Worksheet to get the columns from. If this is the case, how would I fix it?
First Solution
using System;
using System.Data;
using System.Data.OleDb;
namespace ConsoleApp35
{
class Program
{
static void Main(string[] args)
{
using (OleDbConnection excelConnection = new OleDbConnection(String.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0};Extended Properties=\"Excel 12.0\"", #"D:\Coverage.xlsx")))
{
excelConnection.Open();
var dt = new DataTable();
var da = new OleDbDataAdapter();
var _command = new OleDbCommand();
_command.Connection = excelConnection;
_command.CommandText = "SELECT * FROM [Sheet1$]";
da.SelectCommand = _command;
try
{
da.Fill(dt);
// printing columns names
foreach (DataColumn d in dt.Columns)
{
Console.WriteLine(d.ColumnName);
}
// dt has all data from Sheet1
foreach (DataRow r in dt.Rows)
{
Console.WriteLine(String.Join(", ", r.ItemArray));
}
}
catch (Exception e)
{
// process error here
}
Console.ReadLine();
}
}
}
}
Second solution
This solution is for those who have the same requirements but use Open XML.
Install both Open XML packages from Microsoft:
https://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=5124
In your solution add a references to DocumentFormat.OpenXml and WindowsBase assemblies. Add the following using directives:
using DocumentFormat.OpenXml.Spreadsheet;
using DocumentFormat.OpenXml.Packaging;
using System.Text.RegularExpressions;
Add the following methods to your class that processes your Excel file.
private static string GetColumnName(string cellName)
{
var regex = new Regex("[a-zA-Z]+");
var match = regex.Match(cellName);
return match.Value;
}
public static SharedStringItem GetSharedStringItemById(WorkbookPart
workbookPart, int id)
{
return workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem> ().ElementAt(id);
}
Use the following code to get columns names from the specified sheet. Also, additionally, you can process here all the data you need from the file. I have tested reading data from a simple Excel sheet that contains 2 columns and several rows where cells have integer and string values. For processing other data types you need to add additional blocks of code checking cell.DataType.
var columnsNames = new HashSet<string>();
var data = new List<List<string>>();
using (SpreadsheetDocument myDoc =
DocumentFormat.OpenXml.Packaging.SpreadsheetDocument.Open(#"D:\FileName.xlsx",
false))
{
foreach (Sheet s in myDoc.WorkbookPart.Workbook.Sheets)
{
if (s.Name == "Sheet1") {
string relationshipId = s.Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)myDoc.WorkbookPart.GetPartById(relationshipId);
var sd = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rows = sd.Elements<Row>();
foreach (Row row in rows)
{
var rowList = new List<string>();
foreach (Cell cell in row.Elements<Cell>())
{
// get columns names
var columnName = GetColumnName(cell.CellReference.Value);
columnsNames.Add(columnName);
// process data
string cellValue = string.Empty;
if (cell.DataType != null)
{
if (cell.DataType == CellValues.SharedString)
{
int id = -1;
if (Int32.TryParse(cell.InnerText, out id))
{
SharedStringItem item = GetSharedStringItemById(myDoc.WorkbookPart, id);
if (item.Text != null)
{
cellValue = item.Text.Text;
}
else if (item.InnerText != null)
{
cellValue = item.InnerText;
}
else if (item.InnerXml != null)
{
cellValue = item.InnerXml;
}
}
}
}
else
{
cellValue = cell.CellValue.Text;
}
rowList.Add(cellValue);
}
data.Add(rowList);
}
}
}
myDoc.Close();
}
I have the following code :
OleDbDataReader xlsReader =
new OleDbCommand("Select * from [" +spreadSheetName + "]", xlsFileConnection).
ExecuteReader();
In the spreadSheetName parameter i keep my file name.
The connection string for xlsFileConnection was set as
"Provider=Microsoft.Jet.OLEDB.4.0;
Data Source='<directory path>';
Extended Properties='text; HDR=No; FMT=Delimited'"
When i start to execute while (xlsReader.Read()) i take a row #2 but not #1 from data source.
The first suggestion was that HDR parameter has invalid value but it seems it's ok.
There are better and easier ways to reading xlsx files, if I were you I would grab closedXML from nuget and this code to read your excel file into a data table
public void ProcessExcel(string fileName)
{
_dt = ImportSheet(fileName);
dgContacts.ItemsSource = _dt.DefaultView;
}
public static DataTable ImportSheet(string fileName)
{
var datatable = new DataTable();
var workbook = new XLWorkbook(fileName);
var xlWorksheet = workbook.Worksheet(1);
var range = xlWorksheet.Range(xlWorksheet.FirstCellUsed(), xlWorksheet.LastCellUsed());
var col = range.ColumnCount();
var row = range.RowCount();
datatable.Clear();
for (var i = 1; i <= col; i++)
{
var column = xlWorksheet.Cell(1, i);
datatable.Columns.Add(column.Value.ToString());
}
var firstHeadRow = 0;
foreach (var item in range.Rows())
{
if (firstHeadRow != 0)
{
var array = new object[col];
for (var y = 1; y <= col; y++)
{
array[y - 1] = item.Cell(y).Value;
}
datatable.Rows.Add(array);
}
firstHeadRow++;
}
return datatable;
}
The grab the data out of your datatable as you need.
This is live and working code, so you just need to copy and paste
I am looking for a way to create, modify, read .xlsx files in C# without installing Excel or creating files on the server before giving to the user to download.
I found NPOI http://npoi.codeplex.com/ which looks great but supports .xls not .xlsx
I found ExcelPackage http://excelpackage.codeplex.com/ which looks great but has the additional overhead of creating the file on the server before it can be sent to the user.
Does anyone know of a way around this?
I found EPPlus http://epplus.codeplex.com but I am not not certain if this requires creation of a file on the server before it can be sent to the user?
I am pretty new to this so any guidance/examples etc., would be very much appreciated.
With EPPlus it's not required to create file, you can do all with streams, here is an example of ASP.NET ashx handler that will export datatable into excel file and serve it back to the client :
public class GetExcel : IHttpHandler
{
public void ProcessRequest(HttpContext context)
{
var dt = DBServer.GetDataTable("select * from table");
var ms = GetExcel.DataTableToExcelXlsx(dt, "Sheet1");
ms.WriteTo(context.Response.OutputStream);
context.Response.ContentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
context.Response.AddHeader("Content-Disposition", "attachment;filename=EasyEditCmsGridData.xlsx");
context.Response.StatusCode = 200;
context.Response.End();
}
public bool IsReusable
{
get
{
return false;
}
}
public static MemoryStream DataTableToExcelXlsx(DataTable table, string sheetName)
{
var result = new MemoryStream();
var pack = new ExcelPackage();
var ws = pack.Workbook.Worksheets.Add(sheetName);
int col = 1;
int row = 1;
foreach (DataRow rw in table.Rows)
{
foreach (DataColumn cl in table.Columns)
{
if (rw[cl.ColumnName] != DBNull.Value)
ws.Cells[row, col].Value = rw[cl.ColumnName].ToString();
col++;
}
row++;
col = 1;
}
pack.SaveAs(result);
return result;
}
}
http://msdn.microsoft.com/en-us/library/cc850837.aspx
Try to use this code to export the data to excel, may it ll help
public static void DataSetsToExcel(DataSet dataSet, string filepath)
{
try
{
string connString = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + filepath + ";Extended Properties=Excel 12.0 Xml;";
string tablename = "";
DataTable dt = new DataTable();
foreach (System.Data.DataTable dataTable in dataSet.Tables)
{
dt = dataTable;
tablename = dataTable.TableName;
using (OleDbConnection con = new OleDbConnection(connString))
{
con.Open();
StringBuilder strSQL = new StringBuilder();
strSQL.Append("CREATE TABLE ").Append("[" + tablename + "]");
strSQL.Append("(");
for (int i = 0; i < dt.Columns.Count; i++)
{
strSQL.Append("[" + dt.Columns[i].ColumnName + "] text,");
}
strSQL = strSQL.Remove(strSQL.Length - 1, 1);
strSQL.Append(")");
OleDbCommand cmd = new OleDbCommand(strSQL.ToString(), con);
cmd.ExecuteNonQuery();
for (int i = 0; i < dt.Rows.Count; i++)
{
strSQL.Clear();
StringBuilder strfield = new StringBuilder();
StringBuilder strvalue = new StringBuilder();
for (int j = 0; j < dt.Columns.Count; j++)
{
strfield.Append("[" + dt.Columns[j].ColumnName + "]");
strvalue.Append("'" + dt.Rows[i][j].ToString().Replace("'", "''") + "'");
if (j != dt.Columns.Count - 1)
{
strfield.Append(",");
strvalue.Append(",");
}
else
{
}
}
if (strvalue.ToString().Contains("<br/>"))
{
strvalue = strvalue.Replace("<br/>", Environment.NewLine);
}
cmd.CommandText = strSQL.Append(" insert into [" + tablename + "]( ")
.Append(strfield.ToString())
.Append(") values (").Append(strvalue).Append(")").ToString();
cmd.ExecuteNonQuery();
}
con.Close();
}
}
}
catch (Exception ex)
{
}
}
I am using IExcelDataReader to reader to read a excel sheet using following code:
private static IExcelDataReader FetchDataReaderForExcel(HttpPostedFile file)
{
IExcelDataReader dataReader = null;
if (null != file)
{
string fileExtension = Path.GetExtension(file.FileName);
switch (fileExtension)
{
case ".xls":
dataReader = ExcelReaderFactory.CreateBinaryReader(file.InputStream);
break;
case ".xlsx":
dataReader = ExcelReaderFactory.CreateOpenXmlReader(file.InputStream);
break;
default:
dataReader = null;
break;
}
}
return dataReader;
}
When i am reading the excel sheet using this method sometime i am not able to read the data correctly. Sometime it is not able to read column other time it is not able to read the entire data. I need to format each column to normal text and then upload again, it works then. Excel contains data contains integer, string, datetime, hyperlink. Can anyone tell me what could be the problem or alternative for this?
I'm using oledb and it works perfect for me. Here is my example:
using (OleDbConnection con = new OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + Filename + ";Extended Properties=\"Excel 12.0 Xml;HDR=YES\""))
{
//
string listName = "Sheet1";
con.Open();
try
{
DataSet ds = new DataSet();
OleDbDataAdapter odb = new OleDbDataAdapter("select * from [" + listName + "$]", con);
odb.Fill(ds);
con.Close();
foreach (DataRow myrow in ds.Tables[0].Rows)
{
Object[] cells = myrow.ItemArray;
if (cells[0].ToString().Length > 0 || cells[1].ToString().Length > 0 || cells[2].ToString().Length > 0)
{
/*
cells[0]
cells[1]
cells[2]
are getting values
*/
}
}
}
catch (Exception ex)
{
return null;
}
}
OLEDB.12.0 works with both .xls and .xlsx
If you are Uploading the file ,and the file has Many sheets in it and you want to read all the sheets you can follow this method....first write the Code for FileUPload and save the uploaded file in a path....using that path you can read the files
/// <summary>
/// This method retrieves the excel sheet names from
/// an excel workbook & reads the excel file
/// </summary>
/// <param name="excelFile">The excel file.</param>
/// <returns></returns>
#region GetsAllTheSheetNames of An Excel File
public static string[] ExcelSheetNames(String excelFile)
{
DataTable dt;
string connString = #"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + excelFile + ";Extended Properties='Excel 12.0;HDR=Yes'";
using (OleDbConnection objConn = new OleDbConnection(connString))
{
objConn.Open();
dt =
objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (dt == null)
{
return null;
}
string[] res = new string[dt.Rows.Count];
for (int i = 0; i < res.Length; i++)
{
string name = dt.Rows[i]["TABLE_NAME"].ToString();
if (name[0] == '\'')
{
//numeric sheetnames get single quotes around
//remove them here
if (Regex.IsMatch(name, #"^'\d\w+\$'$"))
{
name = name.Substring(1, name.Length - 2);
}
}
res[i] = name;
}
return res;
}
}
#endregion
//You can read files and store the data in a dataset use them
public static DataTable GetWorksheet(string excelFile,string worksheetName)
{
string connString = #"Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + excelFile + ";Extended Properties='Excel 12.0;HDR=Yes'";
OleDbConnection con = new System.Data.OleDb.OleDbConnection(connString);
OleDbDataAdapter cmd = new System.Data.OleDb.OleDbDataAdapter("select * from [" + worksheetName + "$]", con);
con.Open();
System.Data.DataSet excelDataSet = new DataSet();
cmd.Fill(excelDataSet);
con.Close();
return excelDataSet.Tables[0];
}
Else U can use this method to read the excel file
Just Add the reference
click on the "AddReference" on solution explorer ,click on com tab and Add this reference
Microsoft.Office.Interop.Excel
And add this namespace in your code behind
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Excel = Microsoft.Office.Interop.Excel;
using System.IO;
using System.Data;
static void Main(string[] args)
{
string Path = #"C:\samples\SEP DUMPS.xls";
// initialize the Excel Application class
Excel.Application app = new Excel.Application();
//Excel.Worksheet NwSheet;
Excel.Range ShtRange;
// create the workbook object by opening the excel file.
Excel.Workbook workBook = app.Workbooks.Open(Path,0,true,5,"","",true,Excel.XlPlatform.xlWindows,"\t",false,false, 0,true,1,0);
// Get The Active Worksheet Using Sheet Name Or Active Sheet
Excel.Worksheet workSheet = (Excel.Worksheet)workBook.ActiveSheet;
int index = 1;
// that is which cell in the excel you are interesting to read.
object rowIndex = 1;
object colIndex1 = 1;
object colIndex2 = 5;
object colIndex3 = 4;
System.Text.StringBuilder sb = new StringBuilder();
try
{
while (((Excel.Range)workSheet.Cells[rowIndex, colIndex1]).Value2 != null)
{
rowIndex =index;
string firstName = Convert.ToString( ((Excel.Range)workSheet.Cells[rowIndex, colIndex1]).Value2);
string lastName = Convert.ToString(((Excel.Range)workSheet.Cells[rowIndex, colIndex2]).Value2);
string Name = Convert.ToString(((Excel.Range)workSheet.Cells[rowIndex, colIndex3]).Value2);
string line = firstName + "," + lastName + "," + Name;
sb.Append(line); sb.Append(Environment.NewLine);
Console.WriteLine(" {0},{1},{2} ", firstName, lastName,Name);
index++;
}
Writetofile(sb.ToString());
ShtRange = workSheet.UsedRange;
Object[,] s = ShtRange.Value;
}
catch (Exception ex)
{
app.Quit();
Console.WriteLine(ex.Message);
Console.ReadLine();
}
}
Hope this helps you..........If u have any doubts Ask me...