How I merge more excel document in one single with NPOI? - c#

I am trying to Merge more excel document into a single document using NPOI. Here is the code write:``
static void Main(string[] args)
{
DataTable dt = new DataTable();
string[] files = new string[] { #"C:\Users\Ionut\source\repos\ExcelMergeDocument\ExcelMergeDocument\bin\Debug\TAMUExport\Project1\Report3Item.xls",
#"C:\Users\Ionut\source\repos\ExcelMergeDocument\ExcelMergeDocument\bin\Debug\TAMUExport\Project2\Report3Item.xls"};
for (int i = 0; i < files.Length; i++)
{
MergeData(files[i], dt);
}
ExportEasy(dt, finalImagePathReport3full);
}
public static string imagePathReport3full = #"\ResultReport3Item.xls";
public static string finalImagePathReport3full = AssemblyDirectory + imagePathReport3full;
public static string AssemblyDirectory
{
get
{
string codeBase = Assembly.GetExecutingAssembly().CodeBase;
UriBuilder uri = new UriBuilder(codeBase);
string path = Uri.UnescapeDataString(uri.Path);
return System.IO.Path.GetDirectoryName(path);
}
}
private static void MergeData(string path, DataTable dt)
{
XSSFWorkbook workbook = new XSSFWorkbook(path);
XSSFSheet sheet = (XSSFSheet)workbook.GetSheetAt(0);
XSSFRow headerRow = (XSSFRow)sheet.GetRow(0);
int cellCount = headerRow.LastCellNum;
if (dt.Rows.Count == 0)
{
for (int i = headerRow.FirstCellNum; i < cellCount; i++)
{
DataColumn column = new DataColumn(headerRow.GetCell(i).StringCellValue);
dt.Columns.Add(column);
}
}
else
{
}
int rowCount = sheet.LastRowNum + 1;
for (int i = (sheet.FirstRowNum + 1); i < rowCount; i++)
{
XSSFRow row = (XSSFRow)sheet.GetRow(i);
DataRow dataRow = dt.NewRow();
for (int j = row.FirstCellNum; j < cellCount; j++)
{
if (row.GetCell(j) != null)
dataRow[j] = row.GetCell(j).ToString();
}
dt.Rows.Add(dataRow);
}
workbook = null;
sheet = null;
}
public static void ExportEasy(DataTable dtSource, string strFileName)
{
HSSFWorkbook workbook = new HSSFWorkbook();
HSSFSheet sheet = (HSSFSheet)workbook.CreateSheet();
HSSFRow dataRow = (HSSFRow)sheet.CreateRow(0);
foreach (DataColumn column in dtSource.Columns)
{
dataRow.CreateCell(column.Ordinal).SetCellValue(column.ColumnName);
}
for (int i = 0; i < dtSource.Rows.Count; i++)
{
dataRow = (HSSFRow)sheet.CreateRow(i + 1);
for (int j = 0; j < dtSource.Columns.Count; j++)
{
dataRow.CreateCell(j).SetCellValue(dtSource.Rows[i][j].ToString());
}
}
using (MemoryStream ms = new MemoryStream())
{
using (FileStream fs = new FileStream(strFileName, FileMode.Create, FileAccess.Write))
{
workbook.Write(fs);
}
}
}
}
When I am run I have the following error:
ICSharpCode.SharpZipLib.Zip.ZipException: 'Cannot find central
directory'
what did I suppose to do?
NPUI work only with .xlsx extension? I have the only .xls extension for the excel document. But where I run the program with .xlsx extension I have other error :
System.IO.InvalidDataException: 'Zip File is closed'
The both error appear on this line of code :
XSSFWorkbook workbook = new XSSFWorkbook(path);

This is the modified that I do on the code:
private static void MergeData(string path, DataTable dt)
{
// HSSFWorkbook workbook = new HSSFWorkbook(path);
HSSFWorkbook workbook;
using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
{
workbook = new HSSFWorkbook();
}
HSSFSheet sheet = (HSSFSheet)workbook.GetSheetAt(0);
HSSFRow headerRow = (HSSFRow)sheet.GetRow(0);
int cellCount = headerRow.LastCellNum;
if (dt.Rows.Count == 0)
{
for (int i = headerRow.FirstCellNum; i < cellCount; i++)
{
DataColumn column = new DataColumn(headerRow.GetCell(i).StringCellValue);
dt.Columns.Add(column);
}
}
else
{
}
int rowCount = sheet.LastRowNum + 1;
for (int i = (sheet.FirstRowNum + 1); i < rowCount; i++)
{
HSSFRow row = (HSSFRow)sheet.GetRow(i);
DataRow dataRow = dt.NewRow();
for (int j = row.FirstCellNum; j < cellCount; j++)
{
if (row.GetCell(j) != null)
dataRow[j] = row.GetCell(j).ToString();
}
dt.Rows.Add(dataRow);
}
workbook = null;
sheet = null;
}
Now at this line of code : `
HSSFSheet sheet = (HSSFSheet)workbook.GetSheetAt(0);
he tell me System.ArgumentOutOfRangeException:
'Index was out of range. Must be non-negative and less than the size of the collection.
Parameter name: index'.
I read about this error and I know to initializate the index or create one, I tried that too, but no effect. What I do wrong or maybe where suppose to initializate the index?
Rest of the code is the same as the previos panel, only this void I modified to Merge .xls document.

Related

I need to change IDs in a .txt file based on an excel document

I got a large.txt file where I need to change IDs based on an excel file. The excel table is build like this:
Old ID
new ID
1A1
160
1A10
207
1A11
174
I have in total 354 IDs to be changed.
The problem is that my algorithm changes 1A10 instead of to 207 to 1600 and 1A11 instead of 174 to 1601.. It just sees 1A1 in the .txt file and directly changes it to 160 and then adds 0 or 1 after that.
Any suggestions how to change that? See code below
(...)
ExcelApp.Application excelApp = new ExcelApp.Application();
DataRow myNewRow;
DataTable myTable;
//create book,pages and range variables
ExcelApp.Workbook excelBook = excelApp.Workbooks.Open(#"matching.xlsx");
ExcelApp._Worksheet excelSheet = excelBook.Sheets[1];
ExcelApp.Range excelRange = excelSheet.UsedRange;
//calculate rows and columns
int rows = excelRange.Rows.Count;
int cols = excelRange.Columns.Count;
//define DataTable Name and Column Name
myTable = new DataTable("TranslationTable");
myTable.Columns.Add("Plasma", typeof(string));
myTable.Columns.Add("Thrombus", typeof(string));
//reading columns and rows into DataTable
for (int i = 2; i < rows; i++)
{
myNewRow = myTable.NewRow();
myNewRow["Plasma"] = excelRange.Cells[i, 1].Value2.ToString();
myNewRow["Thrombus"] = excelRange.Cells[i, 3].Value2.ToString();
myTable.Rows.Add(myNewRow);
}
//rewrite Plasma file
StreamReader sr = new StreamReader(#"C:\Users\wviegener\Desktop\Stroke\dataExchange\proteinGroups_Plasma.txt");
String[] row = Regex.Split(sr.ReadToEnd(), "\r\n");
sr.Close();
String old_ID;
String new_ID;
StreamWriter sw = new StreamWriter(#"C:\Users\wviegener\Desktop\Stroke\dataExchange\proteinGroups_Plasma_new.txt");
for(int i = 0; i < row.Length; i++)
{
for (int j = 0; j < myTable.Rows.Count - 1; j++)
{
old_ID = myTable.Rows[j][0].ToString();
new_ID = myTable.Rows[j][1].ToString();
row[i] = row[i].Replace(old_ID,"Thr" + new_ID);
row[i] = row[i].Replace("WDH", "" );
}
sw.WriteLine(row[i]);
the txt files look like this:
proteinGroups_Plasma.txt:
LFQ intensity 1A11_20220429 LFQ intensity 1A12_20220429
proteinGroups_Plasma_new.txt:
LFQ intensity Thr1672_20220429 LFQ intensity Thr312_20220429
If you did something like,
public static IEnumerable<KeyValuePair<string, string>> ReadReplacements()
{
ExcelApp.Application excelApp = new ExcelApp.Application();
//create book,pages and range variables
ExcelApp.Workbook excelBook = excelApp.Workbooks.Open(#"matching.xlsx");
ExcelApp._Worksheet excelSheet = excelBook.Sheets[1];
ExcelApp.Range excelRange = excelSheet.UsedRange;
//calculate rows and columns
int rows = excelRange.Rows.Count;
int cols = excelRange.Columns.Count;
//reading columns and rows into DataTable
for (int i = 2; i < rows; i++)
{
yield return new KeyValuePair<string, string>(
excelRange.Cells[i, 1].Value2.ToString(),
$"Thr{excelRange.Cells[i, 3].Value2.ToString())}";
}
yield return new KeyValuePair<string, string>("WDH", "");
}
IReadOnlyDictionary<string, string> replacements = new Dictionary<string, string>(
ReadReplacements()
.OrderBy(pair => pair.Key.Length));
and then deleted your datatable, and used replacements instead, you could be confident that the order of replacements would not corrupt your data.
This won't address the collision when a Thrombus replacement value contains a subsequent Plasma replacement key. This may not be possible with your business case.
Thats the code now. Works finally
using System;
using System.Data;
using System.Text.RegularExpressions;
using ExcelApp = Microsoft.Office.Interop.Excel;
namespace Stroke
{
class Program
{
static void Main(string[] args)
{
ExcelApp.Application excelApp = new ExcelApp.Application();
DataRow myNewRow;
DataTable myTable;
//create book,pages and range variables
ExcelApp.Workbook excelBook = excelApp.Workbooks.Open(#"C:\Users\wviegener\Desktop\Stroke\dataExchange\matching.xlsx");
ExcelApp._Worksheet excelSheet = excelBook.Sheets[1];
ExcelApp.Range excelRange = excelSheet.UsedRange;
//calculate rows and columns
int rows = excelRange.Rows.Count;
int cols = excelRange.Columns.Count;
//define DataTable Name and Column Name
myTable = new DataTable("TranslationTable");
myTable.Columns.Add("Plasma", typeof(string));
myTable.Columns.Add("Thrombus", typeof(string));
//reading columns and rows into DataTable
for (int i = 2; i < rows; i++)
{
myNewRow = myTable.NewRow();
myNewRow["Plasma"] = excelRange.Cells[i, 1].Value2.ToString();
myNewRow["Thrombus"] = excelRange.Cells[i, 3].Value2.ToString();
myTable.Rows.Add(myNewRow);
}
//reverse Datatable
DataView dv = myTable.DefaultView;
dv.Sort = ("Plasma desc");
DataTable sortedTable = dv.ToTable();
//rewrite Plasma file
StreamReader sr = new StreamReader(#"C:\Users\wviegener\Desktop\Stroke\dataExchange\proteinGroups_Plasma.txt");
String[] row = Regex.Split(sr.ReadToEnd(), "\r\n");
sr.Close();
String old_ID;
String new_ID;
//TODO Erstes und letzes Element nicht ausgetasucht
StreamWriter sw = new StreamWriter(#"C:\Users\wviegener\Desktop\Stroke\dataExchange\proteinGroups_Plasma_new.txt");
for (int i = 0; i < row.Length; i++)
{
for (int j = 0; j < sortedTable.Rows.Count - 1; j++)
{
old_ID = sortedTable.Rows[j][0].ToString();
new_ID = sortedTable.Rows[j][1].ToString();
row[i] = row[i].Replace(old_ID, "Thr" + new_ID);
row[i] = row[i].Replace("WDH", "");
}
sw.WriteLine(row[i]);
}
sw.Close();
}
}
}

Import from large Excel file a Excel.Range to DataTabe(MS.Interop or ExcelDataReader)

One condition its I cant use File.Open or smth like this cause I've already opened Excel file.
So I Trying like this to use AsDataSet() method but throw exception cause (Access denied, even I using FileShare.ReadWrite)
//ExcelDataReader - Config with filter range
var i = 0;
ExcelDataSetConfiguration excelconfig = new ExcelDataSetConfiguration
{
ConfigureDataTable = _ => new ExcelDataTableConfiguration
{
UseHeaderRow = addHeaders,
FilterRow = rowreader => rangeExcel.Row <= ++i - 1,
FilterColumn = (rowreader, colindex) => rangeExcel.Column <= colindex,
},
UseColumnDataType = true,
};
//ExcelDataReader using AsDataSet() below
private static DataTable CreateDataReader(Excel.Workbook book, Excel.Range range, ExcelDataSetConfiguration configuration, string sheetName)
{
var dataSet = new DataSet(sheetName);
using (var stream = File.Open(book.Path, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite))
{
using (IExcelDataReader reader = ExcelReaderFactory.CreateReader(stream))
{
return reader.AsDataSet(configuration).Tables[sheetName];
}
}
}
so second variant its =>
private static DataTable CreateDT(Excel.Range range, bool addHeaders)
{
var dt = new DataTable();
int i = 1;
int row = range.Rows.Count;
int col = range.Columns.Count;
if (addHeaders)
{
//Add headers to dt
i++;
}
else
{
//just add columns to dt
}
// So here below Two variants =>
// 1st Working variant on prod
//here throw excetion "Out or Range" while Excel File is Large 40MB
var arrayRowValue = (object[,])range.Cells.Value;
for (; i < range.Rows.Count + 1; i++)
{
var dataRow = dt.NewRow();
object[] arrayToValue = new object[dt.Columns.Count];
for (int iterator = 0; iterator < arrayToValue.Length; iterator++)
{
arrayToValue[iterator] = arrayRowValue[i, iterator + 1];
}
dataRow.ItemArray = arrayToValue;
dt.Rows.Add(dataRow);
}
}
//2nd variant I want smth like this
DataRow dataRow;
for (; i < row + 1; i++)
{
dataRow = dt.NewRow();
for (int iterator = 0; iterator < col; iterator++)
{
dataRow.ItemArray = range.Rows[i, iterator + 1].Value;// Not contain Value
}
dt.Rows.Add(dataRow);
}
What can help me to better import large excel range into DataTable and most important, faster.
I already read this thread Using ExcelDataReader to read Excel data starting from a particular cell
Not helping because every example using File.Open, in my case i cant :) I already have Excel Application but its usless property for me :(

How to read Excel to Datatable using NPOI C#

I'm Trying to read Excel to DataTable using NPOI.Every thing working fine but only issue is If we have any Column cell is empty in that row it is not reading .In Excel i have 4 row's with (each row have some empty values for cells).
Excel File Image : enter image description here
After Reading That Excel To data table :enter image description here
I want like this in data table
private DataTable GetDataTableFromExcel(String Path)
{
XSSFWorkbook wb;
XSSFSheet sh;
String Sheet_name;
using (var fs = new FileStream(Path, FileMode.Open, FileAccess.Read))
{
wb = new XSSFWorkbook(fs);
Sheet_name = wb.GetSheetAt(0).SheetName; //get first sheet name
}
DataTable DT = new DataTable();
DT.Rows.Clear();
DT.Columns.Clear();
// get sheet
sh = (XSSFSheet)wb.GetSheet(Sheet_name);
int i = 0;
while (sh.GetRow(i) != null)
{
// add neccessary columns
if (DT.Columns.Count < sh.GetRow(i).Cells.Count)
{
for (int j = 0; j < sh.GetRow(i).Cells.Count; j++)
{
DT.Columns.Add("", typeof(string));
}
}
// add row
DT.Rows.Add();
// write row value
for (int j = 0; j < sh.GetRow(i).Cells.Count; j++)
{
var cell = sh.GetRow(i).GetCell(j);
DT.Rows[i][j] = sh.GetRow(i).GetCell(j);
}
i++;
}
return DT;
}
Plese hlp me.
you may have to try something along this line. its workable code to read the excel using NPOI.
// read the current row data
XSSFRow headerRow = (XSSFRow)sheet.GetRow(0);
// LastCellNum is the number of cells of current rows
int cellCount = headerRow.LastCellNum;
// LastRowNum is the number of rows of current table
int rowCount = sheet.LastRowNum + 1;
bool isBlanKRow = false;
//Start reading data after first row(header row) of excel sheet.
for (int i = (sheet.FirstRowNum + 1); i < rowCount; i++)
{
XSSFRow row = (XSSFRow)sheet.GetRow(i);
DataRow dataRow = dt.NewRow();
isBlanKRow = true;
try
{
for (int j = row.FirstCellNum; j < cellCount; j++)
{
if (null != row.GetCell(j) && !string.IsNullOrEmpty(row.GetCell(j).ToString()) && !string.IsNullOrWhiteSpace(row.GetCell(j).ToString()))
{
dataRow[j] = row.GetCell(j).ToString();
isBlanKRow = false;
}
}
}
catch (Exception Ex)
{
}
if (!isBlanKRow)
{
dt.Rows.Add(dataRow);
}
}

Interop - Speed of inserting into open excel instance

I am finding a running instance of excel and inserting data into one of the worksheets. Everything works, however inserting data is very slow. ~0.25 seconds for each cell.
Is there any way I can make this faster?
I have been looking for an approach that doesn't go cell by cell, but haven't found anything.
My code:
using System;
using System.Data;
using Microsoft.Office.Interop.Excel;
using Application = Microsoft.Office.Interop.Excel.Application;
using DataTable = System.Data.DataTable;
[STAThread]
static void Main()
{
Application xlApp = null;
try
{
xlApp = (Application)System.Runtime.InteropServices.Marshal.GetActiveObject("Excel.Application");
}
catch (Exception)//Excel not open
{
return;
}
xlApp.Visible = true;
var wb = xlApp.ActiveWorkbook;
Worksheet ws = null;
try
{
ws = (Worksheet)wb.Worksheets["SomeSheet"];
}
catch (Exception e)
{
return;
}
if (ws == null)
{
return;
}
var dt = new DataTable();
dt = new DataTable();
dt.Clear();
dt.Columns.Add("Col1");
dt.Columns.Add("Col2");
for (int ii = 0; ii < 20; ii++)
{
DataRow row = dt.NewRow();
row["Col1"] = "xxxx";
row["Col2"] = "yyyy";
dt.Rows.Add(row);
}
//Header
for (int i = 0; i < dt.Columns.Count; i++)
{
ws.Cells[1, i + 1] = dt.Columns[i].ColumnName;
}
//Data
for (int i = 0; i < dt.Rows.Count; i++)
{
for (int j = 0; j < dt.Columns.Count; j++)
{
ws.Cells[i + 2, j + 1] = dt.Rows[i][j];
}
}
}
Have you tried using a Range? Assigning a Array to the range ensuring the Range and Array are the same length should work.
https://msdn.microsoft.com/en-us/library/microsoft.office.tools.excel.worksheet.range.aspx?cs-save-lang=1&cs-lang=csharp#code-snippet-1

NPOI with ASP.NET (C#)

I am trying to get NPOI to work with ASP.NET (C#) and I want to read an excel file and put it in a DataSet. Here is the code I attempted:
public static DataTable getExcelData(string FileName, string strSheetName)
{
DataTable dt = new DataTable();
HSSFWorkbook hssfworkbook;
using (FileStream file = new FileStream(FileName, FileMode.Open, FileAccess.Read))
{
hssfworkbook = new HSSFWorkbook(file);
}
ISheet sheet = hssfworkbook.GetSheet(strSheetName);
System.Collections.IEnumerator rows = sheet.GetRowEnumerator();
while (rows.MoveNext())
{
IRow row = (HSSFRow)rows.Current;
if (dt.Columns.Count == 0)
{
for (int j = 0; j < row.LastCellNum; j++)
{
dt.Columns.Add(row.GetCell(j).ToString());
}
continue;
}
DataRow dr = dt.NewRow();
for (int i = 0; i < row.LastCellNum; i++)
{
ICell cell = row.GetCell(i);
if (cell == null)
{
dr[i] = null;
}
else
{
dr[i] = cell.ToString();
}
}
dt.Rows.Add(dr);
}
return dt;
}
The Error that I get is
+ $exception {"Object reference not set to an instance of an object."} System.Exception {System.NullReferenceException}
The odd thing is that this actually works with 2 excel files that I have, but when I put in a third one it crashes with that error.
This returns null if strSheetName isn't found:
ISheet sheet = hssfworkbook.GetSheet(strSheetName);
try:
for( int iSheet = 0; iSheet < hssfworkbook.NumberOfSheets; ++iSheet )
{
ISheet sheet = hssfworkbook.GetSheetAt(iSheet); // could cast to HSSFSheet
String strSheetNameActual = sheet.SheetName;
}
Then figure out how you want to compare strSheetName to strSheetNameActual or which sheets you want to process and how.
Try using this:
for (int j = row.FirstCellNum; j < row.LastCellNum; j++)
and
for (int i = row.FirstCellNum; i < row.LastCellNum; i++)
Instead of:
for (int j = 0; j < row.LastCellNum; j++)
and
for (int i = 0; i < row.LastCellNum; i++)
Also, make sure that you manage the case when the cells on the first row are null:
if (dt.Columns.Count == 0)
{
int empty = 0;
for (int j = row.FirstCellNum; j < row.LastCellNum; j++)
{
ICell cell = row.GetCell(j);
if (cell == null)
{
dt.Columns.Add(String.Format("emptyColumnName_{0}", empty++));
}
else
{
dt.Columns.Add(row.GetCell(j).ToString());
}
}
continue;
}
If you always want to read from the first sheet (probably, to get rid of the second method parameter, the sheet name, which is also the cause of your error), you may use:
// rest of the method's code
ISheet sheet = hssfworkbook.GetSheetAt(0);
if (sheet == null)
return dt;
var rows = sheet.GetRowEnumerator();
// rest of the method's code

Categories