As part of my documentation procedures automation project I use Spire.Office to convert .xlsx document containing multiple worksheets to .doc document.
Overall, auto copy paste works well, I've modified this code to jump over few worksheets:
https://www.e-iceblue.com/Tutorials/Spire.XLS/Spire.XLS-Program-Guide/Document-Operation/How-to-Export-Excel-Data-to-Word-Table-Maintaining-Formatting-in-C.html
The problem I'm trying to fix is that merged cell are ignored and copied as individual cells.
Here is the code:
class ToWord
{
public void CopyToWord(string fpath, int qnumber)
{
Workbook spirewb = new Workbook();
spirewb.LoadFromFile(fpath);
Document doc = new Document();
for (int i = 1; i <= qnumber; i++)
{
Spire.Xls.Worksheet sheet = spirewb.Worksheets[i];
Table table = doc.AddSection().AddTable(true);
table.ResetCells(sheet.LastRow, sheet.LastColumn);
for (int r = 1; r <= sheet.LastRow; r++)
{
for (int c = 1; c <= sheet.LastColumn; c++)
{
CellRange xCell = sheet.Range[r, c];
TableCell wCell = table.Rows[r - 1].Cells[c - 1];
//fill data to word table
TextRange textRange = wCell.AddParagraph().AppendText(xCell.NumberText);
//copy font and cell style from excel to word
CopyStyle(textRange, xCell, wCell);
}
}
}
doc.SaveToFile("result.doc", Spire.Doc.FileFormat.Doc);
System.Diagnostics.Process.Start("result.doc");
}
private static void CopyStyle(TextRange wTextRange, CellRange xCell, TableCell wCell)
{
//copy font stlye
wTextRange.CharacterFormat.TextColor = xCell.Style.Font.Color;
wTextRange.CharacterFormat.FontSize = (float)xCell.Style.Font.Size;
wTextRange.CharacterFormat.FontName = xCell.Style.Font.FontName;
wTextRange.CharacterFormat.Bold = xCell.Style.Font.IsBold;
wTextRange.CharacterFormat.Italic = xCell.Style.Font.IsItalic;
//copy backcolor
wCell.CellFormat.BackColor = xCell.Style.Color;
//copy text alignment
switch (xCell.HorizontalAlignment)
{
case HorizontalAlignType.Left:
wTextRange.OwnerParagraph.Format.HorizontalAlignment = HorizontalAlignment.Left;
break;
case HorizontalAlignType.Center:
wTextRange.OwnerParagraph.Format.HorizontalAlignment = HorizontalAlignment.Center;
break;
case HorizontalAlignType.Right:
wTextRange.OwnerParagraph.Format.HorizontalAlignment = HorizontalAlignment.Right;
break;
}
}
}
Working solution has been posted here:
https://www.e-iceblue.com/forum/post37260.html
I am fairly new to C# coding, and recently I read that it is better to use Try Catch than if/else for file I/O related matters. I also read in other threads that try catch block should be avoided in loops because they significantly lower performances.
In my case, I am reading multiple (often over 1,000) excel files using for loop. Currently, I am using if/else if to handle really basic exceptions (i.e. file exist or not), but I was thinking of implementing try catch for the reason I explained above.
Here's some of my code
public void ReadFile(string path)
{
Excel.Application xlApp;
Excel.Workbooks xlBooks;
Excel.Workbook xlBook;
Excel.Worksheet xlSheet;
xlApp = new Excel.Application();
xlBooks = xlApp.Workbooks;
xlBook = xlBooks.Open(path);
xlSheet = xlBook.Sheets[1];
int rowCount = FindLastRow(xlSheet);
int colCount = FindLastColumn(xlSheet);
string header = "";
string glNum = Path.GetFileNameWithoutExtension(path).Trim();
for (int row = 1; row <= rowCount; row++)
{
bool processDone = false;
string curationInfo = "";
List<string> data = new List<string>();
// If empty line, skip
if (IsEmpty(xlSheet, row, colCount)) continue;
// If header line, capture
if (IsHeader(xlSheet, row, colCount) != "")
{
header = IsHeader(xlSheet, row, colCount);
continue;
}
// If coloured line, capture
if (IsColoured(xlSheet, row, colCount))
{
curationInfo = ConstructCurationInfo(xlSheet, row, colCount);
data.Add(curationInfo);
// If last row, end
if (row == rowCount) processDone = true;
while (!processDone)
{
// If last row
if (row == rowCount)
{
// If last row is empty, one section is done
if (IsEmpty(xlSheet, row, colCount)) processDone = true;
// If last row is header, one section is done
else if (IsHeader(xlSheet, row, colCount) != "") processDone = true;
// Every other case
else
{
// if coloured, capture
if (IsColoured(xlSheet, row, colCount))
{
string newCurationInfo = ConstructCurationInfo(xlSheet, row, colCount);
data.Add(newCurationInfo);
}
processDone = true;
}
}
// If not last row
else
{
int nextRow = row + 1;
// If next row is last row, end
if (nextRow == rowCount) processDone = true;
// If next row is empty, one section is done
if (IsEmpty(xlSheet, nextRow, colCount)) processDone = true;
// If next row is header, one section is done
else if (IsHeader(xlSheet, nextRow, colCount) != "") processDone = true;
// Every other case
else
{
// if coloured, capture
if (IsColoured(xlSheet, nextRow, colCount))
{
string newCurationInfo = ConstructCurationInfo(xlSheet, nextRow, colCount);
data.Add(newCurationInfo);
}
row++;
}
}
}
}
if (processDone && data.Count != 0)
{
Curation cur = new Curation(header, data, glNum);
curationList.Add(cur);
}
}
// Terminate background Excel Workers
xlBook.Close(false, Missing.Value, Missing.Value);
xlBooks.Close();
xlApp.Quit();
xlApp.DisplayAlerts = false;
Marshal.ReleaseComObject(xlSheet);
Marshal.ReleaseComObject(xlBook);
Marshal.ReleaseComObject(xlBooks);
Marshal.ReleaseComObject(xlApp);
GC.WaitForPendingFinalizers();
GC.Collect();
}
And the Form method that uses ReadFile() method
// Background workers
private void mergeNew_bgw_DoWork(object sender, DoWorkEventArgs e)
{
string input_path = db_input_tb.Text;
string output_dir_path = output_tb.Text;
// Status message to be reported to the UI
string status = "";
status = "Collecting files to be read.....";
mergeNew_bgw.ReportProgress(0, status);
List<string> excelPaths = GetPathToExcel(input_path);
if (IsEmpty(excelPaths))
{
status = "No File to be Processed!";
mergeNew_bgw.ReportProgress(0, status);
fileToProcess = false;
}
// If not empty list, process
else
{
ExcelInfo info = new ExcelInfo();
bool doesExist = false;
string path = "";
int row = 1;
for (int i = 0; i < excelPaths.Count; i++)
{
status = "Processing....." + (i + 1).ToString() + "/" + excelPaths.Count.ToString();
mergeNew_bgw.ReportProgress(0, status);
info.ReadFile(excelPaths[i]);
// If last file, write excel file
if (i + 1 == excelPaths.Count)
{
status = "Writing Complete Merged File";
mergeNew_bgw.ReportProgress(0, status);
info.WriteNew(ref doesExist, ref row, ref path, output_dir_path);
info.Clear();
}
else if ((i + 1) % cutoff == 0)
{
status = "Writing Partial Merged File";
mergeNew_bgw.ReportProgress(0, status);
info.WriteNew(ref doesExist, ref row, ref path, output_dir_path);
info.Clear();
}
}
}
}
Should I implement try catch block inside the for loop in mergeNew_bgw_DoWork method like this?
for (int i = 0; i < excelPaths.Count; i++)
{
status = "Processing....." + (i + 1).ToString() + "/" + excelPaths.Count.ToString();
mergeNew_bgw.ReportProgress(0, status);
try
{
info.ReadFile(excelPaths[i]);
}
catch(Exception e)
{
throw new Exception(e.ToString());
}
finally
{
// If last file, write excel file
if (i + 1 == excelPaths.Count)
{
status = "Writing Complete Merged File";
mergeNew_bgw.ReportProgress(0, status);
info.WriteNew(ref doesExist, ref row, ref path, output_dir_path);
info.Clear();
}
else if ((i + 1) % cutoff == 0)
{
status = "Writing Partial Merged File";
mergeNew_bgw.ReportProgress(0, status);
info.WriteNew(ref doesExist, ref row, ref path, output_dir_path);
info.Clear();
}
}
}
Thank you for your help!
EDIT
Apparently, performance will not change that much as pointed by one of the comments. However, where should I insert try catch block in order to get meaningful error messages? ReadFile() method is big, so I think putting the entire method in try block may not give users meaning error messages. Would it be better to insert try catch somewhere inside ReadFile() method?
I try to read excel file using NPOI library.
Here is the code:
public void ReadDataFromXL()
{
try
{
for (int i = 1; i <= sheet.LastRowNum; i++)
{
IRow row = sheet.GetRow(i);
for (int j = 0; j < row.Cells.Count(); j++)
{
var columnIndex = row.GetCell(j).ColumnIndex;
var cell = row.GetCell(j);
if (cell != null)
{
switch (cell.CellType)
{
case CellType.Numeric:
var val = cell.NumericCellValue; ;
break;
case CellType.String:
var str = cell.StringCellValue;
break;
}
}
}
}
}
catch (Exception)
{
throw;
}
}
Here the content of .xlsx file that I try to read:
As you can see column X and column Y are numeric columns.
But when I start to read this columns using the code above some of the numeric values in X and Y column have been recognizes by code as string values.
For example in picture above the cell B4 is numeric type but, on cell.CellType it shows String and the value of the string is 31.724732480727\n. '\n' is appended to the value.
Any idea why some numeric values appeared as string and why '\n' appended to the value?
It looks like the datatype of the column is of String, so if you wanted to check for the double datatype (assuming its going to be in the num+'\n' format, you could try the following snippet of code.
String number = "1512421.512\n";
double res;
if (double.TryParse(number.Substring(0, number.Length - 1), out res))
{
Console.WriteLine("It's a number! " + res);
}
I'm reading an xlsx file using NPOI lib, with C#. I need to extract some of the excel columns and save the extracted values into some kind of data structure.
I can successfully read the file and get all the values from the 2nd (the first one contains only headers) to the last row with the following code:
...
workbook = new XSSFWorkbook(fs);
sheet = (XSSFSheet)workbook.GetSheetAt(0);
....
int rowIndex = 1; //--- SKIP FIRST ROW (index == 0) AS IT CONTAINS TEXT HEADERS
while (sheet.GetRow(rowIndex) != null) {
for (int i = 0; i < this.columns.Count; i++){
int colIndex = this.columns[i].colIndex;
ICell cell = sheet.GetRow(rowIndex).GetCell(colIndex);
cell.SetCellType(CellType.String);
String cellValue = cell.StringCellValue;
this.columns[i].values.Add(cellValue); //--- Here I'm adding the value to a custom data structure
}
rowIndex++;
}
What I'd like to do now is check if the excel file is empty or if it has only 1 row in order to properly handle the issue and display a message
If I run my code against an excel file with only 1 row (headers), it breaks on
cell.SetCellType(CellType.String); //--- here cell is null
with the following error:
Object reference not set to an instance of an object.
I also tried to get the row count with
sheet.LastRowNum
but it does not return the right number of rows. For example, I have created an excel with 5 rows (1xHEADER + 4xDATA), the code reads successfully the excel values. On the same excel I have removed the 4 data rows and then I have launched again the code on the excel file. sheet.LastRowNum keeps returning 4 as result instead of 1.... I think this is related to some property bound to the manually-cleaned sheet cells.
Do you have any hint to solve this issue?
I think it would be wise to use sheet.LastRowNum which should return the amount of rows on the current sheet
Am I oversimplifying?
bool hasContent = false;
while (sheet.GetRow(rowIndex) != null)
{
var row = rows.Current as XSSFRow;
//all cells are empty, so is a 'blank row'
if (row.Cells.All(d => d.CellType == CellType.Blank)) continue;
hasContent = true;
}
You can retrieve the number of rows using this code:
public int GetTotalRowCount(bool warrant = false)
{
IRow headerRow = activeSheet.GetRow(0);
if (headerRow != null)
{
int rowCount = activeSheet.LastRowNum + 1;
return rowCount;
}
return 0;
}
Here is a way to get both the actual last row index and the number of physically existing rows:
public static int LastRowIndex(this ISheet aExcelSheet)
{
IEnumerator rowIter = aExcelSheet.GetRowEnumerator();
return rowIter.MoveNext()
? aExcelSheet.LastRowNum
: -1;
}
public static int RowsSpanCount(this ISheet aExcelSheet)
{
return aExcelSheet.LastRowIndex() + 1;
}
public static int PhysicalRowsCount(this ISheet aExcelSheet )
{
if (aExcelSheet == null)
{
return 0;
}
int rowsCount = 0;
IEnumerator rowEnumerator = aExcelSheet.GetRowEnumerator();
while (rowEnumerator.MoveNext())
{
++rowsCount;
}
return rowsCount;
}
I'm implementing a custom control based on the WPF DataGrid.
One of the things I implemented is pasting from clipboard.
For some reason the approach I'm taking is performing really slow.
I did some performance evaluation and it seems that BeginEdit() is taking around 80% of exclusive samples.
Can anyone provide any insights on what I maybe doing wrong or have a different approach to pasting data into WPF DatagGrid?
Here's my method doing the work:
private void OnExecutedPaste(object sender, ExecutedRoutedEventArgs e)
{
// get clipboard content
List<object[]> rowData = ClipboardHelper.ParseClipboardDataToTypes();
var selectedCellContent = SelectedCells[0].Column.GetCellContent(SelectedCells[0].Item);
if (selectedCellContent != null)
{
var firstCell = SelectedCells.Count > 0 ? selectedCellContent.Parent as DataGridCell : null;
// Get the start & end rows/columns indexes
int minRowIndex = firstCell != null ? firstCell.GetParentRow().GetIndex() : Items.Count - 1;
int maxRowIndex = minRowIndex + rowData.Count;
int minColumnDisplayIndex = (SelectionUnit != DataGridSelectionUnit.FullRow) && firstCell != null
? firstCell.Column.DisplayIndex
: 0;
int maxColumnDisplayIndex = Columns.Count - 1;
// Go through rows
int rowDataIndex = 0;
for (int i = minRowIndex; i <= maxRowIndex && rowDataIndex < rowData.Count; i++, rowDataIndex++)
{
int columnDataIndex = 0;
// Get row view model bound to the row
var rowVM = Items[i];
// Go through columns
for (int j = minColumnDisplayIndex;
j <= maxColumnDisplayIndex && columnDataIndex < rowData[rowDataIndex].Length;
j++, columnDataIndex++)
{
// Get the column
var column = ColumnFromDisplayIndex(j);
// Get the value to be pasted at the cell
object value = rowData[rowDataIndex][columnDataIndex];
CurrentCell = new DataGridCellInfo(rowVM, column);
BeginEdit();
// If first cell in the row we need to refresh item in case is the newitemplaceholder
// BeginEdit() may have triggered NewItemInitializer
if (j == minColumnDisplayIndex) rowVM = Items[i];
// Paste the value in the cell
column.OnPastingCellClipboardContent(rowVM, value);
//CommitEdit(DataGridEditingUnit.Cell, true);
}
if (!CommitEdit())
{
MessageBox.Show(string.Format("Cannot paste clipboard content at row {0}. Make sure the data is valid.", i), "Can't paste row",
MessageBoxButton.OK, MessageBoxImage.Exclamation);
return;
}
if (i >= Items.Count - 1)
{
if (NewItemInitializer == null)
MessageBox.Show("Cannot add new rows for additional items.", "Can't paste row",
MessageBoxButton.OK, MessageBoxImage.Exclamation);
else
OnInitializingNewItem(new InitializingNewItemEventArgs(NewItemInitializer.Invoke()));
}
}
}
}