With OpenXML SDK i have created a docx file which i'm using as a template. It needs to replace the words inside the document. Well if i use a document with paragraphs it works. But for text within a tablecell and within a paragraph like a break it's not working. Below my code =>
protected void btnMail_Click(object sender, EventArgs e)
{
string templateDocumentPath = string.Format("{0}\\document.docx", Server.MapPath("~/App_Data"));
byte[] result = null;
byte[] templateBytes = System.IO.File.ReadAllBytes(templateDocumentPath);
using (MemoryStream templateStream = new MemoryStream())
{
templateStream.Write(templateBytes, 0, (int)templateBytes.Length);
using (WordprocessingDocument doc = WordprocessingDocument.Open(templateStream, true))
{
MainDocumentPart mainPart = doc.MainDocumentPart;
var body = doc.MainDocumentPart.Document.Body;
var paras = body.Elements<DocumentFormat.OpenXml.Wordprocessing.Paragraph>();
var breaks = body.Elements<DocumentFormat.OpenXml.Wordprocessing.Break>();
foreach (var br in breaks)
{
foreach (var run in br.Elements<Run>())
{
foreach (var text in run.Elements<Text>())
{
if (text.Text.Contains("#bNaam#"))
{
text.Text = text.Text.Replace("#bNaam#", Parameters.Naam);
run.AppendChild(new Break());
}
}
}
}
foreach (var para in paras)
{
foreach (var run in para.Elements<Run>())
{
foreach (var text in run.Elements<Text>())
{
if (text.Text.Contains("bNaam"))
{
text.Text = text.Text.Replace("bNaam", Parameters.Naam);
run.AppendChild(new Break());
}
if (text.Text.Contains("bAdres"))
{
text.Text = text.Text.Replace("bAdres", Parameters.Adres);
run.AppendChild(new Break());
}
if (text.Text.Contains("#bPostcode#") && text.Text.Contains("#bGemeente#"))
{
text.Text = text.Text.Replace("#bPostcode#", Parameters.Postcode);
text.Text = text.Text.Replace("#bGemeente#", Parameters.Plaats);
run.AppendChild(new Break());
}
if (text.Text.Contains("#docBuitenland#"))
{
text.Text = text.Text.Replace("#docBuitenland#", Parameters.Naam);
run.AppendChild(new Break());
}
}
}
}
mainPart.Document.Save();
templateStream.Position = 0;
using (MemoryStream memoryStream = new MemoryStream())
{
templateStream.CopyTo(memoryStream);
result = memoryStream.ToArray();
}
}
byte[] fileContent = templateStream.ToArray();
templateStream.Close();
// Response.Buffer = true;
Response.ContentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
Response.AddHeader("Content-Disposition", "filename=document.docx");
Response.BinaryWrite(fileContent);
Response.End();
}
}
If you need to replace any text, you could try regex as per MSDN sample
using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
docText = sr.ReadToEnd();
}
Regex regexText = new Regex("Hello world!");
docText = regexText.Replace(docText, "Hi Everyone!");
To replace in particular container such as table, you would need to enumerate tables and cells (in the same way as you do for paragraphs)
var tables = mainPart.Document.Descendants<Table>().ToList();
foreach (Table t in tables)
{
var rows = t.Elements<TableRow>();
foreach (TableRow row in rows)
{
var cells = row.Elements<TableCell>();
foreach (TableCell cell in cells)
...
}
}
See MSDN for more details.
This makes the document incorrect to open =>
var tables = mainPart.Document.Descendants<DocumentFormat.OpenXml.Wordprocessing.Table>().ToList();
foreach (DocumentFormat.OpenXml.Wordprocessing.Table t in tables)
{
var rows = t.Elements<DocumentFormat.OpenXml.Wordprocessing.TableRow>();
foreach (DocumentFormat.OpenXml.Wordprocessing.TableRow row in rows)
{
var cells = row.Elements<DocumentFormat.OpenXml.Wordprocessing.TableCell>();
foreach (DocumentFormat.OpenXml.Wordprocessing.TableCell cell in cells)
{
if (cell.InnerText.Contains("#bNaam#"))
{
//paragraph.InnerText will be empty
Run newRun = new Run();
newRun.AppendChild(new Text(cell.InnerText.Replace("#bNaam#", Parameters.Naam)));
//remove any child runs
cell.RemoveAllChildren<Run>();
//add the newly created run
cell.AppendChild(newRun);
}
}
}
}
Related
I want to generate a DocX file with footer from HTML.
Using the following lib: DocumentFormat.OpenXml
I manage to generate the DocX file, BUT without Footer.
The code that I use is the following:
class HtmlToDoc
{
public static byte[] GenerateDocX(string html)
{
MemoryStream ms;
MainDocumentPart mainPart;
Body b;
Document d;
AlternativeFormatImportPart chunk;
AltChunk altChunk;
string altChunkID = "AltChunkId1";
ms = new MemoryStream();
using(var myDoc = WordprocessingDocument.Create(ms, WordprocessingDocumentType.Document))
{
mainPart = myDoc.MainDocumentPart;
if (mainPart == null)
{
mainPart = myDoc.AddMainDocumentPart();
b = new Body();
d = new Document(b);
d.Save(mainPart);
}
chunk = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.Xhtml, altChunkID);
using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write))
{
using (StreamWriter stringStream = new StreamWriter(chunkStream))
{
stringStream.Write("<html><head></head><body>" + html + "</body></html>");
}
}
altChunk = new AltChunk();
altChunk.Id = altChunkID;
mainPart.Document.Body.InsertAt(altChunk, 0);
AddFooter(myDoc);
mainPart.Document.Save();
}
return ms.ToArray();
}
private static void AddFooter(WordprocessingDocument doc)
{
string newFooterText = "New footer via Open XML Format SDK 2.0 classes";
MainDocumentPart mainDocPart = doc.MainDocumentPart;
FooterPart newFooterPart = mainDocPart.AddNewPart<FooterPart>();
string rId = mainDocPart.GetIdOfPart(newFooterPart);
GeneratePageFooterPart(newFooterText).Save(newFooterPart);
foreach (SectionProperties sectProperties in
mainDocPart.Document.Descendants<SectionProperties>())
{
foreach (FooterReference footerReference in
sectProperties.Descendants<FooterReference>())
sectProperties.RemoveChild(footerReference);
FooterReference newFooterReference =
new FooterReference() { Id = rId, Type = HeaderFooterValues.Default };
sectProperties.Append(newFooterReference);
}
mainDocPart.Document.Save();
}
private static Footer GeneratePageFooterPart(string FooterText)
{
PositionalTab pTab = new PositionalTab()
{
Alignment = AbsolutePositionTabAlignmentValues.Center,
RelativeTo = AbsolutePositionTabPositioningBaseValues.Margin,
Leader = AbsolutePositionTabLeaderCharValues.None
};
var elment =
new Footer(
new Paragraph(
new ParagraphProperties(
new ParagraphStyleId() { Val = "Footer" }),
new Run(pTab,
new Text(FooterText))
)
);
return elment;
}
}
I tried some other examples too for generating the footer, but the results were the same: generated but WITHOUT footer.
What could be the problem ?
This is how you can add footer to a docx file:
static void Main(string[] args)
{
using (WordprocessingDocument document =
WordprocessingDocument.Open("Document.docx", true))
{
MainDocumentPart mainDocumentPart = document.MainDocumentPart;
// Delete the existing footer parts
mainDocumentPart.DeleteParts(mainDocumentPart.FooterParts);
// Create a new footer part
FooterPart footerPart = mainDocumentPart.AddNewPart<FooterPart>();
// Get Id of footer part
string footerPartId = mainDocumentPart.GetIdOfPart(footerPart);
GenerateFooterPartContent(footerPart);
// Get SectionProperties and Replace FooterReference with new Id
IEnumerable<SectionProperties> sections =
mainDocumentPart.Document.Body.Elements<SectionProperties>();
foreach (var section in sections)
{
// Delete existing references to headers and footers
section.RemoveAllChildren<FooterReference>();
// Create the new header and footer reference node
section.PrependChild<FooterReference>(
new FooterReference() { Id = footerPartId });
}
}
}
static void GenerateFooterPartContent(FooterPart part)
{
Footer footer1 = new Footer();
Paragraph paragraph1 = new Paragraph();
ParagraphProperties paragraphProperties1 = new ParagraphProperties();
ParagraphStyleId paragraphStyleId1 = new ParagraphStyleId() { Val = "Footer" };
paragraphProperties1.Append(paragraphStyleId1);
Run run1 = new Run();
Text text1 = new Text();
text1.Text = "Footer";
run1.Append(text1);
paragraph1.Append(paragraphProperties1);
paragraph1.Append(run1);
footer1.Append(paragraph1);
part.Footer = footer1;
}
I'm trying to write verification code for our PDF generating routines, and I'm having difficulty getting PDFsharp to extract text from files created with MigraDoc. The ExtractText code works with other PDFs, but not with the PDFs that I generate with MigraDoc (see code below.)
Any tips on what I'm doing wrong?
//Create the Doc
var doc = new MigraDoc.DocumentObjectModel.Document();
doc.Info.Title = "VerifyReadWrite";
var section = doc.AddSection();
section.AddParagraph("ABCDEF abcdef");
//Render the PDF
var renderer = new PdfDocumentRenderer(true);
var pdf = new PdfDocument();
renderer.PdfDocument = pdf;
renderer.Document = doc;
renderer.RenderDocument();
var msOut = new MemoryStream();
pdf.Save(msOut, true);
var pdfBytes = msOut.ToArray();
//Read the PDF into PdfSharp
var ms = new MemoryStream(pdfBytes);
var pdfRead = PdfSharp.Pdf.IO.PdfReader.Open(ms, PdfDocumentOpenMode.ReadOnly);
var segments = pdfRead.Pages[0].ExtractText().ToList();
Results in the following:
segments[0] = "\0$\0%\0&\0'\0(\0)"
segments[1] = "\0D\0E\0F\0G\0H\0I"
I'd expect to see:
segments[0] = "ABCDEF"
segments[1] = "abcdef"
I'm using the ExtractText code from here:
C# Extract text from PDF using PdfSharp
and it works very well for all but PDFs generated with MigraDoc.
public static IEnumerable<string> ExtractText(this PdfPage page)
{
var content = ContentReader.ReadContent(page);
var text = content.ExtractText();
return text.Select(x => x.Trim());
}
public static IEnumerable<string> ExtractText(this CObject cObject)
{
if (cObject is COperator)
{
var cOperator = (COperator) cObject;
if (cOperator.OpCode.Name == OpCodeName.Tj.ToString() ||
cOperator.OpCode.Name == OpCodeName.TJ.ToString())
{
foreach (var cOperand in cOperator.Operands)
foreach (var txt in ExtractText(cOperand))
yield return txt;
}
}
else
{
var sequence = cObject as CSequence;
if (sequence != null)
{
var cSequence = sequence;
foreach (var element in cSequence)
foreach (var txt in ExtractText(element))
yield return txt;
}
else if (cObject is CString)
{
var cString = (CString) cObject;
yield return cString.Value;
}
}
}
It seems the code used to extract text does not support all cases.
Try new PdfDocumentRenderer(false) (instead of 'true'). AFAIK this will lead to a different encoding and the text extraction might work.
I'm having a question about writing data to a CSV file.
I have a file named test.csv in which are 2 fields > accountnumber and relation ID.
Now I want to add another field next to it: IBAN.
The IBAN is the data from the first row which is validated by the SOAP function BBANtoIBAN.
How can I keep the 2 rows of data accountnumbers and relation IDs in the CSV and add the IBAN in the 3rd row?
This is my code so far:
using (var client = new WebService.BANBICSoapClient("IBANBICSoap"))
{
List<List<string>> dataList = new List<List<string>>();
TextFieldParser parser = new TextFieldParser(#"C:\CSV\test.csv");
parser.TextFieldType = FieldType.Delimited;
parser.SetDelimiters(";");
while (!parser.EndOfData)
{
List<string> data = new List<string>();
string row = parser.ReadLine();
try
{
string resultIBAN = client.BBANtoIBAN(row);
if (resultIBAN != string.Empty)
data.Add(resultIBAN);
else
data.Add("Accountnumber is not correct.");
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
dataList.Add(data);
}
}
I see it as:
StreamReader sr = new StreamReader(#"C:\CSV\test.csv")
StreamWriter sw = new StreamWriter(#"C:\CSV\testOut.csv")
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
try
{
string[] rowsArray = line.Split(';');
string row = rowsArray[0];
string resultIBAN = client.BBANtoIBAN(row);
if (resultIBAN != string.Empty)
{
line +=";"+ resultIBAN;
}
else
{
line +=";"+"Accountnumber is not correct.";
}
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
sw.WriteLine(line)
}
sr.Close();
sw.Close();
I would do something like this to parse the csv file, and add an extra item to the data list:
List<List<string>> dataList = new List<List<string>>();
string filename = #"C:\CSV\test.csv";
using (StreamReader sr = new StreamReader(filename))
{
string fileContent = sr.ReadToEnd();
foreach (string line in fileContent.Split(new string[] {Environment.NewLine},StringSplitOptions.RemoveEmptyEntries))
{
List<string> data = new List<string>();
foreach (string field in line.Split(';'))
{
data.Add(field);
}
try
{
string resultIBAN = client.BBANtoIBAN(data[0]);
if (resultIBAN != string.Empty)
{
data.Add(resultIBAN);
}
else
{
data.Add("Accountnumber is not correct.");
}
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
dataList.Add(data);
}
Hi all, I have CSV files which are in this format:
**CSV Format1**
||OrderGUID||OrderItemID||Qty||SKUID||TrackingNumber||TotalWeight||DateShipped||DateDelivered||ShippingStatusId||OrderShippingAddressId
||5 ||3 ||2 ||12312||aasdasd ||24 ||2012-12-2010|| || 10025 ||10028
||5 ||4 ||3 ||113123||adadasdasd ||22 ||2012-12-2012|| ||10026 ||10028
**CSV Format2**
||"OrderGUID"||"OrderItemID"||"Qty"||"SKUID"||"TrackingNumber"||"TotalWeight"||"DateShipped"||"DateDelivered"||"ShippingStatusId"||"OrderShippingAddressId"||
||"5" ||"3" ||"2" ||"12312"||"aasdasd" ||"24" ||"2012-12-2010"||"" || "10025" ||"10028"||
||"5" ||"4" ||"3" ||"113123"||"adadasdasd" ||"22" ||"2012-12-2012"|| "2012-12-2010" ||"10026" ||"10028"||
I have to read these files without saving them on the server. Can anyone help me? How can I read this files and insert in my db? How can I trim the special characters from the files?
This is what I am trying to do for the file upload:
[AcceptVerbs(HttpVerbs.Post)]
public ActionResult ImportTrackingNumber(FormCollection form,HttpPostedFileBase UploadedFile,TrackingNumbersModel Trackingnumbers)
{
if (UploadedFile != null)
{
var allowedExtensions = new[] {".xlsx", ".csv"};
if (UploadedFile.ContentLength > 0)
{
var extension = Path.GetExtension(UploadedFile.FileName);
if (extension == ".xlsx")
{
//Need To code For Excel Files Reading
}
else if (extension == ".csv")
{
//string filename = Path.GetFileName(UploadedFile.PostedFile.InputStream);
StreamReader csvreader = new StreamReader(UploadedFile.FileName);
DataTable dt;
}
}
}
return View();
}
Just an example on how you can read the uploaded file without saving it on the server:
// Use the InputStream to get the actual stream sent.
using (StreamReader csvReader = new StreamReader(UploadedFile.InputStream))
{
while (!csvReader.EndOfStream)
{
var line = csvReader.ReadLine();
var values = line.Split(';');
}
}
This is my code:
public static DataTable GetDataTabletFromCSVFile(HttpPostedFileBase file)
{
DataTable csvDataTable = new DataTable();
// Read bytes from http input stream
var csvBody = string.Empty;
using (BinaryReader b = new BinaryReader(file.InputStream))
{
byte[] binData = b.ReadBytes(file.ContentLength);
csvBody = Encoding.UTF8.GetString(binData);
}
var memoryStream = new MemoryStream();
var streamWriter = new StreamWriter(memoryStream);
streamWriter.Write(csvBody);
streamWriter.Flush();
memoryStream.Position = 0;
using (TextFieldParser csvReader = new TextFieldParser(memoryStream))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvDataTable.Columns.Add(datecolumn);
}
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvDataTable.Rows.Add(fieldData);
}
}
return csvDataTable;
}
I have a cell that contains the placeholder "$$value" in the Excel sheet, the thing is that I need to replace the placeholder's actual value using Open XML and save it as separate workbook.
Here is the code that I tried...it is not replacing the actual value and also I'm unable to save the workbook. I need to sort out this issue.
WorksheetPart worksheetPart = (WorksheetPart)myWorkbook.WorkbookPart.GetPartById(sheet.Id);
DocumentFormat.OpenXml.Spreadsheet.Worksheet worksheet = worksheetPart.Worksheet;
string _txt1 = "$$value";
if (_txt1.Contains("$$"))
{
worksheet.InnerText.Replace(_txt1, "test");
}
by default Excel stores strings in the global (1 per workbook) SharedStringTablePart. So, this is the one you need to target. However, the OpenXML format also allows inline text inside the WorksheetParts. hence, a complete solution needs to look there as well.
Here's a sample app (with some inline comments):
using DocumentFormat.OpenXml.Packaging;
using x = DocumentFormat.OpenXml.Spreadsheet;
class Program
{
private static readonly string placeHolder = "$$value";
static void Main()
{
var templatePath = #"C:\Temp\template.xlsx";
var resultPath = #"C:\Temp\result.xlsx";
string replacementText = "test";
using (Stream xlsxStream = new MemoryStream())
{
// Read template from disk
using (var fileStream = File.OpenRead(templatePath))
fileStream.CopyTo(xlsxStream);
// Do replacements
ProcessTemplate(xlsxStream, replacementText);
// Reset stream to beginning
xlsxStream.Seek(0L, SeekOrigin.Begin);
// Write results back to disk
using (var resultFile = File.Create(resultPath))
xlsxStream.CopyTo(resultFile);
}
}
private static void ProcessTemplate(Stream template, string replacementText)
{
using (var workbook = SpreadsheetDocument.Open(template, true, new OpenSettings { AutoSave = true }))
{
// Replace shared strings
SharedStringTablePart sharedStringsPart = workbook.WorkbookPart.SharedStringTablePart;
IEnumerable<x.Text> sharedStringTextElements = sharedStringsPart.SharedStringTable.Descendants<x.Text>();
DoReplace(sharedStringTextElements, replacementText);
// Replace inline strings
IEnumerable<WorksheetPart> worksheetParts = workbook.GetPartsOfType<WorksheetPart>();
foreach (var worksheet in worksheetParts)
{
var allTextElements = worksheet.Worksheet.Descendants<x.Text>();
DoReplace(allTextElements, replacementText);
}
} // AutoSave enabled
}
private static void DoReplace(IEnumerable<x.Text> textElements, string replacementText)
{
foreach (var text in textElements)
{
if (text.Text.Contains(placeHolder))
text.Text = text.Text.Replace(placeHolder, replacementText);
}
}
Solution:
private static void ProcessTemplate(Stream template, Dictionary<string,string> toReplace)
{
using (var workbook = SpreadsheetDocument.Open(template, true, new OpenSettings { AutoSave = true }))
{
workbook.WorkbookPart.Workbook.CalculationProperties.ForceFullCalculation = true;
workbook.WorkbookPart.Workbook.CalculationProperties.FullCalculationOnLoad = true;
//Replace SheetNames
foreach (Sheet sheet in workbook.WorkbookPart.Workbook.Sheets)
foreach (var key in toReplace.Keys)
sheet.Name.Value = sheet.Name.Value.Replace(key, toReplace[key]);
foreach (WorksheetPart wsheetpart in workbook.WorkbookPart.WorksheetParts)
foreach (SheetData sheetd in wsheetpart.Worksheet.Descendants<x.SheetData>())
foreach (Row r in wsheetpart.Worksheet.Descendants<x.Row>())
foreach (Cell c in r.Descendants<x.Cell>())
if (c.CellFormula != null)
{
foreach (var key in toReplace.Keys)
c.CellFormula.Text = c.CellFormula.Text.Replace(key, toReplace[key]);
}
// Replace shared strings
SharedStringTablePart sharedStringsPart = workbook.WorkbookPart.SharedStringTablePart;
IEnumerable<x.Text> sharedStringTextElements = sharedStringsPart.SharedStringTable.Descendants<x.Text>();
for(int i =0;i<toReplace.Keys.Count; i++)
DoReplace(sharedStringTextElements, toReplace);
IEnumerable<x.Formula> sharedStringTextElementsF = sharedStringsPart.SharedStringTable.Descendants<x.Formula>();
for (int i = 0; i < toReplace.Keys.Count; i++)
DoReplaceFormula(sharedStringTextElementsF, toReplace);
// Replace inline strings
IEnumerable<WorksheetPart> worksheetParts = workbook.GetPartsOfType<WorksheetPart>();
foreach (var worksheet in worksheetParts)
{
var allTextElements = worksheet.Worksheet.Descendants<x.Text>();
DoReplace(allTextElements, toReplace);
var allTextElements2 = worksheet.Worksheet.Descendants<x.Formula>();
DoReplaceFormula(allTextElements2, toReplace);
}
} // AutoSave enabled
}