How to get ordered list into pdf using itext? - c#

I have to get an ordered list into pdf.The data stored is in html format.When exporting to pdf using itextsharp,the ol-li tags should be replaced by an ordered list.

You'll want to use iTextSharp's iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList() method. Below is a full working sample WinForms app targeting iTextSharp 5.1.1.0 that does what you're looking for. See the inline comments for what's going on.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text.pdf;
using iTextSharp.text;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//File to export to
string exportFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "HTML.pdf");
//Create our PDF document
using (Document doc = new Document(PageSize.LETTER)){
using (FileStream fs = new FileStream(exportFile, FileMode.Create, FileAccess.Write, FileShare.Read)){
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)){
//Open the doc for writing
doc.Open();
//Insert a page
doc.NewPage();
//This is our sample HTML
String HTML = "<ol><li>Row 1</li><li>Row 2</li></ol>";
//Create a StringReader to parse our text
using (StringReader sr = new StringReader(HTML))
{
//Pass our StringReader into iTextSharp's HTML parser, get back a list of iTextSharp elements
List<IElement> ies = iTextSharp.text.html.simpleparser.HTMLWorker.ParseToList(sr, null);
//Loop through each element and add to the document
foreach (IElement ie in ies)
{
doc.Add(ie);
}
}
//Close our document
doc.Close();
}
}
}
}
}
}

Related

PDF Merge using iTextSharp in winforms

I have the code for my pdf merge application but I need help on how to make this all work. I need help on the winforms Design. Obviously its just a blank screen right now without the design.
Below you will see my form code which is labeled form1.cs and program.cs which is below
Program.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace WindowsFormsApplication1
{
static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new Form1());
}
}
}
form1.cs
using System;
using System.IO;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//Folder that we'll work from
string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string pdf1 = Path.Combine(workingFolder, "pdf1.pdf");//PDF with solid red background color
string pdf2 = Path.Combine(workingFolder, "pdf2.pdf");//PDF with text
string pdf3 = Path.Combine(workingFolder, "pdf3.pdf");//Merged PDF
//Create a basic PDF filled with red, nothing special
using (FileStream fs = new FileStream(pdf1, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
PdfContentByte cb = writer.DirectContent;
cb.SetColorFill(BaseColor.RED);
cb.Rectangle(0, 0, doc.PageSize.Width, doc.PageSize.Height);
cb.Fill();
doc.Close();
}
}
}
//Create a basic PDF with a single line of text, nothing special
using (FileStream fs = new FileStream(pdf2, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
doc.Add(new Paragraph("This is a test"));
doc.Close();
}
}
}
//Create a basic PDF
using (FileStream fs = new FileStream(pdf3, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
//Get page 1 of the first file
PdfImportedPage imp1 = writer.GetImportedPage(new PdfReader(pdf1), 1);
//Get page 2 of the second file
PdfImportedPage imp2 = writer.GetImportedPage(new PdfReader(pdf2), 1);
//Add the first file to coordinates 0,0
writer.DirectContent.AddTemplate(imp1, 0, 0);
//Since we don't call NewPage the next call will operate on the same page
writer.DirectContent.AddTemplate(imp2, 0, 0);
doc.Close();
}
}
}
this.Close();
}
private void Form1_Load_1(object sender, EventArgs e)
{
}
}
}
I expect the output to be a working Pdf Merge/combine to where it has two buttons asking which file to upload for the parent pdf and then another button for child pdf and then something that shows the child pdf merged into the parent pdf.

Reading very large .xml.bz2 files

I'd like to parse Wikimedia's .xml.bzip2 dumps without extracting the entire file or performing any XML validation:
var filename = "enwiki-20160820-pages-articles.xml.bz2";
var settings = new XmlReaderSettings()
{
ValidationType = ValidationType.None,
ConformanceLevel = ConformanceLevel.Auto // Fragment ?
};
using (var stream = File.Open(filename, FileMode.Open))
using (var bz2 = new BZip2InputStream(stream))
using (var xml = XmlTextReader.Create(bz2, settings))
{
xml.ReadToFollowing("page");
// ...
}
The BZip2InputStream works - if I use a StreamReader, I can read XML line by line. But when I use XmlTextReader, it fails when I try to perform the read:
System.Xml.XmlException: 'Unexpected end of file has occurred. The following elements are not closed: mediawiki. Line 58, position 1.'
The bzip stream is not at EOF. Is it possible to open an XmlTextReader on top of a BZip2 stream? Or is there some other means to do this?
This should work. I used combination of XmlReader and Xml Linq. You can parse the XElement doc as needed.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication29
{
class Program
{
const string URL = #"https://dumps.wikimedia.org/enwiki/20160820/enwiki-20160820-abstract26.xml";
static void Main(string[] args)
{
XmlReader reader = XmlReader.Create(URL);
while (!reader.EOF)
{
if (reader.Name != "doc")
{
reader.ReadToFollowing("doc");
}
if (!reader.EOF)
{
XElement doc = (XElement)XElement.ReadFrom(reader);
}
}
}
}
}

Save modified WordprocessingDocument to new file

I'm attempting to open a Word document, change some text and then save the changes to a new document. I can get the first bit done using the code below but I can't figure out how to save the changes to a NEW document (specifying the path and file name).
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using DocumentFormat.OpenXml.Packaging;
using System.IO;
namespace WordTest
{
class Program
{
static void Main(string[] args)
{
string template = #"c:\data\hello.docx";
string documentText;
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(template, true))
{
using (StreamReader reader = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
documentText = reader.ReadToEnd();
}
documentText = documentText.Replace("##Name##", "Paul");
documentText = documentText.Replace("##Make##", "Samsung");
using (StreamWriter writer = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
writer.Write(documentText);
}
}
}
}
}
I'm a complete beginner at this, so forgive the basic question!
If you use a MemoryStream you can save the changes to a new file like this:
byte[] byteArray = File.ReadAllBytes("c:\\data\\hello.docx");
using (MemoryStream stream = new MemoryStream())
{
stream.Write(byteArray, 0, (int)byteArray.Length);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(stream, true))
{
// Do work here
}
// Save the file with the new name
File.WriteAllBytes("C:\\data\\newFileName.docx", stream.ToArray());
}
In Open XML SDK 2.5:
File.Copy(originalFilePath, modifiedFilePath);
using (var wordprocessingDocument = WordprocessingDocument.Open(modifiedFilePath, isEditable: true))
{
// Do changes here...
}
wordprocessingDocument.AutoSave is true by default so Close and Dispose will save changes.
wordprocessingDocument.Close is not needed explicitly because the using block will call it.
This approach doesn't require entire file content to be loaded into memory like in accepted answer. It isn't a problem for small files, but in my case I have to process more docx files with embedded xlsx and pdf content at the same time so the memory usage would be quite high.
Simply copy the source file to the destination and make changes from there.
File.copy(source,destination);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(destination, true))
{
\\Make changes to the document and save it.
wordDoc.MainDocumentPart.Document.Save();
wordDoc.Close();
}
Hope this works.
This approach allows you to buffer the "template" file without batching the whole thing into a byte[], perhaps allowing it to be less resource intensive.
var templatePath = #"c:\data\hello.docx";
var documentPath = #"c:\data\newFilename.docx";
using (var template = File.OpenRead(templatePath))
using (var documentStream = File.Open(documentPath, FileMode.OpenOrCreate))
{
template.CopyTo(documentStream);
using (var document = WordprocessingDocument.Open(documentStream, true))
{
//do your work here
document.MainDocumentPart.Document.Save();
}
}
For me this worked fine:
// To search and replace content in a document part.
public static void SearchAndReplace(string document)
{
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(document, true))
{
string docText = null;
using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
docText = sr.ReadToEnd();
}
Regex regexText = new Regex("Hello world!");
docText = regexText.Replace(docText, "Hi Everyone!");
using (StreamWriter sw = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
sw.Write(docText);
}
}
}

How can i set an image to a pdf field in existing pdf file?

How can i set an image to a pdf field in existing pdf file?
I'm using the iTextSharp object.
Setting the text field is working fine. No problem in it.
pdfFormFields.SetField("Firstname", "Mujeeb");
Please help.
Remove the Text field and replace it with a Pushbutton field of the same size and position. If you set the Pushbutton to READ_ONLY then it can't be pressed and it will look like a static image. This keeps the image you're trying to add as a field annotation instead of adding it to the page content.
void ConvertTextFieldToImage(string inputFile, string fieldName, string imageFile, string outputFile)
{
using (PdfStamper stamper = new PdfStamper(new PdfReader(inputFile), File.Create(outputFile)))
{
AcroFields.FieldPosition fieldPosition = stamper.AcroFields.GetFieldPositions(fieldName)[0];
PushbuttonField imageField = new PushbuttonField(stamper.Writer, fieldPosition.position, fieldName);
imageField.Layout = PushbuttonField.LAYOUT_ICON_ONLY;
imageField.Image = iTextSharp.text.Image.GetInstance(imageFile);
imageField.ScaleIcon = PushbuttonField.SCALE_ICON_ALWAYS;
imageField.ProportionalIcon = false;
imageField.Options = BaseField.READ_ONLY;
stamper.AcroFields.RemoveField(fieldName);
stamper.AddAnnotation(imageField.Field, fieldPosition.page);
stamper.Close();
}
}
To the best of my knowledge you can't technically set a standard PDF field as an image (although you might be able to do this with XFA).
The workaround, however, is to just create a standard iTextSharp image and scale it to the form field's dimensions and place it where the field is.
Below is a full working C# 2010 WinForms app targeting iTextSharp 5.1.1.0 that shows how to do this. It starts by creating a very simple PDF with a single form field on it called "firstName". The second part of the program then gets the position and dimensions of that field and places an image there scaled appropriately. See the comments in the code for further details.
using System;
using System.ComponentModel;
using System.Text;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
string baseFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "StartFile.pdf");
string secondFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "SecondFile.pdf");
string TestImage = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Test.jpg");
//Create a very simple PDF with a single form field called "firstName"
using (FileStream fs = new FileStream(baseFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document doc = new Document(PageSize.LETTER))
{
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs))
{
doc.Open();
writer.AddAnnotation(new TextField(writer, new iTextSharp.text.Rectangle(0, 0, 100, 100), "firstName").GetTextField());
doc.Close();
}
}
}
//Create a second file "filling out" the form above
using (FileStream fs = new FileStream(secondFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (PdfStamper stamper = new PdfStamper(new PdfReader(baseFile), fs))
{
//GetFieldPositions returns an array of field positions if you are using 5.0 or greater
//This line does a lot and should really be broken up for null-checking
iTextSharp.text.Rectangle rect = stamper.AcroFields.GetFieldPositions("firstName")[0].position;
//Create an image
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(TestImage);
//Scale it
img.ScaleAbsolute(rect.Width, rect.Height);
//Position it
img.SetAbsolutePosition(rect.Left, rect.Bottom);
//Add it to page 1 of the document
stamper.GetOverContent(1).AddImage(img);
stamper.Close();
}
}
this.Close();
}
}
}
This is the answer that works for placing an image in a specific location. `
using (PdfStamper stamper = new PdfStamper(new PdfReader(fromFilePath), File.Create("toFilePath")))
{
AcroFields.FieldPosition fieldPosition = stamper.AcroFields.GetFieldPositions("btn1")[0];
PushbuttonField imageField = new PushbuttonField(stamper.Writer, fieldPosition.position, "btn1Replaced");
imageField.Layout = PushbuttonField.LAYOUT_ICON_ONLY;
imageField.Image = iTextSharp.text.Image.GetInstance(ImageLocationPath);
imageField.ScaleIcon = PushbuttonField.SCALE_ICON_ALWAYS;
imageField.ProportionalIcon = false;
imageField.Options = BaseField.READ_ONLY;
stamper.AcroFields.RemoveField("btn1");
stamper.AddAnnotation(imageField.Field, fieldPosition.page);
stamper.Close();
}

How to display text in a pdf file?

I am using c#.net and need for some data to be displayed in a .pdf file. I am able to do so by using a PdfTable but then it is displayed in a tabular format.
I want a simple format like:
I dont want the exact tabular format.I am using iTextSharp dll.
Is it possible to display data in the above metioned format?
Any suggestions are welcome..
You can use PdfParagraph class to render genreal text in PDF
So when you say that you don't want exact tabular format are you talking about the borders? If so, I'd recommend using a PdfPTable and turning the borders off then. Cells in a PdfPTable are all based on the table's DefaultCell, so to turn the borders off for a table called t you'd do this:
t.DefaultCell.Border = 0;
Below is a full working WinForms app targetting iTextSharp 5.1.1.0 that does exactly that:
//PdfPTable Example
using System;
using System.ComponentModel;
using System.IO;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//PDF file to output
string outputFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Output.pdf");
//Create a basic stream to write to
using (FileStream fs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
//Create a new PDF document
using (Document doc = new Document())
{
//Bind a the document to the stream
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
//Open our PDF for writing
doc.Open();
//Insert a new page into our output PDF
doc.NewPage();
//Create a three column table
PdfPTable t = new PdfPTable(3);
//Remove the border from the table
t.DefaultCell.Border = 0;
//For the first row we some extra padding
t.DefaultCell.PaddingBottom = 10;
t.AddCell("ColumnA");
t.AddCell("ColumnB");
t.AddCell("ColumnC");
//Reset the padding for the remaining cells
t.DefaultCell.PaddingBottom = 0;
t.AddCell("Value1");
t.AddCell("Value1");
t.AddCell("Value1");
t.AddCell("Value2");
t.AddCell("Value2");
t.AddCell("Value2");
//Add the table to the document
doc.Add(t);
//Close our output PDF
doc.Close();
}
}
}
this.Close();
}
}
}
If for some reason you don't want to do this with a PdfPTable you can try hacking it together with a Paragraph but unless alignment isn't important you are going to give yourself a headache. The sample below uses paragraphs and formats each "column" within the paragraph to be 20 characters each, padding spaces as necessary. NOTE: This is 20 characters of a variable-width font so things don't actually align. If you want that, either use the PdfPTable above or you are going to have to measure strings which is not fun.
//Paragraph Example
using System;
using System.ComponentModel;
using System.IO;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//PDF file to output
string outputFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Output.pdf");
//Create a basic stream to write to
using (FileStream fs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
//Create a new PDF document
using (Document doc = new Document())
{
//Bind a the document to the stream
using (PdfWriter w = PdfWriter.GetInstance(doc, fs))
{
//Open our PDF for writing
doc.Open();
//Insert a new page into our output PDF
doc.NewPage();
//Right-pad each "column" for a total of 20 characters
string FormatString = "{0,-20}{1,-20}{2,-20}";
//Add the first row
doc.Add(new Paragraph(String.Format(FormatString, "ColumnA", "ColumnB", "ColumnC")));
//Add a blank line
doc.Add(new Paragraph(""));
//Add the two data rows
doc.Add(new Paragraph(String.Format(FormatString, "Value1", "Value1", "Value1")));
doc.Add(new Paragraph(String.Format(FormatString, "Value2", "Value2", "Value2")));
//Close our output PDF
doc.Close();
}
}
}
this.Close();
}
}
}
In iTextSharp dll, you just pass your data by creating table/tr/td structure in string like this :
string strData = string.Emrty;
strData = "<table border='0' cellpadding='0' cellspacing='0' width='100%'>";
strData += "<tr>";
strData += "<td>";
strData += "</td>";
strData += "</tr>";
strData += "</table>";
by creating table/tr/td structure of your data, just pass this variable data to pdf, then you will get your resulted output in pdf.

Categories