I am trying to create a program that merges several pdfs and has a bookmark to each of them.
My starting method is :
private Byte[] MergePdfForms(Dictionary<string, Stream> files)
{
var dest = new MemoryStream();
PdfDocument pdf = new PdfDocument(new PdfWriter(dest));
PdfMerger merger = new PdfMerger(pdf);
foreach (var keyValuePair in files)
{
//TODO add bookmark to this page
PdfDocument firstSourcePdf = new PdfDocument(new PdfReader(keyValuePair.Value));
merger.Merge(firstSourcePdf, 1, firstSourcePdf.GetNumberOfPages());
firstSourcePdf.Close();
}
pdf.Close();
return dest.ToArray();
}
I have tried things like
pdf.InitializeOutlines();
pdf.AddNamedDestination("Test Bookmark", pdf.GetLastPage().GetPdfObject());
Which doesn't create any bookmarks.
Take a look at the example here: http://developers.itextpdf.com/examples/merging-pdf-documents/clone-merging-documents-bookmarks
private byte[] MergePdfForms(Dictionary<string, Stream> files)
{
var dest = new MemoryStream();
PdfDocument pdf = new PdfDocument(new PdfWriter(dest));
PdfMerger merger = new PdfMerger(pdf);
PdfOutline rootOutline = pdf.GetOutlines(false);
PdfOutline helloWorld = rootOutline.AddOutline("Root");
int pages = 1;
foreach (var keyValuePair in files)
{
var firstSourcePdf = new PdfDocument(new PdfReader(keyValuePair.Value));
var subPages = firstSourcePdf.GetNumberOfPages();
merger.Merge(firstSourcePdf, 1, subPages);
firstSourcePdf.Close();
var link1 = helloWorld.AddOutline(keyValuePair.Key);
link1.AddDestination(PdfExplicitDestination.CreateFit(pdf.GetPage(pages)));
pages += subPages;
}
pdf.Close();
return dest.ToArray();
}
Related
I'm using iText7 to convert from HTML to PDF which runs perfectly
however this call:
HtmlConverter.ConvertToPdf(htmlStream, document);
will close the document after it's called but I don't want to close the document yet for the following reason
I wrote this function to write pages I'll be calling it in a loop
public static void WritePage(string htmlbody)
{
document.AddNewPage();
byte[] htmlByteArray = Encoding.UTF8.GetBytes(htmlbody);
MemoryStream htmlStream = new MemoryStream(htmlByteArray);
HtmlConverter.ConvertToPdf(htmlStream, document);
}
and after the loop is over I'll close the document my self
public static void CloseDocument()
{
document.Close();
}
this worked when I was using iText5 but now the convertToPdf will close the document.
I can read from that document and add it with the new page to a new document but I don't want to do that
I want to utilize the built in document.AddNewPage and after the document is fully constructed I'll close it myself
Thank you in advance
answer#1 is using PdfMerger and it is my preferred answer
public void createPdf(String baseUri, String[] src, String dest) throws IOException {
ConverterProperties properties = new ConverterProperties();
properties.setBaseUri(baseUri);
PdfWriter writer = new PdfWriter(dest);
PdfDocument pdf = new PdfDocument(writer);
PdfMerger merger = new PdfMerger(pdf);
for (String html : src) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument temp = new PdfDocument(new PdfWriter(baos));
HtmlConverter.convertToPdf(new FileInputStream(html), temp, properties);
temp = new PdfDocument(
new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
merger.merge(temp, 1, temp.getNumberOfPages());
temp.close();
}
pdf.close();}
answer#2 is converting the html to IElement List and adding that to the document like in follwoing code:
public void createPdf(String baseUri, String[] src, String dest) throws IOException {
ConverterProperties properties = new ConverterProperties();
properties.setBaseUri(baseUri);
PdfWriter writer = new PdfWriter(dest);
PdfDocument pdf = new PdfDocument(writer);
Document document = new Document(pdf);
for (String html : src) {
List<IElement> elements =
HtmlConverter.convertToElements(new FileInputStream(html), properties);
for (IElement element : elements) {
document.add((IBlockElement)element);
}
}
document.close();}
Reference: https://developers.itextpdf.com/content/itext-7-converting-html-pdf-pdfhtml/chapter-7-frequently-asked-questions-about-pdfhtml/how-parse-multiple-html-files-one-pdf
I have two fillable pdf files and did the code to merge those pdfs into one single pdf. Below is my code for that.
public void PDFSplit()
{
List<string> files=new List<string>();
files.Add(Server.MapPath("~/Template/sample_pdf.pdf"));
files.Add(Server.MapPath("~/Template/temp/sample_pdf.pdf"));
//call method
Merge(files, Server.MapPath("~/Template/sample_pdf_123.pdf"));
}
//Merge pdf
public void Merge(List<String> InFiles, String OutFile)
{
using (FileStream stream = new FileStream(OutFile, FileMode.Create))
using (iTextSharp.text.Document doc = new iTextSharp.text.Document())
using (PdfCopy pdf = new PdfCopy(doc, stream))
{
doc.Open();
PdfReader reader = null;
PdfImportedPage page = null;
InFiles.ForEach(file =>
{
reader = new PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
});
}
}
The code is working fine, but the problem is when I am trying to read that new generated merged file, it's not showing fields using AcroFields.
//To read pdf data
PdfReader reader = null;
reader = new PdfReader(Server.MapPath("~/Template/sample_pdf_123.pdf"));
AcroFields pdfFormFields = reader.AcroFields;
You are unable to marge fallible PDF files because you are using an old version of iText. Please upgrade to iText 7 for .NET and read the iText 7 jump-start tutorial, more specifically chapter 6 where it says:
Merging forms
This is how it's done:
PdfDocument destPdfDocument = new PdfDocument(new PdfWriter(dest));
PdfDocument[] sources = new PdfDocument[] {
new PdfDocument(new PdfReader(SRC1)),
new PdfDocument(new PdfReader(SRC2)) };
PdfPageFormCopier formCopier = new PdfPageFormCopier();
foreach (PdfDocument sourcePdfDocument in sources) {
sourcePdfDocument.CopyPagesTo(1,
sourcePdfDocument.GetNumberOfPages(), destPdfDocument, formCopier);
sourcePdfDocument.Close();
}
destPdfDocument.Close();
I have been trying to get a grip of iText7 c# with little luck.
My goal is:
Load a pdf (one page) with a form (multiple fields) as a template
Fill out the form, Flatten and copy the filled forms page to a new document
repeat #2 x number of times with different data
Save to Memory Stream
I have the different parts working , but i cant get it to work together
var memoryStream = new MemoryStream();
PdfReader reader = new PdfReader("untitled-1.pdf"); //Iput
PdfWriter writer = new PdfWriter(memoryStream); //output
PdfDocument pdfDoc = new PdfDocument(reader, writer);
PdfAcroForm form = PdfAcroForm.GetAcroForm(pdfDoc, true);
var fields = form.GetFormFields();
if(fields.ContainsKey("address")) {
fields["address"].SetValue("first\nlast");
}
form.FlattenFields();
pdfDoc.Close();
byte[] b = memoryStream.ToArray();
File.WriteAllBytes(#"t.pdf", b);
clone page:
// create clone page x times
PdfDocument pdfDoc = new PdfDocument(new PdfWriter("output.pdf").SetSmartMode(true));
pdfDoc.InitializeOutlines();
PdfDocument srcDoc;
for (int i = 0; i<5; i++) {
srcDoc = new PdfDocument(new PdfReader("untitled-1.pdf"));
// copy content to the resulting PDF
srcDoc.CopyPagesTo(1, srcDoc.GetNumberOfPages(), pdfDoc);
}
pdfDoc.Close();
got an idea just after writing this question. Here is one solution to this problem
Create a pdf-file with a form and a text field named address to use as template, save as untitled1-pdf.
This code will create an empty document and then for each user in users load and fill the field address whit the user.
The filled form will then be flatten and copied into the new document.
When all is done, the document will be saved as result.pdf
//b.
static void Main(string[] args)
{
List<string> users = new List<string> { "Peter", "john", "Carl" };
byte[] result = createPdf(users, "untitled-1.pdf");
File.WriteAllBytes(#"result.pdf", result);
}
public static byte[] createPdf(List<string> users,string templateFile)
{
// create clone page for each user in users
using (MemoryStream memoryStream = new MemoryStream())
{
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(memoryStream).SetSmartMode(true));
pdfDoc.InitializeOutlines();
PdfDocument srcDoc;
foreach (var u in users)
{
MemoryStream m = new MemoryStream(fillForm(u,templateFile));
srcDoc = new PdfDocument(new PdfReader(m));
// copy content to the resulting PDF
srcDoc.CopyPagesTo(1, srcDoc.GetNumberOfPages(), pdfDoc);
}
pdfDoc.Close();
return memoryStream.ToArray();
}
}
public static byte[] fillForm(string user,string templateFile)
{
using (var memoryStream = new MemoryStream())
{
PdfReader reader = new PdfReader(templateFile); //Iput
PdfWriter writer = new PdfWriter(memoryStream); //output
PdfDocument pdfDoc = new PdfDocument(reader, writer);
PdfAcroForm form = PdfAcroForm.GetAcroForm(pdfDoc, true);
var fields = form.GetFormFields();
if (fields.ContainsKey("address"))
{
fields["address"].SetValue(user);
}
form.FlattenFields();
pdfDoc.Close();
byte[] b = memoryStream.ToArray();
return b;
}
}
I'm using iTextSharp 5.x. I'm trying to merge two pdfs and preserve the isTagged flag. When I remove copy.SetTagged(); the result pdf contains both pdfs which is great. When adding the copy.SetTagged() is get an exception
Exception -->System.ObjectDisposedException: Cannot access a closed file.
at System.IO.__Error.FileNotOpen()
at System.IO.FileStream.get_Position()
Here is the code
List<string> filesToMerge = new List<string> { "C:/dev/dcs/wp-cla-dcs/Hex/Docs/metadata/coverPage.pdf", "C:/dev/dcs/wp-cla-dcs/Hex/Docs/metadata/49W7a.pdf" };
string outputFileName = "C:/dev/dcs/wp-cla-dcs/Hex/Docs/metadata/results.pdf";
using (FileStream outFS = new FileStream(outputFileName, FileMode.Create))
using (Document document = new Document())
// using (PdfCopy copy = new PdfCopy(document, outFS))
using (PdfCopy copy = new PdfSmartCopy(document, outFS))
{
{
copy.SetTagged();
// Set up the iTextSharp document
document.Open();
foreach (string pdfFile in filesToMerge)
{
using (var reader = new PdfReader(pdfFile))
{
copy.AddDocument(reader);
copy.FreeReader(reader);
}
}
}
}
despite #bruno-lowagie's comment, I have had better results doing this with with iText5.
Uisng iText7, PdfMerger left several contents untagged (all were tagged in the source document). PdfCopy in iText5 however worked just fine, only needed to manually add Xmp metadata, title, lang, etc:
public static void CombineMultiplePDFs(string[] fileNames, string outFile)
{
var lang = "en";
var title = "My new title";
// step 1: creation of a document-object
Document document = new Document();
// step 2: we create a writer that listens to the document
FileStream newFileStream = new FileStream(outFile, FileMode.Create);
PdfCopy writer = new PdfCopy(document, newFileStream);
writer.SetTagged();
writer.PdfVersion = PdfWriter.VERSION_1_7;
writer.AddViewerPreference(PdfName.DISPLAYDOCTITLE, new PdfBoolean(true));
writer.Info.Put(PdfName.TITLE, new PdfString(title));
writer.CreateXmpMetadata();
// step 3: we open the document
document.Open();
// set meta data
document.AddLanguage(lang);
document.AddTitle(title);
// keep an array of all open readers so they can be closed again.
var readers = new PdfReader[fileNames.Length];
for (var fi = 0; fi < fileNames.Length; fi++)
{
// we create a reader for a certain document
var fileName = fileNames[0];
PdfReader reader = new PdfReader(fileName);
readers[fi] = reader;
reader.ConsolidateNamedDestinations();
// step 4: we add content
for (int i = 1; i <= reader.NumberOfPages; i++)
{
// IMPORTANT: the third param is is "KeepTaggedPdfStructure"
PdfImportedPage page = writer.GetImportedPage(reader, i, true);
writer.AddPage(page);
}
}
// step 5: we close the document and writer
writer.Close();
document.Close();
// close readers only after document is lcosed
foreach (var r in readers)
{
r.Close();
}
}
How would I merge several pdf pages into one with iTextSharp which also supports merging pages having form elements like textboxes, checkboxes, etc.
I have tried so many by googling, but nothing has worked well.
See my answer here Merging Memory Streams. I give an example of how to merge PDFs with itextsharp.
For updating form field names add this code that uses the stamper to change the form field names.
/// <summary>
/// Merges pdf files from a byte list
/// </summary>
/// <param name="files">list of files to merge</param>
/// <returns>memory stream containing combined pdf</returns>
public MemoryStream MergePdfForms(List<byte[]> files)
{
if (files.Count > 1)
{
string[] names;
PdfStamper stamper;
MemoryStream msTemp = null;
PdfReader pdfTemplate = null;
PdfReader pdfFile;
Document doc;
PdfWriter pCopy;
MemoryStream msOutput = new MemoryStream();
pdfFile = new PdfReader(files[0]);
doc = new Document();
pCopy = new PdfSmartCopy(doc, msOutput);
pCopy.PdfVersion = PdfWriter.VERSION_1_7;
doc.Open();
for (int k = 0; k < files.Count; k++)
{
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
msTemp = new MemoryStream();
pdfTemplate = new PdfReader(files[k]);
stamper = new PdfStamper(pdfTemplate, msTemp);
names = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(names, 0);
foreach (string name in names)
{
stamper.AcroFields.RenameField(name, name + "_file" + k.ToString());
}
stamper.Close();
pdfFile = new PdfReader(msTemp.ToArray());
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
pdfFile.Close();
pCopy.Close();
doc.Close();
return msOutput;
}
else if (files.Count == 1)
{
return new MemoryStream(files[0]);
}
return null;
}
Here is my simplified version of Jonathan's Merge code with namespaces added, and stamping removed.
public IO.MemoryStream MergePdfForms(System.Collections.Generic.List<byte[]> files)
{
if (files.Count > 1) {
using (System.IO.MemoryStream msOutput = new System.IO.MemoryStream()) {
using (iTextSharp.text.Document doc = new iTextSharp.text.Document()) {
using (iTextSharp.text.pdf.PdfSmartCopy pCopy = new iTextSharp.text.pdf.PdfSmartCopy(doc, msOutput) { PdfVersion = iTextSharp.text.pdf.PdfWriter.VERSION_1_7 }) {
doc.Open();
foreach (byte[] oFile in files) {
using (iTextSharp.text.pdf.PdfReader pdfFile = new iTextSharp.text.pdf.PdfReader(oFile)) {
for (i = 1; i <= pdfFile.NumberOfPages; i++) {
pCopy.AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
}
}
}
return msOutput;
}
} else if (files.Count == 1) {
return new System.IO.MemoryStream(files[0]);
}
return null;
}
to merge PDF see "Merging two pdf pages into one using itextsharp"
Below is my code for pdf merging.Thanks Jonathan for giving suggestion abt renaming fields,which resolved the issues while merging pdf pages with form fields.
private static void CombineAndSavePdf(string savePath, List<string> lstPdfFiles)
{
using (Stream outputPdfStream = new FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document document = new Document();
PdfSmartCopy copy = new PdfSmartCopy(document, outputPdfStream);
document.Open();
PdfReader reader;
int totalPageCnt;
PdfStamper stamper;
string[] fieldNames;
foreach (string file in lstPdfFiles)
{
reader = new PdfReader(file);
totalPageCnt = reader.NumberOfPages;
for (int pageCnt = 0; pageCnt < totalPageCnt; )
{
//have to create a new reader for each page or PdfStamper will throw error
reader = new PdfReader(file);
stamper = new PdfStamper(reader, outputPdfStream);
fieldNames = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(fieldNames, 0);
foreach (string name in fieldNames)
{
stamper.AcroFields.RenameField(name, name + "_file" + pageCnt.ToString());
}
copy.AddPage(copy.GetImportedPage(reader, ++pageCnt));
}
copy.FreeReader(reader);
}
document.Close();
}
}