How can I create named destinations with iTextSharp? - c#

I am trying to convert PDF bookmarks into named destinations with C# and iTextSharp 5 library. Unfortunately iTextSharp seems not to write named destinations into the target PDF file.
using System;
using System.Collections.Generic;
using iTextSharp.text.pdf;
using iTextSharp.text;
using System.IO;
namespace PDFConvert
{
class Program
{
static void Main(string[] args)
{
String InputPdf = #"test.pdf";
String OutputPdf = "out.pdf";
PdfReader reader = new PdfReader(InputPdf);
var fileStream = new FileStream(OutputPdf, FileMode.Create, FileAccess.Write, FileShare.None);
var list = SimpleBookmark.GetBookmark(reader);
PdfStamper stamper = new PdfStamper(reader, fileStream);
foreach (Dictionary<string, object> entry in list)
{
object o;
entry.TryGetValue("Title", out o);
String title = o.ToString();
entry.TryGetValue("Page", out o);
String location = o.ToString();
String[] aLoc = location.Split(' ');
int page = int.Parse(aLoc[0]);
PdfDestination dest = new PdfDestination(PdfDestination.XYZ, float.Parse(aLoc[2]), float.Parse(aLoc[3]), float.Parse(aLoc[4]));
stamper.Writer.AddNamedDestination(title, page, dest);
// stamper.Writer.AddNamedDestinations(SimpleNamedDestination.GetNamedDestination(reader, false), reader.NumberOfPages);
}
stamper.Close();
reader.Close();
}
}
}
I already tried to use PdfWriter instead of PdfStamper, with the same result. I have definitely calls of stamper.Writer.AddNamedDestination(title, page, dest); but no sign of NamedDestinations in my target file.

I have found a solution using iText 7 instead of 5. Unfortunately the syntax is completely different. In my code below I only consider the second level Bookmarks ("Outline") of my PDF.
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Navigation;
using System;
namespace PDFConvert
{
class Program
{
static void Main(string[] args)
{
String InputPdf = #"test.pdf";
String OutputPdf = "out.pdf";
PdfDocument pdfDoc = new PdfDocument(new PdfReader(InputPdf), new PdfWriter(OutputPdf));
PdfOutline outlines = pdfDoc.GetOutlines(false);
// first level
foreach (var outline in outlines.GetAllChildren())
{
// second level
foreach (var second in outline.GetAllChildren())
{
String title = second.GetTitle();
PdfDestination dest = second.GetDestination();
pdfDoc.AddNamedDestination(title, dest.GetPdfObject());
}
}
pdfDoc.Close();
}
}
}

Related

Grab all of the pages of a PDF using textsharp

I am getting a pfd using the older version of itextsharp with this code
string Oldfile = #"C:/test.pdf"; // Gets the Template
(new FileInfo("C:/C:/test.pdf")).Directory.Create(); // Go create this folder if it's not there
string NewFile = "C:/test.pdf";
PdfReader reader = new PdfReader(Oldfile);
iTextSharp.text.Rectangle Size = reader.GetPageSizeWithRotation(1);
Document document = new Document(Size);
// MemoryStream memory_stream = new MemoryStream();
FileStream fs = new FileStream(NewFile, FileMode.Create, FileAccess.Write);
PdfWriter weiter = PdfWriter.GetInstance(document, fs);
document.Open();
PdfContentByte cb = weiter.DirectContent;
PdfImportedPage page = weiter.GetImportedPage(reader, 1);
//PdfImportedPage page2 = weiter.GetImportedPage(reader, 2);
cb.AddTemplate(page, 0, 0);
The problem I am having is when it gets that file it has 2 pages in that pdf but it only gets the 1st page and adds lines and saves the only 1st page of the pdf I want to be able to grab both of them or is there a way to merge them after wards
I bet you need to iterate all pages.
using System;
using System.IO;
using System.Collections.Generic;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace TestAnything
{
class Program
{
static void Main(string[] args)
{
List<string> filesToMerge = new List<string> { #"c:\temp\1.pdf", #"c:\temp\2.pdf" };
FileInfo destinationFile = new FileInfo(#"c:\temp\merge.pdf");
if (File.Exists(destinationFile.FullName))
File.Delete(destinationFile.FullName);
MergeFiles(filesToMerge, destinationFile);
}
public static void MergeFiles(List<string> sourceFiles, FileInfo destinationFile)
{
if (sourceFiles == null || sourceFiles.Count == 0)
throw new ArgumentNullException("blahhh.");
PdfReader reader = new PdfReader(sourceFiles[0]);
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfCopy writer = new PdfCopy(document, new FileStream(destinationFile.FullName, FileMode.Create));
document.Open();
try
{
foreach (string sourceFile in sourceFiles)
{
reader = new PdfReader(sourceFile);
reader.ConsolidateNamedDestinations();
for (int x = 1; x <= reader.NumberOfPages; x++)
writer.AddPage(writer.GetImportedPage(reader, x));
PRAcroForm form = reader.AcroForm;
if (form != null)
writer.CopyAcroForm(reader);
}
}
finally
{
if (document.IsOpen())
document.Close();
}
}
}
}

Editing a XFA PDF with iText (Editing only a field within a node)

Ok so I'm having to programmatically fill out an XFA PDF using C#. I've been able to successfully extract the XML structure of the PDF. However, I'm running into issues using the AcroFields.Xfa.FillXfaForm(sourceXML) calls.
Essentially what is happening is this: I am taking the ENTIRE XML tree, editing the fields within the XML and then attempting to edit the form fields with the new XML. I end up with a PDF stripped of all AcroForm fields, without the new input added. HOWEVER when I parse this edited PDF and extract the XML tree I see that my edits have been preserved.
The security settings for this particular XFA PDF allow form fields to be edited however I am being forced to use PdfReader.unethicalreading = true; with my current set up (which is why I believe the form fields are being stripped out). I believe that the XFA PDF is taking my XML edits as a full on edit to the format of the document itself.
Here is my code so far:
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
System.IO.StreamWriter file = new System.IO.StreamWriter(#"E:\XMLOutPut\outPutTest.xml");
file.WriteLine(ReadFileNames());
file.Close();
using (FileStream existingPdf = new FileStream(#"E:\ORIGINAL.pdf", FileMode.Open))
{
using (PdfReader pdfReader = new PdfReader(existingPdf))
{
using (FileStream sourceXML = new FileStream(#"E:\XMLOutPut\outPutTest.xml", FileMode.Open))
{
using (FileStream targetPdf = new FileStream(#"E:\ORIGINAL.pdf", FileMode.Open))
{
PdfReader.unethicalreading = true;
PdfStamper stamper = new PdfStamper(pdfReader, targetPdf,'\0', true);
stamper.AcroFields.Xfa.FillXfaForm(sourceXML);
stamper.Close();
}
}
}
}
}
public static string ReadFileNames()
{
string SRC = #"E:\ORIGINAL.pdf";
using (PdfReader reader = new PdfReader(SRC))
{
return ReadXFA(reader);
}
}
public static string ReadXFA(PdfReader reader)
{
XfaForm xfa = new XfaForm(reader);
XmlDocument document = xfa.DomDocument;
reader.Close();
if (!string.IsNullOrEmpty(document.DocumentElement.NamespaceURI))
{
document.DocumentElement.SetAttribute("xmlns", "");
XmlDocument newDoc = new XmlDocument();
newDoc.LoadXml(document.OuterXml);
document = newDoc;
}
var sb = new StringBuilder(4000);
var Xsettings = new XmlWriterSettings() { Indent = true };
using (var wrtier = XmlWriter.Create(sb, Xsettings))
{
document.WriteTo(wrtier);
}
return sb.ToString();
}
}
}
I am starting to believe that I have to somehow iterate through the XML and pull out however many fields I'd like to edit and do it that way??
Any help would be greatly appreciated.
Kind regards.
This is currently not possible using iText. You'll need to extract the XFA from the file (you can use iText to do this) and then traverse the XFA structure to make the edits, which you'll have to do with another tool, and then re-insert the XFA into the PDF, which can be done using iText.
static void Main(string[] args)
{
using (FileStream existingPdf = new FileStream(SRC, FileMode.Open))
using (PdfReader pdfReader = new PdfReader(existingPdf))
using (FileStream targetPdf = new FileStream(Target, FileMode.Create))
{
PdfReader.unethicalreading = true;
using (PdfStamper stamper = new PdfStamper(pdfReader, targetPdf, '\0', true))
{
XfaForm form = new XfaForm(pdfReader);
XDocument xdoc = form.DomDocument.ToXDocument();
var nodeElements = from nodeElement in xdoc.Descendants("form1").Descendants("A1")
select nodeElement;
foreach (XElement singleNodeElement in nodeElements)
{
if (singleNodeElement.Name == "A1")
{
singleNodeElement.Value = "LOLGG";
}
}
XmlDocument xmlDoc = xdoc.ToXmlDocument();
XmlNamespaceManager namespaces = new XmlNamespaceManager(xmlDoc.NameTable);
namespaces.AddNamespace("xfa", "http://www.xfa.org/schema/xfa-data/1.0/");
XmlNode baseNode = xmlDoc.SelectSingleNode("//xfa:datasets", namespaces);
stamper.AcroFields.Xfa.FillXfaForm(baseNode);
}
}
}
}
public static class DocumentExtensions
{
public static XmlDocument ToXmlDocument(this XDocument xDocument)
{
var xmlDocument = new XmlDocument();
using (var xmlReader = xDocument.CreateReader())
{
xmlDocument.Load(xmlReader);
}
return xmlDocument;
}
public static XDocument ToXDocument(this XmlDocument xmlDocument)
{
using (var nodeReader = new XmlNodeReader(xmlDocument))
{
nodeReader.MoveToContent();
return XDocument.Load(nodeReader);
}
}
}
Alrighty folks so it is possible to do this with iText in addition to Linq and Xml.Linq, as the code example shows above.
In order to make this possible we had to take an XMLDocument and convert it to an XDocument and then use Linq to traverse the nodes. Once we were able to get the correct nodes we had to add a namespace to correctly identify the prefix. We then had to transfer the XDoc format back to an XMLDoc format in order to use the FillXfaForm from iText.

Save modified WordprocessingDocument to new file

I'm attempting to open a Word document, change some text and then save the changes to a new document. I can get the first bit done using the code below but I can't figure out how to save the changes to a NEW document (specifying the path and file name).
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using DocumentFormat.OpenXml.Packaging;
using System.IO;
namespace WordTest
{
class Program
{
static void Main(string[] args)
{
string template = #"c:\data\hello.docx";
string documentText;
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(template, true))
{
using (StreamReader reader = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
documentText = reader.ReadToEnd();
}
documentText = documentText.Replace("##Name##", "Paul");
documentText = documentText.Replace("##Make##", "Samsung");
using (StreamWriter writer = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
writer.Write(documentText);
}
}
}
}
}
I'm a complete beginner at this, so forgive the basic question!
If you use a MemoryStream you can save the changes to a new file like this:
byte[] byteArray = File.ReadAllBytes("c:\\data\\hello.docx");
using (MemoryStream stream = new MemoryStream())
{
stream.Write(byteArray, 0, (int)byteArray.Length);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(stream, true))
{
// Do work here
}
// Save the file with the new name
File.WriteAllBytes("C:\\data\\newFileName.docx", stream.ToArray());
}
In Open XML SDK 2.5:
File.Copy(originalFilePath, modifiedFilePath);
using (var wordprocessingDocument = WordprocessingDocument.Open(modifiedFilePath, isEditable: true))
{
// Do changes here...
}
wordprocessingDocument.AutoSave is true by default so Close and Dispose will save changes.
wordprocessingDocument.Close is not needed explicitly because the using block will call it.
This approach doesn't require entire file content to be loaded into memory like in accepted answer. It isn't a problem for small files, but in my case I have to process more docx files with embedded xlsx and pdf content at the same time so the memory usage would be quite high.
Simply copy the source file to the destination and make changes from there.
File.copy(source,destination);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(destination, true))
{
\\Make changes to the document and save it.
wordDoc.MainDocumentPart.Document.Save();
wordDoc.Close();
}
Hope this works.
This approach allows you to buffer the "template" file without batching the whole thing into a byte[], perhaps allowing it to be less resource intensive.
var templatePath = #"c:\data\hello.docx";
var documentPath = #"c:\data\newFilename.docx";
using (var template = File.OpenRead(templatePath))
using (var documentStream = File.Open(documentPath, FileMode.OpenOrCreate))
{
template.CopyTo(documentStream);
using (var document = WordprocessingDocument.Open(documentStream, true))
{
//do your work here
document.MainDocumentPart.Document.Save();
}
}
For me this worked fine:
// To search and replace content in a document part.
public static void SearchAndReplace(string document)
{
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(document, true))
{
string docText = null;
using (StreamReader sr = new StreamReader(wordDoc.MainDocumentPart.GetStream()))
{
docText = sr.ReadToEnd();
}
Regex regexText = new Regex("Hello world!");
docText = regexText.Replace(docText, "Hi Everyone!");
using (StreamWriter sw = new StreamWriter(wordDoc.MainDocumentPart.GetStream(FileMode.Create)))
{
sw.Write(docText);
}
}
}

Set Metadata in iTextSharp

I am developing an application and i use the iTextSharp library.
I am also reading the iText in action from Manning so i can get references.
In Chapter 12 it has the following code to change the metadata in Java.
PdfReader reader = new PdfReader(src);
PdfStamper stamper =
new PdfStamper(reader, new FileOutputStream(dest));
HashMap<String, String> info = reader.getInfo();
info.put("Title", "Hello World stamped");
info.put("Subject", "Hello World with changed metadata");
info.put("Keywords", "iText in Action, PdfStamper");
info.put("Creator", "Silly standalone example");
info.put("Author", "Also Bruno Lowagie");
stamper.setMoreInfo(info);
stamper.close();
How can i do the same in C#?
Conversion from Java to C# is usually pretty straightforward. By convention, Java properties use get and set prefixes so to convert to C# you just need to drop the prefix and turn it into a .Net getter/setter call. getInfo() becomes Info and setMoreInfo(info) becomes MoreInfo = info. Then you just need to convert the native Java types to their equivalent C# types. In this case the Java FileOutputStream becomes a .Net FileStream and the HashMap<String, String> becomes a Dictionary<String, String>.
Lastly, I've updated the code to reflect recent changes to iTextSharp that now (as of 5.1.1.0) implement IDisposable now.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string inputFile = Path.Combine(workingFolder, "Input.pdf");
string outputFile = Path.Combine(workingFolder, "Output.pdf");
PdfReader reader = new PdfReader(inputFile);
using(FileStream fs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None)){
using (PdfStamper stamper = new PdfStamper(reader, fs))
{
Dictionary<String, String> info = reader.Info;
info.Add("Title", "Hello World stamped");
info.Add("Subject", "Hello World with changed metadata");
info.Add("Keywords", "iText in Action, PdfStamper");
info.Add("Creator", "Silly standalone example");
info.Add("Author", "Also Bruno Lowagie");
stamper.MoreInfo = info;
stamper.Close();
}
}
this.Close();
}
}
}
I just made this one after searching the right place in the watch window of the PdfWriter object, it changes the "PDF Creator" in the PDF as it is not accessible by default:
private static void ReplacePDFCreator(PdfWriter writer)
{
Type writerType = writer.GetType();
PropertyInfo writerProperty = writerType.GetProperties(BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance).Where(p => p.PropertyType == typeof(PdfDocument)).FirstOrDefault();
PdfDocument pd = (PdfDocument)writerProperty.GetValue(writer);
Type pdType = pd.GetType();
FieldInfo infoProperty = pdType.GetFields(BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance).Where(p => p.Name == "info").FirstOrDefault();
PdfDocument.PdfInfo pdfInfo = (PdfDocument.PdfInfo)infoProperty.GetValue(pd);
PdfString str = new PdfString("YOUR NEW PDF CREATOR HERE");
pdfInfo.Remove(new PdfName("Producer"));
pdfInfo.Put(new PdfName("Producer"), str);
}
I got a suggestion from "#yannic-donot-text" and it is way much cleaner!:
private static void ReplacePDFCreator(PdfWriter writer)
{
writer.Info.Put(new PdfName("Producer"), new PdfString("YOUR NEW PDF CREATOR HERE"));
}
I tought it was only archievable by reflection but I appreciate the collaboration of more educated persons :)
Thx!
public void pdfproperties()
{
string inputFile = #"D:\1.pdf";
string outputFile = #"D:\48.pdf";
PdfReader reader = new PdfReader(inputFile);
foreach (KeyValuePair<string, string> KV in reader.Info)
{
reader.Info.Remove(KV.Key);
}
using (FileStream FS = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document Doc = new Document())
{
using (PdfCopy writer = new PdfCopy(Doc, FS))
{
Doc.Open();
Doc.AddTitle("Add Title");
Doc.AddSubject("Add Subject");
Doc.AddKeywords("Add Keywords");
Doc.AddCreator("Application Creator");
Doc.AddAuthor("Add Author");
for (int i = 1; i <= reader.NumberOfPages; i++)
{
writer.AddPage(writer.GetImportedPage(reader, i));
}
writer.Info.Put(new PdfName("Producer"), new PdfString("Producer Name"));
Doc.Close();
}
}
}
}

Combine two (or more) PDF's

Background: I need to provide a weekly report package for my sales staff. This package contains several (5-10) crystal reports.
Problem:
I would like to allow a user to run all reports and also just run a single report. I was thinking I could do this by creating the reports and then doing:
List<ReportClass> reports = new List<ReportClass>();
reports.Add(new WeeklyReport1());
reports.Add(new WeeklyReport2());
reports.Add(new WeeklyReport3());
<snip>
foreach (ReportClass report in reports)
{
report.ExportToDisk(ExportFormatType.PortableDocFormat, #"c:\reports\" + report.ResourceName + ".pdf");
}
This would provide me a folder full of the reports, but I would like to email everyone a single PDF with all the weekly reports. So I need to combine them.
Is there an easy way to do this without install any more third party controls? I already have DevExpress & CrystalReports and I'd prefer not to add too many more.
Would it be best to combine them in the foreach loop or in a seperate loop? (or an alternate way)
I had to solve a similar problem and what I ended up doing was creating a small pdfmerge utility that uses the PDFSharp project which is essentially MIT licensed.
The code is dead simple, I needed a cmdline utility so I have more code dedicated to parsing the arguments than I do for the PDF merging:
using (PdfDocument one = PdfReader.Open("file1.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument two = PdfReader.Open("file2.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument outPdf = new PdfDocument())
{
CopyPages(one, outPdf);
CopyPages(two, outPdf);
outPdf.Save("file1and2.pdf");
}
void CopyPages(PdfDocument from, PdfDocument to)
{
for (int i = 0; i < from.PageCount; i++)
{
to.AddPage(from.Pages[i]);
}
}
Here is a single function that will merge X amount of PDFs using PDFSharp
using PdfSharp;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static void MergePDFs(string targetPath, params string[] pdfs) {
using(var targetDoc = new PdfDocument()){
foreach (var pdf in pdfs) {
using (var pdfDoc = PdfReader.Open(pdf, PdfDocumentOpenMode.Import)) {
for (var i = 0; i < pdfDoc.PageCount; i++)
targetDoc.AddPage(pdfDoc.Pages[i]);
}
}
targetDoc.Save(targetPath);
}
}
This is something that I figured out, and wanted to share with you, using PdfSharp.
Here you can join multiple Pdfs in one, without the need of an output directory (following the input list order)
public static byte[] MergePdf(List<byte[]> pdfs)
{
List<PdfSharp.Pdf.PdfDocument> lstDocuments = new List<PdfSharp.Pdf.PdfDocument>();
foreach (var pdf in pdfs)
{
lstDocuments.Add(PdfReader.Open(new MemoryStream(pdf), PdfDocumentOpenMode.Import));
}
using (PdfSharp.Pdf.PdfDocument outPdf = new PdfSharp.Pdf.PdfDocument())
{
for(int i = 1; i<= lstDocuments.Count; i++)
{
foreach(PdfSharp.Pdf.PdfPage page in lstDocuments[i-1].Pages)
{
outPdf.AddPage(page);
}
}
MemoryStream stream = new MemoryStream();
outPdf.Save(stream, false);
byte[] bytes = stream.ToArray();
return bytes;
}
}
I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}
Here is a example using iTextSharp
public static void MergePdf(Stream outputPdfStream, IEnumerable<string> pdfFilePaths)
{
using (var document = new Document())
using (var pdfCopy = new PdfCopy(document, outputPdfStream))
{
pdfCopy.CloseStream = false;
try
{
document.Open();
foreach (var pdfFilePath in pdfFilePaths)
{
using (var pdfReader = new PdfReader(pdfFilePath))
{
pdfCopy.AddDocument(pdfReader);
pdfReader.Close();
}
}
}
finally
{
document?.Close();
}
}
}
The PdfReader constructor has many overloads. It's possible to replace the parameter type IEnumerable<string> with IEnumerable<Stream> and it should work as well. Please notice that the method does not close the OutputStream, it delegates that task to the Stream creator.
PDFsharp seems to allow merging multiple PDF documents into one.
And the same is also possible with ITextSharp.
Combining two byte[] using iTextSharp up to version 5.x:
internal static MemoryStream mergePdfs(byte[] pdf1, byte[] pdf2)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
}
return outStream;
}
Instead of the byte[]'s it's possible to pass also Stream's
There's some good answers here already, but I thought I might mention that pdftk might be useful for this task. Instead of producing one PDF directly, you could produce each PDF you need and then combine them together as a post-process with pdftk. This could even be done from within your program using a system() or ShellExecute() call.
You could try pdf-shuffler gtk-apps.org
I know a lot of people have recommended PDF Sharp, however it doesn't look like that project has been updated since june of 2008. Further, source isn't available.
Personally, I've been playing with iTextSharp which has been pretty easy to work with.
I combined the two above, because I needed to merge 3 pdfbytes and return a byte
internal static byte[] mergePdfs(byte[] pdf1, byte[] pdf2,byte[] pdf3)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
copy.AddDocument(new PdfReader(pdf3));
}
return outStream.ToArray();
}
Following method gets a List of byte array which is PDF byte array and then returns a byte array.
using ...;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static class PdfHelper
{
public static byte[] PdfConcat(List<byte[]> lstPdfBytes)
{
byte[] res;
using (var outPdf = new PdfDocument())
{
foreach (var pdf in lstPdfBytes)
{
using (var pdfStream = new MemoryStream(pdf))
using (var pdfDoc = PdfReader.Open(pdfStream, PdfDocumentOpenMode.Import))
for (var i = 0; i < pdfDoc.PageCount; i++)
outPdf.AddPage(pdfDoc.Pages[i]);
}
using (var memoryStreamOut = new MemoryStream())
{
outPdf.Save(memoryStreamOut, false);
res = Stream2Bytes(memoryStreamOut);
}
}
return res;
}
public static void DownloadAsPdfFile(string fileName, byte[] content)
{
var ms = new MemoryStream(content);
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AddHeader("content-disposition", $"attachment;filename={fileName}.pdf");
HttpContext.Current.Response.Buffer = true;
ms.WriteTo(HttpContext.Current.Response.OutputStream);
HttpContext.Current.Response.End();
}
private static byte[] Stream2Bytes(Stream input)
{
var buffer = new byte[input.Length];
using (var ms = new MemoryStream())
{
int read;
while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
ms.Write(buffer, 0, read);
return ms.ToArray();
}
}
}
So, the result of PdfHelper.PdfConcat method is passed to PdfHelper.DownloadAsPdfFile method.
PS: A NuGet package named [PdfSharp][1] need to be installed. So in the Package Manage Console window type:
Install-Package PdfSharp
Following method merges two pdfs( f1 and f2) using iTextSharp. The second pdf is appended after a specific index of f1.
string f1 = "D:\\a.pdf";
string f2 = "D:\\Iso.pdf";
string outfile = "D:\\c.pdf";
appendPagesFromPdf(f1, f2, outfile, 3);
public static void appendPagesFromPdf(String f1,string f2, String destinationFile, int startingindex)
{
PdfReader p1 = new PdfReader(f1);
PdfReader p2 = new PdfReader(f2);
int l1 = p1.NumberOfPages, l2 = p2.NumberOfPages;
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document doc = new Document();
PdfWriter w = PdfWriter.GetInstance(doc, fs);
doc.Open();
for (int page = 1; page <= startingindex; page++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, page), 0, 0);
//Used to pull individual pages from our source
}// copied pages from first pdf till startingIndex
for (int i = 1; i <= l2;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p2, i), 0, 0);
}// merges second pdf after startingIndex
for (int i = startingindex+1; i <= l1;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, i), 0, 0);
}// continuing from where we left in pdf1
doc.Close();
p1.Close();
p2.Close();
}
}
To solve a similar problem i used iTextSharp like this:
//Create the document which will contain the combined PDF's
Document document = new Document();
//Create a writer for de document
PdfCopy writer = new PdfCopy(document, new FileStream(OutPutFilePath, FileMode.Create));
if (writer == null)
{
return;
}
//Open the document
document.Open();
//Get the files you want to combine
string[] filePaths = Directory.GetFiles(DirectoryPathWhereYouHaveYourFiles);
foreach (string filePath in filePaths)
{
//Read the PDF file
using (PdfReader reader = new PdfReader(vls_FilePath))
{
//Add the file to the combined one
writer.AddDocument(reader);
}
}
//Finally close the document and writer
writer.Close();
document.Close();
Here is a link to an example using PDFSharp and ConcatenateDocuments
Here the solution http://www.wacdesigns.com/2008/10/03/merge-pdf-files-using-c
It use free open source iTextSharp library http://sourceforge.net/projects/itextsharp
I've done this with PDFBox. I suppose it works similarly to iTextSharp.

Categories