getting error in Merge PDF's In C# using Itextsharpdll - c#

when i am trying the merge the PDF's in C# using itextsharp, i used below code It worked for 2 or 3 times. But after that Output file is creating with 0 kb, when i am trying open file it showing file is in use or already open by other. Please help if missed anything Thanks in Adavance.
iTextSharp.text.pdf.PdfReader reader = null;
PdfImportedPage page = null;
FileStream stream = null;
Document pdfDoc = null;
try
{
using(pdfDoc = new Document())
stream = new FileStream(targetPDF, FileMode.Create);
{
using (PdfCopy pdf = new PdfCopy(pdfDoc, stream))
{
pdfDoc.Open();
var files = Directory.GetFiles(sourceDir);
foreach (string file in files)
{
reader = new iTextSharp.text.pdf.PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
}
}
}
}
catch (Exception ex)
{
if (reader != null)
{
reader.Close();
}
}
enter image description here

I don't know what happen with your code but I can give a function (I'm using in my project working 100%) that merge a list of file
private bool MergePDFs(IEnumerable<string> fileNames, string targetPdf)
{
bool merged = true;
using (FileStream stream = new FileStream(targetPdf, FileMode.Create))
{
Document document = new Document();
PdfCopy pdf = new PdfCopy(document, stream);
PdfReader reader = null;
try
{
document.Open();
foreach (string file in fileNames)
{
System.Threading.Thread.Sleep(1500);
reader = new PdfReader(file);
pdf.AddDocument(reader);
reader.Close();
}
}
catch (Exception)
{
merged = false;
if (reader != null)
{
reader.Close();
}
}
finally
{
if (document != null)
{
document.Close();
}
}
}
return merged;
}
so for fileNames : you give the list of your files (construct a list
from your folder by listing files)
targetPdf is the outPut file name

Related

How do I delete/replace an image in a PDF file without breaking the file, using iTextsharp and C#

I'm trying to insert an image with id into a PDF document and allow to replace it later with another image.
My process is as follows:
Get an image from the client (with a unique ID).
Try to find an existing image with the same ID, in the PDF document.
If I find an existing image, try to delete it and put the new image instead, or try to replace the existing image with the new one. (tried both).
If I don't find an existing image, insert the image in a position I choose.
I use code from Bruno Lowagie book:
Replacing
Adding image
The problem is that whenever I delete an existing image or replace it my document gets corrupted. What am I doing wrong?
This is the code:
public static bool PdfInsertSignature(string path, string fileName, string signatureName, byte[] imageBytes)
{
bool resultOk = true;
string tmpFilename = string.Concat("tmp_", Guid.NewGuid().ToString(), ".pdf");
// get file, copy to new file with signature
using (Stream inputPdfStream = new FileStream(path + fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
using (Stream outputPdfStream = new FileStream(path + tmpFilename, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (var reader = new PdfReader(inputPdfStream))
using (PdfStamper stamper = new PdfStamper(reader, outputPdfStream, '\0', false))
{
var img = System.Drawing.Image.FromStream(new MemoryStream(imageBytes));
Image image = Image.GetInstance(img, BaseColor.WHITE);
img.Dispose();
var positions = stamper.AcroFields.GetFieldPositions(signatureName)[0];
if (positions != null)
{
//DeleteExistingSignatureImage(reader, stamper, signatureName);
image.SetAbsolutePosition(positions.position.Left + 20, positions.position.Top - 15);
image.ScalePercent(0.2f * 100);
image.BorderWidth = 0;
PdfImage pdfImg = new PdfImage(image, "", null);
pdfImg.Put(new PdfName("ITXT_SigImageId"), new PdfName(signatureName + "_img"));
if (!ReplaceImage(reader, stamper, signatureName, pdfImg))
{
PdfIndirectObject objRef = stamper.Writer.AddToBody(pdfImg);
image.DirectReference = objRef.IndirectReference;
PdfContentByte pdfContentByte = stamper.GetOverContent(positions.page);
pdfContentByte.AddImage(image);
}
}
else
{
resultOk = false;
logger.Error($"No matching Signature found for signatureName: {signatureName} in fileName: {fileName}.");
}
}
}
if (resultOk)
{
// delete old file and rename new file to old file's name
File.Delete(path + fileName);
File.Move(path + tmpFilename, path + fileName);
}
else
{
File.Delete(path + tmpFilename);
}
return resultOk;
}
private static bool ReplaceImage(PdfReader reader, PdfStamper stamper, string signatureName, PdfStream newImgStream)
{
PdfName key = new PdfName("ITXT_SigImageId");
PdfName value = new PdfName(signatureName + "_img");
PdfObject obj;
PRStream stream;
for (int i = 1; i < reader.XrefSize; i++)
{
obj = reader.GetPdfObject(i);
if (obj == null || !obj.IsStream())
{
continue;
}
stream = (PRStream)obj;
PdfObject pdfSubtype = stream.Get(PdfName.SUBTYPE);
if (pdfSubtype != null && pdfSubtype.ToString().Equals(PdfName.IMAGE.ToString()))
{
var streamVal = stream.Get(key);
if (streamVal != null && value.Equals(streamVal))
{
stream.Clear();
var ms = new MemoryStream();
stream.WriteContent(ms);
stream.SetData(ms.ToArray(), false);
foreach (PdfName name in newImgStream.Keys)
{
stream.Put(name, stream.Get(name));
}
return true;
}
}
}
return false;
}
private static void DeleteExistingSignatureImage(PdfReader reader, PdfStamper stamper, string signatureName)
{
PdfName key = new PdfName("ITXT_SigImageId");
PdfName value = new PdfName(signatureName + "_img");
PdfObject obj;
PRStream stream;
for (int i = 1; i < reader.XrefSize; i++)
{
obj = reader.GetPdfObject(i);
if (obj == null || !obj.IsStream())
{
continue;
}
stream = (PRStream)obj;
PdfObject pdfSubtype = stream.Get(PdfName.SUBTYPE);
if (pdfSubtype != null && pdfSubtype.ToString().Equals(PdfName.IMAGE.ToString()))
{
var streamVal = stream.Get(key);
if (streamVal != null && value.Equals(streamVal))
{
stream.Clear();
PdfReader.KillIndirect(stream);
//PdfReader.KillIndirect(obj);
//reader.RemoveUnusedObjects();
}
}
}
}
The purpose of signing a PDF file is to prevent further changes without notice.
You need to sign the document after you swap the image, or it will be corrupted.
Just do make it easier to find:
This is Solution provided by amira.
This is the code i've used to replace a 'ButtonField' on my PDF template with a signature image :
string TempStampPath = Server.MapPath(TempPath + "BookingConfirmation.pdf");
PdfReader pdfReader = new PdfReader(TempStampPath);
PdfStamper pdfStamper = new PdfStamper(pdfReader, new FileStream(LocalFileName, FileMode.Create));
AcroFields pdfFormFields = pdfStamper.AcroFields;
try
{
pdfFormFields.SetField("NameSurname", NameSurname);
pdfFormFields.SetField("IdNumber", IDNumber);
pdfFormFields.SetField("CourseName", CourseName);
pdfFormFields.SetField("Location", Venue);
pdfFormFields.SetField("DateCompleted", CourseDate);
pdfFormFields.SetField("FacilitatorName", Facilitator);
try
{
iTextSharp.text.Image signature = iTextSharp.text.Image.GetInstance(image, System.Drawing.Imaging.ImageFormat.Png);
PushbuttonField ad = pdfStamper.AcroFields.GetNewPushbuttonFromField("btnFacilitatorSignature");
ad.Layout = PushbuttonField.LAYOUT_ICON_ONLY;
ad.ProportionalIcon = true;
ad.Image = signature;
ad.BackgroundColor = iTextSharp.text.BaseColor.WHITE;
pdfFormFields.ReplacePushbuttonField("btnFacilitatorSignature", ad.Field);
}
catch (Exception ex)
{ }
pdfStamper.FormFlattening = true;
pdfStamper.Close();
pdfStamper.Dispose();
pdfReader.Close();
}
catch (Exception ex)
{
pdfStamper.Close();
pdfStamper.Dispose();
pdfReader.Close();
}

How to create a PDF Portfolio using existing PDF using iTextSharp [duplicate]

How would I merge several pdf pages into one with iTextSharp which also supports merging pages having form elements like textboxes, checkboxes, etc.
I have tried so many by googling, but nothing has worked well.
See my answer here Merging Memory Streams. I give an example of how to merge PDFs with itextsharp.
For updating form field names add this code that uses the stamper to change the form field names.
/// <summary>
/// Merges pdf files from a byte list
/// </summary>
/// <param name="files">list of files to merge</param>
/// <returns>memory stream containing combined pdf</returns>
public MemoryStream MergePdfForms(List<byte[]> files)
{
if (files.Count > 1)
{
string[] names;
PdfStamper stamper;
MemoryStream msTemp = null;
PdfReader pdfTemplate = null;
PdfReader pdfFile;
Document doc;
PdfWriter pCopy;
MemoryStream msOutput = new MemoryStream();
pdfFile = new PdfReader(files[0]);
doc = new Document();
pCopy = new PdfSmartCopy(doc, msOutput);
pCopy.PdfVersion = PdfWriter.VERSION_1_7;
doc.Open();
for (int k = 0; k < files.Count; k++)
{
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
msTemp = new MemoryStream();
pdfTemplate = new PdfReader(files[k]);
stamper = new PdfStamper(pdfTemplate, msTemp);
names = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(names, 0);
foreach (string name in names)
{
stamper.AcroFields.RenameField(name, name + "_file" + k.ToString());
}
stamper.Close();
pdfFile = new PdfReader(msTemp.ToArray());
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
pdfFile.Close();
pCopy.Close();
doc.Close();
return msOutput;
}
else if (files.Count == 1)
{
return new MemoryStream(files[0]);
}
return null;
}
Here is my simplified version of Jonathan's Merge code with namespaces added, and stamping removed.
public IO.MemoryStream MergePdfForms(System.Collections.Generic.List<byte[]> files)
{
if (files.Count > 1) {
using (System.IO.MemoryStream msOutput = new System.IO.MemoryStream()) {
using (iTextSharp.text.Document doc = new iTextSharp.text.Document()) {
using (iTextSharp.text.pdf.PdfSmartCopy pCopy = new iTextSharp.text.pdf.PdfSmartCopy(doc, msOutput) { PdfVersion = iTextSharp.text.pdf.PdfWriter.VERSION_1_7 }) {
doc.Open();
foreach (byte[] oFile in files) {
using (iTextSharp.text.pdf.PdfReader pdfFile = new iTextSharp.text.pdf.PdfReader(oFile)) {
for (i = 1; i <= pdfFile.NumberOfPages; i++) {
pCopy.AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
}
}
}
return msOutput;
}
} else if (files.Count == 1) {
return new System.IO.MemoryStream(files[0]);
}
return null;
}
to merge PDF see "Merging two pdf pages into one using itextsharp"
Below is my code for pdf merging.Thanks Jonathan for giving suggestion abt renaming fields,which resolved the issues while merging pdf pages with form fields.
private static void CombineAndSavePdf(string savePath, List<string> lstPdfFiles)
{
using (Stream outputPdfStream = new FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document document = new Document();
PdfSmartCopy copy = new PdfSmartCopy(document, outputPdfStream);
document.Open();
PdfReader reader;
int totalPageCnt;
PdfStamper stamper;
string[] fieldNames;
foreach (string file in lstPdfFiles)
{
reader = new PdfReader(file);
totalPageCnt = reader.NumberOfPages;
for (int pageCnt = 0; pageCnt < totalPageCnt; )
{
//have to create a new reader for each page or PdfStamper will throw error
reader = new PdfReader(file);
stamper = new PdfStamper(reader, outputPdfStream);
fieldNames = new string[stamper.AcroFields.Fields.Keys.Count];
stamper.AcroFields.Fields.Keys.CopyTo(fieldNames, 0);
foreach (string name in fieldNames)
{
stamper.AcroFields.RenameField(name, name + "_file" + pageCnt.ToString());
}
copy.AddPage(copy.GetImportedPage(reader, ++pageCnt));
}
copy.FreeReader(reader);
}
document.Close();
}
}

Concatenating PDF using PDFsharp returns empty PDF

I have a list of PDF stored as list<byte[]>. I try to concatenate all these PDF files using PDFsharp, but after my operation I get a PDF with proper page count, but all pages are blank. Looks like I lose some header or something but I can't find where.
My code:
PdfDocument output = new PdfDocument();
try
{
foreach (var report in reports)
{
using (MemoryStream stream = new MemoryStream(report))
{
PdfDocument input = PdfReader.Open(stream, PdfDocumentOpenMode.Import);
foreach (PdfPage page in input.Pages)
{
output.AddPage(page);
}
}
}
if (output.Pages.Count <= 0)
{
throw new Exception("Empty Document");
}
MemoryStream final = new MemoryStream();
output.Save(final);
output.Close();
return final.ToArray();
}
catch (Exception e)
{
throw new Exception(e.ToString());
}
I want to return it as byte[] because I use them later:
return File(report, System.Net.Mime.MediaTypeNames.Application.Octet, "test.pdf");
This returns PDF with proper page count, but all blank.
You tell in a comment that the files come from SSRS.
Older versions of PDFsharp require a special SSRS setting:
For the DeviceSettings parameter for the Render method on the ReportExecutionService object, pass this value:
theDeviceSettings = "<DeviceInfo><HumanReadablePDF>True</HumanReadablePDF></DeviceInfo>";
Source:
http://forum.pdfsharp.net/viewtopic.php?p=1613#p1613
I use iTextSharp, look at this saple code (it works)
public static byte[] PdfJoin(List<String> pdfs)
{
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (iTextSharp.text.Document document = new iTextSharp.text.Document())
{
using (iTextSharp.text.pdf.PdfCopy copy = new iTextSharp.text.pdf.PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdfs.Count; ++i)
{
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(pdfs[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
return mergedPdf;
}
public static byte[] PdfJoin(List<byte[]> pdfs)
{
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (iTextSharp.text.Document document = new iTextSharp.text.Document())
{
using (iTextSharp.text.pdf.PdfCopy copy = new iTextSharp.text.pdf.PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdfs.Count; ++i)
{
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(pdfs[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
return mergedPdf;
}

iTextSharp Efficient Batch Pdf Generation From a 1 Page Template

I am generating a multiple page PDF using ITextSharp, each page having the sames template.
The problem is that the PDF grows in Phisical Size by the size of the template.
I HAVE to use ACROFIELDS.
How can I reduce the final file size ?
Here is a code snippet of the pdf handler:
public void ProcessRequest(HttpContext context)
{
Context = context;
Response = context.Response;
Request = context.Request;
try
{
LoadDataInternal();
}
catch (System.Threading.ThreadAbortException)
{
// no-op
}
catch (Exception ex)
{
Logger.LogError(ex);
Response.Write("Error");
Response.End();
}
Response.BufferOutput = true;
Response.ClearHeaders();
Response.ContentType = "application/pdf";
if (true)
{
Response.AddHeader("Content-Disposition", "attachment; filename=" +
(string.IsNullOrEmpty(DownloadFileName) ? context.Session.SessionID + ".pdf" : DownloadFileName));
}
PdfCopyFields copy
= new PdfCopyFields(Response.OutputStream);
// add a document
for (int i = 0; i < dataset.Tables["Model"].Rows.Count; i++)
{
copy.AddDocument(new PdfReader(renameFieldsIn(TemplatePath, i)));
// add a document
}
copy.SetFullCompression();
// Close the PdfCopyFields object
copy.Close();
}
private byte[] renameFieldsIn(String datasheet, int i)
{
MemoryStream baos = new MemoryStream();
// Create the stamper
PdfStamper stamper = new PdfStamper(new PdfReader(GetTemplateBytes()), baos);
// Get the fields
AcroFields form = stamper.AcroFields;
// Loop over the fields
List<String> keys = new List<String>(form.Fields.Keys);
foreach (var key in keys)
{
// rename the fields
form.RenameField(key, String.Format("{0}_{1}", key, i));
}
stamper.FormFlattening = true;
stamper.FreeTextFlattening = true;
stamper.SetFullCompression();
SetFieldsInternal(form, i);
// close the stamper
stamper.Close();
return baos.ToArray();
}
protected byte[] GetTemplateBytes()
{
var data = Context.Cache[PdfTemplateCacheKey] as byte[];
if (data == null)
{
data = File.ReadAllBytes(Context.Server.MapPath(TemplatePath));
Context.Cache.Insert(PdfTemplateCacheKey, data,
null, DateTime.Now.Add(PdfTemplateCacheDuration), Cache.NoSlidingExpiration);
}
return data;
}
I have done something a little similar before and found that after combining all the pages, running the entire generated document through the PDFStamper again resulted in a noticeable compression of the file size.
Ok, so a friend of mine came up with a solution. The only problem remaining is the amount of memory it takes to create a new PdfStamper. So here's the rewritten procedure.
Anyway. Hope this hepls anyone else. And if you have a better solution please don't be shy.
public void ProcessRequest (HttpContext context)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
Context = context;
Response = context.Response;
Request = context.Request;
Response.BufferOutput = true;
Response.ClearHeaders();
Response.ContentType = "application/pdf";
Response.AddHeader("Content-Disposition", "attachment; filename=itextsharp_multiple.pdf");
var pageBytes = (byte[])null;
var pagesAll = new List<byte[]>();
try
{
for (int i = 0; i < GlobalHttpApplication.Model.TestData.Rows.Count; i++)
{
PdfStamper pst = null;
MemoryStream mstr = null;
using (mstr = new MemoryStream())
{
try
{
PdfReader reader = new PdfReader(GetTemplateBytes());
pst = new PdfStamper(reader, mstr);
var acroFields = pst.AcroFields;
SetFieldsInternal(acroFields, 0);
pst.FormFlattening = true;
pst.SetFullCompression();
} finally
{
if (pst != null)
pst.Close();
}
}
pageBytes = mstr.ToArray();
pagesAll.Add(pageBytes);
}
} finally
{
}
Document doc = new Document(PageSize.A4);
var writer = PdfWriter.GetInstance(doc, Response.OutputStream);
var copy2 = new PdfSmartCopy(doc, Response.OutputStream);
doc.Open();
doc.OpenDocument();
foreach (var b in pagesAll)
{
doc.NewPage();
copy2.AddPage(copy2.GetImportedPage(new PdfReader(b), 1));
}
copy2.Close();
watch.Stop();
File.AppendAllText(context.Server.MapPath("~/App_Data/log.txt"), watch.Elapsed + Environment.NewLine);
}

Combine two (or more) PDF's

Background: I need to provide a weekly report package for my sales staff. This package contains several (5-10) crystal reports.
Problem:
I would like to allow a user to run all reports and also just run a single report. I was thinking I could do this by creating the reports and then doing:
List<ReportClass> reports = new List<ReportClass>();
reports.Add(new WeeklyReport1());
reports.Add(new WeeklyReport2());
reports.Add(new WeeklyReport3());
<snip>
foreach (ReportClass report in reports)
{
report.ExportToDisk(ExportFormatType.PortableDocFormat, #"c:\reports\" + report.ResourceName + ".pdf");
}
This would provide me a folder full of the reports, but I would like to email everyone a single PDF with all the weekly reports. So I need to combine them.
Is there an easy way to do this without install any more third party controls? I already have DevExpress & CrystalReports and I'd prefer not to add too many more.
Would it be best to combine them in the foreach loop or in a seperate loop? (or an alternate way)
I had to solve a similar problem and what I ended up doing was creating a small pdfmerge utility that uses the PDFSharp project which is essentially MIT licensed.
The code is dead simple, I needed a cmdline utility so I have more code dedicated to parsing the arguments than I do for the PDF merging:
using (PdfDocument one = PdfReader.Open("file1.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument two = PdfReader.Open("file2.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument outPdf = new PdfDocument())
{
CopyPages(one, outPdf);
CopyPages(two, outPdf);
outPdf.Save("file1and2.pdf");
}
void CopyPages(PdfDocument from, PdfDocument to)
{
for (int i = 0; i < from.PageCount; i++)
{
to.AddPage(from.Pages[i]);
}
}
Here is a single function that will merge X amount of PDFs using PDFSharp
using PdfSharp;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static void MergePDFs(string targetPath, params string[] pdfs) {
using(var targetDoc = new PdfDocument()){
foreach (var pdf in pdfs) {
using (var pdfDoc = PdfReader.Open(pdf, PdfDocumentOpenMode.Import)) {
for (var i = 0; i < pdfDoc.PageCount; i++)
targetDoc.AddPage(pdfDoc.Pages[i]);
}
}
targetDoc.Save(targetPath);
}
}
This is something that I figured out, and wanted to share with you, using PdfSharp.
Here you can join multiple Pdfs in one, without the need of an output directory (following the input list order)
public static byte[] MergePdf(List<byte[]> pdfs)
{
List<PdfSharp.Pdf.PdfDocument> lstDocuments = new List<PdfSharp.Pdf.PdfDocument>();
foreach (var pdf in pdfs)
{
lstDocuments.Add(PdfReader.Open(new MemoryStream(pdf), PdfDocumentOpenMode.Import));
}
using (PdfSharp.Pdf.PdfDocument outPdf = new PdfSharp.Pdf.PdfDocument())
{
for(int i = 1; i<= lstDocuments.Count; i++)
{
foreach(PdfSharp.Pdf.PdfPage page in lstDocuments[i-1].Pages)
{
outPdf.AddPage(page);
}
}
MemoryStream stream = new MemoryStream();
outPdf.Save(stream, false);
byte[] bytes = stream.ToArray();
return bytes;
}
}
I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}
Here is a example using iTextSharp
public static void MergePdf(Stream outputPdfStream, IEnumerable<string> pdfFilePaths)
{
using (var document = new Document())
using (var pdfCopy = new PdfCopy(document, outputPdfStream))
{
pdfCopy.CloseStream = false;
try
{
document.Open();
foreach (var pdfFilePath in pdfFilePaths)
{
using (var pdfReader = new PdfReader(pdfFilePath))
{
pdfCopy.AddDocument(pdfReader);
pdfReader.Close();
}
}
}
finally
{
document?.Close();
}
}
}
The PdfReader constructor has many overloads. It's possible to replace the parameter type IEnumerable<string> with IEnumerable<Stream> and it should work as well. Please notice that the method does not close the OutputStream, it delegates that task to the Stream creator.
PDFsharp seems to allow merging multiple PDF documents into one.
And the same is also possible with ITextSharp.
Combining two byte[] using iTextSharp up to version 5.x:
internal static MemoryStream mergePdfs(byte[] pdf1, byte[] pdf2)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
}
return outStream;
}
Instead of the byte[]'s it's possible to pass also Stream's
There's some good answers here already, but I thought I might mention that pdftk might be useful for this task. Instead of producing one PDF directly, you could produce each PDF you need and then combine them together as a post-process with pdftk. This could even be done from within your program using a system() or ShellExecute() call.
You could try pdf-shuffler gtk-apps.org
I know a lot of people have recommended PDF Sharp, however it doesn't look like that project has been updated since june of 2008. Further, source isn't available.
Personally, I've been playing with iTextSharp which has been pretty easy to work with.
I combined the two above, because I needed to merge 3 pdfbytes and return a byte
internal static byte[] mergePdfs(byte[] pdf1, byte[] pdf2,byte[] pdf3)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
copy.AddDocument(new PdfReader(pdf3));
}
return outStream.ToArray();
}
Following method gets a List of byte array which is PDF byte array and then returns a byte array.
using ...;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static class PdfHelper
{
public static byte[] PdfConcat(List<byte[]> lstPdfBytes)
{
byte[] res;
using (var outPdf = new PdfDocument())
{
foreach (var pdf in lstPdfBytes)
{
using (var pdfStream = new MemoryStream(pdf))
using (var pdfDoc = PdfReader.Open(pdfStream, PdfDocumentOpenMode.Import))
for (var i = 0; i < pdfDoc.PageCount; i++)
outPdf.AddPage(pdfDoc.Pages[i]);
}
using (var memoryStreamOut = new MemoryStream())
{
outPdf.Save(memoryStreamOut, false);
res = Stream2Bytes(memoryStreamOut);
}
}
return res;
}
public static void DownloadAsPdfFile(string fileName, byte[] content)
{
var ms = new MemoryStream(content);
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AddHeader("content-disposition", $"attachment;filename={fileName}.pdf");
HttpContext.Current.Response.Buffer = true;
ms.WriteTo(HttpContext.Current.Response.OutputStream);
HttpContext.Current.Response.End();
}
private static byte[] Stream2Bytes(Stream input)
{
var buffer = new byte[input.Length];
using (var ms = new MemoryStream())
{
int read;
while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
ms.Write(buffer, 0, read);
return ms.ToArray();
}
}
}
So, the result of PdfHelper.PdfConcat method is passed to PdfHelper.DownloadAsPdfFile method.
PS: A NuGet package named [PdfSharp][1] need to be installed. So in the Package Manage Console window type:
Install-Package PdfSharp
Following method merges two pdfs( f1 and f2) using iTextSharp. The second pdf is appended after a specific index of f1.
string f1 = "D:\\a.pdf";
string f2 = "D:\\Iso.pdf";
string outfile = "D:\\c.pdf";
appendPagesFromPdf(f1, f2, outfile, 3);
public static void appendPagesFromPdf(String f1,string f2, String destinationFile, int startingindex)
{
PdfReader p1 = new PdfReader(f1);
PdfReader p2 = new PdfReader(f2);
int l1 = p1.NumberOfPages, l2 = p2.NumberOfPages;
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document doc = new Document();
PdfWriter w = PdfWriter.GetInstance(doc, fs);
doc.Open();
for (int page = 1; page <= startingindex; page++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, page), 0, 0);
//Used to pull individual pages from our source
}// copied pages from first pdf till startingIndex
for (int i = 1; i <= l2;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p2, i), 0, 0);
}// merges second pdf after startingIndex
for (int i = startingindex+1; i <= l1;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, i), 0, 0);
}// continuing from where we left in pdf1
doc.Close();
p1.Close();
p2.Close();
}
}
To solve a similar problem i used iTextSharp like this:
//Create the document which will contain the combined PDF's
Document document = new Document();
//Create a writer for de document
PdfCopy writer = new PdfCopy(document, new FileStream(OutPutFilePath, FileMode.Create));
if (writer == null)
{
return;
}
//Open the document
document.Open();
//Get the files you want to combine
string[] filePaths = Directory.GetFiles(DirectoryPathWhereYouHaveYourFiles);
foreach (string filePath in filePaths)
{
//Read the PDF file
using (PdfReader reader = new PdfReader(vls_FilePath))
{
//Add the file to the combined one
writer.AddDocument(reader);
}
}
//Finally close the document and writer
writer.Close();
document.Close();
Here is a link to an example using PDFSharp and ConcatenateDocuments
Here the solution http://www.wacdesigns.com/2008/10/03/merge-pdf-files-using-c
It use free open source iTextSharp library http://sourceforge.net/projects/itextsharp
I've done this with PDFBox. I suppose it works similarly to iTextSharp.

Categories