Using iTextSharp, I was able to read a base64 string and convert certain pages into local files using a FileStream. I want to do the same without saving to the local filesystem, using MemoryStream and only returning the selected pages to the calling function as a base64 string.
// function takes reader and start and end page and destination file path as parameter.
public static void ExtractPages(PdfReader pdfReader, string sourcePdfPath,
string outputPdfPath, int startPage, int endPage)
{
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage = null;
try
{
reader = pdfReader;
sourceDocument = new Document(reader.GetPageSizeWithRotation(startPage));
// old code to save selected pdf into local file system.
FileStream fileStream = new FileStream(outputPdfPath, FileMode.Create);
// memory stream created but not been used yet!
MemoryStream memoryStream = new MemoryStream();
pdfCopyProvider = new PdfCopy(sourceDocument, fileStream);
sourceDocument.Open();
// save selected page into local file system
for (int i = startPage; i <= endPage; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
#region TestMemoryStream
// TODO: New code to be added and return selected page data as bas64 string
#endregion
sourceDocument.Close();
reader.Close();
}
catch (Exception ex)
{
throw ex;
}
}
I was able to sort out the issue, my modified code is below.
reader = pdfReader;
sourceDocument = new Document(reader.GetPageSizeWithRotation(startPage));
//FileStream fileStream = new FileStream(outputPdfPath, FileMode.Create);
MemoryStream memoryStream = new MemoryStream();
pdfCopyProvider = new PdfCopy(sourceDocument, memoryStream);
sourceDocument.Open();
for (int i = startPage; i <= endPage; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
sourceDocument.Close();
reader.Close();
byte[] content1 = memoryStream.ToArray();
// var bas64 = Convert.ToBase64String(content1);
string bas64New = Convert.ToBase64String(content1);
I have a list of PDF stored as list<byte[]>. I try to concatenate all these PDF files using PDFsharp, but after my operation I get a PDF with proper page count, but all pages are blank. Looks like I lose some header or something but I can't find where.
My code:
PdfDocument output = new PdfDocument();
try
{
foreach (var report in reports)
{
using (MemoryStream stream = new MemoryStream(report))
{
PdfDocument input = PdfReader.Open(stream, PdfDocumentOpenMode.Import);
foreach (PdfPage page in input.Pages)
{
output.AddPage(page);
}
}
}
if (output.Pages.Count <= 0)
{
throw new Exception("Empty Document");
}
MemoryStream final = new MemoryStream();
output.Save(final);
output.Close();
return final.ToArray();
}
catch (Exception e)
{
throw new Exception(e.ToString());
}
I want to return it as byte[] because I use them later:
return File(report, System.Net.Mime.MediaTypeNames.Application.Octet, "test.pdf");
This returns PDF with proper page count, but all blank.
You tell in a comment that the files come from SSRS.
Older versions of PDFsharp require a special SSRS setting:
For the DeviceSettings parameter for the Render method on the ReportExecutionService object, pass this value:
theDeviceSettings = "<DeviceInfo><HumanReadablePDF>True</HumanReadablePDF></DeviceInfo>";
Source:
http://forum.pdfsharp.net/viewtopic.php?p=1613#p1613
I use iTextSharp, look at this saple code (it works)
public static byte[] PdfJoin(List<String> pdfs)
{
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (iTextSharp.text.Document document = new iTextSharp.text.Document())
{
using (iTextSharp.text.pdf.PdfCopy copy = new iTextSharp.text.pdf.PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdfs.Count; ++i)
{
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(pdfs[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
return mergedPdf;
}
public static byte[] PdfJoin(List<byte[]> pdfs)
{
byte[] mergedPdf = null;
using (MemoryStream ms = new MemoryStream())
{
using (iTextSharp.text.Document document = new iTextSharp.text.Document())
{
using (iTextSharp.text.pdf.PdfCopy copy = new iTextSharp.text.pdf.PdfCopy(document, ms))
{
document.Open();
for (int i = 0; i < pdfs.Count; ++i)
{
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(pdfs[i]);
// loop over the pages in that document
int n = reader.NumberOfPages;
for (int page = 0; page < n; )
{
copy.AddPage(copy.GetImportedPage(reader, ++page));
}
}
}
}
mergedPdf = ms.ToArray();
}
return mergedPdf;
}
None of the similar questions are quite what I'm looking for!
What's wrong with the following code?
files is a text array of file contents, and fileNames is the corresponding filename array.
This code always fails at the second-last line with the Save method, but I can't see why the stream would be closed!
result = new MemoryStream();
using (ZipFile zipFile = new ZipFile())
{
for (int i = 0; i < files.Count(); i++)
{
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
Byte[] bytes = encoding.GetBytes(files[i]);
using (MemoryStream fs = new MemoryStream(bytes))
{
zipFile.AddEntry(fileNames[i], fs);
}
}
zipFile.Save(result);
}
Thanks for any help - getting desperate here!
This is my solution based on #spender's first comment, although his solution posted below is possibly nicer.
try
{
result = new MemoryStream();
List<Stream> streams = new List<Stream>();
if (files.Count > 0)
{
using (ZipFile zipFile = new ZipFile())
{
for (int i = 0; i < files.Count(); i++)
{
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
Byte[] bytes = encoding.GetBytes(files[i]);
streams.Add(new MemoryStream(bytes));
zipFile.AddEntry(fileNames[i], streams[i]);
}
zipFile.Save(result);
}
}
}
catch (Exception ex)
{
throw;
}
It seems that calling Save is the point when the source streams are read. This means you have to keep them undisposed until after the save. Abandon using statement in this case as it is impossible to extend its scope beyond the loop. Instead, collect your IDisposables and dispose of them once the save is completed.
result = new MemoryStream();
using (ZipFile zipFile = new ZipFile())
{
List<IDisposable> memStreams = new List<IDisposable>();
try
{
for (int i = 0; i < files.Count(); i++)
{
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
Byte[] bytes = encoding.GetBytes(files[i]);
MemoryStream fs = new MemoryStream(bytes);
zipFile.AddEntry(fileNames[i], fs);
memStreams.Add(fs);
}
zipFile.Save(result);
}
finally
{
foreach(var x in memStreams)
{
x.Dispose();
}
}
}
#spender answer is spot on but as my 2 cents if you want to be more synonymous with using pattern you can do something like
class
public class DisposableBucket : IDisposable
{
readonly List<IDisposable> listOfDisposables = new List<IDisposable>();
public TClass Using<TClass>(TClass disposable) where TClass : IDisposable
{
listOfDisposables.Add(disposable);
return disposable;
}
public void Dispose()
{
foreach (var listOfDisposable in listOfDisposables)
{
listOfDisposable.Dispose();
}
}
}
How to use
result = new MemoryStream();
using(var bucket = new DisposableBucket())
{
using (var zipFile = new ZipFile())
{
List<IDisposable> memStreams = new List<IDisposable>();
for (int i = 0; i < files.Count(); i++)
{
System.Text.ASCIIEncoding encoding = new System.Text.ASCIIEncoding();
Byte[] bytes = encoding.GetBytes(files[i]);
var fs = bucket.Using(new MemoryStream(bytes));
zipFile.AddEntry(fileNames[i], fs);
}
zipFile.Save(result);
}
}
I have an application in ASP.NET where user can upload ZIP file. I'm trying to extract file using ICSharpZipLib (I also tried DotNetZip, but had same issue).
This zip file contains single xml document (9KB before compress).
When I open this file with other applications on my desktop (7zip, windows explorer) it seems to be ok.
My unzip method throws System.OutOfMemoryException and I have no idea why is that. When I debugged my unziping method I noticed that zipInputStreams' Length property throws Exception and is not available:
Stream UnZipSingleFile(Stream memoryStream)
{
var zipInputStream = new ZipInputStream(memoryStream);
memoryStream.Position = 0;
zipInputStream.GetNextEntry();
MemoryStream unzippedStream = new MemoryStream();
int len;
byte[] buf = new byte[4096];
while ((len = zipInputStream.Read(buf, 0, buf.Length)) > 0)
{
unzippedStream.Write(buf, 0, len);
}
unzippedStream.Position = 0;
memoryStream.Position = 0;
return unzippedStream;
}
and here's how I get string of unzippedStream:
string GetString()
{
var reader = new StreamReader(unzippedStream);
var result = reader.ReadToEnd();
unzippedStream.Position = 0;
return result;
}
From their wiki:
"Sharpzip supports Zip files using both stored and deflate compression methods and also supports old (PKZIP 2.0) style and AES encryption"
Are you sure the format of the uploaded zip file is acceptable for SharpZipLib?
While this post is quite old, I think it could be beneficial to illustrate how I did this for compression and decompression using ICSharpZipLib (C# package version 1.1.0). I put this together by looking into the examples shown here (see ie. these compression and decompression examples).
Assumption: The input to the compression and decompression below should be in bytes. If you have ie. an xml file you could load it to an XDocument, and convert it into an XmlDocument with .ToXmlDocument(). From there, you could access the string contents by calling .OuterXml, and converting the string to a byte array.
// Compression (inputBytes = ie. string-to-compress, as bytes)
using var dataStream = new MemoryStream(inputBytes);
var outputStream = new MemoryStream();
using (var zipStream = new ZipOutputStream(outputStream))
{
zipStream.SetLevel(3);
var newEntry = new ZipEntry("someFilename.someExtension");
newEntry.DateTime = DateTime.Now;
zipStream.PutNextEntry(newEntry);
StreamUtils.Copy(dataStream, zipStream, new byte[4096]);
zipStream.CloseEntry();
zipStream.IsStreamOwner = false;
}
outputStream.Position = 0;
var outputBytes = outputStream.ToArray();
// Decompression (inputBytes = ie. string-to-decompress, as bytes)
using var dataStream = new MemoryStream(inputBytes);
var outputStream = new MemoryStream();
using (var zipStream = new ZipInputStream(dataStream))
{
while (zipStream.GetNextEntry() is ZipEntry zipEntry)
{
var buffer = new byte[4096];
StreamUtils.Copy(zipStream, outputStream, buffer);
}
}
var outputBytes = outputStream.ToArray();
Background: I need to provide a weekly report package for my sales staff. This package contains several (5-10) crystal reports.
Problem:
I would like to allow a user to run all reports and also just run a single report. I was thinking I could do this by creating the reports and then doing:
List<ReportClass> reports = new List<ReportClass>();
reports.Add(new WeeklyReport1());
reports.Add(new WeeklyReport2());
reports.Add(new WeeklyReport3());
<snip>
foreach (ReportClass report in reports)
{
report.ExportToDisk(ExportFormatType.PortableDocFormat, #"c:\reports\" + report.ResourceName + ".pdf");
}
This would provide me a folder full of the reports, but I would like to email everyone a single PDF with all the weekly reports. So I need to combine them.
Is there an easy way to do this without install any more third party controls? I already have DevExpress & CrystalReports and I'd prefer not to add too many more.
Would it be best to combine them in the foreach loop or in a seperate loop? (or an alternate way)
I had to solve a similar problem and what I ended up doing was creating a small pdfmerge utility that uses the PDFSharp project which is essentially MIT licensed.
The code is dead simple, I needed a cmdline utility so I have more code dedicated to parsing the arguments than I do for the PDF merging:
using (PdfDocument one = PdfReader.Open("file1.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument two = PdfReader.Open("file2.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument outPdf = new PdfDocument())
{
CopyPages(one, outPdf);
CopyPages(two, outPdf);
outPdf.Save("file1and2.pdf");
}
void CopyPages(PdfDocument from, PdfDocument to)
{
for (int i = 0; i < from.PageCount; i++)
{
to.AddPage(from.Pages[i]);
}
}
Here is a single function that will merge X amount of PDFs using PDFSharp
using PdfSharp;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static void MergePDFs(string targetPath, params string[] pdfs) {
using(var targetDoc = new PdfDocument()){
foreach (var pdf in pdfs) {
using (var pdfDoc = PdfReader.Open(pdf, PdfDocumentOpenMode.Import)) {
for (var i = 0; i < pdfDoc.PageCount; i++)
targetDoc.AddPage(pdfDoc.Pages[i]);
}
}
targetDoc.Save(targetPath);
}
}
This is something that I figured out, and wanted to share with you, using PdfSharp.
Here you can join multiple Pdfs in one, without the need of an output directory (following the input list order)
public static byte[] MergePdf(List<byte[]> pdfs)
{
List<PdfSharp.Pdf.PdfDocument> lstDocuments = new List<PdfSharp.Pdf.PdfDocument>();
foreach (var pdf in pdfs)
{
lstDocuments.Add(PdfReader.Open(new MemoryStream(pdf), PdfDocumentOpenMode.Import));
}
using (PdfSharp.Pdf.PdfDocument outPdf = new PdfSharp.Pdf.PdfDocument())
{
for(int i = 1; i<= lstDocuments.Count; i++)
{
foreach(PdfSharp.Pdf.PdfPage page in lstDocuments[i-1].Pages)
{
outPdf.AddPage(page);
}
}
MemoryStream stream = new MemoryStream();
outPdf.Save(stream, false);
byte[] bytes = stream.ToArray();
return bytes;
}
}
I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}
Here is a example using iTextSharp
public static void MergePdf(Stream outputPdfStream, IEnumerable<string> pdfFilePaths)
{
using (var document = new Document())
using (var pdfCopy = new PdfCopy(document, outputPdfStream))
{
pdfCopy.CloseStream = false;
try
{
document.Open();
foreach (var pdfFilePath in pdfFilePaths)
{
using (var pdfReader = new PdfReader(pdfFilePath))
{
pdfCopy.AddDocument(pdfReader);
pdfReader.Close();
}
}
}
finally
{
document?.Close();
}
}
}
The PdfReader constructor has many overloads. It's possible to replace the parameter type IEnumerable<string> with IEnumerable<Stream> and it should work as well. Please notice that the method does not close the OutputStream, it delegates that task to the Stream creator.
PDFsharp seems to allow merging multiple PDF documents into one.
And the same is also possible with ITextSharp.
Combining two byte[] using iTextSharp up to version 5.x:
internal static MemoryStream mergePdfs(byte[] pdf1, byte[] pdf2)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
}
return outStream;
}
Instead of the byte[]'s it's possible to pass also Stream's
There's some good answers here already, but I thought I might mention that pdftk might be useful for this task. Instead of producing one PDF directly, you could produce each PDF you need and then combine them together as a post-process with pdftk. This could even be done from within your program using a system() or ShellExecute() call.
You could try pdf-shuffler gtk-apps.org
I know a lot of people have recommended PDF Sharp, however it doesn't look like that project has been updated since june of 2008. Further, source isn't available.
Personally, I've been playing with iTextSharp which has been pretty easy to work with.
I combined the two above, because I needed to merge 3 pdfbytes and return a byte
internal static byte[] mergePdfs(byte[] pdf1, byte[] pdf2,byte[] pdf3)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
copy.AddDocument(new PdfReader(pdf3));
}
return outStream.ToArray();
}
Following method gets a List of byte array which is PDF byte array and then returns a byte array.
using ...;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static class PdfHelper
{
public static byte[] PdfConcat(List<byte[]> lstPdfBytes)
{
byte[] res;
using (var outPdf = new PdfDocument())
{
foreach (var pdf in lstPdfBytes)
{
using (var pdfStream = new MemoryStream(pdf))
using (var pdfDoc = PdfReader.Open(pdfStream, PdfDocumentOpenMode.Import))
for (var i = 0; i < pdfDoc.PageCount; i++)
outPdf.AddPage(pdfDoc.Pages[i]);
}
using (var memoryStreamOut = new MemoryStream())
{
outPdf.Save(memoryStreamOut, false);
res = Stream2Bytes(memoryStreamOut);
}
}
return res;
}
public static void DownloadAsPdfFile(string fileName, byte[] content)
{
var ms = new MemoryStream(content);
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AddHeader("content-disposition", $"attachment;filename={fileName}.pdf");
HttpContext.Current.Response.Buffer = true;
ms.WriteTo(HttpContext.Current.Response.OutputStream);
HttpContext.Current.Response.End();
}
private static byte[] Stream2Bytes(Stream input)
{
var buffer = new byte[input.Length];
using (var ms = new MemoryStream())
{
int read;
while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
ms.Write(buffer, 0, read);
return ms.ToArray();
}
}
}
So, the result of PdfHelper.PdfConcat method is passed to PdfHelper.DownloadAsPdfFile method.
PS: A NuGet package named [PdfSharp][1] need to be installed. So in the Package Manage Console window type:
Install-Package PdfSharp
Following method merges two pdfs( f1 and f2) using iTextSharp. The second pdf is appended after a specific index of f1.
string f1 = "D:\\a.pdf";
string f2 = "D:\\Iso.pdf";
string outfile = "D:\\c.pdf";
appendPagesFromPdf(f1, f2, outfile, 3);
public static void appendPagesFromPdf(String f1,string f2, String destinationFile, int startingindex)
{
PdfReader p1 = new PdfReader(f1);
PdfReader p2 = new PdfReader(f2);
int l1 = p1.NumberOfPages, l2 = p2.NumberOfPages;
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document doc = new Document();
PdfWriter w = PdfWriter.GetInstance(doc, fs);
doc.Open();
for (int page = 1; page <= startingindex; page++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, page), 0, 0);
//Used to pull individual pages from our source
}// copied pages from first pdf till startingIndex
for (int i = 1; i <= l2;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p2, i), 0, 0);
}// merges second pdf after startingIndex
for (int i = startingindex+1; i <= l1;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, i), 0, 0);
}// continuing from where we left in pdf1
doc.Close();
p1.Close();
p2.Close();
}
}
To solve a similar problem i used iTextSharp like this:
//Create the document which will contain the combined PDF's
Document document = new Document();
//Create a writer for de document
PdfCopy writer = new PdfCopy(document, new FileStream(OutPutFilePath, FileMode.Create));
if (writer == null)
{
return;
}
//Open the document
document.Open();
//Get the files you want to combine
string[] filePaths = Directory.GetFiles(DirectoryPathWhereYouHaveYourFiles);
foreach (string filePath in filePaths)
{
//Read the PDF file
using (PdfReader reader = new PdfReader(vls_FilePath))
{
//Add the file to the combined one
writer.AddDocument(reader);
}
}
//Finally close the document and writer
writer.Close();
document.Close();
Here is a link to an example using PDFSharp and ConcatenateDocuments
Here the solution http://www.wacdesigns.com/2008/10/03/merge-pdf-files-using-c
It use free open source iTextSharp library http://sourceforge.net/projects/itextsharp
I've done this with PDFBox. I suppose it works similarly to iTextSharp.