I'm working on a Blazor WASM App and I want my users to easily open pdf files on specific pages that contain additional information.
I cannot distribute those files myself or upload them to any kind of server. Each user has to provide them themselves.
Because the files are up to 60MB big I cannot convert the uploaded file to base64 and display them as described here.
However I don't have to display the whole file and could just load the needed page +- some pages around them.
For that I tried using iText7 ExtractPageRange(). This answer indicates, that I have to override the GetNextPdfWriter() Method and to store all streams in an collection.
class ByteArrayPdfSplitter : PdfSplitter {
public ByteArrayPdfSplitter(PdfDocument pdfDocument) : base(pdfDocument) {
}
protected override PdfWriter GetNextPdfWriter(PageRange documentPageRange) {
CurrentMemoryStream = new MemoryStream();
UsedStreams.Add(CurrentMemoryStream);
return new PdfWriter(CurrentMemoryStream);
}
public MemoryStream CurrentMemoryStream { get; private set; }
public List<MemoryStream> UsedStreams { get; set; } = new List<MemoryStream>();
Then I thought I could merge those streams and convert them to base64
var file = loadedFiles.First();
using (MemoryStream ms = new MemoryStream())
{
var rs = file.OpenReadStream(maxFileSize);
await rs.CopyToAsync(ms);
ms.Position = 0;
//rs needed to be converted to ms, because the PdfReader constructer uses a
//synchronious read that isn't supported by rs and throws an exception.
PdfReader pdfReader = new PdfReader(ms);
var document = new PdfDocument(pdfReader);
var splitter = new ByteArrayPdfSplitter(document);
var range = new PageRange();
range.AddPageSequence(1, 10);
var splitDoc = splitter.ExtractPageRange(range);
//Edit commented this out, shouldn't have been here at all leads to an exception
//splitDoc.Close();
var outputMs = new MemoryStream();
foreach (var usedMs in splitter.UsedStreams)
{
usedMs.Position = 0;
outputMs.Position = outputMs.Length;
await usedMs.CopyToAsync(outputMs);
}
var data = outputMs.ToArray();
currentPdfContent = "data:application/pdf;base64,";
currentPdfContent += Convert.ToBase64String(data);
pdfLoaded = true;
}
This however doesn't work.
Has anyone a suggestion how to get this working? Or maybe a simpler solution I could try.
Edit:
I took a closer look in debug and it seems like, the resulting stream outputMs is always empty. So it is probably a problem in how I split the pdf.
After at least partially clearing up my misconception of what it means to not being able to access the file system from blazor WASM I managed to find a working solution.
await using MemoryStream ms = new MemoryStream();
var rs = file.OpenReadStream(maxFileSize);
await using var fs = new FileStream("test.pdf", FileMode.Create)
fs.Position = 0;
await rs.CopyToAsync(fs);
fs.Close();
string path = "test.pdf";
string range = "10 - 15";
var pdfDocument = new PdfDocument(new PdfReader("test.pdf"));
var split = new MySplitter(pdfDocument);
var result = split.ExtractPageRange(new PageRange(range));
result.Close();
await using var splitFs = new FileStream("split.pdf", FileMode.Open))
await splitFs.CopyToAsync(ms);
var data = ms.ToArray();
var pdfContent = "data:application/pdf;base64,";
pdfContent += System.Convert.ToBase64String(data);
Console.WriteLine(pdfContent);
currentPdfContent = pdfContent;
With the MySplitter Class from this answer.
class MySplitter : PdfSplitter
{
public MySplitter(PdfDocument pdfDocument) : base(pdfDocument)
{
}
protected override PdfWriter GetNextPdfWriter(PageRange documentPageRange)
{
String toFile = "split.pdf";
return new PdfWriter(toFile);
}
}
I'm trying to generate PDF of my SharePoint page but keep getting an "Unable to evaluate expression because the code is optimized or a native frame is on top of the call stack." error. Am I going about doing this the correct way?
protected void btnDownload_Click(object sender, EventArgs e)
{
try
{
SPSite spBtnSite = new SPSite(SPContext.Current.Site.Url);
SPWeb btnSPWeb = SPContext.Current.Web;
string doclibURL = string.Empty;
using (MemoryStream ms = new MemoryStream())
{
using (var pdfWriter = new PdfWriter(ms))
{
ConverterProperties properties = new ConverterProperties();
pdfWriter.SetCloseStream(false);
PdfDocument pdfDocument = new PdfDocument(pdfWriter);
//For setting the PAGE SIZE
pdfDocument.SetDefaultPageSize(iText.Kernel.Geom.PageSize.A4);
Document document1 = new Document(pdfDocument, pdfDocument.GetDefaultPageSize(), false);
using (var document = HtmlConverter.ConvertToDocument(hidText.Text, pdfDocument, properties))
{
}
ms.Position = 0;
byte[] bytesInStream = ms.ToArray(); // simpler way of converting to array
ms.Close();
if (bytesInStream != null)
{
Page.Response.Clear();
Page.Response.ClearHeaders();
Page.Response.ClearContent();
Page.Response.ContentType = "application/pdf";
Page.Response.AddHeader("content-length", bytesInStream.Length.ToString());
Page.Response.BinaryWrite(bytesInStream);
Page.Response.End();
}
}
}
}
catch (Exception ex_btnSubmit_Click)
{
Exceptions.SaveException(UserID, ex_btnSubmit_Click.Message, ex_btnSubmit_Click.StackTrace, (ex_btnSubmit_Click.InnerException != null) ? ex_btnSubmit_Click.InnerException.Message : "");
}
}
You got ThreadAbortException on Response.End(). According to this, you should use HttpContext.Current.ApplicationInstance.CompleteRequest instead of Response.End().
public FileResult Download()
{
var doc = new EO.Pdf.PdfDocument();
EO.Pdf.HtmlToPdf.ConvertUrl("http://www.google.com/", doc);
var ms = new MemoryStream();
doc.Save(ms);
ms.Position = 0;
return new FileStreamResult(ms, "application/pdf")
{
FileDownloadName = "download.pdf"
};
}
Can you please show if possible, how to extend the code above to be able to convert several web pages into one pdf document?
The tricky part is that we don't know what pages a user is likely to attempt to convert.
So, hardcoding the webpages as the code above shows isn't helping us.
Any help is greatly appreciated.
//Create a new PdfDocument object
var doc = new EO.Pdf.PdfDocument();
//Convert two ore more different pages into the same PdfDocument
EO.Pdf.HtmlToPdf.ConvertUrl("c:\\1.html", doc);
EO.Pdf.HtmlToPdf.ConvertUrl("c:\\2.html", doc);
Latest code:
public FileResult Download()
{
var doc = new EO.Pdf.PdfDocument();
foreach(var url in passedUrls)
{
EO.Pdf.HtmlToPdf.ConvertUrl(url, doc);
doc.Save(ms);
}
ms.Position = 0;
return new FileStreamResult(ms, "application/pdf")
{
FileDownloadName = "download.pdf"
};
}
Latest from Adam (thank you sir)
public FileResult Download()
{
var documents = new List<EO.Pdf.PdfDocument>();
foreach(var url in passedUrls)
{
var doc = new EO.Pdf.PdfDocument();
EO.Pdf.HtmlToPdf.ConvertUrl(url, doc);
documents.Add(doc);
}
EO.Pdf.PdfDocument mergedDocument = EO.Pdf.PdfDocument.Merge(documents.ToArray());
}
Hopefully, others find these codes useful.
Based on the Help documentation I would recommend the following:
public FileResult Download()
{
var urls = new List<string>
{ // Populate list with urls
"C:\\1.html",
"C:\\2.html"
};
var documents = new List<EO.Pdf.PdfDocument>();
foreach(var url in urls)
{
var doc = new EO.Pdf.PdfDocument();
EO.Pdf.HtmlToPdf.ConvertUrl(url, doc);
documents.Add(doc);
}
EO.Pdf.PdfDocument mergedDocument = EO.Pdf.PdfDocument.Merge(documents.ToArray());
var ms = new MemoryStream();
mergedDocument.Save(ms);
ms.Position = 0;
return new FileStreamResult(ms, "application/pdf") { FileDownloadName = "download.pdf" };
}
Pass an array of url strings to the function
Then
foreach(var url in passedUrls)
{
EO.Pdf.HtmlToPdf.ConvertUrl(url, doc);
doc.Save(ms);
}
I am trying to build up a single PDF from a bunch of other PDFs that I am filling out some form values in. Essentially I am doing a PDF mail merge. My code is below:
byte[] completedDocument = null;
using (MemoryStream streamCompleted = new MemoryStream())
{
using (Document document = new Document())
{
document.Open();
PdfCopy copy = new PdfCopy(document, streamCompleted);
copy.Open();
foreach (var item in eventItems)
{
byte[] mergedDocument = null;
PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
using (MemoryStream streamTemplate = new MemoryStream())
{
using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
{
foreach (var token in item.DataTokens)
{
if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
{
stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
}
}
stamper.FormFlattening = true;
stamper.Writer.CloseStream = false;
}
mergedDocument = new byte[streamTemplate.Length];
streamTemplate.Position = 0;
streamTemplate.Read(mergedDocument, 0, (int)streamTemplate.Length);
}
reader = new PdfReader(mergedDocument);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
document.SetPageSize(PageSize.A4);
copy.AddPage(copy.GetImportedPage(reader, i));
}
}
}
completedDocument = new byte[streamCompleted.Length];
streamCompleted.Position = 0;
streamCompleted.Read(completedDocument, 0, (int)streamCompleted.Length);
}
The problem I am having is that is throws a null reference exception when it exits the using (Document document = new Document()) block.
From debugging the iTextSharp source the problem is the below method in PdfAnnotationsimp
public bool HasUnusedAnnotations() {
return annotations.Count > 0;
}
annotations is null so this throws the null ref exception. Is there something I should be doing to instantiate this?
I changed:
document.Open();
PdfCopy copy = new PdfCopy(document, streamCompleted);
to
PdfCopy copy = new PdfCopy(document, streamCompleted);
document.Open();
And it fixed the problem. This library needs better exception handling. When you do something slightly wrong it falls over horribly and gives you no clue about what you did wrong. I have no idea how i could possibly have worked this out if I didn't have the source code.
What version of iTextSharp are you using? The Document class doesn't implement IDisposable so you can't wrap it in a using block.
Background: I need to provide a weekly report package for my sales staff. This package contains several (5-10) crystal reports.
Problem:
I would like to allow a user to run all reports and also just run a single report. I was thinking I could do this by creating the reports and then doing:
List<ReportClass> reports = new List<ReportClass>();
reports.Add(new WeeklyReport1());
reports.Add(new WeeklyReport2());
reports.Add(new WeeklyReport3());
<snip>
foreach (ReportClass report in reports)
{
report.ExportToDisk(ExportFormatType.PortableDocFormat, #"c:\reports\" + report.ResourceName + ".pdf");
}
This would provide me a folder full of the reports, but I would like to email everyone a single PDF with all the weekly reports. So I need to combine them.
Is there an easy way to do this without install any more third party controls? I already have DevExpress & CrystalReports and I'd prefer not to add too many more.
Would it be best to combine them in the foreach loop or in a seperate loop? (or an alternate way)
I had to solve a similar problem and what I ended up doing was creating a small pdfmerge utility that uses the PDFSharp project which is essentially MIT licensed.
The code is dead simple, I needed a cmdline utility so I have more code dedicated to parsing the arguments than I do for the PDF merging:
using (PdfDocument one = PdfReader.Open("file1.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument two = PdfReader.Open("file2.pdf", PdfDocumentOpenMode.Import))
using (PdfDocument outPdf = new PdfDocument())
{
CopyPages(one, outPdf);
CopyPages(two, outPdf);
outPdf.Save("file1and2.pdf");
}
void CopyPages(PdfDocument from, PdfDocument to)
{
for (int i = 0; i < from.PageCount; i++)
{
to.AddPage(from.Pages[i]);
}
}
Here is a single function that will merge X amount of PDFs using PDFSharp
using PdfSharp;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static void MergePDFs(string targetPath, params string[] pdfs) {
using(var targetDoc = new PdfDocument()){
foreach (var pdf in pdfs) {
using (var pdfDoc = PdfReader.Open(pdf, PdfDocumentOpenMode.Import)) {
for (var i = 0; i < pdfDoc.PageCount; i++)
targetDoc.AddPage(pdfDoc.Pages[i]);
}
}
targetDoc.Save(targetPath);
}
}
This is something that I figured out, and wanted to share with you, using PdfSharp.
Here you can join multiple Pdfs in one, without the need of an output directory (following the input list order)
public static byte[] MergePdf(List<byte[]> pdfs)
{
List<PdfSharp.Pdf.PdfDocument> lstDocuments = new List<PdfSharp.Pdf.PdfDocument>();
foreach (var pdf in pdfs)
{
lstDocuments.Add(PdfReader.Open(new MemoryStream(pdf), PdfDocumentOpenMode.Import));
}
using (PdfSharp.Pdf.PdfDocument outPdf = new PdfSharp.Pdf.PdfDocument())
{
for(int i = 1; i<= lstDocuments.Count; i++)
{
foreach(PdfSharp.Pdf.PdfPage page in lstDocuments[i-1].Pages)
{
outPdf.AddPage(page);
}
}
MemoryStream stream = new MemoryStream();
outPdf.Save(stream, false);
byte[] bytes = stream.ToArray();
return bytes;
}
}
I used iTextsharp with c# to combine pdf files. This is the code I used.
string[] lstFiles=new string[3];
lstFiles[0]=#"C:/pdf/1.pdf";
lstFiles[1]=#"C:/pdf/2.pdf";
lstFiles[2]=#"C:/pdf/3.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath=#"C:/pdf/new.pdf";
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
//Open the output file
sourceDocument.Open();
try
{
//Loop through the files list
for (int f = 0; f < lstFiles.Length-1; f++)
{
int pages =get_pageCcount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
//Add pages of current file
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
//At the end save the output file
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
private int get_pageCcount(string file)
{
using (StreamReader sr = new StreamReader(File.OpenRead(file)))
{
Regex regex = new Regex(#"/Type\s*/Page[^s]");
MatchCollection matches = regex.Matches(sr.ReadToEnd());
return matches.Count;
}
}
Here is a example using iTextSharp
public static void MergePdf(Stream outputPdfStream, IEnumerable<string> pdfFilePaths)
{
using (var document = new Document())
using (var pdfCopy = new PdfCopy(document, outputPdfStream))
{
pdfCopy.CloseStream = false;
try
{
document.Open();
foreach (var pdfFilePath in pdfFilePaths)
{
using (var pdfReader = new PdfReader(pdfFilePath))
{
pdfCopy.AddDocument(pdfReader);
pdfReader.Close();
}
}
}
finally
{
document?.Close();
}
}
}
The PdfReader constructor has many overloads. It's possible to replace the parameter type IEnumerable<string> with IEnumerable<Stream> and it should work as well. Please notice that the method does not close the OutputStream, it delegates that task to the Stream creator.
PDFsharp seems to allow merging multiple PDF documents into one.
And the same is also possible with ITextSharp.
Combining two byte[] using iTextSharp up to version 5.x:
internal static MemoryStream mergePdfs(byte[] pdf1, byte[] pdf2)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
}
return outStream;
}
Instead of the byte[]'s it's possible to pass also Stream's
There's some good answers here already, but I thought I might mention that pdftk might be useful for this task. Instead of producing one PDF directly, you could produce each PDF you need and then combine them together as a post-process with pdftk. This could even be done from within your program using a system() or ShellExecute() call.
You could try pdf-shuffler gtk-apps.org
I know a lot of people have recommended PDF Sharp, however it doesn't look like that project has been updated since june of 2008. Further, source isn't available.
Personally, I've been playing with iTextSharp which has been pretty easy to work with.
I combined the two above, because I needed to merge 3 pdfbytes and return a byte
internal static byte[] mergePdfs(byte[] pdf1, byte[] pdf2,byte[] pdf3)
{
MemoryStream outStream = new MemoryStream();
using (Document document = new Document())
using (PdfCopy copy = new PdfCopy(document, outStream))
{
document.Open();
copy.AddDocument(new PdfReader(pdf1));
copy.AddDocument(new PdfReader(pdf2));
copy.AddDocument(new PdfReader(pdf3));
}
return outStream.ToArray();
}
Following method gets a List of byte array which is PDF byte array and then returns a byte array.
using ...;
using PdfSharp.Pdf;
using PdfSharp.Pdf.IO;
public static class PdfHelper
{
public static byte[] PdfConcat(List<byte[]> lstPdfBytes)
{
byte[] res;
using (var outPdf = new PdfDocument())
{
foreach (var pdf in lstPdfBytes)
{
using (var pdfStream = new MemoryStream(pdf))
using (var pdfDoc = PdfReader.Open(pdfStream, PdfDocumentOpenMode.Import))
for (var i = 0; i < pdfDoc.PageCount; i++)
outPdf.AddPage(pdfDoc.Pages[i]);
}
using (var memoryStreamOut = new MemoryStream())
{
outPdf.Save(memoryStreamOut, false);
res = Stream2Bytes(memoryStreamOut);
}
}
return res;
}
public static void DownloadAsPdfFile(string fileName, byte[] content)
{
var ms = new MemoryStream(content);
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AddHeader("content-disposition", $"attachment;filename={fileName}.pdf");
HttpContext.Current.Response.Buffer = true;
ms.WriteTo(HttpContext.Current.Response.OutputStream);
HttpContext.Current.Response.End();
}
private static byte[] Stream2Bytes(Stream input)
{
var buffer = new byte[input.Length];
using (var ms = new MemoryStream())
{
int read;
while ((read = input.Read(buffer, 0, buffer.Length)) > 0)
ms.Write(buffer, 0, read);
return ms.ToArray();
}
}
}
So, the result of PdfHelper.PdfConcat method is passed to PdfHelper.DownloadAsPdfFile method.
PS: A NuGet package named [PdfSharp][1] need to be installed. So in the Package Manage Console window type:
Install-Package PdfSharp
Following method merges two pdfs( f1 and f2) using iTextSharp. The second pdf is appended after a specific index of f1.
string f1 = "D:\\a.pdf";
string f2 = "D:\\Iso.pdf";
string outfile = "D:\\c.pdf";
appendPagesFromPdf(f1, f2, outfile, 3);
public static void appendPagesFromPdf(String f1,string f2, String destinationFile, int startingindex)
{
PdfReader p1 = new PdfReader(f1);
PdfReader p2 = new PdfReader(f2);
int l1 = p1.NumberOfPages, l2 = p2.NumberOfPages;
//Create our destination file
using (FileStream fs = new FileStream(destinationFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
Document doc = new Document();
PdfWriter w = PdfWriter.GetInstance(doc, fs);
doc.Open();
for (int page = 1; page <= startingindex; page++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, page), 0, 0);
//Used to pull individual pages from our source
}// copied pages from first pdf till startingIndex
for (int i = 1; i <= l2;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p2, i), 0, 0);
}// merges second pdf after startingIndex
for (int i = startingindex+1; i <= l1;i++)
{
doc.NewPage();
w.DirectContent.AddTemplate(w.GetImportedPage(p1, i), 0, 0);
}// continuing from where we left in pdf1
doc.Close();
p1.Close();
p2.Close();
}
}
To solve a similar problem i used iTextSharp like this:
//Create the document which will contain the combined PDF's
Document document = new Document();
//Create a writer for de document
PdfCopy writer = new PdfCopy(document, new FileStream(OutPutFilePath, FileMode.Create));
if (writer == null)
{
return;
}
//Open the document
document.Open();
//Get the files you want to combine
string[] filePaths = Directory.GetFiles(DirectoryPathWhereYouHaveYourFiles);
foreach (string filePath in filePaths)
{
//Read the PDF file
using (PdfReader reader = new PdfReader(vls_FilePath))
{
//Add the file to the combined one
writer.AddDocument(reader);
}
}
//Finally close the document and writer
writer.Close();
document.Close();
Here is a link to an example using PDFSharp and ConcatenateDocuments
Here the solution http://www.wacdesigns.com/2008/10/03/merge-pdf-files-using-c
It use free open source iTextSharp library http://sourceforge.net/projects/itextsharp
I've done this with PDFBox. I suppose it works similarly to iTextSharp.