I have a web api that creates 2 temporary PDFs, merges them, then deletes them. Problem is, in my code, when it goes to clean out the temp files, it tells me they're being used by another process. Something in the below code is keeping the files open so that, after this function is called, they can't be deleted. For the life of me, I can't figure out what. I've tried disposing whatever resources I can find and deleting the files in all different places trying to figure out WHAT is leaving them open, and I think I've narrowed it down to the if statement after the f counter is being incremented. I think it doesn't like the way the reader is being reassigned.
public static bool MergeFiles(string destinationFile, string[] sourceFiles)
{
int f = 0;
var reader = new PdfReader(sourceFiles[f]);
int n = reader.NumberOfPages;
using (var fileStream = new FileStream(destinationFile, FileMode.Create, FileAccess.Write))
{
var document = new Document(reader.GetPageSizeWithRotation(1));
PdfWriter writer = PdfWriter.GetInstance(document, fileStream);
document.Open();
PdfContentByte cb = writer.DirectContent;
while (f < sourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
PdfImportedPage page = writer.GetImportedPage(reader, i);
int rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0,reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
f++;
if (f < sourceFiles.Length)
{
reader = new PdfReader(sourceFiles[f]);
n = reader.NumberOfPages;
}
}
document.Close();
writer.Dispose();
}
reader.Close();
reader.Dispose();
Log.Info(string.Format("Documents merged into: {0}", destinationFile));
return true;
}
Later in the code, the files are being removed using the following command:
File.Delete(tempCoverLetterFile);
FWIW: I've found that, if you put this code above the using statement, it will delete the one file, no problem:
reader.Dispose();
File.Delete(sourceFiles[0]);
I also found that, if you add a reader.Dispose() in the if statement before you reassign the reader to the next pdf, it works. But, the end pdf that gets merged is corrupted and won't open.
Where you have if f < sourcefiles.length you open a new reader. However, you havent closed the old. This would leave you with open files.
I would expect before the new reader is opened you need to add reader.close and reader.dispose (this latter maybe optional)
Related
I am trying to merge two PDF in this method but after successfully adding, I am unable to convert iTextSharp.text.Document into byte array . Please help me.
protected byte[] MergePDFs(byte[] First, byte[] Second, string RptTitle)
{
iTextSharp.text.Document doc = new iTextSharp.text.Document();
iTextSharp.text.pdf.PdfReader reader = null;
int numberOfPages = 0;
int currentPageNumber = 0;
iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(doc, MemStream);
doc.Open();
iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent;
iTextSharp.text.pdf.PdfImportedPage page = null;
//Third, append all the PDFs--THIS IS THE MAGIC PART
if (First != null)
{
reader = new iTextSharp.text.pdf.PdfReader(First);
numberOfPages = reader.NumberOfPages;
currentPageNumber = 0;
while ((currentPageNumber < numberOfPages))
{
currentPageNumber += 1;
doc.SetPageSize(iTextSharp.text.PageSize.A4);
doc.NewPage();
page = writer.GetImportedPage(reader, currentPageNumber);
//we know that the page would always be in Potrait Format
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 1);
}
}
if (Second != null)
{
reader = new iTextSharp.text.pdf.PdfReader(Second);
numberOfPages = reader.NumberOfPages;
currentPageNumber = 0;
while ((currentPageNumber < numberOfPages))
{
currentPageNumber += 1;
doc.SetPageSize(iTextSharp.text.PageSize.A4.Rotate());
//.Rotate());
doc.NewPage();
page = writer.GetImportedPage(reader, currentPageNumber);
//we know that the page would always be in LandScape Format
// cb.AddTemplate(page,0, -1f, 1f, 0,0, reader.GetPageSizeWithRotation(currentPageNumber).Height+300);
cb.AddTemplate(page, 1, 0, 0, 1, 0, 0);
}
}
doc.Close();
byte[] cc = StringToByteArray(doc.ToString());
return cc;
I don't know where you people get this examples, they are completely wrong.
First, use PdfCopy, not PdfWriter. That way the correct page size and orientation will be correctly imported.
Second, if your MemStream is a MemoryStream you would use MemStream.ToArray() to get the PDF bytes.
PS: Where did that invention of doc.ToString() come from? It's completely rubbish.
I am using acrobat 8.0 professional for cropping text from a PDF.
One page of Original pdf is
After cropping pdf above page is
In mine project i am using cropped pdf and extract individual pages from it by following code
private void ExtractPages(string inputFile, string outputFile, int start, int end)
{
// get input document
PdfReader inputPdf = new PdfReader(inputFile);
// retrieve the total number of pages
int pageCount = inputPdf.NumberOfPages;
if (end < start || end > pageCount)
{
end = pageCount;
}
//var pgSize = new iTextSharp.text.Rectangle(myWidth, myHeight);
//var doc = new iTextSharp.text.Document(pgSize, leftMargin, rightMargin, topMargin, bottomMargin);
// load the input document
Document inputDoc = new Document(inputPdf.GetPageSizeWithRotation(1));
// create the filestream
using (FileStream fs = new FileStream(outputFile, FileMode.Create))
{
// create the output writer
PdfWriter outputWriter = PdfWriter.GetInstance(inputDoc, fs);
inputDoc.Open();
PdfContentByte cb1 = outputWriter.DirectContent;
// copy pages from input to output document
for (int i = start; i <= end; i++)
{
inputDoc.SetPageSize(inputPdf.GetPageSizeWithRotation(i));
inputDoc.NewPage();
PdfImportedPage page = outputWriter.GetImportedPage(inputPdf, i);
int rotation = inputPdf.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb1.AddTemplate(page, 0, -1f, 1f, 0, 0, inputPdf.GetPageSizeWithRotation(i).Height);
}
else
{
cb1.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
inputDoc.Close();
}
}
Problem is that after page extraction cropping information is not retained in extracted pdf. Extracted pdf is same as original pdf with extra text in it.
How to retained cropped information in extracted pdf ?
Here is known code that splits PDF document:
try
{
FileInfo file = new FileInfo(#"d:\С.pdf");
string name = file.Name.Substring(0, file.Name.LastIndexOf("."));
// we create a reader for a certain document
PdfReader reader = new PdfReader(#"d:\С.pdf");
// we retrieve the total number of pages
int n = reader.NumberOfPages;
int digits = 1 + (n / 10);
System.Console.WriteLine("There are " + n + " pages in the original file.");
Document document;
int pagenumber;
string filename;
for (int i = 0; i < n; i++)
{
pagenumber = i + 1;
filename = pagenumber.ToString();
while (filename.Length < digits) filename = "0" + filename;
filename = "_" + filename + ".pdf";
// step 1: creation of a document-object
document = new Document(reader.GetPageSizeWithRotation(pagenumber));
// step 2: we create a writer that listens to the document
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(name + filename, FileMode.Create));
// step 3: we open the document
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page = writer.GetImportedPage(reader, pagenumber);
int rotation = reader.GetPageRotation(pagenumber);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(pagenumber).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
// step 5: we close the document
document.Close();
}
}
catch (DocumentException de)
{
System.Console.Error.WriteLine(de.Message);
}
catch (IOException ioe)
{
System.Console.Error.WriteLine(ioe.Message);
}
Here is left top corner of one splitted page:
You can see here (and in other corners) unexpected lines,rounds.. How can I avoid them?
As explained many times before (ITextSharp include all pages from the input file, Itext pdf Merge : Document overflow outside pdf (Text truncated) page and not displaying, and so on), you should read chapter 6 of my book iText in Action (you can find the C# version of the examples here).
You are using a combination of Document, PdfWriter and PdfImportedPage to split a PDF. Please tell me who made you do it this way, so that I can curse the person who inspired you (because I've answered this question hundreds of times before, and I'm getting tired of repeating myself). These classes aren't a good choice for that job:
you lose all interactivity,
you need to rotate the content yourself if the page is in landscape (you already discovered this),
you need to take the original page size into account,
...
Your problem is similar to this one Itext pdf Merge : Document overflow outside pdf (Text truncated) page and not displaying. Apparently the original document you're trying to split contains a MediaBox and a CropBox. When you look at your original document, only the content inside the CropBox is shown. When you look at your copy, the content inside the MediaBox is shown, unveiling "printer marks". These printer marks show where the page needs to be cut in a publishing environment. When printing books or magazines, the pages on which content is printed are usually bigger than the final page. The extra content is cut off before assembling the book or magazine.
Long story short: read the documentation, replace PdfWriter with PdfCopy, replace AddTemplate() with AddPage().
I have one PDF form which is filled by supervisor(or a user).
I want to merge generated PDFs.
I have done simple merging but in it i have to first generate multiple files and then merge them.
Is there any way that when user fills multiple forms and then at the time of final submission or printing i get only a single merged of all filled PDFs.
Try out this Merge function to merge PDF files:
public static void Merge(string[] sourceFiles, string destinationFile)
{
try
{
int f = 0;
// we create a reader for a certain document
PdfReader reader = new PdfReader(sourceFiles[f]);
// we retrieve the total number of pages
int n = reader.NumberOfPages;
//Debug.WriteLine("There are " + n + " pages in the original file.");
// step 1: creation of a document-object
Document document = new Document(reader.GetPageSizeWithRotation(1));
// step 2: we create a writer that listens to the document
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
// step 3: we open the document
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
// step 4: we add content
while (f < sourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
//Debug.WriteLine("Processed page " + i);
}
f++;
if (f < sourceFiles.Length)
{
reader = new PdfReader(sourceFiles[f]);
// we retrieve the total number of pages
n = reader.NumberOfPages;
//Debug.WriteLine("There are " + n + " pages in the original file.");
}
}
// step 5: we close the document
document.Close();
}
catch (Exception ex)
{
throw ex;
}
}
Reference: Merge PDF Files using iTextSharp
based off the merge code I sourced in my comments above..
I use this to write to the output stream of an http response. It returns a byte[], but you can use the original code and this to get the job done.
public static byte[] MergeFiles(List<byte[]> sourceFiles)
{
Document document = new Document();
MemoryStream output = new MemoryStream();
try
{
// Initialize pdf writer
PdfWriter writer = PdfWriter.GetInstance(document, output);
// Open document to write
document.Open();
PdfContentByte content = writer.DirectContent;
// Iterate through all pdf documents
for (int fileCounter = 0; fileCounter < sourceFiles.Count; fileCounter++)
{
// Create pdf reader
PdfReader reader = new PdfReader(sourceFiles[fileCounter]);
int numberOfPages = reader.NumberOfPages;
// Iterate through all pages
for (int currentPageIndex = 1; currentPageIndex <=
numberOfPages; currentPageIndex++)
{
// Determine page size for the current page
document.SetPageSize(
reader.GetPageSizeWithRotation(currentPageIndex));
// Create page
document.NewPage();
PdfImportedPage importedPage =
writer.GetImportedPage(reader, currentPageIndex);
// Determine page orientation
int pageOrientation = reader.GetPageRotation(currentPageIndex);
if ((pageOrientation == 90) || (pageOrientation == 270))
{
content.AddTemplate(importedPage, 0, -1f, 1f, 0, 0,
reader.GetPageSizeWithRotation(currentPageIndex).Height);
}
else
{
content.AddTemplate(importedPage, 1f, 0, 0, 1f, 0, 0);
}
}
}
}
catch (Exception exception)
{
throw new Exception("There has an unexpected exception" +
" occured during the pdf merging process.", exception);
}
finally
{
document.Close();
}
return output.GetBuffer();
}
//the func merges the src pdf with the memory stream, wherein the stream may contain
//few othr src pdf streams in previous calls to this func
//in first cal, ms would be null
public static void MergePdf(MemoryStream ms, string srcFile)
{
PdfReader reader = new PdfReader(srcFile);
Document document = null;
PdfWriter writer = null;
int n = reader.NumberOfPages;
if (document == null)
{
document = new Document(reader.GetPageSizeWithRotation(1));
writer = PdfWriter.GetInstance(document, ms);
document.Open();
}
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0,
reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
}
I write the Memory stream, back to context.response.Outputstream; but the pdf doesnt load, the page 'Failed to load Pdf' results; Is there any problem in converting btwn memorystream and Pdf contents, or what may be the issue?
I had the same problem, and it turned out that the cause was not closing the document, and adding the following line of code:
document.Close();
should fix the problem.