Crop Pdf from each edge using itextshap - c#

I am trying to crop pdf 5 mm from every edge i.e top,bottom,right and left. I tried with below code
public void TrimPdf(string sourceFilePath, string outputFilePath)
{
PdfReader pdfReader = new PdfReader(sourceFilePath);
float widthTo_Trim = iTextSharp.text.Utilities.MillimetersToPoints(5);
using (FileStream output = new FileStream(outputFilePath, FileMode.Create, FileAccess.Write))
using (PdfStamper pdfStamper = new PdfStamper(pdfReader, output))
{
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
Rectangle cropBox = pdfReader.GetCropBox(page);
cropBox.Left += widthTo_Trim;
cropBox.Right += widthTo_Trim;
cropBox.Top += widthTo_Trim;
cropBox.Bottom += widthTo_Trim;
pdfReader.GetPageN(page).Put(PdfName.CROPBOX, new PdfRectangle(cropBox));
}
}
}
By using this code i am Able to Crop only Left and Bottom part. unable to crop top and right side
How can i get desire result ?

This solved my problem by using Below code
public void TrimLeftandRightFoall(string sourceFilePath, string outputFilePath, float cropwidth)
{
PdfReader pdfReader = new PdfReader(sourceFilePath);
float width = (float)GetPDFwidth(sourceFilePath);
float height = (float)GetPDFHeight(sourceFilePath);
float widthTo_Trim = iTextSharp.text.Utilities.MillimetersToPoints(cropwidth);
PdfRectangle rectLeftside = new PdfRectangle(widthTo_Trim, widthTo_Trim, width-widthTo_Trim , height-widthTo_Trim);
using (var output = new FileStream(outputFilePath, FileMode.CreateNew, FileAccess.Write))
{
// Create a new document
Document doc = new Document();
// Make a copy of the document
PdfSmartCopy smartCopy = new PdfSmartCopy(doc, output);
// Open the newly created document
doc.Open();
// Loop through all pages of the source document
for (int i = 1; i <= pdfReader.NumberOfPages; i++)
{
// Get a page
var page = pdfReader.GetPageN(i);
page.Put(PdfName.MEDIABOX, rectLeftside);
var copiedPage = smartCopy.GetImportedPage(pdfReader, i);
smartCopy.AddPage(copiedPage);
}
doc.Close();
}
}

Related

Highlight texts in a pdf file with background colour

Need a solution to highlight specific text with a background color in PDF and final result to be in the new pdf file.. I tried few solutions based on itextSharp but no luck as yet. Could you please help me with the solution.
private static void PDFTextGetter(string pSearch, StringComparison sc, string sourceFile, string destinationFile)
{
PdfStamper stamper;
PdfContentByte cb;
var pReader = new PdfReader(sourceFile);
using (FileStream fs = new FileStream(destinationFile, FileMode.Create))
{
stamper = new PdfStamper(pReader, fs);
for (int page = 1; page <= pReader.NumberOfPages; page++)
{
var strategy = new myLocationTextExtractionStrategy();
cb = stamper.GetUnderContent(page);
strategy.UndercontentCharacterSpacing = cb.CharacterSpacing;
strategy.UndercontentHorizontalScaling = cb.HorizontalScaling;
string currentText = PdfTextExtractor.GetTextFromPage(pReader, page, strategy);
List<iTextSharp.text.Rectangle> matchesFound = strategy.GetTextLocations(pSearch, sc);
cb.SetColorFill(BaseColor.YELLOW);
foreach (iTextSharp.text.Rectangle rect in matchesFound)
cb.Rectangle(rect.Left, rect.Bottom, rect.Width, rect.Height);
cb.Fill();
}
stamper.Close();
pReader.Close();
}
}

Grab all of the pages of a PDF using textsharp

I am getting a pfd using the older version of itextsharp with this code
string Oldfile = #"C:/test.pdf"; // Gets the Template
(new FileInfo("C:/C:/test.pdf")).Directory.Create(); // Go create this folder if it's not there
string NewFile = "C:/test.pdf";
PdfReader reader = new PdfReader(Oldfile);
iTextSharp.text.Rectangle Size = reader.GetPageSizeWithRotation(1);
Document document = new Document(Size);
// MemoryStream memory_stream = new MemoryStream();
FileStream fs = new FileStream(NewFile, FileMode.Create, FileAccess.Write);
PdfWriter weiter = PdfWriter.GetInstance(document, fs);
document.Open();
PdfContentByte cb = weiter.DirectContent;
PdfImportedPage page = weiter.GetImportedPage(reader, 1);
//PdfImportedPage page2 = weiter.GetImportedPage(reader, 2);
cb.AddTemplate(page, 0, 0);
The problem I am having is when it gets that file it has 2 pages in that pdf but it only gets the 1st page and adds lines and saves the only 1st page of the pdf I want to be able to grab both of them or is there a way to merge them after wards
I bet you need to iterate all pages.
using System;
using System.IO;
using System.Collections.Generic;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace TestAnything
{
class Program
{
static void Main(string[] args)
{
List<string> filesToMerge = new List<string> { #"c:\temp\1.pdf", #"c:\temp\2.pdf" };
FileInfo destinationFile = new FileInfo(#"c:\temp\merge.pdf");
if (File.Exists(destinationFile.FullName))
File.Delete(destinationFile.FullName);
MergeFiles(filesToMerge, destinationFile);
}
public static void MergeFiles(List<string> sourceFiles, FileInfo destinationFile)
{
if (sourceFiles == null || sourceFiles.Count == 0)
throw new ArgumentNullException("blahhh.");
PdfReader reader = new PdfReader(sourceFiles[0]);
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfCopy writer = new PdfCopy(document, new FileStream(destinationFile.FullName, FileMode.Create));
document.Open();
try
{
foreach (string sourceFile in sourceFiles)
{
reader = new PdfReader(sourceFile);
reader.ConsolidateNamedDestinations();
for (int x = 1; x <= reader.NumberOfPages; x++)
writer.AddPage(writer.GetImportedPage(reader, x));
PRAcroForm form = reader.AcroForm;
if (form != null)
writer.CopyAcroForm(reader);
}
}
finally
{
if (document.IsOpen())
document.Close();
}
}
}
}

pdf stamper, pdfEventHandler not working when trying to add page number on the pdf document using itext

I am using itext to generate a pdf document but I am trying to use the existing solutions to add page number to the pdf document that is being generated but none of them seems to be working for me.
I tried using something like
using (MemoryStream stream = new MemoryStream())
{
PdfReader reader = new PdfReader(bytes);
using (PdfStamper stamper = new PdfStamper(reader, stream))
{
int pages = reader.NumberOfPages;
for (int i = 1; i <= pages; i++)
but my code doesnt recognize PdfStamper and asks me to create a class.
Similarly, I tried using
MemoryStream ms = new MemoryStream();
PdfReader reader = new PdfReader(pdf);
int n = reader.NumberOfPages;
Rectangle psize = reader.GetPageSize(1);
There is error on .NumberOfPages and .GetPageSize.
I also tried creating a separate PageEventHandler class but the problem remains same.
Right now, I am able to generate pdf but I want to add page number and I have a code like
private MemoryStream MakeDocument(Application application)
{
MemoryStream ms = new MemoryStream();
PdfWriter writer = new PdfWriter(ms);
PdfDocument pdfDocument = new PdfDocument(writer);
using (var document = new Document(pdfDocument))
{
var sections = new List<IDocumentSection>
{
new Header(),
new Projects(application.Projects),
//Footer
};
foreach (var section in sections)
section.AddTo(document);
Rectangle pageSize;
PdfCanvas canvas;
int n = pdfDocument.GetNumberOfPages();
for (int i = 1; i <= n; i++)
{
PdfPage page = pdfDocument.GetPage(i);
pageSize = page.GetPageSize();
canvas = new PdfCanvas(page);
canvas.BeginText()
.SetFontAndSize(PdfFontFactory.CreateFont(FontConstants.HELVETICA), 7)
.MoveText(pageSize.GetWidth() / 2 - 7, 10)
.ShowText(i.ToString())
.ShowText(" of ")
.ShowText(n.ToString())
.EndText();
}
}
return ms;
var outputStream = MakeDocument(application);
var response = new HttpResponseMessage
{
StatusCode = HttpStatusCode.OK,
Content = new ByteArrayContent(outputStream.ToArray())
};
response.Content.Headers.Add("Content-Type", "application/pdf");
return response;
But it is complaining at GetPageSize
Object reference not set to an instance of an object. Even if I set page size as A4, it starts to complain at canvas = new PdfCanvas(page);
It seems that you are using iText 7. Or rather, that's what I assume when I see:
PdfWriter writer = new PdfWriter(ms);
PdfDocument pdfDocument = new PdfDocument(writer);
Document document = new Document(pdfDocument);
The PdfStamper class is an iText 5 class. It doesn't exist in iText 7. See Chapter 5 of the jump-start tutorial. You probably want something like this:
MemoryStream stream = new MemoryStream();
PdfWriter writer = new PdfWriter(stream);
PdfReader reader = new PdfReader(bytes);
PdfDocument pdfDoc = new PdfDocument(reader, writer);
Document document = new Document(pdfDoc);
Rectangle pageSize;
PdfCanvas canvas;
int n = pdfDoc.GetNumberOfPages();
for (int i = 1; i <= n; i++) {
PdfPage page = pdfDoc.GetPage(i);
pageSize = page.GetPageSize();
canvas = new PdfCanvas(page);
// draw page numbers on the canvas
}
pdfDoc.close();
If you aren't using iText 7, then there's something wrong in your question. In that case you should clarify what you mean when you use the concepts PdfStamper and PdfEventHandler in the same sentence, because that doesn't make any sense.

Removing Watermark from a PDF

I had been used the first code create a water mark and removed and it work perfect
private void Form1_Load(object sender, EventArgs e) {
string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string startFile = Path.Combine(workingFolder, "StartFile.pdf");
string watermarkedFile = Path.Combine(workingFolder, "Watermarked.pdf");
string unwatermarkedFile = Path.Combine(workingFolder, "Un-watermarked.pdf");
string watermarkText = "This is a test";
//SECTION 1
//Create a 5 page PDF, nothing special here
using (FileStream fs = new FileStream(startFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (Document doc = new Document(PageSize.LETTER)) {
using (PdfWriter witier = PdfWriter.GetInstance(doc, fs)) {
doc.Open();
for (int i = 1; i <= 5; i++) {
doc.NewPage();
doc.Add(new Paragraph(String.Format("This is page {0}", i)));
}
doc.Close();
}
}
}
//SECTION 2
//Create our watermark on a separate layer. The only different here is that we are adding the watermark to a PdfLayer which is an OCG or Optional Content Group
PdfReader reader1 = new PdfReader(startFile);
using (FileStream fs = new FileStream(watermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader1, fs)) {
int pageCount1 = reader1.NumberOfPages;
//Create a new layer
PdfLayer layer = new PdfLayer("WatermarkLayer", stamper.Writer);
for (int i = 1; i <= pageCount1; i++) {
iTextSharp.text.Rectangle rect = reader1.GetPageSize(i);
//Get the ContentByte object
PdfContentByte cb = stamper.GetUnderContent(i);
//Tell the CB that the next commands should be "bound" to this new layer
cb.BeginLayer(layer);
cb.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 50);
PdfGState gState = new PdfGState();
gState.FillOpacity = 0.25f;
cb.SetGState(gState);
cb.SetColorFill(BaseColor.BLACK);
cb.BeginText();
cb.ShowTextAligned(PdfContentByte.ALIGN_CENTER, watermarkText, rect.Width / 2, rect.Height / 2, 45f);
cb.EndText();
//"Close" the layer
cb.EndLayer();
}
}
}
//SECTION 3
//Remove the layer created above
//First we bind a reader to the watermarked file, then strip out a bunch of things, and finally use a simple stamper to write out the edited reader
PdfReader reader2 = new PdfReader(watermarkedFile);
//NOTE, This will destroy all layers in the document, only use if you don't have additional layers
//Remove the OCG group completely from the document.
//reader2.Catalog.Remove(PdfName.OCPROPERTIES);
//Clean up the reader, optional
reader2.RemoveUnusedObjects();
//Placeholder variables
PRStream stream;
String content;
PdfDictionary page;
PdfArray contentarray;
//Get the page count
int pageCount2 = reader2.NumberOfPages;
//Loop through each page
for (int i = 1; i <= pageCount2; i++) {
//Get the page
page = reader2.GetPageN(i);
//Get the raw content
contentarray = page.GetAsArray(PdfName.CONTENTS);
if (contentarray != null) {
//Loop through content
for (int j = 0; j < contentarray.Size; j++) {
//Get the raw byte stream
stream = (PRStream)contentarray.GetAsStream(j);
//Convert to a string. NOTE, you might need a different encoding here
content = System.Text.Encoding.ASCII.GetString(PdfReader.GetStreamBytes(stream));
//Look for the OCG token in the stream as well as our watermarked text
if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) {
//Remove it by giving it zero length and zero data
stream.Put(PdfName.LENGTH, new PdfNumber(0));
stream.SetData(new byte[0]);
}
}
}
}
//Write the content out
using (FileStream fs = new FileStream(unwatermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader2, fs)) {
}
}
this.Close();
}
}
}
but when I used digital sign some time when a I tried to remove the watermark some page lost the information , them I'm would like to used the PdfLayerRemover and I change to C# but when I made this
PdfDictionary ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES);
I had been used the first code create a water mark and removed and it work perfect but when I used digital sign some time when a I tried to remove the watermark some page lost the information , them I'm would like to used the PdfLayerRemover and I change to C# but when I made this
PdfDictionary ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES);
always get null value and the pdf has the watermark??? please if can help me and the pdf has ocpproperties because when I use this works but it remove additional information . but the way Itextshap version 5.5.2.0
if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) {
//Remove it by giving it zero length and zero data
stream.Put(PdfName.LENGTH, new PdfNumber(0));
stream.SetData(new byte[0]);
}

Copy pdf annotations via C#

I have a stream (PDF file with annotations) and another stream (the same PDF file without annotations). I use streams because I need to execute this operations in memory.
I need to copy annotations from first document to another. Annotations can be different: comments, highlighting and other. So it is better to copy annotations without parsing it.
Can you advice me some helpful PDF library for .NET? And some sample for this problem.
You can use this example for iTextSharp to approach your problem (this example copies a list of pdf files with annotations into a new pdf file):
var output = new MemoryStream();
using (var document = new Document(PageSize.A4, 70f, 70f, 20f, 20f))
{
var readers = new List<PdfReader>();
var writer = PdfWriter.GetInstance(document, output);
writer.CloseStream = false;
document.Open();
const Int32 requiredWidth = 500;
const Int32 zeroBottom = 647;
const Int32 left = 50;
Action<String, Action> inlcudePdfInDocument = (filename, e) =>
{
var reader = new PdfReader(filename);
readers.Add(reader);
var pageCount = reader.NumberOfPages;
for (var i = 0; i < pageCount; i++)
{
e?.Invoke();
var imp = writer.GetImportedPage(reader, (i + 1));
var scale = requiredWidth / imp.Width;
var height = imp.Height * scale;
writer.DirectContent.AddTemplate(imp, scale, 0, 0, scale, left, zeroBottom - height);
var annots = reader.GetPageN(i + 1).GetAsArray(PdfName.ANNOTS);
if (annots != null && annots.Size != 0)
{
foreach (var a in annots)
{
var newannot = new PdfAnnotation(writer, new Rectangle(0, 0));
var annotObj = (PdfDictionary) PdfReader.GetPdfObject(a);
newannot.PutAll(annotObj);
var rect = newannot.GetAsArray(PdfName.RECT);
rect[0] = new PdfNumber(((PdfNumber)rect[0]).DoubleValue * scale + left); // Left
rect[1] = new PdfNumber(((PdfNumber)rect[1]).DoubleValue * scale); // top
rect[2] = new PdfNumber(((PdfNumber)rect[2]).DoubleValue * scale + left); // right
rect[3] = new PdfNumber(((PdfNumber)rect[3]).DoubleValue * scale); // bottom
writer.AddAnnotation(newannot);
}
}
document.NewPage();
}
}
foreach (var apprPdf in pdfs)
{
document.NewPage();
inlcudePdfInDocument(apprPdf.Pdf, null);
}
document.Close();
readers.ForEach(x => x.Close());
}
output.Position = 0;
return output;
PdfReader has a constructor that takes an array of bytes so you can adapt it for MemoryStream.
I'm using ITextSharp which is forked from IText (a java implemenation fpr pdf editing).
http://sourceforge.net/projects/itextsharp/
http://itextpdf.com/
Edit - this is what you need to do (untested but shoul be close):
using System;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
// return processed stream (a new MemoryStream)
public Stream copyAnnotations(Stream sourcePdfStream, Stream destinationPdfStream)
{
// Create new document (IText)
Document outdoc = new Document(PageSize.A4);
// Seek to Stream start and create Reader for input PDF
m.Seek(0, SeekOrigin.Begin);
PdfReader inputPdfReader = new PdfReader(sourcePdfStream);
// Seek to Stream start and create Reader for destination PDF
m.Seek(0, SeekOrigin.Begin);
PdfReader destinationPdfReader = new PdfReader(destinationPdfStream);
// Create a PdfWriter from for new a pdf destination stream
// You should write into a new stream here!
Stream processedPdf = new MemoryStream();
PdfWriter pdfw = PdfWriter.GetInstance(outdoc, processedPdf);
// do not close stream if we've read everything
pdfw.CloseStream = false;
// Open document
outdoc.Open();
// get number of pages
int numPagesIn = inputPdfReader.NumberOfPages;
int numPagesOut = destinationPdfReader.NumberOfPages;
int max = numPagesIn;
// Process max number of pages
if (max<numPagesOut)
{
throw new Exception("Impossible - different number of pages");
}
int i = 0;
// Process Pdf pages
while (i < max)
{
// Import pages from corresponding reader
PdfImportedPage pageIn = writer.inputPdfReader(reader, i);
PdfImportedPage pageOut = writer.destinationPdfReader(reader, i);
// Get named destinations (annotations
List<Annotations> toBeAdded = ParseInAndOutAndGetAnnotations(pageIn, pageOut);
// add your annotations
foreach (Annotation anno in toBeAdded) pageOut.Add(anno);
// Add processed page to output PDFWriter
outdoc.Add(pageOut);
}
// PDF creation finished
outdoc.Close();
// your new destination stream is processedPdf
return processedPdf;
}
The implementation of ParseInAndOutAndGetAnnotations(pageIn, pageOut) needs to reflect your annotations.
Here is a good example with annotations: http://www.java2s.com/Open-Source/Java-Document/PDF/pdf-itext/com/lowagie/text/pdf/internal/PdfAnnotationsImp.java.htm

Categories