Removing Watermark from a PDF - c#

I had been used the first code create a water mark and removed and it work perfect
private void Form1_Load(object sender, EventArgs e) {
string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string startFile = Path.Combine(workingFolder, "StartFile.pdf");
string watermarkedFile = Path.Combine(workingFolder, "Watermarked.pdf");
string unwatermarkedFile = Path.Combine(workingFolder, "Un-watermarked.pdf");
string watermarkText = "This is a test";
//SECTION 1
//Create a 5 page PDF, nothing special here
using (FileStream fs = new FileStream(startFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (Document doc = new Document(PageSize.LETTER)) {
using (PdfWriter witier = PdfWriter.GetInstance(doc, fs)) {
doc.Open();
for (int i = 1; i <= 5; i++) {
doc.NewPage();
doc.Add(new Paragraph(String.Format("This is page {0}", i)));
}
doc.Close();
}
}
}
//SECTION 2
//Create our watermark on a separate layer. The only different here is that we are adding the watermark to a PdfLayer which is an OCG or Optional Content Group
PdfReader reader1 = new PdfReader(startFile);
using (FileStream fs = new FileStream(watermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader1, fs)) {
int pageCount1 = reader1.NumberOfPages;
//Create a new layer
PdfLayer layer = new PdfLayer("WatermarkLayer", stamper.Writer);
for (int i = 1; i <= pageCount1; i++) {
iTextSharp.text.Rectangle rect = reader1.GetPageSize(i);
//Get the ContentByte object
PdfContentByte cb = stamper.GetUnderContent(i);
//Tell the CB that the next commands should be "bound" to this new layer
cb.BeginLayer(layer);
cb.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 50);
PdfGState gState = new PdfGState();
gState.FillOpacity = 0.25f;
cb.SetGState(gState);
cb.SetColorFill(BaseColor.BLACK);
cb.BeginText();
cb.ShowTextAligned(PdfContentByte.ALIGN_CENTER, watermarkText, rect.Width / 2, rect.Height / 2, 45f);
cb.EndText();
//"Close" the layer
cb.EndLayer();
}
}
}
//SECTION 3
//Remove the layer created above
//First we bind a reader to the watermarked file, then strip out a bunch of things, and finally use a simple stamper to write out the edited reader
PdfReader reader2 = new PdfReader(watermarkedFile);
//NOTE, This will destroy all layers in the document, only use if you don't have additional layers
//Remove the OCG group completely from the document.
//reader2.Catalog.Remove(PdfName.OCPROPERTIES);
//Clean up the reader, optional
reader2.RemoveUnusedObjects();
//Placeholder variables
PRStream stream;
String content;
PdfDictionary page;
PdfArray contentarray;
//Get the page count
int pageCount2 = reader2.NumberOfPages;
//Loop through each page
for (int i = 1; i <= pageCount2; i++) {
//Get the page
page = reader2.GetPageN(i);
//Get the raw content
contentarray = page.GetAsArray(PdfName.CONTENTS);
if (contentarray != null) {
//Loop through content
for (int j = 0; j < contentarray.Size; j++) {
//Get the raw byte stream
stream = (PRStream)contentarray.GetAsStream(j);
//Convert to a string. NOTE, you might need a different encoding here
content = System.Text.Encoding.ASCII.GetString(PdfReader.GetStreamBytes(stream));
//Look for the OCG token in the stream as well as our watermarked text
if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) {
//Remove it by giving it zero length and zero data
stream.Put(PdfName.LENGTH, new PdfNumber(0));
stream.SetData(new byte[0]);
}
}
}
}
//Write the content out
using (FileStream fs = new FileStream(unwatermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader2, fs)) {
}
}
this.Close();
}
}
}
but when I used digital sign some time when a I tried to remove the watermark some page lost the information , them I'm would like to used the PdfLayerRemover and I change to C# but when I made this
PdfDictionary ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES);
I had been used the first code create a water mark and removed and it work perfect but when I used digital sign some time when a I tried to remove the watermark some page lost the information , them I'm would like to used the PdfLayerRemover and I change to C# but when I made this
PdfDictionary ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES);
always get null value and the pdf has the watermark??? please if can help me and the pdf has ocpproperties because when I use this works but it remove additional information . but the way Itextshap version 5.5.2.0
if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) {
//Remove it by giving it zero length and zero data
stream.Put(PdfName.LENGTH, new PdfNumber(0));
stream.SetData(new byte[0]);
}

Related

Add footer to each page of existing pdf [duplicate]

I tried 3 different ways of displaying Page numbers, OnCloseDocument content is not displaying in the page, none of them worked.
My Intention is displaying Page numbers like this
1 of 10
2 0f 10
..............
............
10 of 10
on each page
I know how to display
1
2
3
4
....
10
but don`t know how to display total page number
I`m using OnCloseDocument to display No.of pages count,but the content in it is
not displaying.
public class MyPdfPageEventHelpPageNo : iTextSharp.text.pdf.PdfPageEventHelper
{
protected PdfTemplate total;
protected BaseFont helv;
private bool settingFont = false;
public override void OnOpenDocument(PdfWriter writer, Document document)
{
template= writer.DirectContent.CreateTemplate(100, 100);
bf = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.WINANSI, BaseFont.NOT_EMBEDDED);
}
public override void OnCloseDocument(PdfWriter writer, Document document)
{
//See below
}
1ST WAY:
public override void OnCloseDocument(PdfWriter writer, Document document)
{
//I create a table with one column like below.
PdfPTable pageNumber2 = new PdfPTable(1);
pageNumber2.TotalWidth = 50;
pageNumber2.HorizontalAlignment = Element.ALIGN_RIGHT;
pageNumber2.AddCell(BuildTable2RightCells("Page " + writer.PageNumber));
pageNumber.AddCell(BuildTable2LeftCells(writer.PageCount));
pageNumber2.WriteSelectedRows(0, -1, 500,
(document.PageSize.GetBottom(140)), cb);
}
2ND WAY:
public override void OnCloseDocument(PdfWriter writer, Document document)
{
ColumnText.ShowTextAligned(template,Element.ALIGN_CENTER,new
Phrase(writer.PageNumber.ToString()), 500, 140, 0);
}
3RD WAY:
public override void OnCloseDocument(PdfWriter writer, Document document)
{
template.BeginText();
template.SetFontAndSize(bf, 8);
template.SetTextMatrix(500, 140);
template.ShowText(Convert.ToString((writer.PageNumber - 1)));
template.EndText();
}
Am I doing anything wrong?
Your second way is probably the simplest way. Below is a very, very slimmed down but working version:
public class MyPdfPageEventHelpPageNo : iTextSharp.text.pdf.PdfPageEventHelper {
public override void OnEndPage(PdfWriter writer, Document document) {
ColumnText.ShowTextAligned(writer.DirectContent, Element.ALIGN_CENTER, new Phrase(writer.PageNumber.ToString()), 500, 140, 0);
}
}
And to use it:
//Create a file on our desktop
string outputFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "OnCloseTest.pdf");
//Standard PDF creation, adjust as needed
using (FileStream fs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (Document doc = new Document(PageSize.LETTER)) {
using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)) {
//Find our custom event handler
writer.PageEvent = new MyPdfPageEventHelpPageNo();
doc.Open();
//Add text the first page
doc.Add(new Paragraph("Test"));
//Add a new page with more text
doc.NewPage();
doc.Add(new Paragraph("Another Test"));
doc.Close();
}
}
}
EDIT
Sorry, I thought that you were having problems with the basic setup of events, my mistake.
I've only seen two ways to do what you are trying to do, either do two passes or use the PdfTemplate syntax which renders an image as far as I know.
I'd recommend just running two passes, the first just to create your PDF and the second to add your page numbers. You can run your first pass to a MemoryStream so you don't have to hit the disk twice if you want.
PdfReader reader = new PdfReader(outputFile);
using (FileStream fs = new FileStream(secondFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader, fs)) {
int PageCount = reader.NumberOfPages;
for (int i = 1; i <= PageCount; i++) {
ColumnText.ShowTextAligned(stamper.GetOverContent(i), Element.ALIGN_CENTER, new Phrase(String.Format("Page {0} of {1}", i, PageCount)), 500, 140, 0);
}
}
}
Here is sample code for for the "Chris Haas" explanation (the two pass method with out writing file into harddisk)
The "copy and paste code" using memorystream for page x of y output
protected void Button2_Click(object sender, EventArgs e)
{
byte[] b = CreatePDF2();
string ss = HttpContext.Current.Request.PhysicalApplicationPath;
string filenamefirst = ss + DateTime.Now.ToString("ddMMyyHHmmss");
string filenamefirstpdf = filenamefirst + ".pdf";
using (PdfReader reader = new PdfReader(b))
{
using (FileStream fs = new FileStream(filenamefirstpdf, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (PdfStamper stamper = new PdfStamper(reader, fs))
{
int PageCount = reader.NumberOfPages;
BaseFont bf = BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.WINANSI, BaseFont.EMBEDDED);
for (int i = 1; i <= PageCount; i++)
{
string sss = String.Format("Page {0} of {1}", i, PageCount);
PdfContentByte over= stamper.GetOverContent(i);
over.BeginText();
over.SetTextMatrix(500, 750);
over.SetFontAndSize(bf, 8);
over.ShowText(sss);
over.EndText();
}
}
}
}
}
private byte[] CreatePDF2()
{
Document doc = new Document(PageSize.LETTER, 50, 50, 50, 50);
using (MemoryStream output = new MemoryStream())
{
PdfWriter wri = PdfWriter.GetInstance(doc, output);
doc.Open();
Paragraph header = new Paragraph("My Document") { Alignment = Element.ALIGN_CENTER };
Paragraph paragraph = new Paragraph("Testing the iText pdf.");
Phrase phrase = new Phrase("This is a phrase but testing some formatting also. \nNew line here.");
Chunk chunk = new Chunk("This is a chunk.");
PdfPTable tab = new PdfPTable(3);
PdfPCell cell = new PdfPCell(new Phrase("Header", new Font(Font.FontFamily.HELVETICA, 24F)));
cell.Colspan = 3;
cell.HorizontalAlignment = 1; //0=Left, 1=Centre, 2=Right //Style
cell.BorderColor = new BaseColor(System.Drawing.Color.Red);
cell.Border = Rectangle.BOTTOM_BORDER; // | Rectangle.TOP_BORDER;
cell.BorderWidthBottom = 3f;
tab.AddCell(cell);
//row 1
for (int i = 1; i < 120; i++)
{
//row 1
tab.AddCell("R1C1");
tab.AddCell("R1C2");
tab.AddCell("R1C3");
//row 2
tab.AddCell("R2C1");
tab.AddCell("R2C2");
tab.AddCell("R2C3");
}
doc.Add(header);
doc.Add(paragraph);
doc.Add(phrase);
doc.Add(chunk);
doc.Add(tab);
doc.Close();
return output.ToArray();
}
}

Crop Pdf from each edge using itextshap

I am trying to crop pdf 5 mm from every edge i.e top,bottom,right and left. I tried with below code
public void TrimPdf(string sourceFilePath, string outputFilePath)
{
PdfReader pdfReader = new PdfReader(sourceFilePath);
float widthTo_Trim = iTextSharp.text.Utilities.MillimetersToPoints(5);
using (FileStream output = new FileStream(outputFilePath, FileMode.Create, FileAccess.Write))
using (PdfStamper pdfStamper = new PdfStamper(pdfReader, output))
{
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
Rectangle cropBox = pdfReader.GetCropBox(page);
cropBox.Left += widthTo_Trim;
cropBox.Right += widthTo_Trim;
cropBox.Top += widthTo_Trim;
cropBox.Bottom += widthTo_Trim;
pdfReader.GetPageN(page).Put(PdfName.CROPBOX, new PdfRectangle(cropBox));
}
}
}
By using this code i am Able to Crop only Left and Bottom part. unable to crop top and right side
How can i get desire result ?
This solved my problem by using Below code
public void TrimLeftandRightFoall(string sourceFilePath, string outputFilePath, float cropwidth)
{
PdfReader pdfReader = new PdfReader(sourceFilePath);
float width = (float)GetPDFwidth(sourceFilePath);
float height = (float)GetPDFHeight(sourceFilePath);
float widthTo_Trim = iTextSharp.text.Utilities.MillimetersToPoints(cropwidth);
PdfRectangle rectLeftside = new PdfRectangle(widthTo_Trim, widthTo_Trim, width-widthTo_Trim , height-widthTo_Trim);
using (var output = new FileStream(outputFilePath, FileMode.CreateNew, FileAccess.Write))
{
// Create a new document
Document doc = new Document();
// Make a copy of the document
PdfSmartCopy smartCopy = new PdfSmartCopy(doc, output);
// Open the newly created document
doc.Open();
// Loop through all pages of the source document
for (int i = 1; i <= pdfReader.NumberOfPages; i++)
{
// Get a page
var page = pdfReader.GetPageN(i);
page.Put(PdfName.MEDIABOX, rectLeftside);
var copiedPage = smartCopy.GetImportedPage(pdfReader, i);
smartCopy.AddPage(copiedPage);
}
doc.Close();
}
}

How do I merge 2 PDF’s and keep the resulting file editable?

We are using iText to populate data in a pre-existing (editable) PDF form. After stamping that data into the form we then merge a 2nd PDF document (a flattened scanned PDF typically) with the form, and would like the resulting document to remain editable. The stamp & merge process works fine if we flatten the resulting file. We thought it would be as simple as removing the .FormFlattening = true, but in the resulting file the data now doesn’t get stamped in the form, and the resulting document is also not editable.
Do we need to add a form field to the 2nd document or somehow change it (programmatically) to an editable document before the merge?
using (var fs = new FileStream(filePathOfPDF, FileMode.Open, FileAccess.Read))
{
var a = new List<byte[]>();
a.Add(ReadFully(workingStream));
a.Add(ReadFully(fs));
var test = PdfManager.MergePdfForms(a);
var asdf = test.DuplicateStream();
return asdf;
}
public static MemoryStream MergePdfForms(List<byte[]> files)
{
if (files.Count > 1)
{
PdfReader pdfFile;
Document doc;
PdfWriter pCopy;
MemoryStream msOutput = new MemoryStream();
pdfFile = new PdfReader(files[0]);
doc = new Document();
pCopy = new PdfSmartCopy(doc, msOutput);
doc.Open();
for (int k = 0; k < files.Count; k++)
{
pdfFile = new PdfReader(files[k]);
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
}
pCopy.FreeReader(pdfFile);
}
pdfFile.Close();
pCopy.Close();
doc.Close();
return msOutput;
}
else if (files.Count == 1)
{
return new MemoryStream(files[0]);
}
return null;
}

How to programmatically find position of character in PDF and place form field on top of it using iTextSharp

How could I find every SOH character, which looks like a box on the PDF, and place a checkbox form field on top of it. This question was close Extract text and text rectangle coordinates from a Pdf file using itextsharp but I can not get this to work. Below is some code of what I am trying to do. It would be best also, if I could not put a form if there is already one there.
StringBuilder text = new StringBuilder();
if (File.Exists(filePath))
{
using (PdfReader pdfReader = new PdfReader(filePath))
using (FileStream fileOut = new FileStream(#"C:\Projects\document.pdf", FileMode.Create, FileAccess.Write))
using (PdfStamper stamp = new PdfStamper(pdfReader, fileOut))
{
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new PdfHelper.LocationTextExtractionStrategyEx();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
int count = 0;
foreach (var strat in ((PdfHelper.LocationTextExtractionStrategyEx)(strategy)).TextLocationInfo)
{
RadioCheckField checkbox = new RadioCheckField(stamp.Writer, new iTextSharp.text.Rectangle(strat.TopLeft[0], strat.BottomRight[1], (strat.TopLeft[0] + 5), (strat.BottomRight[1] - 5)), ("CheckBoxInserted" + count), "On");
checkbox.CheckType = RadioCheckField.TYPE_SQUARE;
stamp.AddAnnotation(checkbox.CheckField, page);
}
RadioCheckField checkField = new RadioCheckField(stamp.Writer, new iTextSharp.text.Rectangle(450, 690, 460, 680), "checkboxname", "On");
checkField.CheckType = RadioCheckField.TYPE_SQUARE;
stamp.AddAnnotation(checkField.CheckField, 1);
}
}
}
return text.ToString();

Copy pdf annotations via C#

I have a stream (PDF file with annotations) and another stream (the same PDF file without annotations). I use streams because I need to execute this operations in memory.
I need to copy annotations from first document to another. Annotations can be different: comments, highlighting and other. So it is better to copy annotations without parsing it.
Can you advice me some helpful PDF library for .NET? And some sample for this problem.
You can use this example for iTextSharp to approach your problem (this example copies a list of pdf files with annotations into a new pdf file):
var output = new MemoryStream();
using (var document = new Document(PageSize.A4, 70f, 70f, 20f, 20f))
{
var readers = new List<PdfReader>();
var writer = PdfWriter.GetInstance(document, output);
writer.CloseStream = false;
document.Open();
const Int32 requiredWidth = 500;
const Int32 zeroBottom = 647;
const Int32 left = 50;
Action<String, Action> inlcudePdfInDocument = (filename, e) =>
{
var reader = new PdfReader(filename);
readers.Add(reader);
var pageCount = reader.NumberOfPages;
for (var i = 0; i < pageCount; i++)
{
e?.Invoke();
var imp = writer.GetImportedPage(reader, (i + 1));
var scale = requiredWidth / imp.Width;
var height = imp.Height * scale;
writer.DirectContent.AddTemplate(imp, scale, 0, 0, scale, left, zeroBottom - height);
var annots = reader.GetPageN(i + 1).GetAsArray(PdfName.ANNOTS);
if (annots != null && annots.Size != 0)
{
foreach (var a in annots)
{
var newannot = new PdfAnnotation(writer, new Rectangle(0, 0));
var annotObj = (PdfDictionary) PdfReader.GetPdfObject(a);
newannot.PutAll(annotObj);
var rect = newannot.GetAsArray(PdfName.RECT);
rect[0] = new PdfNumber(((PdfNumber)rect[0]).DoubleValue * scale + left); // Left
rect[1] = new PdfNumber(((PdfNumber)rect[1]).DoubleValue * scale); // top
rect[2] = new PdfNumber(((PdfNumber)rect[2]).DoubleValue * scale + left); // right
rect[3] = new PdfNumber(((PdfNumber)rect[3]).DoubleValue * scale); // bottom
writer.AddAnnotation(newannot);
}
}
document.NewPage();
}
}
foreach (var apprPdf in pdfs)
{
document.NewPage();
inlcudePdfInDocument(apprPdf.Pdf, null);
}
document.Close();
readers.ForEach(x => x.Close());
}
output.Position = 0;
return output;
PdfReader has a constructor that takes an array of bytes so you can adapt it for MemoryStream.
I'm using ITextSharp which is forked from IText (a java implemenation fpr pdf editing).
http://sourceforge.net/projects/itextsharp/
http://itextpdf.com/
Edit - this is what you need to do (untested but shoul be close):
using System;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
// return processed stream (a new MemoryStream)
public Stream copyAnnotations(Stream sourcePdfStream, Stream destinationPdfStream)
{
// Create new document (IText)
Document outdoc = new Document(PageSize.A4);
// Seek to Stream start and create Reader for input PDF
m.Seek(0, SeekOrigin.Begin);
PdfReader inputPdfReader = new PdfReader(sourcePdfStream);
// Seek to Stream start and create Reader for destination PDF
m.Seek(0, SeekOrigin.Begin);
PdfReader destinationPdfReader = new PdfReader(destinationPdfStream);
// Create a PdfWriter from for new a pdf destination stream
// You should write into a new stream here!
Stream processedPdf = new MemoryStream();
PdfWriter pdfw = PdfWriter.GetInstance(outdoc, processedPdf);
// do not close stream if we've read everything
pdfw.CloseStream = false;
// Open document
outdoc.Open();
// get number of pages
int numPagesIn = inputPdfReader.NumberOfPages;
int numPagesOut = destinationPdfReader.NumberOfPages;
int max = numPagesIn;
// Process max number of pages
if (max<numPagesOut)
{
throw new Exception("Impossible - different number of pages");
}
int i = 0;
// Process Pdf pages
while (i < max)
{
// Import pages from corresponding reader
PdfImportedPage pageIn = writer.inputPdfReader(reader, i);
PdfImportedPage pageOut = writer.destinationPdfReader(reader, i);
// Get named destinations (annotations
List<Annotations> toBeAdded = ParseInAndOutAndGetAnnotations(pageIn, pageOut);
// add your annotations
foreach (Annotation anno in toBeAdded) pageOut.Add(anno);
// Add processed page to output PDFWriter
outdoc.Add(pageOut);
}
// PDF creation finished
outdoc.Close();
// your new destination stream is processedPdf
return processedPdf;
}
The implementation of ParseInAndOutAndGetAnnotations(pageIn, pageOut) needs to reflect your annotations.
Here is a good example with annotations: http://www.java2s.com/Open-Source/Java-Document/PDF/pdf-itext/com/lowagie/text/pdf/internal/PdfAnnotationsImp.java.htm

Categories