I need to convert a piece of code from VB to C#. what should I use in place of FileSystemObject and TextStream?
what the below code does is that it reads a file already present in a directory and adds the content of the file to the fields.
Private Sub Read_abc_File()
Dim FileSystem As FileSystemObject
Dim abcFile As TextStream
Dim abcLine As String, abcSection As String
Dim abcFilename As String
Const Read As Integer = 1
abcFilename = "abc.txt"
Set FileSystem = New FileSystemObject
If Not FileSystem.FileExists(abcFilename) Then
FileSystem = Null
Exit Sub
End If
Set abcFile = FileSystem.OpenTextFile(abcFilename, Read, False)
Do While abcFile.AtEndOfStream <> True
abcLine = abcFile.ReadLine
If abcLine > " " Then
If Left$(abcLine, 1) = "[" Then
abcSection = abcLine
Else
Select Case abcSection
Case "[Datafiles]"
DataFilename.AddItem abcLine
Case "[Locations]"
Location.AddItem abcLine
Case "[Formats]"
Format.AddItem abcLine
Case "[Categories]"
Category.AddItem abcLine
End Select
End If
End If
Loop
abcFile.Close
Set abcFile = Nothing
Set FileSystem = Nothing
End Sub
any suggestions/answers are appreciated.
Thanks!
Heres a code snippet to get you started, i think you should be able to complete the job.
using System;
using System.IO;
static void Main(string[] args)
{
string fileName = "abc.txt";
if (!File.Exists(fileName))
return;
using (FileStream file = File.OpenRead(fileName))
using (StreamReader reader = new StreamReader(file))
{
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
}
}
}
Related
I want to generate a PDF using windows form in the desktop application. I have readymade pdf design and I just want to feed data from database in that blank section of pdf for each user. (One type of receipt). Please guide me. I have searched but most of the time there is the solution in asp.net for the web application. I want to do in the desktop app. Here is my code I am able to fatch data from database and print in pdf. But main problem is trhat I have already designed pdf and I want to place data exactly at same field (ie name, Amount, date etc.)
using System;
using System.Windows.Forms;
using System.Diagnostics;
using PdfSharp;
using PdfSharp.Drawing;
using PdfSharp.Pdf;
using System.Data.SqlClient;
using System.Data;
using System.Configuration;
namespace printPDF
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click_1(object sender, EventArgs e)
{
try
{
string connetionString = null;
SqlConnection connection ;
SqlCommand command ;
SqlDataAdapter adapter = new SqlDataAdapter();
DataSet ds = new DataSet();
int i = 0;
string sql = null;
int yPoint = 0;
string pubname = null;
string city = null;
string state = null;
connetionString = "Data Source=EEVO-SALMAN\\MY_PC;Initial Catalog=;User ID=s***;Password=******";
// var connectionString = ConfigurationManager.ConnectionStrings["CharityManagement"].ConnectionString;
sql = "select NAME,NAME,uid from tblumaster";
connection = new SqlConnection(connetionString);
connection.Open();
command = new SqlCommand(sql, connection);
adapter.SelectCommand = command;
adapter.Fill(ds);
connection.Close();
PdfDocument pdf = new PdfDocument();
pdf.Info.Title = "Database to PDF";
PdfPage pdfPage = pdf.AddPage();
XGraphics graph = XGraphics.FromPdfPage(pdfPage);
XFont font = new XFont("Verdana", 20, XFontStyle.Regular );
yPoint = yPoint + 100;
for (i = 0; i <=ds.Tables[0].Rows.Count-1; i++)
{
pubname = ds.Tables[0].Rows[i].ItemArray[0].ToString ();
city = ds.Tables[0].Rows[i].ItemArray[1].ToString();
state = ds.Tables[0].Rows[i].ItemArray[2].ToString();
graph.DrawString(pubname, font, XBrushes.Black, new XRect(10, yPoint, pdfPage.Width.Point, pdfPage.Height.Point), XStringFormats.TopLeft);
graph.DrawString(city, font, XBrushes.Black, new XRect(200, yPoint, pdfPage.Width.Point, pdfPage.Height.Point), XStringFormats.TopLeft);
graph.DrawString(state, font, XBrushes.Black, new XRect(400, yPoint, pdfPage.Width.Point, pdfPage.Height.Point), XStringFormats.TopLeft);
yPoint = yPoint + 40;
}
string pdfFilename = "dbtopdf.pdf";
pdf.Save(pdfFilename);
Process.Start(pdfFilename);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
}
}
Instead of modifying the document, please create a new document and copy the pages from the old document to new document
sample code can be found here,
http://forum.pdfsharp.net/viewtopic.php?p=2637#p2637
Because modifying pdf is not recommended using 'PdfSharp' library. if you still want to edit you can use 'ISharp' library which needs a license.
Here is some VB.Net code I use to fill PDF forms. You need a PDF fillable form with form control names matching the SQL record field names.
It calls a routine Gen.GetDataTable() that just builds a typical DataTable. You could re-code to accept a pre-built Datatable as a parameter. Only the top row is processed. The code can be modified to work with a DataRow (.Table.Columns for column reference) or a DataReader.
Public Function FillPDFFormSQL(pdfMasterPath As String, pdfFinalPath As String, SQL As String, Optional FlattenForm As Boolean = True, Optional PrintPDF As Boolean = False, Optional PrinterName As String = "", Optional AllowMissingFields As Boolean = False) As Boolean
' case matters SQL <-> PDF Form Field Names
Dim pdfFormFields As AcroFields
Dim pdfReader As PdfReader
Dim pdfStamper As PdfStamper
Dim s As String = ""
Try
If pdfFinalPath = "" Then pdfFinalPath = pdfMasterPath.Replace(".pdf", "_Out.pdf")
Dim newFile As String = pdfFinalPath
pdfReader = New PdfReader(pdfMasterPath)
pdfStamper = New PdfStamper(pdfReader, New FileStream(newFile, FileMode.Create))
pdfReader.Close()
pdfFormFields = pdfStamper.AcroFields
Dim dt As DataTable = Gen.GetDataTable(SQL)
For i As Integer = 0 To dt.Columns.Count - 1
s = dt.Columns(i).ColumnName
If AllowMissingFields Then
If pdfFormFields.Fields.ContainsKey(s) Then
pdfFormFields.SetField(s, dt.Rows(0)(i).ToString.Trim)
Else
Debug.WriteLine($"Missing PDF Field: {s}")
End If
Else
pdfFormFields.SetField(s, dt.Rows(0)(i).ToString.Trim)
End If
Next
' flatten the form to remove editing options
' set it to false to leave the form open for subsequent manual edits
If My.Computer.Keyboard.CtrlKeyDown Then
pdfStamper.FormFlattening = False
Else
pdfStamper.FormFlattening = FlattenForm
End If
pdfStamper.Close()
If Not newFile.Contains("""") Then newFile = """" & newFile & """"
If Not PrintPDF Then
Process.Start(newFile)
Else
Dim sPDFProgramPath As String = INI.GetValue("OISForms", "PDFProgramPath", "C:\Program Files (x86)\Foxit Software\Foxit PhantomPDF\FoxitPhantomPDF.exe")
If Not IO.File.Exists(sPDFProgramPath) Then MsgBox("PDF EXE not found:" & vbNewLine & sPDFProgramPath) : Exit Function
If PrinterName.Length > 0 Then
Process.Start(sPDFProgramPath, "/t " & newFile & " " & PrinterName)
Else
Process.Start(sPDFProgramPath, "/p " & newFile)
End If
End If
Return True
Catch ex As Exception
MsgBox(ex.Message)
Return False
Finally
pdfStamper = Nothing
pdfReader = Nothing
End Try
End Function
I want to convert my resulting txt file into a UTF8 formatted file so I can load it into my Azure SQL DW via Polybase. It is required the source file be in UTF8.
MSDN has an "IO Streaming example" HERE works perfectly for a single job. I am trying to architect an SSIS solution for around 30 tables though. I believe using this method would cause a race condition where the PS script will be locked by 1 SSIS package when another SSIS package needs it.
I am a sql dev, not a .NET dev so please forgive me. How would one convert the above to an SSIS C# Script task assuming I know how to pass parameters into the Script task?
PowerShell Code from MSDN
#Static variables
$ascii = [System.Text.Encoding]::ASCII
$utf16le = [System.Text.Encoding]::Unicode
$utf8 = [System.Text.Encoding]::UTF8
$ansi = [System.Text.Encoding]::Default
$append = $False
#Set source file path and file name
$src = [System.IO.Path]::Combine("<MySrcFolder>","<MyUtf8stage>.txt")
#Set source file encoding (using list above)
$src_enc = $ascii
#Set target file path and file name
$tgt = [System.IO.Path]::Combine("<MyDestFolder>","<MyFinalstage>.txt")
#Set target file encoding (using list above)
$tgt_enc = $utf8
$read = New-Object System.IO.StreamReader($src,$src_enc)
$write = New-Object System.IO.StreamWriter($tgt,$append,$tgt_enc)
while ($read.Peek() -ne -1)
{
$line = $read.ReadLine();
$write.WriteLine($line);
}
$read.Close()
$read.Dispose()
$write.Close()
$write.Dispose()
Update
I found a similar post which I was able to tweak to my needs, I swear I searched high and low before posting. Anyway here is what IS working for me. If you see anyway to improve it please share:
public void Main()
{
//$Package::SourceSQLObject = tablename
//$Package::StageFile_DestinationFolderPath = rootpath eg "C:\temp\"
string path = (string)Dts.Variables["$Package::StageFile_DestinationFolderPath"].Value;
string name = (string)Dts.Variables["$Package::SourceSQLObject"].Value;
string from = Path.Combine(path, name) + ".csv";
string to = Path.ChangeExtension(from, "txt");
Dts.Log("Starting " + to.ToUpper(), 0, null);
using (StreamReader reader = new StreamReader(from, Encoding.ASCII, false, 10))
using (StreamWriter writer = new StreamWriter(to, false, Encoding.UTF8, 10))
{
while (reader.Peek() >= 0)
{
writer.WriteLine(reader.ReadLine());
}
}
Dts.TaskResult = (int)ScriptResults.Success;
Your code indicates that your are trying to convert an ASCII file to UTF-8 however that article also states the following:
As UTF-8 uses the same character encoding as ASCII PolyBase will also
support loading data that is ASCII encoded.
So my advice to you is to try the file first with Polybase, check for any conversion issues before you spend any time trying to convert the files.
var mySrcFolder = ""; // something from user variables?
var myUtf8stage = ""; // something from user variables?
var myFinalstage = ""; // something from user variables?
// Static variables
var ascii = System.Text.Encoding.ASCII;
var utf16le = System.Text.Encoding.Unicode;
var utf8 = System.Text.Encoding.UTF8;
var ansi = System.Text.Encoding.Default;
var append = false;
// Set source file path and file name
var src = System.IO.Path.Combine(
mySrcFolder,
String.Format("{0}.txt", myUtf8stage));
// Set source file encoding (using list above)
var src_enc = ascii;
// Set target file path and file name
var tgt = System.IO.Path.Combine(
mySrcFolder,
String.Format("{0}.txt", myFinalstage));
// Set target file encoding (using list above)
var tgt_enc = utf8;
using (var read = new System.IO.StreamReader(src, src_enc))
using (var write = new System.IO.StreamWriter(tgt, append, tgt_enc))
{
while (read.Peek() != -1)
{
var line = read.ReadLine();
write.WriteLine(line);
}
}
public void Main()
{
//$Package::SourceSQLObject = tablename
//$Package::StageFile_DestinationFolderPath = rootpath eg "C:\temp\"
string path = (string)Dts.Variables["$Package::StageFile_DestinationFolderPath"].Value;
string name = (string)Dts.Variables["$Package::SourceSQLObject"].Value;
string from = Path.Combine(path, name) + ".csv";
string to = Path.ChangeExtension(from, "txt");
Dts.Log("Starting " + to.ToUpper(), 0, null);
using (StreamReader reader = new StreamReader(from, Encoding.ASCII, false, 10))
using (StreamWriter writer = new StreamWriter(to, false, Encoding.UTF8, 10))
{
while (reader.Peek() >= 0)
{
writer.WriteLine(reader.ReadLine());
}
}
Dts.TaskResult = (int)ScriptResults.Success;
Dim strTestExample As String
Private colTestExample As Collection
If(FileExists(strFullFile) Then
Open strTestExample For Input As #intFILE
Do While Not EOF(intTEST)
Input #intFILE, strFirstName, strLastName, strFavColor, strAge
Set objTestObject = New PracticeExample
With objTestObject
.FirstName = strFirstName
.LastName = strLastName
.FavColor = strFavColor
.Age = strAge
colTestExample.Add objTestObject, .FirstName
End With
Loop
Close #intFILE
End If
After numerous attempts to recreate this in C# its time that I consult the almighty powers. I have tried using FileSystemObject to open and write to the file. I am attempting to port this over to C#. I believe without checking i am working in .NET 4.0.
If you would like some further elaboration, just ask below. The contents are being input to a .txt file.
C# Attempt:
string Path;
string FullFile;
const string FileName = "People.txt";
TextStream TS;
FileSystemObject FSO = new FileSystemObject();
PracticeExample objTestObject = new PracticeExample();
Path = AppDomain.CurrentDomain.BaseDirectory;
File = FileSystem.FreeFile();
FullFile = Path + "\"" + FileName;
if (File.Exists(FullFile) == true)
{
FileSystem.FileOpen(File, strFirstName, strLastName, strFavColor, strAge);
TS = FSO.OpenTextFile(File.ToString(), IOMode.ForWriting, true);
FSO.
objTestObject.FirstName = strFirstName;
objTestObject.LastName = strLastName;
objTestObject.FavColor = strFavColor;
objTestObject.Age = strAge;
HashTableRouteDefinitions.Add(objTestObject,objTestObject.FirstName);
FileSystem.FileClose(File);
}
I have getters and setters on within PracticeExample.(WARNING, unfinished code.)
I have a database with image URL's. I have a stored procedure hat GET's the url's (SP_GET_Image) I want execute the stored proc then for each of the URL's get the image.
I want the actual image locally not the url.
Then for each image I want to save them locally. I have this code but want to know how do I save each image in a datarow.
I have started with the code.
Dim CMD as new sqlCommand("StoredProcedureName")
CMD.parameters("#Parameter1", sqlDBType.Int).value = Param_1_value
Dim connection As New SqlConnection(connectionString)
CMD.Connection = connection
CMD.CommandType = CommandType.StoredProcedure
Dim adapter As New SqlDataAdapter(CMD)
adapter.SelectCommand.CommandTimeout = 300
'Fill the dataset'
Dim DS as DataSet
adapter.Fill(ds)
connection.Close()
'Now, read through your data:'
For Each DR as DataRow in DS.Tables(0).rows
'<-- Im not sure here how to GET EACH images locally saved.
Next
c# or vb help is fine.
The url looks like this :
http://img.myCompany.net/p/1483/278227_20094171232290.jpg
You can use My.Computer.Network.DownloadFile in order to download and store the file on local machine or a remote server supplying a user name and password (if required). As you need to specify the file name when downloading, you can extract it from the URL with SubString(URL.LastIndexOf("/") + 1)
For Each DR as DataRow in DS.Tables(0).Rows
Dim URL as String = DR("Your_URL_Column_Name").ToString()
Dim Destination as String = "\\SERVERNAME\FolderName\"
My.Computer.Network.DownloadFile(URL, Destination & SubString(URL.LastIndexOf("/") + 1), "name", "password")
Next
This function will help you download a list of images to a specified local path
public void DownloadFiles(IEnumerable<string> urls, string path)
{
if (!System.IO.Directory.Exists(path))
System.IO.Directory.CreateDirectory(path);
System.Threading.Tasks.Parallel.ForEach(urls, url =>
{
using (var downloader = new WebClient())
{
var filePath = System.IO.Path.Combine(path, System.IO.Path.GetFileName(url));
downloader.DownloadFile(url,filePath);
}
});
}
You can use it similar to this:
var urlList= DS.Tables[0].Rows
.Cast<DataRow>()
.Select(x => x["YourColumnNameOfUrl"].ToString());
DownloadFiles(urlList,"C:\Directory\Of\Ur\Choice\");
Here is a small utility function to help you with your task
Function SaveRemoteImage(remoteImageUrl As String) As Integer
Try
Dim request = WebRequest.Create(remoteImageUrl)
Dim folderName = Server.MapPath("~/VB/Images/")
Using response As WebResponse = request.GetResponse()
Using stream As Stream = response.GetResponseStream()
Dim imageExtension = String.Empty
Select Case response.ContentType.ToLower
Case "image/bmp",
"image/x-bmp",
"image/x-ms-bmp"
imageExtension = ".bmp"
Case "image/jpeg"
imageExtension = ".jpeg"
Case "image/gif"
imageExtension = ".gif"
Case "image/png"
imageExtension = ".png"
Case Else
imageExtension = ".png"
End Select
'renaming image name as GUID to avoid conflicts
Dim imageName = Guid.NewGuid().ToString()
' Download the file
Dim destinationPath = String.Concat(
folderName,
imageName,
imageExtension)
Using tempFile = File.OpenWrite(destinationPath)
' Remark: if the file is very big read it in chunks
' to avoid loading it into memory
Dim buffer = New Byte(response.ContentLength - 1) {}
stream.Read(buffer, 0, buffer.Length)
tempFile.Write(buffer, 0, buffer.Length)
End Using
End Using
End Using
Return True
Catch ex As Exception
Return False
End Try
End Function
I am not using the WebClient method as we need the correct Image Content-Type to get the local files extension.
Now, get all ImageUrls as an IEnumerable(Of String) from the DataTable and call it like this
Dim images = table.AsEnumerable().
Select(Function(row) row.Field(Of String)("ImageUrl"))
For Each remoteImage In images
SaveRemoteImage(remoteImage)
Next
If you want some parallel programming magic, replace the For Each like this.
System.Threading.Tasks.Parallel.ForEach(images,
Function(remoteImage) SaveRemoteImage(remoteImage))
I added a watermark on pdf using Pdfstamper. Here is the code:
for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++)
{
iTextSharp.text.Rectangle pageRectangle = reader.GetPageSizeWithRotation(pageIndex);
PdfContentByte pdfData = stamper.GetUnderContent(pageIndex);
pdfData.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252,
BaseFont.NOT_EMBEDDED), watermarkFontSize);
PdfGState graphicsState = new PdfGState();
graphicsState.FillOpacity = watermarkFontOpacity;
pdfData.SetGState(graphicsState);
pdfData.SetColorFill(iTextSharp.text.BaseColor.BLACK);
pdfData.BeginText();
pdfData.ShowTextAligned(PdfContentByte.ALIGN_CENTER, "LipikaChatterjee",
pageRectangle.Width / 2, pageRectangle.Height / 2, watermarkRotation);
pdfData.EndText();
}
This works fine. Now I want to remove this watermark from my pdf. I looked into iTextSharp but was not able to get any help. I even tried to add watermark as layer and then delete the layer but was not able to delete the content of layer from the pdf. I looked into iText for layer removal and found a class OCGRemover but I was not able to get an equivalent class in iTextsharp.
I'm going to give you the benefit of the doubt based on the statement "I even tried to add watermark as layer" and assume that you are working on content that you are creating and not trying to unwatermark someone else's content.
PDFs use Optional Content Groups (OCG) to store objects as layers. If you add your watermark text to a layer you can fairly easily remove it later.
The code below is a full working C# 2010 WinForms app targeting iTextSharp 5.1.1.0. It uses code based on Bruno's original Java code found here. The code is in three sections. Section 1 creates a sample PDF for us to work with. Section 2 creates a new PDF from the first and applies a watermark to each page on a separate layer. Section 3 creates a final PDF from the second but removes the layer with our watermark text. See the code comments for additional details.
When you create a PdfLayer object you can assign it a name to appear within a PDF reader. Unfortunately I can't find a way to access this name so the code below looks for the actual watermark text within the layer. If you aren't using additional PDF layers I would recommend only looking for /OC within the content stream and not wasting time looking for your actual watermark text. If you find a way to look for /OC groups by name please let me kwow!
using System;
using System.Windows.Forms;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1 {
public partial class Form1 : Form {
public Form1() {
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e) {
string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
string startFile = Path.Combine(workingFolder, "StartFile.pdf");
string watermarkedFile = Path.Combine(workingFolder, "Watermarked.pdf");
string unwatermarkedFile = Path.Combine(workingFolder, "Un-watermarked.pdf");
string watermarkText = "This is a test";
//SECTION 1
//Create a 5 page PDF, nothing special here
using (FileStream fs = new FileStream(startFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (Document doc = new Document(PageSize.LETTER)) {
using (PdfWriter witier = PdfWriter.GetInstance(doc, fs)) {
doc.Open();
for (int i = 1; i <= 5; i++) {
doc.NewPage();
doc.Add(new Paragraph(String.Format("This is page {0}", i)));
}
doc.Close();
}
}
}
//SECTION 2
//Create our watermark on a separate layer. The only different here is that we are adding the watermark to a PdfLayer which is an OCG or Optional Content Group
PdfReader reader1 = new PdfReader(startFile);
using (FileStream fs = new FileStream(watermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader1, fs)) {
int pageCount1 = reader1.NumberOfPages;
//Create a new layer
PdfLayer layer = new PdfLayer("WatermarkLayer", stamper.Writer);
for (int i = 1; i <= pageCount1; i++) {
iTextSharp.text.Rectangle rect = reader1.GetPageSize(i);
//Get the ContentByte object
PdfContentByte cb = stamper.GetUnderContent(i);
//Tell the CB that the next commands should be "bound" to this new layer
cb.BeginLayer(layer);
cb.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 50);
PdfGState gState = new PdfGState();
gState.FillOpacity = 0.25f;
cb.SetGState(gState);
cb.SetColorFill(BaseColor.BLACK);
cb.BeginText();
cb.ShowTextAligned(PdfContentByte.ALIGN_CENTER, watermarkText, rect.Width / 2, rect.Height / 2, 45f);
cb.EndText();
//"Close" the layer
cb.EndLayer();
}
}
}
//SECTION 3
//Remove the layer created above
//First we bind a reader to the watermarked file, then strip out a bunch of things, and finally use a simple stamper to write out the edited reader
PdfReader reader2 = new PdfReader(watermarkedFile);
//NOTE, This will destroy all layers in the document, only use if you don't have additional layers
//Remove the OCG group completely from the document.
//reader2.Catalog.Remove(PdfName.OCPROPERTIES);
//Clean up the reader, optional
reader2.RemoveUnusedObjects();
//Placeholder variables
PRStream stream;
String content;
PdfDictionary page;
PdfArray contentarray;
//Get the page count
int pageCount2 = reader2.NumberOfPages;
//Loop through each page
for (int i = 1; i <= pageCount2; i++) {
//Get the page
page = reader2.GetPageN(i);
//Get the raw content
contentarray = page.GetAsArray(PdfName.CONTENTS);
if (contentarray != null) {
//Loop through content
for (int j = 0; j < contentarray.Size; j++) {
//Get the raw byte stream
stream = (PRStream)contentarray.GetAsStream(j);
//Convert to a string. NOTE, you might need a different encoding here
content = System.Text.Encoding.ASCII.GetString(PdfReader.GetStreamBytes(stream));
//Look for the OCG token in the stream as well as our watermarked text
if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) {
//Remove it by giving it zero length and zero data
stream.Put(PdfName.LENGTH, new PdfNumber(0));
stream.SetData(new byte[0]);
}
}
}
}
//Write the content out
using (FileStream fs = new FileStream(unwatermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) {
using (PdfStamper stamper = new PdfStamper(reader2, fs)) {
}
}
this.Close();
}
}
}
As an extension to Chris's answer, a VB.Net class for removing a layer is included at the bottom of this post which should be a bit more precise.
It goes through the PDF's list of layers (stored in the OCGs array in the OCProperties dictionary in the file's catalog). This array contains indirect references to objects in the PDF file which contain the name
It goes through the properties of the page (also stored in a dictionary) to find the properties which point to the layer objects (via indirect references)
It does an actual parse of the content stream to find instances of the pattern /OC /{PagePropertyReference} BDC {Actual Content} EMC so it can remove just these segments as appropriate
The code then cleans up all the references as much as it can. Calling the code might work as shown:
Public Shared Sub RemoveWatermark(path As String, savePath As String)
Using reader = New PdfReader(path)
Using fs As New FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None)
Using stamper As New PdfStamper(reader, fs)
Using remover As New PdfLayerRemover(reader)
remover.RemoveByName("WatermarkLayer")
End Using
End Using
End Using
End Using
End Sub
Full class:
Imports iTextSharp.text
Imports iTextSharp.text.io
Imports iTextSharp.text.pdf
Imports iTextSharp.text.pdf.parser
Public Class PdfLayerRemover
Implements IDisposable
Private _reader As PdfReader
Private _layerNames As New List(Of String)
Public Sub New(reader As PdfReader)
_reader = reader
End Sub
Public Sub RemoveByName(name As String)
_layerNames.Add(name)
End Sub
Private Sub RemoveLayers()
Dim ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES)
If ocProps Is Nothing Then Return
Dim ocgs = ocProps.GetAsArray(PdfName.OCGS)
If ocgs Is Nothing Then Return
'Get a list of indirect references to the layer information
Dim layerRefs = (From l In (From i In ocgs
Select Obj = DirectCast(PdfReader.GetPdfObject(i), PdfDictionary),
Ref = DirectCast(i, PdfIndirectReference))
Where _layerNames.Contains(l.Obj.GetAsString(PdfName.NAME).ToString)
Select l.Ref).ToList
'Get a list of numbers for these layer references
Dim layerRefNumbers = (From l In layerRefs Select l.Number).ToList
'Loop through the pages
Dim page As PdfDictionary
Dim propsToRemove As IEnumerable(Of PdfName)
For i As Integer = 1 To _reader.NumberOfPages
'Get the page
page = _reader.GetPageN(i)
'Get the page properties which reference the layers to remove
Dim props = _reader.GetPageResources(i).GetAsDict(PdfName.PROPERTIES)
propsToRemove = (From k In props.Keys Where layerRefNumbers.Contains(props.GetAsIndirectObject(k).Number) Select k).ToList
'Get the raw content
Dim contentarray = page.GetAsArray(PdfName.CONTENTS)
If contentarray IsNot Nothing Then
For j As Integer = 0 To contentarray.Size - 1
'Parse the stream data looking for references to a property pointing to the layer.
Dim stream = DirectCast(contentarray.GetAsStream(j), PRStream)
Dim streamData = PdfReader.GetStreamBytes(stream)
Dim newData = GetNewStream(streamData, (From p In propsToRemove Select p.ToString.Substring(1)))
'Store data without the stream references in the stream
If newData.Length <> streamData.Length Then
stream.SetData(newData)
stream.Put(PdfName.LENGTH, New PdfNumber(newData.Length))
End If
Next
End If
'Remove the properties from the page data
For Each prop In propsToRemove
props.Remove(prop)
Next
Next
'Remove references to the layer in the master catalog
RemoveIndirectReferences(ocProps, layerRefNumbers)
'Clean up unused objects
_reader.RemoveUnusedObjects()
End Sub
Private Shared Function GetNewStream(data As Byte(), propsToRemove As IEnumerable(Of String)) As Byte()
Dim item As PdfLayer = Nothing
Dim positions As New List(Of Integer)
positions.Add(0)
Dim pos As Integer
Dim inGroup As Boolean = False
Dim tokenizer As New PRTokeniser(New RandomAccessFileOrArray(New RandomAccessSourceFactory().CreateSource(data)))
While tokenizer.NextToken
If tokenizer.TokenType = PRTokeniser.TokType.NAME AndAlso tokenizer.StringValue = "OC" Then
pos = CInt(tokenizer.FilePointer - 3)
If tokenizer.NextToken() AndAlso tokenizer.TokenType = PRTokeniser.TokType.NAME Then
If Not inGroup AndAlso propsToRemove.Contains(tokenizer.StringValue) Then
inGroup = True
positions.Add(pos)
End If
End If
ElseIf tokenizer.TokenType = PRTokeniser.TokType.OTHER AndAlso tokenizer.StringValue = "EMC" AndAlso inGroup Then
positions.Add(CInt(tokenizer.FilePointer))
inGroup = False
End If
End While
positions.Add(data.Length)
If positions.Count > 2 Then
Dim length As Integer = 0
For i As Integer = 0 To positions.Count - 1 Step 2
length += positions(i + 1) - positions(i)
Next
Dim newData(length) As Byte
length = 0
For i As Integer = 0 To positions.Count - 1 Step 2
Array.Copy(data, positions(i), newData, length, positions(i + 1) - positions(i))
length += positions(i + 1) - positions(i)
Next
Dim origStr = System.Text.Encoding.UTF8.GetString(data)
Dim newStr = System.Text.Encoding.UTF8.GetString(newData)
Return newData
Else
Return data
End If
End Function
Private Shared Sub RemoveIndirectReferences(dict As PdfDictionary, refNumbers As IEnumerable(Of Integer))
Dim newDict As PdfDictionary
Dim arrayData As PdfArray
Dim indirect As PdfIndirectReference
Dim i As Integer
For Each key In dict.Keys
newDict = dict.GetAsDict(key)
arrayData = dict.GetAsArray(key)
If newDict IsNot Nothing Then
RemoveIndirectReferences(newDict, refNumbers)
ElseIf arrayData IsNot Nothing Then
i = 0
While i < arrayData.Size
indirect = arrayData.GetAsIndirectObject(i)
If refNumbers.Contains(indirect.Number) Then
arrayData.Remove(i)
Else
i += 1
End If
End While
End If
Next
End Sub
#Region "IDisposable Support"
Private disposedValue As Boolean ' To detect redundant calls
' IDisposable
Protected Overridable Sub Dispose(disposing As Boolean)
If Not Me.disposedValue Then
If disposing Then
RemoveLayers()
End If
' TODO: free unmanaged resources (unmanaged objects) and override Finalize() below.
' TODO: set large fields to null.
End If
Me.disposedValue = True
End Sub
' TODO: override Finalize() only if Dispose(ByVal disposing As Boolean) above has code to free unmanaged resources.
'Protected Overrides Sub Finalize()
' ' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
' Dispose(False)
' MyBase.Finalize()
'End Sub
' This code added by Visual Basic to correctly implement the disposable pattern.
Public Sub Dispose() Implements IDisposable.Dispose
' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above.
Dispose(True)
GC.SuppressFinalize(Me)
End Sub
#End Region
End Class