Unable to set content in SdtElement using WordprocessingDocument - c#

I have a template file in which I have placed two place holders. Both are Plain Text Content Controls. I have following code in which I am setting the values to the Place Holders in the file.
static void Main(string[] args)
{
string fileName = "C:\\xxx\\Template.docx";
byte[] fileContent = File.ReadAllBytes(fileName);
using (MemoryStream memStream = new MemoryStream())
{
memStream.Write(fileContent, 0, (int)fileContent.Length);
using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(memStream,true))
{
MainDocumentPart mainPart = wordDoc.MainDocumentPart;
var sdtElements = wordDoc.MainDocumentPart.Document.Descendants<SdtElement>();
foreach (SdtElement sdtElement in sdtElements)
{
Tag blockTag = sdtElement.SdtProperties.Descendants<Tag>().ElementAt(0);
Run nr = new Run();
Text txt = new Text();
txt.Text = "RKS";
nr.Append(txt);
Lock lckContent = new Lock();
bool lockControl = true;
if (lockControl)
{
lckContent.Val = LockingValues.SdtContentLocked;
}
else
{
lckContent.Val = LockingValues.Unlocked;
}
if (sdtElement is SdtBlock)
{
(((SdtBlock)sdtElement).SdtContentBlock.ElementAt(0)).RemoveAllChildren();
(((SdtBlock)sdtElement).SdtContentBlock.ElementAt(0)).AppendChild<Run>(nr);
((SdtBlock)sdtElement).SdtProperties.Append(lckContent);
}
if (sdtElement is SdtCell)
{
((SdtCell)sdtElement).SdtContentCell.ElementAt(0).Descendants<Paragraph>().ElementAt(0).RemoveAllChildren(); ((SdtCell)sdtElement).SdtContentCell.ElementAt(0).Descendants<Paragraph>().ElementAt(0).AppendChild<Run>(nr);
((SdtCell)sdtElement).SdtProperties.Append(lckContent);
}
if (sdtElement is SdtRun)
{
//SdtContentText text = sdtElement.SdtProperties.Elements<SdtContentText>().FirstOrDefault();
//((SdtRun)sdtElement).SdtContentRun.ElementAt(0).AppendChild<Text>(emptyTxt);
((SdtRun)sdtElement).SdtContentRun.ElementAt(0).RemoveAllChildren();
((SdtRun)sdtElement).SdtContentRun.ElementAt(0).AppendChild<Run>(nr);
((SdtRun)sdtElement).SdtProperties.Append(lckContent);
}
}
wordDoc.MainDocumentPart.Document.Save();
}
}
}
The code runs successfully but the changes are not reflected in the file.
What am I missing?

You are creating a WordprocessingDocument from a memory stream, so there is no way for the class to know which file to write to. It writes all changes to the memory stream, not the file.
You can create a WordprocessingDocument directly from a file by calling WordprocessingDocument.Open method and specifying the name of your file (see https://msdn.microsoft.com/en-us/library/office/documentformat.openxml.packaging.wordprocessingdocument.aspx) and then the changes should be reflected in the file.
If you need to load the document into a buffer for some reason, then you need to copy the data from the buffer back to the file manually.

After making some experiments, I could not understand how it worked but it is fine for me. I just save the file with other name.
After the code line: wordDoc.MainDocumentPart.Document.Save(); I added
File.WriteAllBytes("C:\\xxx\\Sample.docx", memStream.ToArray());

Related

How to copy data from HTML div in Selenium C#?

I want to copy alter table data from webpage div and paste it into a .txtfile, screenshot is attached below:
Below is the HTML for above screenshot:
Can i do this by storing this in a variable like below but how can i copy all data at once in a variable from div ?
string value = driver.FindElement(By.XPath("//td[#style='padding:0px;
white-space: nowrap;']")).Text;
Below is the code of my test case in which i am selecting a file to convert from a tool after conversion i want to store the alter table script in a separate .txt file for which i created a create function to create file :
public void TestMethod1()
{
try
{
string dir = #"D:\test\input"; //path
var directory = new DirectoryInfo(dir); //folder ko access
foreach (FileInfo file in directory.GetFiles()) //loop
IWebDriver driver = new ChromeDriver(); //driver object
driver.Navigate().GoToUrl("http:abcurl//convPLSQL.html");
//site url
driver.Manage().Window.Maximize(); //browser maximize
string param = dir.Replace("/", "\\"); // ye code file
param += "\\";
param += file.Name;
driver.FindElement(By.Id("fileuploader")).SendKeys(param);
driver.FindElement(By.Id("keyinput")).SendKeys("convUser001");//Key
driver.FindElement(By.Id("translatebutton")).Click();//Translate Button
driver.FindElement(By.LinkText("Download Results")).Click();//Download
// string data= driver.FindElement(By.XPath("//td[#style='padding:0px;
// white-space:nowrap;']")).Text;
create(); // call create function to create .txt file
}
public void create()
{
try
{
string fileName = #"D:\test\output\Mahesh.txt";
// Check if file already exists. If yes, delete it.
if (System.IO.File.Exists(fileName))
{
System.IO.File.Delete(fileName);
}
// Create a new file
using (FileStream fs = System.IO.File.Create(fileName))
{
// Add some text to file
Byte[] title = new UTF8Encoding(true).GetBytes("New Text File");
fs.Write(title, 0, title.Length);
byte[] author = new UTF8Encoding(true).GetBytes("Mahesh Chand");
fs.Write(author, 0, author.Length);
}
// Open the stream and read it back.
using (StreamReader sr = System.IO.File.OpenText(fileName))
{
string s = "";
while ((s = sr.ReadLine()) != null)
{
System.Console.WriteLine(s);
}
}
Get all child elements of div containing the spans having the needed text. Something like:
var spans = driver.FindElements(By.XPath("//td/div[2]/span"));
Then concatenate text from each span element. Replace special characters like "&nbsp" with space. Use string builder or add text to string generic collection and join later if the text is big.
Example:
var text = string.Empty;
foreach(var span in spans)
{
text += span.Text.Replace("&nbsp", " ");
}

Openxml merging docx with images

In short: I would like to insert the content of a docx that contains images and bullets in another docx.
My problem: I used two approaches:
Manual merge
Altchunk
With both of them I got a corrupted word document as result.
If I remove the images from the docx that I would like to insert in another one, the result docx is OK.
My code:
Manual merge (thanks to https://stackoverflow.com/a/48870385/10075827):
private static void ManualMerge(string firstPath, string secondPath, string resultPath)
{
if (!System.IO.Path.GetFileName(firstPath).StartsWith("~$"))
{
File.Copy(firstPath, resultPath, true);
using (WordprocessingDocument result = WordprocessingDocument.Open(resultPath, true))
{
using (WordprocessingDocument secondDoc = WordprocessingDocument.Open(secondPath, false))
{
OpenXmlElement p = result.MainDocumentPart.Document.Body.Descendants<Paragraph>().Last();
foreach (var e in secondDoc.MainDocumentPart.Document.Body.Elements())
{
var clonedElement = e.CloneNode(true);
clonedElement.Descendants<DocumentFormat.OpenXml.Drawing.Blip>().ToList().ForEach(blip =>
{
var newRelation = result.CopyImage(blip.Embed, secondDoc);
blip.Embed = newRelation;
});
clonedElement.Descendants<DocumentFormat.OpenXml.Vml.ImageData>().ToList().ForEach(imageData =>
{
var newRelation = result.CopyImage(imageData.RelationshipId, secondDoc);
imageData.RelationshipId = newRelation;
});
result.MainDocumentPart.Document.Body.Descendants<Paragraph>().Last();
if (clonedElement is Paragraph)
{
p.InsertAfterSelf(clonedElement);
p = clonedElement;
}
}
}
}
}
}
public static string CopyImage(this WordprocessingDocument newDoc, string relId, WordprocessingDocument org)
{
var p = org.MainDocumentPart.GetPartById(relId) as ImagePart;
var newPart = newDoc.MainDocumentPart.AddPart(p);
newPart.FeedData(p.GetStream());
return newDoc.MainDocumentPart.GetIdOfPart(newPart);
}
Altchunk merge (from http://www.karthikscorner.com/sharepoint/use-altchunk-document-assembly/):
private static void AltchunkMerge(string firstPath, string secondPath, string resultPath)
{
WordprocessingDocument mainDocument = null;
MainDocumentPart mainPart = null;
var ms = new MemoryStream();
#region Prepare - consuming application
byte[] bytes = File.ReadAllBytes(firstPath);
ms.Write(bytes, 0, bytes.Length);
mainDocument = WordprocessingDocument.Open(ms, true);
mainPart = mainDocument.MainDocumentPart;
#endregion
#region Document to be imported
FileStream fileStream = new FileStream(secondPath, FileMode.Open);
#endregion
#region Merge
AlternativeFormatImportPart chunk = mainPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML, "AltChunkId101");
chunk.FeedData(fileStream);
var altChunk = new AltChunk(new AltChunkProperties() { MatchSource = new MatchSource() { Val = new OnOffValue(true) } });
altChunk.Id = "AltChunkId101";
mainPart.Document.Body.InsertAfter(altChunk, mainPart.Document.Body.Elements<Paragraph>().Last());
mainPart.Document.Save();
#endregion
#region Mark dirty
var listOfFieldChar = mainPart.Document.Body.Descendants<FieldChar>();
foreach (FieldChar current in listOfFieldChar)
{
if (string.Compare(current.FieldCharType, "begin", true) == 0)
{
current.Dirty = new OnOffValue(true);
}
}
#endregion
#region Save Merged Document
mainPart.DocumentSettingsPart.Settings.PrependChild(new UpdateFieldsOnOpen() { Val = new OnOffValue(true) });
mainDocument.Close();
FileStream file = new FileStream(resultPath, FileMode.Create, FileAccess.Write);
ms.WriteTo(file);
file.Close();
ms.Close();
#endregion
}
I spent hours searching for a solution and the most common one I found was to use altchunk. So why is it not working in my case?
If you are able to use the Microsoft.Office.Interop.Word namespace, and able to put a bookmark in the file you want to merge into, you can take this approach:
using Microsoft.Office.Interop.Word;
...
// merge by putting second file into bookmark in first file
private static void NewMerge(string firstPath, string secondPath, string resultPath, string firstBookmark)
{
var app = new Application();
var firstDoc = app.Documents.Open(firstPath);
var bookmarkRange = firstDoc.Bookmarks[firstBookmark];
// Collapse the range to the end, as to not overwrite it. Unsure if you need this
bookmarkRange.Collapse(WdCollapseDirection.wdCollapseEnd);
// Insert into the selected range
// use if relative path
bookmarkRange.InsertFile(Environment.CurrentDirectory + secondPath);
// use if absolute path
//bookmarkRange.InsertFile(secondPath);
}
Related:
C#: Insert and indent bullet points at bookmark in word document using Office Interop libraries

Adding file to FileNet Object store but when I retrieve it, it is empty, 0 byte file

I am able to successfully send a file to Filenet using their API (FileNet.API.dll) v5.2.1 but when I try to retreive it, it is a 0 byte file with no content.
This is essentially using the methods found in their demo c# program that sends and receives files found here (http://www-01.ibm.com/support/docview.wss?uid=swg24028791)
The difference with my scenario is that I will have the source data already in Stream form so I don't to need convert a local file to a Stream like the demo does.
So the Following Methods are run: more or less in this order, some are repeated.
main create doc method executes
IDocument doc = CreateDocument(stream, fnDoc.mimeType, fnDoc.docName, fnRepo.os, "Common", paRequest);
IFolder folder = FetchFolder(fnRepo.os, "/");
doc.Save(RefreshMode.REFRESH);
IReferentialContainmentRelationship rcr = FileContainable(fnRepo.os, doc, folder);
rcr.Save(RefreshMode.REFRESH);
checkInDoc(doc);
The methods listed above are below
public static IDocument CreateDocument(Stream stream, String mimeType, String docName, IObjectStore os, String className)
{
IDocument doc = null;
if (className.Equals(""))
doc = Factory.Document.CreateInstance(os, null);
else
doc = Factory.Document.CreateInstance(os, className);
doc.Properties["DocumentTitle"] = docName;
doc.MimeType = mimeType;
IContentElementList cel = CreateContentElementList(stream, docName);
if (cel != null)
doc.ContentElements = cel;
return doc;
}
next one that is called is CreateContentElementList
public static IContentElementList CreateContentElementList(Stream stream, string docName)
{
IContentElementList cel = null;
if (CreateContentTransfer(stream, docName) != null)
{
cel = Factory.ContentElement.CreateList();
IContentTransfer ct = CreateContentTransfer(stream, docName);
cel.Add(ct);
}
return cel;
}
Then finally CreateContentTransfer
public static IContentTransfer CreateContentTransfer(Stream stream, string docName)
{
IContentTransfer ct = null;
ct = Factory.ContentTransfer.CreateInstance();
ct.SetCaptureSource(stream);
ct.RetrievalName = docName;
return ct;
}
At this point the length and size of ct is not there but these can be set manually so I am not sure if that matters. Also the variable stream still has content.
The doc is returned from CreateDocument
Then folder is fetched
public static IFolder FetchFolder(IObjectStore os, String fPath)
{
IFolder f = Factory.Folder.FetchInstance(os, fPath, null);
return f
}
then
public static IReferentialContainmentRelationship FileContainable(IObjectStore os, IContainable c, IFolder folder)
{
IReferentialContainmentRelationship rcr = null;
if (c is IDocument)
rcr = folder.File((IDocument)c, AutoUniqueName.AUTO_UNIQUE, ((IDocument)c).Name, DefineSecurityParentage.DO_NOT_DEFINE_SECURITY_PARENTAGE);
else
rcr = folder.File((ICustomObject)c, AutoUniqueName.AUTO_UNIQUE, ((ICustomObject)c).Name, DefineSecurityParentage.DO_NOT_DEFINE_SECURITY_PARENTAGE);
return rcr;
}
public static void checkInDoc(IDocument doc)
{
doc.Checkin(AutoClassify.AUTO_CLASSIFY, CheckinType.MINOR_VERSION);
doc.Save(RefreshMode.REFRESH);
doc.Refresh();
}
So am I missing something, some last part to actually put the content on the server or something?
When calling ct.SetCaptureSource(stream); the Stream should be unread (or perhaps the position of the Stream reset), so that the API can capture the desired file.
If the position of the Stream is at the end of the Stream, you will not end up sending anything to the IContentTransfer object, resulting in a ContentElement containing 0 bytes.

Data from newly created .text file is not readable to third party application

I have developed a windows application, which will read updated data from .jrn files(In an ATM Machine) and will be copy the text to a temporary text file "tempfile.txt".
There is another third party application called "POS Text Sender", which reads "tempfile.txt" and display the contents of it in a CCTV Camera.
The problem is that if I type directly something in the tempfile, the POS application will read it, but if my application writes text to "tempfile", I can see the same content as in the .jrn file in tempfile, but it is not reflected in the POS application when ever data is copied from newly generated file to tempfile.if restart the POS Text Sender after the first data copied to tempfile from newly generated file,POS Text sender will display the content til content from newly created file is written to tempfile
My application code is reading .jrn file using StreamReader and assigning it to a string variable and then writing it to a tempfile using StreamWriter. What is the difference between manually typing text on a file and .NET StreamWriter writing text to a file?
CODE:
DateTime LastChecked = DateTime.Now;
try
{
string[] files = System.IO.Directory.GetFiles(#"C:\Test", "*.jrn", System.IO.SearchOption.AllDirectories);
foreach (string file in files)
{
if (!fileList.Contains(file))
{
currentfilename = file;
fileList.Add(file);
copywarehouse(file);
//do_some_processing();
try
{
// Create an instance of StreamReader to read from a file.
// The using statement also closes the StreamReader.
using (StreamReader sr = new StreamReader(file))
{
currentcontent=sr.ReadToEnd();
// Read and display lines from the file until the end of
//// the file is reached.
//while ((currentcontent = sr.ReadLine()) != null)
//{
//}
sr.Close();
//sr.Dispose();
}
}
catch (Exception)
{
// Let the user know what went wrong.
}
}
}
//checking
try
{
using (StreamReader sr = new StreamReader(currentfilename))
{
string currentfilecontent = sr.ReadToEnd();
sr.Close();
//sr.Dispose();
if (currentfilecontent!=currentcontent)
{
if (currentfilecontent.Contains(currentcontent))
{
string originalcontent = currentfilecontent.Substring(currentcontent.Length);
System.IO.StreamWriter filenew = new System.IO.StreamWriter(#"C:\Test\tempfile.txt");
filenew.WriteLine(originalcontent);
filenew.Close();
currentcontent = currentfilecontent;
}
}
}
}
catch (Exception)
{
// Let the user know what went wrong.
}
copywarehouse method:
private void copywarehouse(string filename)
{
string sourcePath = #"C:\Test";
string targetPath = #"C:\Test";
try
{
string sourceFile = System.IO.Path.Combine(sourcePath, filename);
string destFile = System.IO.Path.Combine(targetPath, "tempfile.txt");
System.IO.File.Copy(sourceFile, destFile, true);
}
catch (Exception)
{
}
}
Can you check the following:
Is the generated file encoding same as the manually created file? (i.e. UTF-8/ANSI).
Are your constantly flushing the streamWriter's buffer? Or set the StreamWriter's AutoFlush property to true.
Is the StreamWriter opened with a WriteLock with no read allowed? In this case the other application may not be able to open your tempfile for read access.
EDIT:
Also, in the code you posted, you are comparing the tempFile data to current data, and if tempFile data is newer than current data, you are appending the temp file, which I think should be vice versa.
Main change:
using (StreamWriter filenew = new StreamWriter(fileDetail.TempFileName, true, Encoding.ASCII))
{
filenew.WriteLine(newContent);
}
To know the correct encoding, just create a new tempFile, write something in the editor and save it. Open the file in notepad and do a "save as". This will display the current encoding in the bottom. Set that encoding in .NET code.
If this does not work try (As recommended by shr):
using (StreamWriter filenew = new StreamWriter(fileDetail.TempFileName, true, Encoding.ASCII))
{
filenew.Write(newContent + "\r\n");
}
Long Version: (It may be a bit different than your code):
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
DateTime LastChecked = DateTime.Now;
IDictionary<string, FileDetails> fileDetails = new Dictionary<string, FileDetails>(StringComparer.OrdinalIgnoreCase);
IList<string> tempFileList = new List<string>();
try
{
string[] files = System.IO.Directory.GetFiles(#"C:\Test", "*.jrn", System.IO.SearchOption.AllDirectories);
foreach (string file in files)
{
string currentfilename = file;
string currentcontent = string.Empty;
if (!fileDetails.Keys.Contains(file))
{
fileDetails[file] = new FileDetails(copywarehouse(file));
//do_some_processing();
}
try
{
using (StreamReader sr = new StreamReader(file))
{
currentcontent = sr.ReadToEnd();
}
}
catch (Exception)
{
// Let the user know what went wrong.
}
fileDetails[file].AddContent(currentcontent);
}
//TODO: Check using the file modified time. Avoids unnecessary reading of file.
foreach (var fileDetail in fileDetails.Values)
{
//checking
try
{
string tempFileContent = string.Empty;
string currentcontent = fileDetail.GetContent();
using (StreamReader sr = new StreamReader(fileDetail.TempFileName))
{
tempFileContent = sr.ReadToEnd();
sr.Close();
}
if (!(0 == string.Compare(tempFileContent, currentcontent)))
{
if (currentcontent.Contains(tempFileContent))
{
string newContent = tempFileContent.Substring(currentcontent.Length);
using (StreamWriter filenew = new StreamWriter(fileDetail.TempFileName, true, Encoding.ASCII))
{
filenew.WriteLine(newContent);
}
}
}
}
catch (Exception)
{
// Let the user know what went wrong.
}
}
}
catch (Exception)
{
}
}
private static string copywarehouse(string filename)
{
string sourcePath = #"C:\Test";
string targetPath = #"C:\Test";
string sourceFile = System.IO.Path.Combine(sourcePath, filename);
string destFile = System.IO.Path.Combine(targetPath, filename+ "tempfile.txt");
try
{
System.IO.File.Copy(sourceFile, destFile, true);
}
catch (Exception)
{
}
return destFile;
}
internal class FileDetails
{
public string TempFileName { get; private set; }
private StringBuilder _content;
public FileDetails(string tempFileName)
{
TempFileName = tempFileName;
_content = new StringBuilder();
}
public void AddContent(string content)
{
_content.Append(content);
}
public string GetContent()
{
return _content.ToString();
}
}
}
}
EDIT 2:
Can you change the copywarehouse to this and see it the problem persists:
private void copywarehouse(string filename)
{
const string sourcePath = #"C:\Test";
const string targetPath = #"C:\Test";
try
{
string sourceFile = Path.Combine(sourcePath, filename);
string destFile = Path.Combine(targetPath, "tempfile.txt");
string currentcontent;
using (var sr = new StreamReader(sourceFile))
{
currentcontent = sr.ReadToEnd();
}
using (var wr = new StreamWriter(destFile, false, Encoding.ASCII))
{
wr.WriteLine(currentcontent);
}
}
catch (Exception)
{
}
}
Most likely this is a CR+LF issue.
The POS expects the file to have line endings with CR+LF (Carriage Return (0x0D) + New line (0x0A)) combination.
The filenew.WriteLine(originalcontent) appends only the new line character. When you type, I think, you editor must be creating the CR+LF combination for all line endings.
I suggest you try filenew.Write( originalcontent + "\r\n");
One difference is that your application does not write to tempfile.txt directly but to another file and then copies that file to tempfile.txt.

using ITextSharp to extract and update links in an existing PDF

I need to post several (read: a lot) PDF files to the web but many of them have hard coded file:// links and links to non-public locations. I need to read through these PDFs and update the links to the proper locations. I've started writing an app using itextsharp to read through the directories and files, find the PDFs and iterate through each page. What I need to do next is find the links and then update the incorrect ones.
string path = "c:\\html";
DirectoryInfo rootFolder = new DirectoryInfo(path);
foreach (DirectoryInfo di in rootFolder.GetDirectories())
{
// get pdf
foreach (FileInfo pdf in di.GetFiles("*.pdf"))
{
string contents = string.Empty;
Document doc = new Document();
PdfReader reader = new PdfReader(pdf.FullName);
using (MemoryStream ms = new MemoryStream())
{
PdfWriter writer = PdfWriter.GetInstance(doc, ms);
doc.Open();
for (int p = 1; p <= reader.NumberOfPages; p++)
{
byte[] bt = reader.GetPageContent(p);
}
}
}
}
Quite frankly, once I get the page content I'm rather lost on this when it comes to iTextSharp. I've read through the itextsharp examples on sourceforge, but really didn't find what I was looking for.
Any help would be greatly appreciated.
Thanks.
This one is a little complicated if you don't know the internals of the PDF format and iText/iTextSharp's abstraction/implementation of it. You need to understand how to use PdfDictionary objects and look things up by their PdfName key. Once you get that you can read through the official PDF spec and poke around a document pretty easily. If you do care I've included the relevant parts of the PDF spec in parenthesis where applicable.
Anyways, a link within a PDF is stored as an annotation (PDF Ref 12.5). Annotations are page-based so you need to first get each page's annotation array individually. There's a bunch of different possible types of annotations so you need to check each one's SUBTYPE and see if its set to LINK (12.5.6.5). Every link should have an ACTION dictionary associated with it (12.6.2) and you want to check the action's S key to see what type of action it is. There's a bunch of possible ones for this, link's specifically could be internal links or open file links or play sound links or something else (12.6.4.1). You are looking only for links that are of type URI (note the letter I and not the letter L). URI Actions (12.6.4.7) have a URI key that holds the actual address to navigate to. (There's also an IsMap property for image maps that I can't actually imagine anyone using.)
Whew. Still reading? Below is a full working VS 2010 C# WinForms app based on my post here targeting iTextSharp 5.1.1.0. This code does two main things: 1) Create a sample PDF with a link in it pointing to Google.com and 2) replaces that link with a link to bing.com. The code should be pretty well commented but feel free to ask any questions that you might have.
using System;
using System.Text;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
//Folder that we are working in
private static readonly string WorkingFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Hyperlinked PDFs");
//Sample PDF
private static readonly string BaseFile = Path.Combine(WorkingFolder, "OldFile.pdf");
//Final file
private static readonly string OutputFile = Path.Combine(WorkingFolder, "NewFile.pdf");
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
CreateSamplePdf();
UpdatePdfLinks();
this.Close();
}
private static void CreateSamplePdf()
{
//Create our output directory if it does not exist
Directory.CreateDirectory(WorkingFolder);
//Create our sample PDF
using (iTextSharp.text.Document Doc = new iTextSharp.text.Document(PageSize.LETTER))
{
using (FileStream FS = new FileStream(BaseFile, FileMode.Create, FileAccess.Write, FileShare.Read))
{
using (PdfWriter writer = PdfWriter.GetInstance(Doc, FS))
{
Doc.Open();
//Turn our hyperlink blue
iTextSharp.text.Font BlueFont = FontFactory.GetFont("Arial", 12, iTextSharp.text.Font.NORMAL, iTextSharp.text.BaseColor.BLUE);
Doc.Add(new Paragraph(new Chunk("Go to URL", BlueFont).SetAction(new PdfAction("http://www.google.com/", false))));
Doc.Close();
}
}
}
}
private static void UpdatePdfLinks()
{
//Setup some variables to be used later
PdfReader R = default(PdfReader);
int PageCount = 0;
PdfDictionary PageDictionary = default(PdfDictionary);
PdfArray Annots = default(PdfArray);
//Open our reader
R = new PdfReader(BaseFile);
//Get the page cont
PageCount = R.NumberOfPages;
//Loop through each page
for (int i = 1; i <= PageCount; i++)
{
//Get the current page
PageDictionary = R.GetPageN(i);
//Get all of the annotations for the current page
Annots = PageDictionary.GetAsArray(PdfName.ANNOTS);
//Make sure we have something
if ((Annots == null) || (Annots.Length == 0))
continue;
//Loop through each annotation
foreach (PdfObject A in Annots.ArrayList)
{
//Convert the itext-specific object as a generic PDF object
PdfDictionary AnnotationDictionary = (PdfDictionary)PdfReader.GetPdfObject(A);
//Make sure this annotation has a link
if (!AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.LINK))
continue;
//Make sure this annotation has an ACTION
if (AnnotationDictionary.Get(PdfName.A) == null)
continue;
//Get the ACTION for the current annotation
PdfDictionary AnnotationAction = (PdfDictionary)AnnotationDictionary.Get(PdfName.A);
//Test if it is a URI action
if (AnnotationAction.Get(PdfName.S).Equals(PdfName.URI))
{
//Change the URI to something else
AnnotationAction.Put(PdfName.URI, new PdfString("http://www.bing.com/"));
}
}
}
//Next we create a new document add import each page from the reader above
using (FileStream FS = new FileStream(OutputFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
using (Document Doc = new Document())
{
using (PdfCopy writer = new PdfCopy(Doc, FS))
{
Doc.Open();
for (int i = 1; i <= R.NumberOfPages; i++)
{
writer.AddPage(writer.GetImportedPage(R, i));
}
Doc.Close();
}
}
}
}
}
}
EDIT
I should note, this only changes the actual link. Any text within the document won't get updated. Annotations are drawn on top of text but aren't really tied to the text underneath in anyway. That's another topic completely.
Noted if the Action is indirect it will not return a dictionary and you will have an error:
PdfDictionary AnnotationAction = (PdfDictionary)AnnotationDictionary.Get(PdfName.A);
In cases of possible indirect dictionaries:
PdfDictionary Action = null;
//Get action directly or by indirect reference
PdfObject obj = Annotation.Get(PdfName.A);
if (obj.IsIndirect) {
Action = PdfReader.GetPdfObject(obj);
} else {
Action = (PdfDictionary)obj;
}
In that case you have to investigate the returned dictionary to figure out where the URI is found. As with an indirect /Launch dictionary the URI is located in the /F item being of type PRIndirectReference with the /Type being a /FileSpec and the URI located in the value of /F
Added code for dealing with indirect and launch actions and null annotation-dictionary:
PdfReader r = new PdfReader(#"d:\kb2\" + f);
for (int i = 1; i <= r.NumberOfPages; i++) {
//Get the current page
var PageDictionary = r.GetPageN(i);
//Get all of the annotations for the current page
var Annots = PageDictionary.GetAsArray(PdfName.ANNOTS);
//Make sure we have something
if ((Annots == null) || (Annots.Length == 0))
continue;
foreach (var A in Annots.ArrayList) {
var AnnotationDictionary = PdfReader.GetPdfObject(A) as PdfDictionary;
if (AnnotationDictionary == null)
continue;
//Make sure this annotation has a link
if (!AnnotationDictionary.Get(PdfName.SUBTYPE).Equals(PdfName.LINK))
continue;
//Make sure this annotation has an ACTION
if (AnnotationDictionary.Get(PdfName.A) == null)
continue;
var annotActionObject = AnnotationDictionary.Get(PdfName.A);
var AnnotationAction = (PdfDictionary)(annotActionObject.IsIndirect() ? PdfReader.GetPdfObject(annotActionObject) : annotActionObject);
var type = AnnotationAction.Get(PdfName.S);
//Test if it is a URI action
if (type.Equals(PdfName.URI)) {
//Change the URI to something else
string relativeRef = AnnotationAction.GetAsString(PdfName.URI).ToString();
AnnotationAction.Put(PdfName.URI, new PdfString(url));
} else if (type.Equals(PdfName.LAUNCH)) {
//Change the URI to something else
var filespec = AnnotationAction.GetAsDict(PdfName.F);
string url = filespec.GetAsString(PdfName.F).ToString();
AnnotationAction.Put(PdfName.F, new PdfString(url));
}
}
}
//Next we create a new document add import each page from the reader above
using (var output = File.OpenWrite(outputFile.FullName)) {
using (Document Doc = new Document()) {
using (PdfCopy writer = new PdfCopy(Doc, output)) {
Doc.Open();
for (int i = 1; i <= r.NumberOfPages; i++) {
writer.AddPage(writer.GetImportedPage(r, i));
}
Doc.Close();
}
}
}
r.Close();

Categories