Roslyn - get grouped single line comments - c#

I am writing a program in C# for extracting comments from code. I am using Roslyn compiler to do that. It's great, because I am just visiting the whole abstract syntax tree and fetching SingleLineComment trivia, MultiLineComment trivia and DocumentationComment trivia syntax from the file in solution. But there is a problem because programmers often write comments like that:
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
You can see that these are three single line comments, but I want them them to be fetched from code as one comment. Can I achieve that with Roslyn or maybe there is another way? Because that's frequent situation when programmers are writing multi line commments using single line comments syntax.
My code for extracting comments looks like this:
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using System.Collections.Generic;
namespace RoslynPlay
{
public class CommentStore
{
public List<Comment> Comments { get; } = new List<Comment>();
public void AddCommentTrivia(SyntaxTrivia trivia,
LocationStore commentLocationstore, string fileName)
{
if (trivia.Kind() == SyntaxKind.SingleLineCommentTrivia)
{
Comments.Add(new SingleLineComment(trivia.ToString(),
trivia.GetLocation().GetLineSpan().EndLinePosition.Line + 1, commentLocationstore)
{
FileName = fileName,
});
}
else if (trivia.Kind() == SyntaxKind.MultiLineCommentTrivia)
{
Comments.Add(new MultiLineComment(trivia.ToString(),
trivia.GetLocation().GetLineSpan().StartLinePosition.Line + 1,
trivia.GetLocation().GetLineSpan().EndLinePosition.Line + 1, commentLocationstore)
{
FileName = fileName,
});
}
}
public void AddCommentNode(DocumentationCommentTriviaSyntax node,
LocationStore commentLocationstore, string fileName)
{
Comments.Add(new DocComment(node.ToString(),
node.GetLocation().GetLineSpan().StartLinePosition.Line + 1,
node.GetLocation().GetLineSpan().EndLinePosition.Line,
commentLocationstore)
{
FileName = fileName,
});
}
}
}
and in main main file (Program.cs) I am launching comment extraction from code like this:
string fileContent;
SyntaxTree tree;
SyntaxNode root;
CommentsWalker commentWalker;
MethodsAndClassesWalker methodWalker;
string[] files = Directory.GetFiles(projectPath, $"*.cs", SearchOption.AllDirectories);
var commentStore = new CommentStore();
Console.WriteLine("Reading files...");
ProgressBar progressBar = new ProgressBar(files.Length);
foreach (var file in files)
{
fileContent = File.ReadAllText(file);
string filePath = new Regex($#"{projectPath}\\(.*)$").Match(file).Groups[1].ToString();
tree = CSharpSyntaxTree.ParseText(fileContent);
root = tree.GetRoot();
commentWalker = new CommentsWalker(filePath, commentStore);
commentWalker.Visit(root);
progressBar.UpdateAndDisplay();
}
and here is also the comment walker:
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
namespace RoslynPlay
{
public class CommentsWalker : CSharpSyntaxWalker
{
private string _fileName;
private CommentStore _commentStore;
public CommentsWalker(string fileName,
CommentStore commentStore)
: base(SyntaxWalkerDepth.StructuredTrivia)
{
_fileName = fileName;
_commentStore = commentStore;
}
public override void VisitTrivia(SyntaxTrivia trivia)
{
if (trivia.Kind() == SyntaxKind.SingleLineCommentTrivia
|| trivia.Kind() == SyntaxKind.MultiLineCommentTrivia)
{
_commentStore.AddCommentTrivia(trivia, _commentLocationStore, _fileName);
}
base.VisitTrivia(trivia);
}
public override void VisitDocumentationCommentTrivia(DocumentationCommentTriviaSyntax node)
{
_commentStore.AddCommentNode(node, _commentLocationStore, _fileName);
base.VisitDocumentationCommentTrivia(node);
}
}
}
And the problem is because trivia.Kind() == SyntaxKind.SingleLineCommentTrivia extracts only single line of comments, but I want to extract single line comments blocks as one comment.

Related

Change files using Roslyn

I'm trying to write a command line tool that modifies some code using Roslyn. Everything seems to go well: the solution is opened, the solution is changed, the Workspace.TryApplyChanges method returns true. However no actual files are changed on disk. What's up? Below is the top level code I'm using.
static void Main(string[] args)
{
var solutionPath = args[0];
UpdateAnnotations(solutionPath).Wait();
}
static async Task<bool> UpdateAnnotations(string solutionPath)
{
using (var workspace = MSBuildWorkspace.Create())
{
var solution = await workspace.OpenSolutionAsync(solutionPath);
var newSolution = await SolutionAttributeUpdater.UpdateAttributes(solution);
var result = workspace.TryApplyChanges(newSolution);
Console.WriteLine(result);
return result;
}
}
I constructed a short program using your code and received the results I expected - the problem appears to reside within the SolutionAttributeUpdater.UpdateAttributes method. I received these results using the following implementation with your base main and UpdateAnnotations-methods:
public class SolutionAttributeUpdater
{
public static async Task<Solution> UpdateAttributes(Solution solution)
{
foreach (var project in solution.Projects)
{
foreach (var document in project.Documents)
{
var syntaxTree = await document.GetSyntaxTreeAsync();
var root = syntaxTree.GetRoot();
var descentants = root.DescendantNodes().Where(curr => curr is AttributeListSyntax).ToList();
if (descentants.Any())
{
var attributeList = SyntaxFactory.AttributeList(
SyntaxFactory.SingletonSeparatedList(
SyntaxFactory.Attribute(SyntaxFactory.IdentifierName("Cookies"), SyntaxFactory.AttributeArgumentList(SyntaxFactory.SeparatedList(new[] { SyntaxFactory.AttributeArgument(
SyntaxFactory.LiteralExpression(
SyntaxKind.StringLiteralExpression, SyntaxFactory.Literal(#"Sample"))
)})))));
root = root.ReplaceNodes(descentants, (node, n2) => attributeList);
solution = solution.WithDocumentSyntaxRoot(document.Id, root);
}
}
}
return solution;
}
}
It was tested using the following class in the sample solution:
public class SampleClass<T>
{
[DataMember("Id")]
public int Property { get; set; }
[DataMember("Id")]
public void DoStuff()
{
DoStuff();
}
}
And it resulted in the following Output:
public class SampleClass<T>
{
[Cookies("Sample")] public int Property { get; set; }
[Cookies("Sample")] public void DoStuff()
{
DoStuff();
}
}
If you take a look at the UpdateAttributes method I had to replace the nodes with ReplaceNodes and updated the solution by calling WithDocumentSyntaxRoot.
I would assume that either one of those two calls is missing or that nothing was changed at all - if you call workspace.TryApplyChanges(solution) you would still receive true as an Output.
Note that using multiple calls of root.ReplaceNode() instead of root.ReplaceNodes() can also result in an error since only the first update is actually used for the modified document - which might lead you to believe that nothing has changed at all, depending on the implementation.

Remove Extraneous Semicolons in C# Using Roslyn - (replace w empty trivia)

I've figured out how to open a solution and then iterate through the Projects and then Documents. I'm stuck with how to look for C# Classes, Enums, Structs, and Interfaces that may have an extraneous semicolon at the end of the declaration (C++ style). I'd like to remove those and save the .cs files back to disk. There are approximately 25 solutions written at my current company that I would run this against. Note: The reason we are doing this is to move forward with a better set of coding standards. (And I'd like to learn how to use Roslyn to do these 'simple' adjustments)
Example (UPDATED):
class Program
{
static void Main(string[] args)
{
string solutionFile = #"S:\source\dotnet\SimpleApp\SimpleApp.sln";
IWorkspace workspace = Workspace.LoadSolution(solutionFile);
var proj = workspace.CurrentSolution.Projects.First();
var doc = proj.Documents.First();
var root = (CompilationUnitSyntax)doc.GetSyntaxRoot();
var classes = root.DescendantNodes().OfType<ClassDeclarationSyntax>();
foreach (var decl in classes)
{
ProcessClass(decl);
}
Console.ReadKey();
}
private static SyntaxNode ProcessClass(ClassDeclarationSyntax node)
{
ClassDeclarationSyntax newNode;
if (node.HasTrailingTrivia)
{
foreach (var t in node.GetTrailingTrivia())
{
var es = new SyntaxTrivia();
es.Kind = SyntaxKind.EmptyStatement;
// kind is readonly - what is the right way to create
// the right SyntaxTrivia?
if (t.Kind == SyntaxKind.EndOfLineTrivia)
{
node.ReplaceTrivia(t, es);
}
}
return // unsure how to do transform and return it
}
}
Example Code I Want to Transform
using System;
public class Person
{
public string FirstName { get; set; }
public string LastName { get; set; }
};
// note: the semicolon at the end of the Person class
Here is a little program that removes the optional semicolon after all class-, struct-, interface and enum-declarations within a solution. The program loops through documents within the solution, and uses a SyntaxWriter for rewriting the syntaxtree. If any changes were made, the original code-files are overwritten with the new syntax.
using System;
using System.IO;
using System.Linq;
using Roslyn.Compilers.CSharp;
using Roslyn.Services;
namespace TrailingSemicolon
{
class Program
{
static void Main(string[] args)
{
string solutionfile = #"c:\temp\mysolution.sln";
var workspace = Workspace.LoadSolution(solutionfile);
var solution = workspace.CurrentSolution;
var rewriter = new TrailingSemicolonRewriter();
foreach (var project in solution.Projects)
{
foreach (var document in project.Documents)
{
SyntaxTree tree = (SyntaxTree)document.GetSyntaxTree();
var newSource = rewriter.Visit(tree.GetRoot());
if (newSource != tree.GetRoot())
{
File.WriteAllText(tree.FilePath, newSource.GetText().ToString());
}
}
}
}
class TrailingSemicolonRewriter : SyntaxRewriter
{
public override SyntaxNode VisitClassDeclaration(ClassDeclarationSyntax node)
{
return RemoveSemicolon(node, node.SemicolonToken, t => node.WithSemicolonToken(t));
}
public override SyntaxNode VisitInterfaceDeclaration(InterfaceDeclarationSyntax node)
{
return RemoveSemicolon(node, node.SemicolonToken, t => node.WithSemicolonToken(t));
}
public override SyntaxNode VisitStructDeclaration(StructDeclarationSyntax node)
{
return RemoveSemicolon(node, node.SemicolonToken, t => node.WithSemicolonToken(t));
}
public override SyntaxNode VisitEnumDeclaration(EnumDeclarationSyntax node)
{
return RemoveSemicolon(node, node.SemicolonToken, t => node.WithSemicolonToken(t));
}
private SyntaxNode RemoveSemicolon(SyntaxNode node,
SyntaxToken semicolonToken,
Func<SyntaxToken, SyntaxNode> withSemicolonToken)
{
if (semicolonToken.Kind != SyntaxKind.None)
{
var leadingTrivia = semicolonToken.LeadingTrivia;
var trailingTrivia = semicolonToken.TrailingTrivia;
SyntaxToken newToken = Syntax.Token(
leadingTrivia,
SyntaxKind.None,
trailingTrivia);
bool addNewline = semicolonToken.HasTrailingTrivia
&& trailingTrivia.Count() == 1
&& trailingTrivia.First().Kind == SyntaxKind.EndOfLineTrivia;
var newNode = withSemicolonToken(newToken);
if (addNewline)
return newNode.WithTrailingTrivia(Syntax.Whitespace(Environment.NewLine));
else
return newNode;
}
return node;
}
}
}
}
Hopefully it is something along the lines of what you were looking for.
This information would have to be stored in the ClassDeclaration node - as, according to the C# specification, the semi-colon is an optional token in the end of its productions:
class-declaration:
attributesopt class-modifiersopt partialopt class identifier type-parameter-listopt
class-baseopt type-parameter-constraints-clausesopt class-body ;opt
UPDATE
According to Roslyn's documentation, you cannot actually change Syntax Trees - as they are immutable structures. That's probably the reason why kind is readonly. You may, however, create a new tree, using With* methods, defined for each changeable tree property, and using ReplaceNode. There is a good example on Roslyn documentation:
var root = (CompilationUnitSyntax)tree.GetRoot();
var oldUsing = root.Usings[1];
var newUsing = oldUsing.WithName(name); //changes the name property of a Using statement
root = root.ReplaceNode(oldUsing, newUsing);
For converting your new tree into code again (aka pretty printing), you could use the GetText() method from the compilation unit node (in our example, the root variable).
You can also extend a SyntaxRewriter class for performing code transformations. There is an extensive example for doing so in the official Roslyn website; take a look at this particular walkthrough. The following commands write the transformed tree back to the original file:
SyntaxNode newSource = rewriter.Visit(sourceTree.GetRoot());
if (newSource != sourceTree.GetRoot())
{
File.WriteAllText(sourceTree.FilePath, newSource.GetFullText());
}
where rewriter is an instance of a SyntaxRewriter.

XmlSerializer; System.invalidOperationException

EDIT: Using the code provided by Jon Skeet.
I get the following error:
Message: There is an error in XML document (2, 2).
Inner Exception: {"<Translator xmlns=''> was not expected."}
If it helps I can provide the code below:
Translator.cs:
public class Translator
{
public FullBotTranslation Translation;
public Translator()
{
Translation = new FullBotTranslation();
}
public void LoadLanguage(string language)
{
if (!Useful.ExistFile(System.AppDomain.CurrentDomain.BaseDirectory + "\\LanguagePacks\\" + language + ".xml"))
language = "Francais";
Translation = XmlSerializerHelper.Deserialize<FullBotTranslation>(System.AppDomain.CurrentDomain.BaseDirectory + "\\LanguagePacks\\" + language + ".xml");
}
public string GetTranslation(PhraseID phraseId)
{
foreach (Phrase phrase in Translation.Phrases)
{
if (phrase.PhraseID == phraseId)
return phrase.PhraseString;
}
return "Incomplete translation...";
}
#region Nested type: Translation
[Serializable]
public class FullBotTranslation
{
public List<Phrase> Phrases = new List<Phrase>();
}
#endregion
}
Phrase.cs:
public class Phrase
{
public PhraseID PhraseID { set; get; }
public string PhraseString{ set; get; }
public Phrase()
{
}
}
PhraseID.cs
[Serializable]
public enum PhraseID
{
none,
Button_Start,
Button_Stop,
}
I use it like this:
Setup:
private Translator _translator;
_translator = new Translator();
Saving:
Helpers.XmlSerializerHelper.Serialize(
System.AppDomain.CurrentDomain.BaseDirectory + "\\LanguagePacks\\" + langPackName.Text + ".xml",
_translator);
Loading:
_translator = new Translator(); //yes this is needed ;)
_translator.LoadLanguage(preloadedLangCombo.SelectedItem.ToString());
When using the code above to save an XML file it outputs the following:
English.XML:
<?xml version="1.0"?>
<Translator xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Translation>
<Phrases>
<Phrase>
<PhraseID>none</PhraseID>
<PhraseString>Incomplete Translation</PhraseString>
</Phrase>
<Phrase>
<PhraseID>Button_Start</PhraseID>
<PhraseString>Start</PhraseString>
</Phrase>
<Phrase>
<PhraseID>Button_Stop</PhraseID>
<PhraseString>Stop</PhraseString>
</Phrase>
</Phrases>
</Translation>
</Translator>
It's not clear what's going wrong here - particularly because the error message doesn't seem to match your sample XML. Your exception handling may well be hiding problems though - it's a really bad idea to catch all exceptions like that, and you're going to unnecessary lengths to close the streams involved. I would condense your class down to just this:
public static class XmlSerializerHelper
{
public static void Serialize(String path, object #object)
{
using (var stream = File.Create(path))
{
var s = new XmlSerializer(#object.GetType());
s.Serialize(stream, #object);
}
}
public static T Deserialize<T>(String path)
{
using (var stream = File.OpenRead(path))
{
var s = new XmlSerializer(typeof(T));
return (T) s.Deserialize(stream);
}
}
}
Now if something goes wrong, the exception will propagate out of the methods, rather than being disguised. Also, note that I've serialized to/from just streams, rather than getting a StreamWriter involved. It's simpler to let the XML infrastructure deal with all the encoding.
The above code works fine for me in a simple test.

A Simple Helper Class doesn't work

Sorry for asking such a simple question but I lost really long time trying to solve this. At the end, I decide to ask you.
Let's start with the code base :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.Mvc;
namespace Navigation.Helpers
{
public static class NavigationBarSE
{
public static MvcHtmlString RenderNavigationBarSE(this HtmlHelper helper, String[] includes)
{
return new MvcHtmlString("Y U no Work??");
//NavTypeSE res = new NavTypeSE(includes);
//String ress = res.toString();
//return new MvcHtmlString(ress);
}
}
}
In the original form, this helper needs to return a String that produced by the NavTypeSE class. But in the end, to get a result, I only want it to return a String for me... But it didn't do that...
Before you ask, I can say that,
<add namespace="Navigation.Helpers"/>
exists in my Web.config file in Views folder.
For detailed information, my NavTypeSE class as below :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Navigation.Helpers
{
//Creates a Navigation Menu Type which includes Previous, Next and Validate Buttons
public class NavTypeSE
{
Boolean pr, nt, vld;
Boolean Previous { get; set; }
Boolean Next { get; set; }
Boolean Validate { get; set; }
public NavTypeSE(Boolean Previous, Boolean Next, Boolean Validate)
{
this.pr = Previous;
this.nt = Next;
this.vld = Validate;
}
public NavTypeSE() { }
public NavTypeSE(String[] inc)
{
for(int i=0; i<inc.Length; i++)//foreach (String s in inc)
{
String s = inc[i]; // Don't need for foreach method.
if (s.Equals("previous")||s.Equals("Previous"))
{
this.pr = true;
}
else if (s.Equals("next") || s.Equals("Next"))
{
this.nt = true;
}
else if (s.Equals("validate") || s.Equals("Validate"))
{
this.vld = true;
}
else
{
this.pr = false; this.nt = false; this.vld = false;
}
}
public String toString()
{
return "Previous: " + this.pr + ", Next: " + this.nt + ", Validate: " + this.vld;
}
}
}
Also, in my View, I call this Helper like below :
#{
String[] str = new String[] { "Previous", "next", "Validate" };
Html.RenderNavigationBarSE(str);
}
This is just a base for a project. And I'm starter level in both C# and ASP.NET MVC Platform. Sorry for spending your time.
Your RenderNavigationBarSE writes nothing into the Response just returns a MvcHtmlString.
So you need to put an # before the method call to tell Razor engine that you want to write the returned MvcHtmlString into the response (otherwise inside a code block it just executes your method and throws away the returned value)
#{
String[] str = new String[] { "Previous", "next", "Validate" };
}
#Html.RenderNavigationBarSE(str);
You can read more about the Razor syntax:
Introduction to ASP.NET Web Programming Using the Razor Syntax (C#)
There is also a C# Razor Syntax Quick Reference

3rd party Pdf library significantly slower when running NUnit

I am evaluating Winnovative's PdfToText library and have run into something that concerns me.
Everything runs fine and I am able to extract the text content from a small 20k or less pdf immediately if I am running a console application. However, if I call the same code from the NUnit gui running it takes 15-25 seconds (I've verified it's PdfToText by putting a breakpoint on the line that extracts the text and hitting F10 to see how long it takes to advance to the next line).
This concerns me because I'm not sure where to lay blame since I don't know the cause. Is there a problem with NUnit or PdfToText? All I want to do is extract the text from a pdf, but 20 seconds is completely unreasonable if I'm going to see this behavior under certain conditions. If it's just when running NUnit, that's acceptable, but otherwise I'll have to look elsewhere.
It's easier to demonstrate the problem using a complete VS Solution (2010), so here's the link to make it easier to setup and run (no need to download NUnit or PdfToText or even a sample pdf):
http://dl.dropbox.com/u/273037/PdfToTextProblem.zip (You may have to change the reference to PdfToText to use the x86 dll if you're running on a 32-bit machine).
Just hit F5 and the NUnit Gui runner will load.
I'm not tied to this library, if you have suggestions, I've tried iTextSharp (way too expensive for 2 lines of code), and looked at Aspose (I didn't try it, but the SaaS license is $11k). But they either lack the required functionality or are way too expensive.
(comment turned into answer)
How complex are your PDFs? The 4.1.6 version of iText allows for a closed sourced solution. Although 4.1.6 doesn't directly have a text extractor it isn't too terribly hard to write one using the PdfReader and GetPageContent().
Below is the code I used to extract the text from the PDF using iTextSharp v4.1.6. If it seems overly verbose, it's related to how I'm using it and the flexibility required.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf;
namespace ClassLibrary1
{
public class PdfToken
{
private PdfToken(int type, string value)
{
Type = type;
Value = value;
}
public static PdfToken Create(PRTokeniser tokenizer)
{
return new PdfToken(tokenizer.TokenType, tokenizer.StringValue);
}
public int Type { get; private set; }
public string Value { get; private set; }
public bool IsOperand
{
get
{
return Type == PRTokeniser.TK_OTHER;
}
}
}
public class PdfOperation
{
public PdfOperation(PdfToken operationToken, IEnumerable<PdfToken> arguments)
{
Name = operationToken.Value;
Arguments = arguments;
}
public string Name { get; private set; }
public IEnumerable<PdfToken> Arguments { get; private set; }
}
public interface IPdfParsingStrategy
{
void Execute(PdfOperation op);
}
public class PlainTextParsingStrategy : IPdfParsingStrategy
{
StringBuilder text = new StringBuilder();
public PlainTextParsingStrategy()
{
}
public String GetText()
{
return text.ToString();
}
#region IPdfParsingStrategy Members
public void Execute(PdfOperation op)
{
// see Adobe PDF specs for additional operations
switch (op.Name)
{
case "TJ":
PrintText(op);
break;
case "Tm":
SetMatrix(op);
break;
case "Tf":
SetFont(op);
break;
case "S":
PrintSection(op);
break;
case "G":
case "g":
case "rg":
SetColor(op);
break;
}
}
#endregion
bool newSection = false;
private void PrintSection(PdfOperation op)
{
text.AppendLine("------------------------------------------------------------");
newSection = true;
}
private void PrintNewline(PdfOperation op)
{
text.AppendLine();
}
private void PrintText(PdfOperation op)
{
if (newSection)
{
newSection = false;
StringBuilder header = new StringBuilder();
PrintText(op, header);
}
PrintText(op, text);
}
private static void PrintText(PdfOperation op, StringBuilder text)
{
foreach (PdfToken t in op.Arguments)
{
switch (t.Type)
{
case PRTokeniser.TK_STRING:
text.Append(t.Value);
break;
case PRTokeniser.TK_NUMBER:
text.Append(" ");
break;
}
}
}
String lastFont = String.Empty;
String lastFontSize = String.Empty;
private void SetFont(PdfOperation op)
{
var args = op.Arguments.ToList();
string font = args[0].Value;
string size = args[1].Value;
//if (font != lastFont || size != lastFontSize)
// text.AppendLine();
lastFont = font;
lastFontSize = size;
}
String lastX = String.Empty;
String lastY = String.Empty;
private void SetMatrix(PdfOperation op)
{
var args = op.Arguments.ToList();
string x = args[4].Value;
string y = args[5].Value;
if (lastY != y)
text.AppendLine();
else if (lastX != x)
text.Append(" ");
lastX = x;
lastY = y;
}
String lastColor = String.Empty;
private void SetColor(PdfOperation op)
{
lastColor = PrintCommand(op).Replace(" ", "_");
}
private static string PrintCommand(PdfOperation op)
{
StringBuilder text = new StringBuilder();
foreach (PdfToken t in op.Arguments)
text.AppendFormat("{0} ", t.Value);
text.Append(op.Name);
return text.ToString();
}
}
}
And here's how I call it:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using iTextSharp.text.pdf;
namespace ClassLibrary1
{
public class PdfExtractor
{
public static string GetText(byte[] pdfBuffer)
{
PlainTextParsingStrategy strategy = new PlainTextParsingStrategy();
ParsePdf(pdfBuffer, strategy);
return strategy.GetText();
}
private static void ParsePdf(byte[] pdf, IPdfParsingStrategy strategy)
{
PdfReader reader = new PdfReader(pdf);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
byte[] page = reader.GetPageContent(i);
if (page != null)
{
PRTokeniser tokenizer = new PRTokeniser(page);
List<PdfToken> parameters = new List<PdfToken>();
while (tokenizer.NextToken())
{
var token = PdfToken.Create(tokenizer);
if (token.IsOperand)
{
strategy.Execute(new PdfOperation(token, parameters));
parameters.Clear();
}
else
{
parameters.Add(token);
}
}
}
}
}
}
}

Categories