Related
I am trying to split the following chunked memory into ReadOnlySequence<char> by newline \n and then delimiters (in this example of ").
I have the partially working (by lines) code below which when I tweak I get exceptions, and currently have the incorrect output of: hello, fun, one.
I believe my issues are with my use of ReadOnlySequence.Slice() and SequencePosition, as this seems linked to the position of the starting sequence, and not the start of the sliced ReadOnlySequence (at least as I understand).
I am kindly seeking advice towards a corrected example of the below, so that we get the expected:
hello, much, fun, done.
using System;
using System.Buffers;
namespace NotMuchFunYet
{
class Program
{
static void Main(string[] args)
{
var buffer = GetExampleBuffer();
while (TryReadLine(ref buffer, out var line))
{
while (GetString(ref line, out var token))
{
Console.WriteLine(token.ToString());
}
}
}
private static ReadOnlySequence<char> GetExampleBuffer()
{
Chunk<char> startChnk;
var currentChnk = startChnk = new Chunk<char>(new ReadOnlyMemory<char>("\"hello\".\"mu".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("ch\".".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("\"fun\"".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("\n\"done\"\n".ToCharArray()));
return new ReadOnlySequence<char>(startChnk, 0, currentChnk, currentChnk.Memory.Length);
}
private static bool TryReadLine(ref ReadOnlySequence<char> buffer, out ReadOnlySequence<char> line)
{
var position = buffer.PositionOf('\n'); // Look for a EOL in the buffer.
if (position == null)
{
line = default;
return false;
}
line = buffer.Slice(0, position.Value); // Skip the line + the \n.
buffer = buffer.Slice(buffer.GetPosition(1, position.Value));
return true;
}
public static bool GetString(ref ReadOnlySequence<char> line, out ReadOnlySequence<char> property)
{
var start = line.PositionOf('"');
if (start == null)
{
property = default;
return false;
}
property = line.Slice(start.Value.GetInteger() + 1);
var end = property.PositionOf('"');
if (end == null)
{
property = default;
return false;
}
property = property.Slice(0, end.Value);
line = line.Slice(line.GetPosition(1, end.Value));
return true;
}
}
class Chunk<T> : ReadOnlySequenceSegment<T>
{
public Chunk(ReadOnlyMemory<T> memory) => Memory = memory;
public Chunk<T> Add(ReadOnlyMemory<T> mem)
{
var segment = new Chunk<T>(mem) { RunningIndex = RunningIndex + Memory.Length };
Next = segment;
return segment;
}
}
}
Changing the first property fetch in GetString() method resolves this, from:
property = line.Slice(start.Value.GetInteger() + 1);
To:
property = line.Slice(line.GetPosition(1, start.Value));
Giving the code:
using System;
using System.Buffers;
namespace NowMuchFun
{
class Program
{
static void Main(string[] args)
{
var buffer = GetExampleBuffer();
while (TryReadLine(ref buffer, out var line))
{
while (GetString(ref line, out var token))
{
Console.WriteLine(token.ToString());
}
}
}
private static ReadOnlySequence<char> GetExampleBuffer()
{
Chunk<char> startChnk;
var currentChnk = startChnk = new Chunk<char>(new ReadOnlyMemory<char>("\"hello\".\"mu".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("ch\".".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("\"fun\"".ToCharArray()));
currentChnk = currentChnk.Add(new ReadOnlyMemory<char>("\n\"done\"\n".ToCharArray()));
return new ReadOnlySequence<char>(startChnk, 0, currentChnk, currentChnk.Memory.Length);
}
private static bool TryReadLine(ref ReadOnlySequence<char> buffer, out ReadOnlySequence<char> line)
{
var position = buffer.PositionOf('\n'); // Look for a EOL in the buffer.
if (position == null)
{
line = default;
return false;
}
line = buffer.Slice(0, position.Value); // Skip the line + the \n.
buffer = buffer.Slice(buffer.GetPosition(1, position.Value));
return true;
}
public static bool GetString(ref ReadOnlySequence<char> line, out ReadOnlySequence<char> property)
{
var start = line.PositionOf('"');
if (start == null)
{
property = default;
return false;
}
// property = line.Slice(start.Value.GetInteger() + 1);
// REPLACE WITH BELOW:
property = line.Slice(line.GetPosition(1, start.Value));
var end = property.PositionOf('"');
if (end == null)
{
property = default;
return false;
}
property = property.Slice(0, end.Value);
line = line.Slice(line.GetPosition(1, end.Value));
return true;
}
}
class Chunk<T> : ReadOnlySequenceSegment<T>
{
public Chunk(ReadOnlyMemory<T> memory) => Memory = memory;
public Chunk<T> Add(ReadOnlyMemory<T> mem)
{
var segment = new Chunk<T>(mem) { RunningIndex = RunningIndex + Memory.Length };
Next = segment;
return segment;
}
}
}
And how can i report to the user that there was a problem and that it's trying over again ? And should i just do it like i'm doing it now reseting everything and calling Init() again or should i use some timer and wait some seconds before trying again ?
In the class i did:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Net;
using System.Xml;
using HtmlAgilityPack;
using System.ComponentModel;
namespace TestingDownloads
{
class ExtractImages
{
static WebClient client;
static string htmltoextract;
public static List<string> countriescodes = new List<string>();
public static List<string> countriesnames = new List<string>();
public static List<string> DatesAndTimes = new List<string>();
public static List<string> imagesUrls = new List<string>();
static string firstUrlPart = "http://www.sat24.com/image2.ashx?region=";
static string secondUrlPart = "&time=";
static string thirdUrlPart = "&ir=";
public class ProgressEventArgs : EventArgs
{
public int Percentage { get; set; }
public string StateText { get; set; }
}
public event EventHandler<ProgressEventArgs> ProgressChanged;
public void Init()
{
object obj = null;
int index = 0;
ExtractCountires();
foreach (string cc in countriescodes)
{
// raise event here
ProgressChanged?.Invoke(obj,new ProgressEventArgs{ Percentage = 100 * index / countriescodes.Count, StateText = cc });
ExtractDateAndTime("http://www.sat24.com/image2.ashx?region=" + cc);
index +=1;
}
ImagesLinks();
}
public void ExtractCountires()
{
try
{
htmltoextract = "http://sat24.com/en/?ir=true";//"http://sat24.com/en/";// + regions;
client = new WebClient();
client.DownloadFile(htmltoextract, #"c:\temp\sat24.html");
client.Dispose();
string tag1 = "<li><a href=\"/en/";
string tag2 = "</a></li>";
string s = System.IO.File.ReadAllText(#"c:\temp\sat24.html");
s = s.Substring(s.IndexOf(tag1));
s = s.Substring(0, s.LastIndexOf(tag2) + tag2.ToCharArray().Length);
s = s.Replace("\r", "").Replace("\n", "").Replace(" ", "");
string[] parts = s.Split(new string[] { tag1, tag2 }, StringSplitOptions.RemoveEmptyEntries);
string tag3 = "<li><ahref=\"/en/";
for (int i = 0; i < parts.Length; i++)
{
if (i == 17)
{
//break;
}
string l = "";
if (parts[i].Contains(tag3))
l = parts[i].Replace(tag3, "");
string z1 = l.Substring(0, l.IndexOf('"'));
if (z1.Contains("</ul></li><liclass="))
{
z1 = z1.Replace("</ul></li><liclass=", "af");
}
countriescodes.Add(z1);
countriescodes.GroupBy(n => n).Any(c => c.Count() > 1);
string z2 = parts[i].Substring(parts[i].LastIndexOf('>') + 1);
if (z2.Contains("&"))
{
z2 = z2.Replace("&", " & ");
}
countriesnames.Add(z2);
countriesnames.GroupBy(n => n).Any(c => c.Count() > 1);
}
}
catch (Exception e)
{
if (countriescodes.Count == 0)
{
countriescodes = new List<string>();
countriesnames = new List<string>();
DatesAndTimes = new List<string>();
imagesUrls = new List<string>();
Init();
}
}
}
public void ExtractDateAndTime(string baseAddress)
{
try
{
var wc = new WebClient();
wc.BaseAddress = baseAddress;
HtmlDocument doc = new HtmlDocument();
var temp = wc.DownloadData("/en");
doc.Load(new MemoryStream(temp));
var secTokenScript = doc.DocumentNode.Descendants()
.Where(e =>
String.Compare(e.Name, "script", true) == 0 &&
String.Compare(e.ParentNode.Name, "div", true) == 0 &&
e.InnerText.Length > 0 &&
e.InnerText.Trim().StartsWith("var region")
).FirstOrDefault().InnerText;
var securityToken = secTokenScript;
securityToken = securityToken.Substring(0, securityToken.IndexOf("arrayImageTimes.push"));
securityToken = secTokenScript.Substring(securityToken.Length).Replace("arrayImageTimes.push('", "").Replace("')", "");
var dates = securityToken.Trim().Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries);
var scriptDates = dates.Select(x => new ScriptDate { DateString = x });
foreach (var date in scriptDates)
{
DatesAndTimes.Add(date.DateString);
}
}
catch
{
countriescodes = new List<string>();
countriesnames = new List<string>();
DatesAndTimes = new List<string>();
imagesUrls = new List<string>();
this.Init();
}
}
public class ScriptDate
{
public string DateString { get; set; }
public int Year
{
get
{
return Convert.ToInt32(this.DateString.Substring(0, 4));
}
}
public int Month
{
get
{
return Convert.ToInt32(this.DateString.Substring(4, 2));
}
}
public int Day
{
get
{
return Convert.ToInt32(this.DateString.Substring(6, 2));
}
}
public int Hours
{
get
{
return Convert.ToInt32(this.DateString.Substring(8, 2));
}
}
public int Minutes
{
get
{
return Convert.ToInt32(this.DateString.Substring(10, 2));
}
}
}
public void ImagesLinks()
{
int cnt = 0;
foreach (string countryCode in countriescodes)
{
cnt++;
for (; cnt < DatesAndTimes.Count(); cnt++)
{
string imageUrl = firstUrlPart + countryCode + secondUrlPart + DatesAndTimes[cnt] + thirdUrlPart + "true";
imagesUrls.Add(imageUrl);
if (cnt % 10 == 0) break;
}
}
}
}
}
In both cases where i'm using try and catch if it's getting to the catch i'm trying over again the whole operation by reseting the Lists and calling Init() again.
Then in form1
ExtractImages ei = new ExtractImages();
public Form1()
{
InitializeComponent();
ProgressBar1.Minimum = 0;
ProgressBar1.Maximum = 100;
backgroundWorker1.RunWorkerAsync();
}
events of backgroundworker
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
if (backgroundWorker1.CancellationPending == true)
{
e.Cancel = true;
return; // this will fall to the finally and close everything
}
else
{
ei.ProgressChanged += (senders, eee) => backgroundWorker1.ReportProgress(eee.Percentage, eee.StateText);
ei.Init();
}
}
private void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
ProgressBar1.Value = e.ProgressPercentage;
label7.Text = e.UserState.ToString();
label8.Text = e.ProgressPercentage + "%";
}
private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
{
if (e.Error == null)
{
ProgressBar1.Value = 100;
}
else
{
}
}
Another thing not sure if it's a problem. When there is no a problem with the class i see in form1 in label7 all the codes running from the first to the last.
But the progressBar1.Value and label8 both are getting only to 97% so i need in the completed event to add the progressBar1.Value = 100; is that fine or there is a problem with the reporting calculation in the class ?
For the 1st question:
Better catch the exception at client side and display the error msg, 'cause you probably need to control your program behavior accrodingly when sth went wrong.
Consider this: if the DOM struct of the page changed, according to you code, your program probably throw expections, with each exception catching, client.DownloadFile is excuted for one time, if this happens, u will need to know what go wrong, and if u donot change ur code behavior, such hi-freq client.DownloadFile excution will cause the firewall of the website block ur ip for a while.
Add a timer at client side is a good idea i think.
For the 2nd one:
did u define how to handle RunWorkerCompleted event?
This question is not asking about hard coded strings only, but magic numbers etc. as well.
Is there a way to find all the hard coded values i.e. string , magic numbers and what not in C# project/solution in VS?
What prompted this question is a project that I am looking at, I just found 174 times a string value was hardcodely repeated!
What you could do is program Roslyn, the (not so) new cool kid in town. It allows you to parse C# (or VB.NET) projects quite easily. Then you can visit the detected nodes and check what you really want to check. Detecting magic literals for a machine is not always as easy as it seems for a human. For example, is 1 really a magic number? I personally consider it's not, but 2 is more suspect...
Anyway, here is a small sample that does a good part of the job I believe, but it could/should be improved, maybe to tailor your exact business needs or rules (which is very interesting).
Note Roslyn can also be used directly in the context of Visual Studio, so you could turn this sample into what's called a diagnostic (an extension to Visual Studio) that can help you directly live from within the IDE. There are samples for this: Samples and Walkthroughs
class Program
{
static void Main(string[] args)
{
var text = #"
public class MyClass
{
public void MyMethod()
{
const int i = 0; // this is ok
decimal d = 11; // this is not ok
string s = ""magic"";
if (i == 29) // another magic
{
}
else if (s != ""again another magic"")
{
}
}
}";
ScanHardcodedFromText("test.cs", text, (n, s) =>
{
Console.WriteLine(" " + n.SyntaxTree.GetLineSpan(n.FullSpan) + ": " + s);
}).Wait();
}
public static async Task ScanHardcodedFromText(string documentName, string text, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (text == null)
throw new ArgumentNullException("text");
AdhocWorkspace ws = new AdhocWorkspace();
var project = ws.AddProject(documentName + "Project", LanguageNames.CSharp);
ws.AddDocument(project.Id, documentName, SourceText.From(text));
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcodedFromSolution(string solutionFilePath, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (solutionFilePath == null)
throw new ArgumentNullException("solutionFilePath");
var ws = MSBuildWorkspace.Create();
await ws.OpenSolutionAsync(solutionFilePath);
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcodedFromProject(string solutionFilePath, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (solutionFilePath == null)
throw new ArgumentNullException("solutionFilePath");
var ws = MSBuildWorkspace.Create();
await ws.OpenProjectAsync(solutionFilePath);
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcoded(Workspace workspace, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (workspace == null)
throw new ArgumentNullException("workspace");
if (scannedFunction == null)
throw new ArgumentNullException("scannedFunction");
foreach (var project in workspace.CurrentSolution.Projects)
{
foreach (var document in project.Documents)
{
var tree = await document.GetSyntaxTreeAsync();
var root = await tree.GetRootAsync();
foreach (var n in root.DescendantNodesAndTokens())
{
if (!CanBeMagic(n.Kind()))
continue;
if (IsWellKnownConstant(n))
continue;
string suggestion;
if (IsMagic(n, out suggestion))
{
scannedFunction(n, suggestion);
}
}
}
}
}
public static bool IsMagic(SyntaxNodeOrToken kind, out string suggestion)
{
var vdec = kind.Parent.Ancestors().OfType<VariableDeclarationSyntax>().FirstOrDefault();
if (vdec != null)
{
var dec = vdec.Parent as MemberDeclarationSyntax;
if (dec != null)
{
if (!HasConstOrEquivalent(dec))
{
suggestion = "member declaration could be const: " + dec.ToFullString();
return true;
}
}
else
{
var ldec = vdec.Parent as LocalDeclarationStatementSyntax;
if (ldec != null)
{
if (!HasConstOrEquivalent(ldec))
{
suggestion = "local declaration contains at least one non const value: " + ldec.ToFullString();
return true;
}
}
}
}
else
{
var expr = kind.Parent.Ancestors().OfType<ExpressionSyntax>().FirstOrDefault();
if (expr != null)
{
suggestion = "expression uses a non const value: " + expr.ToFullString();
return true;
}
}
// TODO: add other cases?
suggestion = null;
return false;
}
private static bool IsWellKnownConstant(SyntaxNodeOrToken node)
{
if (!node.IsToken)
return false;
string text = node.AsToken().Text;
if (text == null)
return false;
// note: this is naïve. we also should add 0d, 0f, 0m, etc.
if (text == "1" || text == "-1" || text == "0")
return true;
// ok for '\0' or '\r', etc.
if (text.Length == 4 && text.StartsWith("'\\") && text.EndsWith("'"))
return true;
if (text == "' '")
return true;
// TODO add more of these? or make it configurable...
return false;
}
private static bool HasConstOrEquivalent(SyntaxNode node)
{
bool hasStatic = false;
bool hasReadOnly = false;
foreach (var tok in node.ChildTokens())
{
switch (tok.Kind())
{
case SyntaxKind.ReadOnlyKeyword:
hasReadOnly = true;
if (hasStatic)
return true;
break;
case SyntaxKind.StaticKeyword:
hasStatic = true;
if (hasReadOnly)
return true;
break;
case SyntaxKind.ConstKeyword:
return true;
}
}
return false;
}
private static bool CanBeMagic(SyntaxKind kind)
{
return kind == SyntaxKind.CharacterLiteralToken ||
kind == SyntaxKind.NumericLiteralToken ||
kind == SyntaxKind.StringLiteralToken;
}
}
If you run this little program (I've also provided helper methods to use it on solution or projects), it will output this:
test.cs: (6,20)-(6,22): local declaration contains at least one non const value: decimal d = 11; // this is not ok
test.cs: (7,19)-(7,26): local declaration contains at least one non const value: string s = "magic";
test.cs: (8,17)-(8,19): expression uses a non const value: i == 29
test.cs: (11,22)-(11,43): expression uses a non const value: s != "again another magic"
I have some code which can find magic numbers and hard coded non-constant strings. May be that can help someone -
/// <summary>
/// Scans all cs files in the solutions for magic strings and numbers using the Roslyn
/// compiler and analyzer tools.
/// Based upon a Roslyn code sample.
/// </summary>
class MagicStringAnalyzer
{
protected static Filter filter;
static void Main(string[] args)
{
string outputPath = #"E:\output.txt";
string solutionPath = #"E:\Solution.sln";
filter = new Filter(#"E:\IgnorePatterns.txt");
if (File.Exists(outputPath))
{
OverWriteFile(outputPath);
}
analyzeSolution(outputPath, solutionPath);
}
protected static void loadFilters()
{
}
private static void OverWriteFile(string path)
{
Console.WriteLine("Do you want to overwrite existing output file? (y/n)");
if (Console.ReadKey().Key == ConsoleKey.Y)
{
File.Delete(path);
Console.WriteLine("");
}
else
{
Environment.Exit(-1);
}
}
public static void analyzeSolution(string outputPath, string solutionPath)
{
Console.WriteLine("Analyzing file...");
System.IO.StreamWriter writer = new System.IO.StreamWriter(outputPath);
ScanHardcodedFromSolution(solutionPath, (n, s) =>
{
string syntaxLineSpan = n.SyntaxTree.GetLineSpan(n.FullSpan).ToString();
if (!filter.IsMatch(syntaxLineSpan))
{
writer.WriteLine(" " + syntaxLineSpan + ": \r\n" + s + "\r\n\r\n");
}
}).Wait();
writer.Close();
}
public static async Task ScanHardcodedFromText(string documentName, string text, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (text == null)
throw new ArgumentNullException("text");
AdhocWorkspace ws = new AdhocWorkspace();
var project = ws.AddProject(documentName + "Project", LanguageNames.CSharp);
ws.AddDocument(project.Id, documentName, SourceText.From(text));
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcodedFromSolution(string solutionFilePath, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (solutionFilePath == null)
throw new ArgumentNullException("solutionFilePath");
var ws = MSBuildWorkspace.Create();
await ws.OpenSolutionAsync(solutionFilePath);
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcodedFromProject(string solutionFilePath, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (solutionFilePath == null)
throw new ArgumentNullException("solutionFilePath");
var ws = MSBuildWorkspace.Create();
await ws.OpenProjectAsync(solutionFilePath);
await ScanHardcoded(ws, scannedFunction);
}
public static async Task ScanHardcoded(Workspace workspace, Action<SyntaxNodeOrToken, string> scannedFunction)
{
if (workspace == null)
throw new ArgumentNullException("workspace");
if (scannedFunction == null)
throw new ArgumentNullException("scannedFunction");
foreach (var project in workspace.CurrentSolution.Projects)
{
foreach (var document in project.Documents)
{
var tree = await document.GetSyntaxTreeAsync();
var root = await tree.GetRootAsync();
foreach (var n in root.DescendantNodesAndTokens())
{
if (!CanBeMagic(n.Kind()))
continue;
if (IsWellKnownConstant(n))
continue;
string suggestion;
if (IsMagic(n, out suggestion))
{
scannedFunction(n, suggestion);
}
}
}
}
}
public static bool IsMagic(SyntaxNodeOrToken kind, out string suggestion)
{
var vdec = kind.Parent.Ancestors().OfType<VariableDeclarationSyntax>().FirstOrDefault();
if (vdec != null)
{
var dec = vdec.Parent as MemberDeclarationSyntax;
if (dec != null)
{
if (!HasConstOrEquivalent(dec))
{
suggestion = "member declaration could be const: " + dec.ToFullString();
return true;
}
}
else
{
var ldec = vdec.Parent as LocalDeclarationStatementSyntax;
if (ldec != null)
{
if (!HasConstOrEquivalent(ldec))
{
suggestion = "local declaration contains at least one non const value: " + ldec.ToFullString();
return true;
}
}
}
}
else
{
var expr = kind.Parent.Ancestors().OfType<ExpressionSyntax>().FirstOrDefault();
if (expr != null)
{
suggestion = "expression uses a non const value: " + expr.ToFullString();
return true;
}
}
// TODO: add other cases?
suggestion = null;
return false;
}
private static bool IsWellKnownConstant(SyntaxNodeOrToken node)
{
if (!node.IsToken)
return false;
string text = node.AsToken().Text;
if (text == null)
return false;
// note: this is naïve. we also should add 0d, 0f, 0m, etc.
if (text == "1" || text == "-1" || text == "0")
return true;
// ok for '\0' or '\r', etc.
if (text.Length == 4 && text.StartsWith("'\\") && text.EndsWith("'"))
return true;
if (text == "' '")
return true;
if (text == "")
return true;
return false;
}
private static bool HasConstOrEquivalent(SyntaxNode node)
{
bool hasStatic = false;
bool hasReadOnly = false;
foreach (var tok in node.ChildTokens())
{
switch (tok.Kind())
{
case SyntaxKind.ReadOnlyKeyword:
hasReadOnly = true;
if (hasStatic)
return true;
break;
case SyntaxKind.StaticKeyword:
hasStatic = true;
if (hasReadOnly)
return true;
break;
case SyntaxKind.ConstKeyword:
return true;
}
}
return false;
}
private static bool CanBeMagic(SyntaxKind kind)
{
return kind == SyntaxKind.CharacterLiteralToken ||
kind == SyntaxKind.NumericLiteralToken ||
kind == SyntaxKind.StringLiteralToken;
}
}
public class Filter
{
protected string[] patterns;
public Filter(string path)
{
loadFilters(path);
}
protected void loadFilters(string path)
{
patterns = File.ReadAllLines(path);
}
public bool IsMatch(string input)
{
foreach (string pattern in patterns)
{
if(Regex.IsMatch(input, pattern, RegexOptions.IgnoreCase))
{
return true;
}
}
return false;
}
}
Your txt file that contains file names to ignore would contain values like -
Constant.cs
Resoures.Designer.cs
Configuration.cs
Reference.cs
Test
Give name of your solution in solution path and run this. This will generate txt file for you with all hard coded strings and magic numbers.
Edit:
To compile the project, you'll need to install Microsoft.CodeAnalysis NuGet package into your console app project:
Install-Package Microsoft.CodeAnalysis -Pre
Here is a complete list of references you should have in your Program.cs:
using System;
using System.Linq;
using System.Threading.Tasks;
using System.IO;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.MSBuild;
using Microsoft.CodeAnalysis.Text;
namespace MagicStringAnalyzer
{
// the rest of the code goes here...
}
I have attempted this over and over, but I'm now just going in circles.
I have a set of mostly unique job files stored externally, from these files I'm collection information and storing it in my program via a List.
On occasion, I have two job files named the same (this unfortunately cant be avoided) but containing different data (the most unique of which is a DateTime string).
My program works by polling 3 locations for these files (all locations are mirrored so the same data is present in each), collecting the information then outputting the info to a ListView window dependent on current status of each job file.
The issue I'm having is preventing duplicate entries being added to my List resulting in 69 objects in the list for only 23 jobs files.
To avoid having duplicates output in the listview, I attempted to use a dictionary to store known keys based on the job file name, however this falls over when reaching the second file that is named the same as an existing entry. Resulting in me only seeing 21 jobs in a list that should contain 23 (I also get an exception thrown). I got around this by appending something to the end of the second entry, but this isn't ideal, and was found to cause other complications later on.
Any ideas?
Here's what I have so far.
public List<object> PendingJobsArray = new List<object>();
public List<object> ActiveJobsArray = new List<object>();
public List<object> CompletedJobsArray = new List<object>();
public JobObject PendingJobs = new JobObject();
public JobObject ActiveJobs = new JobObject();
public JobObject CompletedJobs = new JobObject();
private void updatePending(int index, XmlParser xmlParser)
{
if (Program.PendingJobsArray.Count == 0)
{
Program.KnownPendingJobs.Add(xmlParser.getElementAttrValue(index, "Name"), xmlParser.getElementAttrValue(index, "DateTime"));
JobObject PendingJobs = new JobObject();
PendingJobs.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
PendingJobs.jobObjectState = xmlParser.getElementAttrValue(index, "State");
PendingJobs.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
PendingJobs.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
PendingJobs.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
PendingJobs.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
PendingJobs.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
PendingJobs.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
PendingJobs.jobObjectPriority = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Priority"));
// PendingJobs.jobObjectServer = xmlParser.getElementAttrValue("Server");
// PendingJobs.jobObjectOperation = xmlParser.getElementAttrValue("Operation");
Program.PendingJobsArray.Add(PendingJobs);
}
else if (Program.KnownPendingJobs.ContainsKey(xmlParser.getElementAttrValue(index, "Name")))
{
for (int i = 0; i < Program.PendingJobsArray.Count; ++i)
{
if (xmlParser.getElementAttrValue(index, "State") == "Write")
{
Program.PendingJobsArray.RemoveAt(index);
}
else
{
PendingJobs.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
PendingJobs.jobObjectState = xmlParser.getElementAttrValue(index, "State");
PendingJobs.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
PendingJobs.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
PendingJobs.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
PendingJobs.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
PendingJobs.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
PendingJobs.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
PendingJobs.jobObjectPriority = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Priority"));
Program.PendingJobsArray[index] = PendingJobs;
}
}
}
else if (Program.KnownPendingJobs.ContainsKey(xmlParser.getElementAttrValue(index, "Name")))
}
else
{
Program.KnownPendingJobs.Add(xmlParser.getElementAttrValue(index, "Name"), xmlParser.getElementAttrValue(index, "DateTime"));
JobObject PendingJobs = new JobObject();
PendingJobs.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
PendingJobs.jobObjectState = xmlParser.getElementAttrValue(index, "State");
PendingJobs.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
PendingJobs.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
PendingJobs.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
PendingJobs.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
PendingJobs.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
PendingJobs.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
PendingJobs.jobObjectQuantity = xmlParser.getElementAttrValue(index, "Quantity");
PendingJobs.jobObjectPriority = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Priority"));
//PendingJobs.jobObjectServer = xmlParser.getElementAttrValue("Server");
//PendingJobs.jobObjectOperation = xmlParser.getElementAttrValue("Operation");
Program.PendingJobsArray.Add(PendingJobs);
}
}
Updated code based on comments below,
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Collections;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Threading;
using System.Windows.Forms;
namespace SOERequestProgress
{
class XmlSplitter
{
// Declarations
private static string[] COMPLETED_STATES = new string[2]
{
"ERROR",
"COMPLETE"
};
private static string[] PENDING_STATES = new string[1]
{
"PENDING"
};
private static string[] ACTIVE_STATES = new string[1]
{
"PAUSED"
};
private const string STATE_ERROR = "ERROR";
private const string STATE_COMPLETE = "COMPLETE";
private const string STATE_PENDING = "PENDING";
private const string STATE_ACTIVE = "PAUSED";
public static JobObject PendingJobs = new JobObject();
public static JobObject CompletedJobs = new JobObject();
public static JobObject ActiveJobs = new JobObject();
//public static JobObject x;
public static int intI = 0;
// Add additional info from parsed XML (Currently only "Name" and "State" are collected.
public static void splitXml(string JobList, out List<JobObject> Active, out List<JobObject> Completed, out List<JobObject> Pending)
{
bool flag = false;
int num1 = 0;
string text = "";
string xmlString = "";
while (!flag)
{
if (num1 < 3)
{
try
{
xmlString = JobList;
flag = true;
}
catch (Exception ex)
{
++num1;
text = text + "Try (" + num1.ToString() + ") " + ex.Message + "\n";
Thread.Sleep(1000);
}
}
else
break;
}
if (!flag)
{
int num2 = (int)MessageBox.Show(text, "Job list error", MessageBoxButtons.OK, MessageBoxIcon.Hand);
}
else
{
XmlParser xmlParser = new XmlParser();
if (!xmlParser.setXmlString(xmlString))
{
int num3 = (int)MessageBox.Show("Error parsing job list server response.", "Error", MessageBoxButtons.OK, MessageBoxIcon.Hand);
flag = false;
}
else
{
int num3 = xmlParser.setElementName("Job");
for (int index = 0; index < num3; ++index)
{
string elementAttrValue1 = xmlParser.getElementAttrValue(index, "Name");
string elementAttrValue2 = xmlParser.getElementAttrValue(index, "State");
if (isState(elementAttrValue2, PENDING_STATES))
{
updatePending(index, xmlParser);
}
else if (isState(elementAttrValue2, COMPLETED_STATES))
{
updateCompleted(index, xmlParser);
}
else //if (isState(elementAttrValue2, ACTIVE_STATES))
{
updateActive(index, xmlParser);
}
}
}
}
Active = Program.ActiveJobsArray;
Completed = Program.CompletedJobsArray;
Pending = Program.PendingJobsArray;
}
private static void updatePending(int index, XmlParser xmlParser)
{
var jobName = xmlParser.getElementAttrValue(index, "Name");
var pendingJob = Program.PendingJobsArray.Find(x => x.jobObjectName == jobName);
if (pendingJob == null)
{
JobObject newPendingJob = CreateJob("Pending", index, xmlParser);
Program.PendingJobsArray.Add(newPendingJob);
}
}
private static void updateCompleted(int index, XmlParser xmlParser)
{
var jobName = xmlParser.getElementAttrValue(index, "Name");
var CompletedJob = Program.CompletedJobsArray.Find(x => x.jobObjectName == jobName);
if (CompletedJob == null)
{
JobObject newCompletedJob = CreateJob("Completed", index, xmlParser);
Program.CompletedJobsArray.Add(newCompletedJob);
}
}
private static void updateActive(int index, XmlParser xmlParser)
{
var jobName = xmlParser.getElementAttrValue(index, "Name");
var activeJob = Program.ActiveJobsArray.Find(x => x.jobObjectName == jobName);
if (activeJob == null)
{
JobObject newActiveJob = CreateJob("Active", index, xmlParser);
Program.ActiveJobsArray.Add(newActiveJob);
}
}
private static JobObject CreateJob(string jobType, int index, XmlParser xmlParser)
{
if (jobType == "Pending")
{
JobObject x = new JobObject();
x.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
x.jobObjectState = xmlParser.getElementAttrValue(index, "State");
x.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
x.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
x.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
x.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
x.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
x.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
x.jobObjectQuantity = xmlParser.getElementAttrValue(index, "Quantity");
x.jobObjectPriority = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Priority"));
//x.jobObjectServer = xmlParser.getElementAttrValue("Server");
//x.jobObjectOperation = xmlParser.getElementAttrValue("Operation");
return x;
}
if (jobType == "Completed")
{
JobObject x = new JobObject();
x.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
x.jobObjectState = xmlParser.getElementAttrValue(index, "State");
x.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
x.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
x.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
x.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
x.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
x.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
//x.jobObjectServer = xmlParser.getElementAttrValue("Server");
//x.jobObjectOperation = xmlParser.getElementAttrValue("Operation");
return x;
}
if (jobType == "Active")
{
JobObject x = new JobObject();
x.jobObjectName = xmlParser.getElementAttrValue(index, "Name");
x.jobObjectState = xmlParser.getElementAttrValue(index, "State");
x.jobObjectDiscName = xmlParser.getElementAttrValue(index, "DiscName");
x.jobObjectPercentComplete = Convert.ToInt32(xmlParser.getElementAttrValue(index, "TotalPercentComplete"));
x.jobObjectDateTime = xmlParser.getElementAttrValue(index, "DateTime");
x.jobObjectRequested = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Requested"));
x.jobObjectCompleted = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Completed"));
x.jobObjectFailed = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Failed"));
x.jobObjectPriority = Convert.ToInt32(xmlParser.getElementAttrValue(index, "Priority"));
//x.jobObjectServer = xmlParser.getElementAttrValue("Server");
//x.jobObjectOperation = xmlParser.getElementAttrValue("Operation");
return x;
}
return null;
}
private static bool isState(string jobState, string[] knownStates)
{
bool flag = false;
for (int index = 0; index < knownStates.Length; ++index)
{
if (jobState.ToUpper().Equals(knownStates[index]))
{
flag = true;
//break;
}
}
return flag;
}
}
}
This new code doesn't appear to be correct and I cant see where its failing, but its filling my CompletedJobArray with over 200 entries in a matter of seconds.
My ListView is only showing 1 entry (its filtered for unique jobs) and on checking the contents of the Array, its adding the same jobObject for all 200+ entries.
Can anyone spot the flaw in this code?
Update 2: IEqualityComparer implemented, further errors encountered.
The below code are the implementation of Equals and GetHashCode.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SOERequestProgress
{
class JobComparer : IEqualityComparer<JobObject>
{
public JobComparer(Func<JobObject, object> keySelector)
{
KeySelector = keySelector;
}
public bool Equals(JobObject x, JobObject y)
{
return KeySelector(x).Equals(KeySelector(y));
//x.jobObjectName == y.jobObjectName &&
//x.jobObjectDateTime == y.jobObjectDateTime;
}
public int GetHashCode(JobObject job)
{
return KeySelector(job).GetHashCode();
}
}
}
This implementation throws the errors below,
Error 1 'SOERequestProgress.JobObject' does not implement interface member 'System.Collections.Generic.IEqualityComparer<SOERequestProgress.JobComparer>.GetHashCode(SOERequestProgress.JobComparer)' C:\Users\youngs\Documents\Visual Studio 2012\Projects\SOERequestProgress\SOERequestProgress\SOERequestProgress\JobObject.cs 9 11 SOERequestProgress
Error 2 'SOERequestProgress.JobObject' does not implement interface member 'System.Collections.Generic.IEqualityComparer<SOERequestProgress.JobComparer>.Equals(SOERequestProgress.JobComparer, SOERequestProgress.JobComparer)' C:\Users\youngs\Documents\Visual Studio 2012\Projects\SOERequestProgress\SOERequestProgress\SOERequestProgress\JobObject.cs 9 11 SOERequestProgress
Update 3: Ok I'm nearly about to admit defeat, as I don't seem to be able to get my head round how the IEqualityComparer works... and I'm back to banging me head on the keyboard >.<
The code below now gets 72 jobs in the CompletedJobsArray, which equates to 24 jobs duplicated 3 times!!!
private static void updateCompleted(int index, XmlParser xmlParser)
{
var jobName = xmlParser.getElementAttrValue(index, "Name");
var jobDate = xmlParser.getElementAttrValue(index, "DateTime");
// Commented line below has same affect as uncommented line, except it always returns true....
//var CompletedJobs = Program.CompletedJobsArray.Distinct(new JobComparer(x => new { x.jobObjectName, x.jobObjectDateTime }));
var CompletedJobs = Program.CompletedJobsArray.Find(x=>x.Equals(jobDate));
if (CompletedJobs == null)
{
JobObject newCompletedJob = CreateJob("Completed", index, xmlParser);
Program.CompletedJobsArray.Add(newCompletedJob);
}
else
{
// Code below causes issues with IEnumerable
//UpdateJob("Completed", index, CompletedJobs, xmlParser);
}
}
I have 24 jobs, but when the poll of the 3 locations for these job files happens, I only need the 24 unique jobs added to the array.... What am I doing wrong?
Update 4 Duplication still occurs... but not in the List...
Ok, so a little further on, and I am finding that my List conatins the correct number of jobs, with no duplication occuring.
However, when a new job enters the List, output from the program (in the UI) gets duplicated for some jobs but not all.
I cant seem to pin down a reason for this, as I've tried to keep things as simple as possible, and am using one list (which is confirmed as working as intended) and am generating my output to a listview from there.
Heres offending section of code,
private void SortViews()
{
var Jobs = Program.JobsArray;
Program.JobsArray.Sort((a, b) => String.Compare(a.jobObjectDateTime, b.jobObjectDateTime));
if (Jobs.Count >= 0)
{
foreach (JobObject Job in Jobs.Where(x => PENDING_STATES.Contains(x.jobObjectState)))
{
if (Job.inPendingView && Job.inPendingView)
{
Job.inCompletedView = false;
CompletedJobsView.Items.Remove(PendingJobsView.FindItemWithText(Job.jobObjectName));
Job.inActiveView = false;
ActiveJobsView.Items.Remove(ActiveJobsView.FindItemWithText(Job.jobObjectName));
ListViewItem item = PendingJobsView.FindItemWithText(Job.jobObjectDateTime);
item.SubItems[0].Text = Job.jobObjectName;
item.SubItems[1].Text = Convert.ToString(Job.jobObjectPriority);
item.SubItems[2].Text = Job.jobObjectDiscName;
item.SubItems[3].Text = Convert.ToString(Job.jobObjectRequested);
item.SubItems[4].Text = Job.jobObjectDateTime;
}
else
{
string[] brr = new string[6];
ListViewItem item;
brr[0] = Job.jobObjectName;
brr[1] = Convert.ToString(Job.jobObjectPriority);
brr[2] = Job.jobObjectDiscName;
brr[3] = Convert.ToString(Job.jobObjectRequested);
brr[4] = Job.jobObjectDateTime;
item = new ListViewItem(brr);
item.ForeColor = Color.Blue;
PendingJobsView.Items.Add(item);
Job.inPendingView = true;
Job.inActiveView = false;
Job.inCompletedView = false;
Job.isActive = false;
Job.isCompleted = false;
Job.isPending = true;
}
}
foreach (JobObject Job in Jobs.Where(x => ACTIVE_STATES.Contains(x.jobObjectState)))
{
if (Job.isActive && Job.inActiveView)
{
Job.inPendingView = false;
PendingJobsView.Items.Remove(PendingJobsView.FindItemWithText(Job.jobObjectName));
Job.inCompletedView = false;
CompletedJobsView.Items.Remove(CompletedJobsView.FindItemWithText(Job.jobObjectDateTime));
//ActiveJobsView.Refresh();
ListViewItem item = ActiveJobsView.FindItemWithText(Job.jobObjectName);
item.SubItems[0].Text = Job.jobObjectName;
item.SubItems[1].Text = Convert.ToString(Job.jobObjectPriority);
item.SubItems[2].Text = Job.jobObjectDiscName;
item.SubItems[3].Text = Convert.ToString(Job.jobObjectCompleted) + " of " + Convert.ToString(Job.jobObjectRequested);
item.SubItems[4].Text = Convert.ToString(Job.jobObjectFailed);
item.SubItems[5].Text = Convert.ToString(Job.percentComplete) + "%";
item.SubItems[6].Text = Convert.ToString(Job.jobObjectState);
}
else
{
Job.isActive = true;
string[] crr = new string[7];
ListViewItem item;
crr[0] = Job.jobObjectName;
crr[1] = Convert.ToString(Job.jobObjectPriority);
crr[2] = Job.jobObjectDiscName;
crr[3] = Convert.ToString(Job.jobObjectCompleted) + " of " + Convert.ToString(Job.jobObjectRequested);
crr[4] = Convert.ToString(Job.jobObjectFailed);
crr[5] = Convert.ToString(Job.percentComplete) + "%";
crr[6] = Convert.ToString(Job.jobObjectState);
item = new ListViewItem(crr);
item.ForeColor = Color.DarkOrange;
ActiveJobsView.Items.Add(item);
Job.inActiveView = true;
Job.inPendingView = false;
Job.inCompletedView = false;
Job.isActive = true;
Job.isCompleted = false;
Job.isPending = false;
}
}
foreach (JobObject Job in Jobs.Where(x => COMPLETED_STATES.Contains(x.jobObjectState)))
{
if (Job.isCompleted && Job.inCompletedView)
{
Job.inPendingView = false;
PendingJobsView.Items.Remove(PendingJobsView.FindItemWithText(Job.jobObjectName));
Job.inActiveView = false;
ActiveJobsView.Items.Remove(ActiveJobsView.FindItemWithText(Job.jobObjectName));
//CompletedJobsView.Refresh();
ListViewItem item = CompletedJobsView.FindItemWithText(Job.jobObjectName);
item.SubItems[0].Text = Job.jobObjectName;
item.SubItems[1].Text = Job.jobObjectDiscName;
item.SubItems[2].Text = Convert.ToString(Job.jobObjectCompleted) + " of " + Convert.ToString(Job.jobObjectRequested);
item.SubItems[3].Text = Convert.ToString(Job.jobObjectFailed);
item.SubItems[4].Text = Convert.ToString(Job.jobObjectState);
item.SubItems[5].Text = Job.jobObjectDateTime;
}
else
{
string[] arr = new string[6];
ListViewItem item;
arr[0] = Job.jobObjectName;
arr[1] = Job.jobObjectDiscName;
arr[2] = Convert.ToString(Job.jobObjectCompleted) + " of " + Convert.ToString(Job.jobObjectRequested);
arr[3] = Convert.ToString(Job.jobObjectFailed);
arr[4] = Convert.ToString(Job.jobObjectState);
arr[5] = Job.jobObjectDateTime;
item = new ListViewItem(arr);
if (Job.jobObjectState == "ERROR")
{
item.ForeColor = Color.Firebrick;
}
if (Job.jobObjectState == "COMPLETE")
{
item.ForeColor = Color.Green;
}
CompletedJobsView.Items.Add(item);
Job.inCompletedView = true;
Job.inPendingView = false;
Job.inActiveView = false;
Job.isCompleted = true;
Job.isActive = false;
Job.isPending = false;
}
}
}
}
Any ideas whats causing this behavior?
Thanks again :)
Define a custom IEqualityComparer for your PendingJobs object. Then you can easily tweak the comparison as you see fit and compare all the fields or a subset of fields. Then you can create a HashSet and feed it this comparer. You can also use a dictionary if it is better as it also accepts IEqualityComparer.
Instead of complicating the code, it would be easy to work with a single list here.
List<JobObject> jobs = new List<JobObject>();
// Code that adds / updates a job
var newJob = CreateJob(state);
// The equality comparer should check for a job name and job date of two jobs to ensure the uniqueness
var existingJob = jobs.Find(x => x.Equals(x, newJob));
if(existingJob != null)
{
// Update your existing job
// If you want to calulate the time interval between job start till end, you can have more properties
// indicating the jobPendingStartTime, jobActiveStartTime etc.
}
else
{
jobs.Add(newJob);
}
// Code to find jobs
var pendingJobs = jobs.Where(x => PENDING_STATES.contains(x.jobObjectState));
var activeJobs = jobs.Where(x => ACTIVE_STATES.contains(x.jobObjectState));
var completedJobs = jobs.Where(x => COMPLETED_STATES.contains(x.jobObjectState));
// Bind to different list views
IEqualityComparer Implementation:
public class JobObject
{
public string Name { get; set; }
public string Date { get; set; }
//Other properties
}
public class JobObjectComparer : IEqualityComparer<JobObject>
{
public bool Equals(JobObject x, JobObject y)
{
return x.Name == y.Name &&
x.Date == y.Date;
}
public int GetHashCode(JobObject job)
{
return job.GetHashCode();
}
// Then use it as follows.
// JobObject newJob = CreateJob();
// var comparer = new JobObjectComparer();
// var existingJob = jobsArray.Find(x => comparer.Equals(x, newJob));
// if(existingJob != null)
// {
// Job Exists
// }
}
Let me know if this helps.
I have rtf documents that include an embedded object (an image). I need to extract this as an Image object (or any other usable format). I have checked out this CodeProject article but the default apps don't render it correctly (They render the 'default image' image, not the image itself), so I moved on.
Here is a sample of the RTF Code (I had to shorten it because of size):
{\rtf1\ansi\deff0{\fonttbl{\f0\fnil\fcharset0 MS Sans Serif;}}
\viewkind4\uc1\pard\lang1033\f0\fs18{\object\objemb{\*\objclass Package}\objw855\objh810{\*\objdata
01050000
02000000
08000000
5061636b61676500
00000000
00000000
1f900000
02007369675f5f2e6a706700433a5c55736572735c726563657074696f6e5c4465736b746f705c
5369676e6174757265735c7369675f5f2e6a7067000000030034000000433a5c55736572735c52
45434550547e315c417070446174615c4c6f63616c5c54656d705c7369675f5f20283132292e6a
706700c18e0000ffd8ffe000104a46494600010101004800470000ffdb00430001010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101010101010101010101ffdb00430101010101010101010101010101010101
010101010101010101010101010101010101010101010101010101010101010101010101010101
010101010101010101ffc0001108012c03e803012200021101031101ffc4001f00010002030002
0301000000000000000000090a07080b050602030401ffc4003f10000006030001040201030301
04070900000203040506010708090a11121314152116172223314118192532591a24576598d6d8
2933384651788497b7ffc4001a010101000301010000000000000000000000030204050106ffc4
002b11010003010100020103030402030000000002030401051112130614211522230731415124
32536162ffda000c03010002110311003f00bfc000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000000000000000000
...
005c0072006500630065007000740069006f006e005c004400650073006b0074006f0070005c00
5300690067006e006100740075007200650073005c007300690067005f005f002e006a00700067
00
01050000
00000000
}{\result{\pict\wmetafile8\picw2010\pich1905\picwgoal855\pichgoal810
0100090000033b0700000200210600000000050000000b0200000000050000000c02350038001c
000000fb02f4ff000000000000900100000001000000005365676f65205549000e0a52104c2308
00dd1900d894ef758001f3758d0e664a040000002d010000050000000902000000000500000001
02ffffff00a5000000410bc600880020002000000000002000200000000c002800000020000000
400000000100010000000000000100000000000000000000000000000000000000000000ffffff
...
0021001c001c000000fb021000070000000000bc02000000000102022253797374656d00008d0e
664a00000a0022008a0100000000ffffffff8cdd1900040000002d010100030000000000
}}}\par
}
Here is a piece of code that can extract all objects ('Package' class objects) from an RTF stream:
public static void ExtractPackageObjects(string filePath)
{
using (StreamReader sr = new StreamReader(filePath))
{
RtfReader reader = new RtfReader(sr);
IEnumerator<RtfObject> enumerator = reader.Read().GetEnumerator();
while(enumerator.MoveNext())
{
if (enumerator.Current.Text == "object")
{
if (RtfReader.MoveToNextControlWord(enumerator, "objclass"))
{
string className = RtfReader.GetNextText(enumerator);
if (className == "Package")
{
if (RtfReader.MoveToNextControlWord(enumerator, "objdata"))
{
byte[] data = RtfReader.GetNextTextAsByteArray(enumerator);
using (MemoryStream packageData = new MemoryStream())
{
RtfReader.ExtractObjectData(new MemoryStream(data), packageData);
packageData.Position = 0;
PackagedObject po = PackagedObject.Extract(packageData);
File.WriteAllBytes(po.DisplayName, po.Data);
}
}
}
}
}
}
}
}
And here are the utility classes that this code uses. There is a simple stream-based RTF parser that allows to get to the interesting control words.
There is also a utility to extract data from a serialized Object Packager instance. Object Packager is an almost 20-years ago OLE1.0 thing and the serialized binary format is not documented (to my knowledge), but it's understandable.
This works fine on your provided sample, but you may have to adapt things around.
public class RtfReader
{
public RtfReader(TextReader reader)
{
if (reader == null)
throw new ArgumentNullException("reader");
Reader = reader;
}
public TextReader Reader { get; private set; }
public IEnumerable<RtfObject> Read()
{
StringBuilder controlWord = new StringBuilder();
StringBuilder text = new StringBuilder();
Stack<RtfParseState> stack = new Stack<RtfParseState>();
RtfParseState state = RtfParseState.Group;
do
{
int i = Reader.Read();
if (i < 0)
{
if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
yield return new RtfControlWord(controlWord.ToString());
if (!string.IsNullOrWhiteSpace(text.ToString()))
yield return new RtfText(text.ToString());
yield break;
}
char c = (char)i;
// noise chars
if ((c == '\r') ||
(c == '\n'))
continue;
switch (state)
{
case RtfParseState.Group:
if (c == '{')
{
stack.Push(state);
break;
}
if (c == '\\')
{
state = RtfParseState.ControlWord;
break;
}
break;
case RtfParseState.ControlWord:
if (c == '\\')
{
// another controlWord
if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
{
yield return new RtfControlWord(controlWord.ToString());
controlWord.Clear();
}
break;
}
if (c == '{')
{
// a new group
state = RtfParseState.Group;
if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
{
yield return new RtfControlWord(controlWord.ToString());
controlWord.Clear();
}
break;
}
if (c == '}')
{
// close group
state = stack.Count > 0 ? stack.Pop() : RtfParseState.Group;
if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
{
yield return new RtfControlWord(controlWord.ToString());
controlWord.Clear();
}
break;
}
if (!Char.IsLetterOrDigit(c))
{
state = RtfParseState.Text;
text.Append(c);
if (!string.IsNullOrWhiteSpace(controlWord.ToString()))
{
yield return new RtfControlWord(controlWord.ToString());
controlWord.Clear();
}
break;
}
controlWord.Append(c);
break;
case RtfParseState.Text:
if (c == '\\')
{
state = RtfParseState.EscapedText;
break;
}
if (c == '{')
{
if (!string.IsNullOrWhiteSpace(text.ToString()))
{
yield return new RtfText(text.ToString());
text.Clear();
}
// a new group
state = RtfParseState.Group;
break;
}
if (c == '}')
{
if (!string.IsNullOrWhiteSpace(text.ToString()))
{
yield return new RtfText(text.ToString());
text.Clear();
}
// close group
state = stack.Count > 0 ? stack.Pop() : RtfParseState.Group;
break;
}
text.Append(c);
break;
case RtfParseState.EscapedText:
if ((c == '\\') || (c == '}') || (c == '{'))
{
state = RtfParseState.Text;
text.Append(c);
break;
}
// ansi character escape
if (c == '\'')
{
text.Append(FromHexa((char)Reader.Read(), (char)Reader.Read()));
break;
}
if (!string.IsNullOrWhiteSpace(text.ToString()))
{
yield return new RtfText(text.ToString());
text.Clear();
}
// in fact, it's a normal controlWord
controlWord.Append(c);
state = RtfParseState.ControlWord;
break;
}
}
while (true);
}
public static bool MoveToNextControlWord(IEnumerator<RtfObject> enumerator, string word)
{
if (enumerator == null)
throw new ArgumentNullException("enumerator");
while (enumerator.MoveNext())
{
if (enumerator.Current.Text == word)
return true;
}
return false;
}
public static string GetNextText(IEnumerator<RtfObject> enumerator)
{
if (enumerator == null)
throw new ArgumentNullException("enumerator");
while (enumerator.MoveNext())
{
RtfText text = enumerator.Current as RtfText;
if (text != null)
return text.Text;
}
return null;
}
public static byte[] GetNextTextAsByteArray(IEnumerator<RtfObject> enumerator)
{
if (enumerator == null)
throw new ArgumentNullException("enumerator");
while (enumerator.MoveNext())
{
RtfText text = enumerator.Current as RtfText;
if (text != null)
{
List<byte> bytes = new List<byte>();
for (int i = 0; i < text.Text.Length; i += 2)
{
bytes.Add((byte)FromHexa(text.Text[i], text.Text[i + 1]));
}
return bytes.ToArray();
}
}
return null;
}
// Extracts an EmbeddedObject/ObjectHeader from a stream
// see [MS -OLEDS]: Object Linking and Embedding (OLE) Data Structures for more information
// chapter 2.2: OLE1.0 Format Structures
public static void ExtractObjectData(Stream inputStream, Stream outputStream)
{
if (inputStream == null)
throw new ArgumentNullException("inputStream");
if (outputStream == null)
throw new ArgumentNullException("outputStream");
BinaryReader reader = new BinaryReader(inputStream);
reader.ReadInt32(); // OLEVersion
int formatId = reader.ReadInt32(); // FormatID
if (formatId != 2) // see 2.2.4 Object Header. 2 means EmbeddedObject
throw new NotSupportedException();
ReadLengthPrefixedAnsiString(reader); // className
ReadLengthPrefixedAnsiString(reader); // topicName
ReadLengthPrefixedAnsiString(reader); // itemName
int nativeDataSize = reader.ReadInt32();
byte[] bytes = reader.ReadBytes(nativeDataSize);
outputStream.Write(bytes, 0, bytes.Length);
}
// see chapter 2.1.4 LengthPrefixedAnsiString
private static string ReadLengthPrefixedAnsiString(BinaryReader reader)
{
int length = reader.ReadInt32();
if (length == 0)
return string.Empty;
byte[] bytes = reader.ReadBytes(length);
return Encoding.Default.GetString(bytes, 0, length - 1);
}
private enum RtfParseState
{
ControlWord,
Text,
EscapedText,
Group
}
private static char FromHexa(char hi, char lo)
{
return (char)byte.Parse(hi.ToString() + lo, NumberStyles.HexNumber);
}
}
// Utility class to parse an OLE1.0 OLEOBJECT
public class PackagedObject
{
private PackagedObject()
{
}
public string DisplayName { get; private set; }
public string IconFilePath { get; private set; }
public int IconIndex { get; private set; }
public string FilePath { get; private set; }
public byte[] Data { get; private set; }
private static string ReadAnsiString(BinaryReader reader)
{
StringBuilder sb = new StringBuilder();
do
{
byte b = reader.ReadByte();
if (b == 0)
return sb.ToString();
sb.Append((char)b);
}
while (true);
}
public static PackagedObject Extract(Stream inputStream)
{
if (inputStream == null)
throw new ArgumentNullException("inputStream");
BinaryReader reader = new BinaryReader(inputStream);
reader.ReadUInt16(); // sig
PackagedObject po = new PackagedObject();
po.DisplayName = ReadAnsiString(reader);
po.IconFilePath = ReadAnsiString(reader);
po.IconIndex = reader.ReadUInt16();
int type = reader.ReadUInt16();
if (type != 3) // 3 is file, 1 is link
throw new NotSupportedException();
reader.ReadInt32(); // nextsize
po.FilePath = ReadAnsiString(reader);
int dataSize = reader.ReadInt32();
po.Data = reader.ReadBytes(dataSize);
// note after that, there may be unicode + long path info
return po;
}
}
public class RtfObject
{
public RtfObject(string text)
{
if (text == null)
throw new ArgumentNullException("text");
Text = text.Trim();
}
public string Text { get; private set; }
}
public class RtfText : RtfObject
{
public RtfText(string text)
: base(text)
{
}
}
public class RtfControlWord : RtfObject
{
public RtfControlWord(string name)
: base(name)
{
}
}
OK, this should work for you. To demonstrate my solution, I created a WinForms project with a PictureBox whose paint event handler was mapped to the following function:
private void rtfImage_Paint(object sender, PaintEventArgs e)
{
string rtfStr = System.IO.File.ReadAllText("MySampleFile.rtf");
string imageDataHex = ExtractImgHex(rtfStr);
byte[] imageBuffer = ToBinary(imageDataHex);
Image image;
using (MemoryStream stream = new MemoryStream(imageBuffer))
{
image = Image.FromStream(stream);
}
Rectangle rect = new Rectangle(0, 0, 100, 100);
e.Graphics.DrawImage(image, rect);
}
This code relies the on the System.Drawing.Image.FromStream() method, along with two "helper" functions:
A string extractor:
string ExtractImgHex(string s)
{
// I'm sure you could use regex here, but this works.
// This assumes one picture per file; loops required otherwise
int pictTagIdx = s.IndexOf("{\\pict\\");
int startIndex = s.IndexOf(" ", pictTagIdx)+1;
int endIndex = s.IndexOf("}", startIndex);
return s.Substring(startIndex, endIndex - startIndex);
}
... and a binary converter:
public static byte[] ToBinary(string imageDataHex)
{
//this function taken entirely from:
// http://www.codeproject.com/Articles/27431/Writing-Your-Own-RTF-Converter
if (imageDataHex == null)
{
throw new ArgumentNullException("imageDataHex");
}
int hexDigits = imageDataHex.Length;
int dataSize = hexDigits / 2;
byte[] imageDataBinary = new byte[dataSize];
StringBuilder hex = new StringBuilder(2);
int dataPos = 0;
for (int i = 0; i < hexDigits; i++)
{
char c = imageDataHex[i];
if (char.IsWhiteSpace(c))
{
continue;
}
hex.Append(imageDataHex[i]);
if (hex.Length == 2)
{
imageDataBinary[dataPos] = byte.Parse(hex.ToString(), System.Globalization.NumberStyles.HexNumber);
dataPos++;
hex.Remove(0, 2);
}
}
return imageDataBinary;
}
Below code can extract all type of embedded objects. including image/docs/mails etc with original file name. And save them in a local path.
string MyDir = #"E:\temp\";
Document doc = new Document(MyDir + "Requirement#4.rtf");
NodeCollection nodeColl = doc.GetChildNodes(NodeType.Shape, true);
foreach (var node in nodeColl)
{
Shape shape1 = (Shape)node;
if (shape1.OleFormat != null)
{
shape1.OleFormat.Save(MyDir + shape1.OleFormat.SuggestedFileName + shape1.OleFormat.SuggestedExtension);
}
}