I'm currently trying to scrape a cannabis strain database as it is no longer being maintained. I seem to be running into an issue where table rows are skipped in my logic but it really doesn't make sense, it's like a break is being called when I'm iterating through the //tr elements of the table that is storing the chemical reports.
Is it something like the δ α symbols in the next row. I've tried regex replacing them out to now luck. Any help would be appreciated all code is in a single class console app.
Issue is in the table.ChildNodes not iterating all the way through. Located in the ParseChemical() method.
Sample Page: http://ocpdb.pythonanywhere.com/ocpdb/420/
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations.Schema;
using System.ComponentModel.Design;
using System.IO.IsolatedStorage;
using System.Linq;
using System.Linq.Expressions;
using System.Net;
using System.Net.Http;
using System.Reflection.Emit;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Microsoft.VisualBasic.CompilerServices;
namespace CScrape
{
class Program
{
private static readonly HttpClient Client = new HttpClient();
private static readonly string BaseUri = "http://ocpdb.pythonanywhere.com/ocpdb/";
private static int _startId = 420;
private static int _endId = 1519;
private static List<Lab> _labs = new List<Lab>();
private static List<ChemicalItem> _chemicalItems = new List<ChemicalItem>();
private static List<UnitOfMeasure> _uoms = new List<UnitOfMeasure>();
private static List<Strain> _strains = new List<Strain>();
static void Main(string[] args)
{
Client.DefaultRequestHeaders.Accept.Clear();
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls13;
_uoms.AddRange(GetUoms());
for (var i = _startId; i <= _endId; i++)
{
var result = GetUri($"{BaseUri}{i}").Result;
_strains.Add(ParseChemical(result));
}
}
private static long AddChemicalItem(ChemicalItem item)
{
if (ChemicalExists(item.Symbol))
return _chemicalItems.FirstOrDefault(ci => ci.Symbol == item.Symbol)?.Id ?? -1;
item.Id = _chemicalItems.Count + 1;
_chemicalItems.Add(item);
return item.Id;
}
private static void UpdateChemicalItem(ChemicalItem item)
{
if (!ChemicalExists(item.Symbol)) return;
var index = _chemicalItems.IndexOf(item);
if (!(index >= 0)) return;
_chemicalItems.RemoveAt(index);
AddChemicalItem(item);
}
private static long AddLab(Lab lab)
{
if (LabExists(lab.Name))
return _labs.FirstOrDefault(l => l.Name == lab.Name)?.Id ?? -1;
lab.Id = _labs.Count + 1;
_labs.Add(lab);
return lab.Id;
}
private static async Task<string> GetUri(string uri)
{
var response = await Client.GetByteArrayAsync(uri);
return Encoding.UTF8.GetString(response, 0, response.Length - 1);
}
private static Strain ParseChemical(string html)
{
html = Regex.Replace(html, #"Δ", "Delta");
HtmlDocument htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var strain = new Strain();
strain.Reports ??= new List<ChemicalReport>();
try
{
strain.Name = htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div[1]/div[1]/h3/b").InnerText;
}
catch (Exception e)
{
// TODO: DOcument Exception
Console.WriteLine(e.Message);
}
if (string.IsNullOrWhiteSpace(strain.Name)) return null;
try
{
var ocpId = htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div[1]/div[2]/p/b/text()[1]");
strain.OcpId = SanitizeHtml(ocpId.InnerText).Split(':')[1];
}
catch (Exception e)
{
// TODO: Document Exception
Console.WriteLine(e.Message);
}
if (string.IsNullOrWhiteSpace(strain.OcpId)) return null;
try
{
var date = htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div[1]/div[2]/p/text()");
}
catch (Exception e)
{
// TODO: Document Exception
Console.WriteLine(e.Message);
}
var chemReport = new ChemicalReport();
chemReport.Items ??= new List<ReportItem>();
try
{
var table = htmlDoc.DocumentNode.SelectSingleNode("/html/body/div/div[2]/div[1]/table/tbody");
var children = table.ChildNodes.ToList();
// On the sample page there are 200 children here
// However it only interates through the first few and then just breaks out of the loop
foreach (var child in children)
{
var name = child.Name;
if (child.Name == "tr")
{
var infos = child.SelectNodes("th|td");
foreach (var info in infos)
{
if(string.IsNullOrWhiteSpace(info.InnerText)) continue;
if (info.InnerText.Contains("Report")) continue;
if (double.TryParse(info.InnerText, out var isNumber))
{
var last = chemReport.Items.LastOrDefault();
if (last == null) continue;
if (last.Value <= 0.0000) last.Value = isNumber;
else
{
var further = chemReport.Items.ToArray()[chemReport.Items.Count - 2];
if (further.Value <= 0.0000)
further.Value = isNumber;
}
continue;
}
var _ = new ChemicalItem
{
Name = info.InnerText,
Symbol = info.InnerText
};
_.Id = AddChemicalItem(_);
var report = new ReportItem
{
Chemical = _,
ChemicalItemId = _.Id,
UnitOfMeasureId = 1,
UoM = GetUoms()[0]
};
chemReport.Items.Add(report);
}
}
}
strain.Reports.Add(chemReport);
}
catch (Exception e)
{
// TODO: Document exception
Console.Write(e.Message);
}
return strain;
}
private static List<UnitOfMeasure> GetUoms()
{
return new List<UnitOfMeasure>
{
new UnitOfMeasure {Name = "Milligrams per Gram", Symbol = "mg/g"},
new UnitOfMeasure {Name = "Percent", Symbol = "%"}
};
}
private static string SanitizeHtml(string text, string replacement = "")
{
return Regex.Replace(text, #"<[^>]+>| |α|\n|\t", replacement);
}
private static string GetLabName(string[] split)
{
var strip = split[0].Split(':')[1];
for(var i = 1; i < split.Length - 2; i ++)
{
if (string.IsNullOrWhiteSpace(split[i])) break;
strip += $" {split[i]}";
}
return strip;
}
private static string GetSampleId(string[] split)
{
var found = false;
foreach (var item in split)
{
if (found)
return item.Split(':')[1];
if (item == "Sample") found = true;
}
return "NA";
}
private static bool LabExists(string name)
{
return _labs.Any(lab => lab.Name == name);
}
private static bool ChemicalExists(string name)
{
return _chemicalItems.Any(ci => ci.Symbol == name);
}
private static ReportItem GetReportItem(string text)
{
if (string.IsNullOrWhiteSpace(text)) return null;
ReportItem ri = null;
try
{
var clean = SanitizeHtml(text);
var check = 0;
var split = clean.Split(':');
var label = split[0];
if (string.IsNullOrWhiteSpace(label)) return null;
if (double.TryParse(label, out var invalidType)) return null;
var val = string.Empty;
if (split.Length == 1)
{
if (split[0].Contains("Total"))
{
Regex re = new Regex(#"([a-zA-Z]+)(\d+)");
Match result = re.Match(split[0]);
label = result.Groups[1].Value;
val = result.Groups[2].Value;
}
}
if(split.Length > 1)
val = split[1];
if (!ChemicalExists(label)) AddChemicalItem(new ChemicalItem {Id = _chemicalItems.Count + 1,Symbol = label});
ri = new ReportItem();
ri.Chemical = _chemicalItems.FirstOrDefault(ci => ci.Symbol == label);
ri.UoM = val.Contains("%")
? _uoms.FirstOrDefault(uom => uom.Symbol == "%")
: _uoms.FirstOrDefault(uom => uom.Symbol == "mg/g");
if (string.IsNullOrWhiteSpace(val)) return ri;
var value = val.Contains("%") ? split[1].Substring(0, val.Length - 1) : val;
ri.Value = Convert.ToDouble(value);
}
catch (Exception e)
{
// TODO: Document Exception
Console.WriteLine(e.Message);
}
return ri;
}
//private static ChemicalItem GetChemicalItem(string text)
//{
//}
public class Strain
{
public long Id { get; set; }
public string Name { get; set; }
public DateTime Created { get; set; }
public string OcpId { get; set; }
public bool IsHidden { get; set; } = false;
public virtual ICollection<ChemicalReport> Reports { get; set; }
}
public class Lab
{
public long Id { get; set; }
public string Name { get; set; }
public virtual ICollection<ChemicalReport> Reports { get; set; }
}
public class ChemicalReport
{
public long Id { get; set; }
[ForeignKey("Lab")]
public long LabId { get; set; }
public virtual Lab Lab { get; set; }
public string SampleId { get; set; }
public DateTime Created { get; set; }
public virtual ICollection<ReportItem> Items { get; set; }
[ForeignKey("Strain")]
public long StrainId { get; set; }
public virtual Strain Strain { get; set; }
}
public class ChemicalItem
{
public long Id { get; set; }
public string Name { get; set; }
public string Symbol { get; set; }
}
public class ReportItem
{
public long Id { get; set; }
[ForeignKey("Chemical")]
public long ChemicalItemId { get; set; }
public virtual ChemicalItem Chemical { get; set; }
public double Value { get; set; }
[ForeignKey("UoM")]
public long UnitOfMeasureId { get; set; }
public virtual UnitOfMeasure UoM { get; set; }
}
public class UnitOfMeasure
{
public long Id { get; set; }
public string Name { get; set; }
public string Description { get; set; }
public string Symbol { get; set; }
}
}
}
Starting from a table of daily fruit prices
fruits.csv
Day,Name,Kind,Price
2019-09-04,"apple","red",63.09
2019-09-04,"apple","yellow",52.14
2019-09-04,"orange","navel",41.18
2019-09-04,"orange","blood",41.18
2019-09-03,"apple","red",63.07
2019-09-03,"apple","yellow",52.11
2019-09-03,"orange","navel",41.13
2019-09-03,"orange","blood",41.13
I'd like to insert the reference prices by name and kind
fruit_ref_prices.csv
Name,Kind,Reference_Price
"apple","red",60.00
"apple","yellow",50.00
"orange","navel",40.00
"orange","blood",42.00
to result in the following table
Day,Name,Kind,Price,Reference_Price
2019-09-04,"apple","red",63.09,60.00
2019-09-04,"apple","yellow",52.14,50.00
2019-09-04,"orange","navel",41.18,40.00
2019-09-04,"orange","blood",41.18,42.00
2019-09-03,"apple","red",63.07,60.00
2019-09-03,"apple","yellow",52.11,50.00
2019-09-03,"orange","navel",41.13,40.00
2019-09-03,"orange","blood",41.13,42.00
The solution should be simple using C#'s built-in SQL-like syntax, and I'm sure the answer lies in one of the following tutorial pages:
Join clause
Perform custom join operations
Join by using composite keys
but I'm having a hard time identifying the syntax of this language.
In my attempt below instead of writing
join fruit_ref in fruit_refs on fruit.name equals fruit_ref.name
I should be able to write
join fruit_ref in fruit_refs on fruit.name equals fruit_ref.name
and fruit.kind equals fruit_ref.kind
but the Boolean expression is not accepted. Why?
My attempt is:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace MyConsoleApplication
{
class Program
{
const string root = #"c:\path\to\here\";
const string file1_in = root + #"fruits.csv";
const string file2_in = root + #"fruit_ref_prices.csv";
static void Main(string[] args)
{
Fruit_Basket fruit_basket = new Fruit_Basket(file1_in, file2_in);
fruit_basket.PrintFruits();
}
}
public class Fruit
{
public DateTime day { get; set; }
public string name { get; set; }
public string kind { get; set; }
public decimal price { get; set; }
public Fruit(DateTime newFruit_day,
string newFruit_name,
string newFruit_kind,
decimal newFruit_price)
{
this.day = newFruit_day;
this.name = newFruit_name;
this.kind = newFruit_kind;
this.price = newFruit_price;
}
}
public class Fruit_Ref
{
public string name;
public string kind;
public decimal reference_price;
public Fruit_Ref(string newName, string newKind, decimal newRef_Price)
{
this.name = newName;
this.kind = newKind;
this.reference_price = newRef_Price;
}
}
public class Fruit_Basket {
public List<Fruit> fruits { get; set; }
public List<Fruit_Ref> fruit_refs { get; set; }
public Fruit_Basket(string file1_in, string file2_in) {
build_fruit_list(file1_in);
build_fruit_ref_list(file2_in);
}
public void build_fruit_list(string file_in)
{
fruits = new List<Fruit>();
int count = 0;
StreamReader reader = new StreamReader(file_in);
string line = "";
while ((line = reader.ReadLine()) != null)
{
if (++count > 1)
{
string[] splitLine = line.Split(new char[] { ',' }).ToArray();
var newFruit_day = DateTime.Parse(splitLine[0]);
var newFruit_name = splitLine[1];
var newFruit_kind = splitLine[2];
var newFruit_price = decimal.Parse(splitLine[3]);
Fruit newFruit = new Fruit(newFruit_day,
newFruit_name,
newFruit_kind,
newFruit_price);
fruits.Add(newFruit);
}
}
reader.Close();
}
public void build_fruit_ref_list(string file_in)
{
fruit_refs = new List<Fruit_Ref>();
int count = 0;
StreamReader reader = new StreamReader(file_in);
string line = "";
while ((line = reader.ReadLine()) != null)
{
if (++count > 1)
{
string[] splitLine = line.Split(new char[] { ',' }).ToArray();
var newFruit_name = splitLine[0];
var newFruit_kind = splitLine[1];
var newFruit_ref_price = decimal.Parse(splitLine[2]);
Fruit_Ref newFruit_ref = new Fruit_Ref(newFruit_name,
newFruit_kind,
newFruit_ref_price);
fruit_refs.Add(newFruit_ref);
}
}
reader.Close();
}
public void PrintFruits()
{
var innerJoinQuery =
from fruit in fruits
join fruit_ref in fruit_refs on fruit.name equals fruit_ref.name
select new { Day = fruit.day, Name = fruit.name, Kind = fruit.kind,
Price = fruit.price, Reference_Price = fruit_ref.reference_price };
Console.WriteLine($#"""Date"",""Name"",""Kind"",""Price"",""Ref Price""");
foreach (var i in innerJoinQuery)
{
Console.WriteLine($#"{i.Day},{i.Kind},{i.Price},{i.Reference_Price}");
}
}
}
}
You could also refactor your code to use the CsvHelper NuGet package for reading/writing CSV files.
First, You can make these classes to reflect the fruits, fruit references and final fruit structure.
public class Fruit
{
public string Day { get; set; }
public string Name { get; set; }
public string Kind { get; set; }
public string Price { get; set; }
}
public class FruitReferencePrice
{
public string Name { get; set; }
public string Kind { get; set; }
public string Reference_Price { get; set; }
}
public class FruitFinal
{
public string Day { get; set; }
public string Name { get; set; }
public string Kind { get; set; }
public string Price { get; set; }
public string ReferencePrice { get; set; }
public override string ToString()
{
return $"Day={Day},Name={Name},Kind={Kind},Price={Price},Reference_Price={ReferencePrice}";
}
}
Then you can make two methods to return the rows of each CSV file into List<Fruit> and List<FruitReferencePrice>.
private static IEnumerable<Fruit> BuildFruitList(string csvFilePath)
{
if (!File.Exists(csvFilePath))
{
throw new FileNotFoundException("Could not locate CSV at path " + csvFilePath, csvFilePath);
}
try
{
using var fileReader = File.OpenText(csvFilePath);
using var csv = new CsvReader(fileReader);
return csv.GetRecords<Fruit>().ToList();
} catch (Exception ex)
{
Console.WriteLine(ex.Message);
return Enumerable.Empty<Fruit>();
}
}
private static IEnumerable<FruitReferencePrice> BuildFruitReferenceList(string csvFilePath)
{
if (!File.Exists(csvFilePath))
{
throw new FileNotFoundException("Could not locate CSV at path " + csvFilePath, csvFilePath);
}
try
{
using var fileReader = File.OpenText(csvFilePath);
using var csv = new CsvReader(fileReader);
return csv.GetRecords<FruitReferencePrice>().ToList();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return Enumerable.Empty<FruitReferencePrice>();
}
}
Then you can perform a grouped join and output the merged result.
var path1 = "PATH\\fruits.csv";
var path2 = "PATH\\fruit_ref_prices.csv";
var fruitList = BuildFruitList(path1);
var fruitReferencePrices = BuildFruitReferenceList(path2);
var groupedJoin = from fruit in fruitList
join fruit_ref in fruitReferencePrices
on new { fruit.Name, fruit.Kind } equals new { fruit_ref.Name, fruit_ref.Kind }
select new FruitFinal
{
Day = fruit.Day,
Name = fruit.Name,
Kind = fruit.Kind,
Price = fruit.Price,
ReferencePrice = fruit_ref.Reference_Price
};
foreach (var fruit in groupedJoin)
{
Console.WriteLine(fruit.ToString());
}
Merged results:
Day=2019-09-04,Name=apple,Kind=red,Price=63.09,Reference_Price=60.00
Day=2019-09-04,Name=apple,Kind=yellow,Price=52.14,Reference_Price=50.00
Day=2019-09-04,Name=orange,Kind=navel,Price=41.18,Reference_Price=40.00
Day=2019-09-04,Name=orange,Kind=blood,Price=41.18,Reference_Price=42.00
Day=2019-09-03,Name=apple,Kind=red,Price=63.07,Reference_Price=60.00
Day=2019-09-03,Name=apple,Kind=yellow,Price=52.11,Reference_Price=50.00
Day=2019-09-03,Name=orange,Kind=navel,Price=41.13,Reference_Price=40.00
Day=2019-09-03,Name=orange,Kind=blood,Price=41.13,Reference_Price=42.00
Please change the equals clause as on new { fruit.name, fruit.kind } equals new { fruit_ref.name, fruit_ref.kind }
Why you require this
The query has two anonymous types (one for left table and one for right table). So to compare those anonymous types, the linq statement should use new keyword
Query :
var innerJoinQuery = from fruit in fruits
join fruit_ref in fruit_refs on new { fruit.name, fruit.kind } equals new { fruit_ref.name, fruit_ref.kind }
select new { Day = fruit.day, Name = fruit.name, Kind = fruit.kind,
Price = fruit.price, Reference_Price = fruit_ref.reference_price };
I am a beginner in programming,It's really difficult for me to analyze and debug how to skip reading the first line of the csv file. I need some help.
I need my id to fill my combobox in my form that contains all
Id's.In order to not include the header in browsing and
displaying.I need to skip the first line.
public bool ReadEntrie(int id, ref string name, ref string lastname, ref
string phone, ref string mail, ref string website)
{
int count = 0;
CreateConfigFile();
try
{
fs = new FileStream(data_path, FileMode.Open);
sr = new StreamReader(fs);
string temp = "";
bool cond = true;
while (cond == true)
{
if ((temp = sr.ReadLine()) == null)
{
sr.Close();
fs.Close();
cond = false;
if (count == 0)
return false;
}
if (count == id)
{
string[] stringSplit = temp.Split(',');
int _maxIndex = stringSplit.Length;
name = stringSplit[0].Trim('"');
lastname = stringSplit[1].Trim('"');
phone = stringSplit[2].Trim('"');
mail = stringSplit[3].Trim('"');
website = stringSplit[4].Trim('"');
}
count++;
}
sr.Close();
fs.Close();
return true;
}
catch
{
return false;
}
}
#Somadina's answer is correct, but I would suggest a better alternative. You could use a CSV file parser library such as CSV Helpers.
You can get the library from Nuget or Git. Nuget command would be:
Install-Package CsvHelper
Declare the following namespaces:
using CsvHelper;
using CsvHelper.Configuration;
Here's how simple your code looks when you use such a library:
class Program
{
static void Main(string[] args)
{
var csv = new CsvReader(File.OpenText("Path_to_your_csv_file"));
csv.Configuration.IgnoreHeaderWhiteSpace = true;
csv.Configuration.RegisterClassMap<MyCustomObjectMap>();
var myCustomObjects = csv.GetRecords<MyCustomObject>();
foreach (var item in myCustomObjects.ToList())
{
// Apply your application logic here.
Console.WriteLine(item.Name);
}
}
}
public class MyCustomObject
{
// Note: You may want to use a type converter to convert the ID to an integer.
public string ID { get; set; }
public string Name { get; set; }
public string Lastname { get; set; }
public string Phone { get; set; }
public string Mail { get; set; }
public string Website { get; set; }
public override string ToString()
{
return Name.ToString();
}
}
public sealed class MyCustomObjectMap : CsvClassMap<MyCustomObject>
{
public MyCustomObjectMap()
{
// In the name method, you provide the header text - i.e. the header value set in the first line of the CSV file.
Map(m => m.ID).Name("id");
Map(m => m.Name).Name("name");
Map(m => m.Lastname).Name("lastname");
Map(m => m.Phone).Name("phone");
Map(m => m.Mail).Name("mail");
Map(m => m.Website).Name("website");
}
}
Some more details in an answer here.
To skip the first line, just replace the line:
if (count == id)
with
if (count > 0 && count == id)
MORE THOUGHTS ON YOUR APPROACH
Because you used the ref keyword, each line you read will override the previous values you stored in the parameters. A better way to do this is to create a class to hold all the properties of interest. Then, for each line you read, package an instance of the class and add it to a list. You method signature (even the return type) will change eventually.
From your code, the class will look like this:
public class DataModel
{
public string Name { get; set; }
public string LastName { get; set; }
public string Phone{ get; set; }
public string Mail { get; set; }
public string Website{ get; set; }
}
Then your method will be like this:
public IList<DataModel> ReadEntrie(int id, string data_path)
{
int count = 0;
CreateConfigFile();
var fs = new FileStream(data_path, FileMode.Open);
var sr = new StreamReader(fs);
try
{
var list = new List<DataModel>();
string temp = "";
bool cond = true;
while (cond == true)
{
if ((temp = sr.ReadLine()) == null)
{
cond = false;
if (count == 0)
throw new Exception("Failed");
}
if (count > 0 && count == id)
{
string[] stringSplit = temp.Split(',');
var item = new DataModel();
item.Name = stringSplit[0].Trim('"');
item.LastName = stringSplit[1].Trim('"');
item.Phone = stringSplit[2].Trim('"');
item.Mail = stringSplit[3].Trim('"');
item.Website = stringSplit[4].Trim('"');
// add item to list
list.Add(item);
}
count++;
}
return list;
}
catch
{
throw; // or do whatever you wish
}
finally
{
sr.Close();
fs.Close();
}
}
I have written a piece of code that parses links from the top navigation from a website.
private string url = "http://www.blah.com/";
private HtmlWeb web;
private HtmlDocument doc;
private string topNavName = "";
private string topNavUrl = "";
public void Setup()
{
try
{
web = new HtmlWeb();
doc = web.Load(url);
web.AutoDetectEncoding = true;
TopCats();
}
catch (Exception e)
{
Console.WriteLine("There has been an issue loading url {0}", e);
}
}
private List<Catalogue> TopCats()
{
List<Catalogue> GetTop = new List<Catalogue>();
try
{
HtmlNodeCollection TopNode = doc.DocumentNode.SelectNodes("//*[#id='TopTabs1_tabs']/li/span/a");
if (TopNode != null)
{
foreach (HtmlNode Topitem in TopNode)
{
topNavName = Topitem.InnerText;
topNavUrl = url + Topitem.Attributes["href"].Value;
Catalogue xmltopcat = new Catalogue();
xmltopcat.indentifier = "here";
xmltopcat.name = topNavName;
xmltopcat.url = topNavUrl;
xmltopcat.description = "";
Console.WriteLine("Category >> {0}",topNavName);
}
}
}
catch (Exception e)
{
Console.WriteLine("There has been an issue Top Nav {0}", e);
}
return GetTop;
}
}
The problem that I am having is that I am not sure how to make each parsed data in the for each loop fill the XML elements. For the mapping on the XML I have created a new class:
class Catalogue
{
[XmlElement("Category identifier")]
public string indentifier
{ get; set; }
[XmlElement("name")]
public string name
{ get; set; }
[XmlElement("url")]
public string url
{ get; set; }
[XmlElement("description")]
public string description
{ get; set; }
}
I am really not sure to create a XML document- I have tried a few things, and I am really am not sure what I am doing. I am still learning C# and this is my first time working with XML.
You can use LINQ to XML.First store your all Catalogues into a List.
Catalogue xmltopcat = new Catalogue();
xmltopcat.indentifier = "here";
xmltopcat.name = topNavName;
xmltopcat.url = topNavUrl;
xmltopcat.description = "";
GetTop.Add(xmltopcat); // <-- add current catalogue to the list
Then call TopCats method and get your list and create XML file:
var list = TopCats();
XElement xDoc = new XElement("Catalogues",
list.Select(c => new XElement("Catalogue",
new XElement("indentifier",c.indentifier)
new XElement("name",c.name)
new XElement("url",c.url)
new XElement("description",c.description)));
xDoc.Save("savepath");
Or you can use XmlSerializer
FileStream fs = new FileStream("records.xml",FileMode.OpenOrCreate,FileAccess.Write);
XmlSerializer serializer = new XmlSerializer(typeof(List<Catalogue>),new XmlRootAttribute("Catalogues"));
serializer.Serialize(fs,list);
Looking for a good way to parse out of this text file, the values highlighted with the yellow boxes using C#. Each section is delineated by a TERM # which I forgot to highlight. Tried this:
string fileName = "ATMTerminalTotals.txt";
StreamReader sr = new StreamReader(fileName);
string[] delimiter = new string[] { " " };
while (!sr.EndOfStream)
{
string[] lines = sr.ReadLine().Split(delimiter, StringSplitOptions.RemoveEmptyEntries);
foreach (string line in lines)
{
Console.WriteLine(line);
}
}
Console.ReadLine();
Safe to say I am reading lines correctly and removing "white spaces." Although, as an amateur to programming, not sure of a valid way to accurately "know" that I am getting the values from this report that I need. Any advice?
i've tested this with a very simple program to parse the given file,
basically i've created two basic classes, a page class holding a collection of terminal report class (the tran type rows)
these rows maybe even can be represented as transaction and a billing class too
first parsed the data, setting the parameters needed and lastly just accessing the properties
just rushed it to be as simple as possible, no error handling etc... its just to give you a sense of how id start solving these kind of tasks, hope it helps
Adam
namespace TerminalTest
{
class Program
{
public class TerminalReport
{
public string Word { get; set; }
public int Denials { get; set; }
public int Approvals { get; set; }
public int Reversals { get; set; }
public double Amount { get; set; }
public int ON_US { get; set; }
public int Alphalink { get; set; }
public int Interchange { get; set; }
public int Surcharged { get; set; }
public static TerminalReport FromLine(string line)
{
TerminalReport report = new TerminalReport();
report.Word = line.Substring(0, 11);
line = line.Replace(report.Word, string.Empty).Trim();
string[] split = line.Split(' ');
int i = 0;
// transaction summary
report.Denials = int.Parse(split[i++]);
report.Approvals = int.Parse(split[i++]);
report.Reversals = int.Parse(split[i++]);
report.Amount = double.Parse(split[i++]);
// billing counts
report.ON_US = int.Parse(split[i++]);
report.Alphalink = int.Parse(split[i++]);
report.Interchange = int.Parse(split[i++]);
report.Surcharged = int.Parse(split[i++]);
return report;
}
}
public class TerminalPage
{
public int PageNumber { get; set; }
public double TotalSurcharges { get; set; }
public List<TerminalReport> Rows { get; set; }
public TerminalPage(int num)
{
PageNumber = num;
Rows = new List<TerminalReport>();
}
public int TotalDenials
{
get
{
return rows.Sum(r => r.Denials);
}
}
public int TotalApprovals
{
get
{
return Rows.Sum(r => r.Approvals;
}
}
public int TotalReversals
{
get
{
return Rows.Sum(r => r.Reversals;
}
}
public double TotalAmount
{
get
{
return Rows.Sum(r => r.Amount);
}
}
public int TotalON_US
{
get
{
return Rows.Sum(r => r.ON_US);
}
}
public int TotalAlphalink
{
get
{
return Rows.Sum(r => r.Alphalink);
}
}
public int TotalInterchange
{
get
{
return Rows.Sum(r => r.Interchange);
}
}
public int TotalSurcharged
{
get
{
return Rows.Sum(r => r.Surcharged);
}
}
}
private static string CleanString(string text)
{
return Regex.Replace(text, #"\s+", " ").Replace(",", string.Empty).Trim();
}
private static List<TerminalPage> ParseData(string filename)
{
using (StreamReader sr = new StreamReader(File.OpenRead(filename)))
{
List<TerminalPage> pages = new List<TerminalPage>();
int pageNumber = 1;
TerminalPage page = null;
bool parse = false;
while (!sr.EndOfStream)
{
string line = sr.ReadLine();
line = CleanString(line);
if (line.StartsWith("TRAN TYPE"))
{
// get rid of the ----- line
sr.ReadLine();
parse = true;
if (page != null)
{
pages.Add(page);
}
page = new TerminalPage(pageNumber++);
}
else if (line.StartsWith("="))
{
parse = false;
}
else if (line.StartsWith("TOTAL SURCHARGES:"))
{
line = line.Replace("TOTAL SURCHARGES:", string.Empty).Trim();
page.TotalSurcharges = double.Parse(line);
}
else if (parse)
{
TerminalReport r = TerminalReport.FromLine(line);
page.Rows.Add(r);
}
}
if (page != null)
{
pages.Add(page);
}
return pages;
}
}
static void Main(string[] args)
{
string filename = #"C:\bftransactionsp.txt";
List<TerminalPage> pages = ParseData(filename);
foreach (TerminalPage page in pages)
{
Console.WriteLine("TotalSurcharges: {0}", page.TotalSurcharges);
foreach (TerminalReport r in page.Rows)
Console.WriteLine(r.Approvals);
}
}
}
}
I'm not sure I'd split it by spaces actually.. the textfile looks like its split into columns. You might want to read like 10 chars (or whatever the width of the column is) at a time... and I'd parse the whole file into a dictionary so you get entries like
dict["WDL FRM CHK"]["# DENIALS"] = 236
then you can easily retrieve the values you want from there, and if you ever need more values in the future, you've got them.
Alternatively, you can use regexs. You can grab the first value with a regex like
^WDL FRM CHK\s+(?<denials>[0-9,]+)\s+(?<approvals>[0-9,]+)$
using
m.Groups["approvals"]
anyway I recommend you to wrap your StreamReader with using block:
using (StreamReader sr = new StreamReader(fileName))
{
// do stuff
}
Read more on MSDN
Given that it seems to have a standard, regular format, I would use regular expressions. You can check the starting code to figure out what row you're on, then an expression that will parse out the numbers and ignore whitespace will, very likely, be easier than handling it manually.
using System;
using System.Text.RegularExpressions;
namespace ConsoleApplication3
{
class Program
{
static void Main(string[] args)
{
Regex exp = new Regex(#"WDL FRM CHK(\s)+[1-9,]+(\s)+(?<approvals>[1-9,]+)(\s)+");
string str = "WDL FRM CHK 236 1,854 45,465 123 3";
Match match = exp.Match(str);
if (match.Success)
{
Console.WriteLine("Approvals: " + match.Groups["approvals"].Value);
}
Console.ReadLine();
}
}
}
Apdated from the following article to parse one of your numbers:
How to match a pattern by using regular expressions and Visual C#