how to extract column of data using regular expression - c#

i am trying to write c# code to extract columns of data. my data looks like
what should be the regular expression if i want to extract "everything" "under" a column header for example "COMMAND" or "PID".

No need to use regular expression. String Split method will work. Try code like this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Data;
namespace ConsoleApplication53
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("PID", typeof(int));
dt.Columns.Add("TT", typeof(string));
dt.Columns.Add("STAT", typeof(string));
dt.Columns.Add("TIME", typeof(DateTime));
dt.Columns.Add("COMMAND", typeof(string));
StreamReader reader = new StreamReader(FILENAME);
int lineCount = 0;
string inputLine = "";
while ((inputLine = reader.ReadLine) != null)
{
if (++lineCount > 2)
{
string[] inputArray = inputLine.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
dt.Rows.Add(new object[] {
int.Parse(inputArray[0]),
inputArray[1],
inputArray[2],
DateTime.Parse(inputArray[3]),
inputArray[4]
});
}
}
}
}
}

Related

Reading csv without specific type

I want to read csv file and save to list or array or anything, but CsvHelper demands to save it as collection of specific type. Problem is my csv has so many columns, that mapping it to custom class will take a few weeks.
How can I just read it without saving as specific type? Accessing specific values with thing like row[1][2] is more than enough for me.
Add it to a datatable
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Data;
namespace ConsoleApplication23
{
class Program
{
const string FILENAME = #"c:\temp\test.csv";
static void Main(string[] args)
{
StreamReader reader = new StreamReader(FILENAME);
string line = "";
DataTable dt = new DataTable();
int rowCount = 0;
while((line = reader.ReadLine()) != null)
{
line = line.Trim();
if (line.Length > 0)
{
string[] splitArray = line.Split(new char[] { ',' });
if (rowCount == 0)
{
foreach (string col in splitArray)
{
dt.Columns.Add(col, typeof(string));
}
}
else
{
dt.Rows.Add(splitArray);
}
}
}
}
}
}

How to iterate through a column in a csv file using CSVhelper library? using: dynamic object

CSV file
CSV file in notepad editor
using CsvHelper;
public class csvread
{
public dynamic APSSValue ;
public async Task GetMode()
{
try
{
FileOpenPicker openpicker = new FileOpenPicker();
openpicker.FileTypeFilter.Add(".csv");
IRandomAccessStreamWithContentType stream = await file.OpenReadAsync();
StreamReader reader = new StreamReader(stream.AsStream());
string UserCultureInfo = Thread.CurrentThread.CurrentCulture.Name;
CsvReader csv = new CsvReader(reader, culture: CultureInfo.CreateSpecificCulture(UserCultureInfo));
csv.Configuration.HasHeaderRecord = false;
csv.Configuration.Delimiter = ";";
while (csv.Read())
{
APSSValue = Enumerable.ToList(csv.GetRecord<dynamic>());
}
}
}
}
I tried this way shown above but with this I only get the number of columns.
I also tried
csv.Read();
APSSValue = Enumerable.ToList(csv.GetRecords<dynamic>());
but this gives me the entire data of csv file.
Questions:
I want to look for the value under the column (I_APSS_Modus) Please see the images shared above, It can be 0 or 1 so how can I look for that value if it is 0 or 1 ?
NOTE:
I don't want the values of entire column because all the values in the column would be either 0 or 1. So i just want one value from any row under that column.
CSV file is not same every time, so the column number for (I_APSS_Modus) will vary if the CSV file is different
Try following code which put data into a DataTable so you can easily filter with linq. Below will work with posted csv
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Globalization;
using System.Data;
namespace ConsoleApplication8
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("Timestamp", typeof(DateTime));
dt.Columns.Add("B_Kurze_Seite", typeof(Boolean));
dt.Columns.Add("I_Partikeleinfall Reinluft", typeof(int));
dt.Columns.Add("I_Partikeleinfall Rohluft", typeof(int));
dt.Columns.Add("I_APSS_Modus", typeof(int));
StreamReader reader = new StreamReader(FILENAME);
string line = "";
int row = 0;
string format = "yyyy MM dd HH:mm:ss:fff";
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if (line.Length > 0 && !line.StartsWith("sep"))
{
if (++row > 1)
{
string[] splitRow = line.Split(new char[] { ';' });
dt.Rows.Add(new object[] {
DateTime.ParseExact(splitRow[0],format,CultureInfo.InvariantCulture),
(splitRow[1] == "FALSE")? false : true,
int.Parse(splitRow[2]),
int.Parse(splitRow[3]),
int.Parse(splitRow[4])
});
}
}
}
List<int> data = dt.AsEnumerable().Select(x => x.Field<int>("I_APSS_Modus")).ToList();
}
}
}
Here is code that will work with generic columns
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Globalization;
using System.Data;
namespace ConsoleApplication8
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
DataTable dt = new DataTable();
StreamReader reader = new StreamReader(FILENAME);
string line = "";
int row = 0;
string format = "yyyy MM dd HH:mm:ss:fff";
string[] columnNames = null;
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if ((line.Length > 0) && !line.StartsWith("sep"))
{
string[] splitRow = line.Split(new char[] { ';' });
if (++row == 1)
{
columnNames = splitRow;
foreach (string col in splitRow)
{
switch(col)
{
case "Timestamp":
dt.Columns.Add(col, typeof(DateTime));
break;
case "I_APSS_Modus":
dt.Columns.Add(col, typeof(int));
break;
default:
dt.Columns.Add(col, typeof(string));
break;
}
}
}
else
{
DataRow newRow = dt.Rows.Add();
for (int i = 0; i < columnNames.Length; i++)
{
switch (columnNames[i])
{
case "Timestamp":
newRow["Timestamp"] = DateTime.ParseExact(splitRow[i], format, CultureInfo.InvariantCulture);
break;
case "I_APSS_Modus":
newRow["I_APSS_Modus"] = int.Parse(splitRow[i]);
break;
default:
newRow[i] = splitRow[i];
break;
}
}
}
}
}
List<int> data = dt.AsEnumerable().Select(x => x.Field<int>("I_APSS_Modus")).ToList();
}
}
}
If you want to stay with CsvHelper, a simple mapping should do the trick:
// Data model of a row (only I_APSS_Modul is of interest)
public class DataSet
{
public int ApssModus { get; set; }
}
// Mapping
public class ApssMap : ClassMap<DataSet>
{
public ApssMap()
{
Map(m => m.ApssModus).Name("I_APSS_Modus");
}
}
// Reading records
public void GetMode()
{
string UserCultureInfo = Thread.CurrentThread.CurrentCulture.Name;
using (var tr = new StreamReader(new FileStream("any.csv", FileMode.Open)))
{
using (var csv = new CsvReader(tr, new CsvConfiguration(CultureInfo.CreateSpecificCulture(UserCultureInfo))
{
Delimiter = ";"
}))
{
csv.Configuration.RegisterClassMap<ApssMap>();
var records = csv.GetRecords<DataSet>().ToList();
}
}
}
Edit: Check the official docs for all the mapping options: https://joshclose.github.io/CsvHelper/examples/configuration/class-maps
If you just want to read the first row of data, you have to read the header first and then you can read the first data row.
static void Main(string[] args)
{
using (var stream = new MemoryStream())
using (var writer = new StreamWriter(stream))
using (var reader = new StreamReader(stream))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
writer.WriteLine("Timestamp;B_Kurze_Seite;I_Parikeleinfall Reinluft;I_Partikeleinfall Rohluft;I_APSS_Modus");
writer.WriteLine("2020 06 27 08:49:20:335;FALSE;15;0;0");
writer.WriteLine("2020 06 27 08:49:20:391;FALSE;0;0;0");
writer.Flush();
stream.Position = 0;
csv.Configuration.Delimiter = ";";
csv.Read();
csv.ReadHeader();
csv.Read();
var record = csv.GetRecord<dynamic>();
var APSSModus = record.I_APSS_Modus;
}
}
Edit: This should give you the same answer as #jdweng
static void Main(string[] args)
{
using (var stream = new MemoryStream())
using (var writer = new StreamWriter(stream))
using (var reader = new StreamReader(stream))
using (var csv = new CsvReader(reader, CultureInfo.InvariantCulture))
{
writer.WriteLine("sep=;");
writer.WriteLine("");
writer.WriteLine("");
writer.WriteLine("");
writer.WriteLine("Timestamp;B_Kurze_Seite;I_Parikeleinfall Reinluft;I_Partikeleinfall Rohluft;I_APSS_Modus");
writer.WriteLine("2020 06 27 08:49:20:335;FALSE;15;0;0");
writer.WriteLine("2020 06 27 08:49:20:391;FALSE;0;0;0");
writer.Flush();
stream.Position = 0;
csv.Configuration.Delimiter = ";";
csv.Configuration.ShouldSkipRecord = row => row[0].StartsWith("sep=") || row.All(string.IsNullOrEmpty);
var records = csv.GetRecords<dynamic>();
List<int> data = records.Select(x => (int)int.Parse(x.I_APSS_Modus)).ToList();
}
}

Show XML data in datagrid

I am doing a weather API project. The upcoming 7 days weather information is displaying successfully in message boxes.
Instead of message boxes, how can display all the data with a datagridview?
This is my current working code:
string uri = string.Format("http://api.apixu.com/v1/forecast.xml?key=keygoeshere&q={0}&days=7", city);
XDocument doc = XDocument.Load(uri);
foreach (var npc in doc.Descendants("forecastday"))
{
MessageBox.Show((string)npc.Descendants("date").FirstOrDefault());
MessageBox.Show("Max temp " + (string)npc.Descendants("maxtemp_c").FirstOrDefault());
MessageBox.Show("Min temp " + (string)npc.Descendants("mintemp_c").FirstOrDefault());
MessageBox.Show("Text " + (string)npc.Descendants("text").FirstOrDefault());
MessageBox.Show("Icon " + (string)npc.Descendants("http"+"icon").FirstOrDefault());
}
Put data into a DataTable and then make the DataTable the DataSource of the DGV.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Xml;
using System.Xml.Linq;
using System.Net;
using System.IO;
namespace WindowsFormsApplication3
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
DataTable dt = new DataTable();
dt.Columns.Add("Date", typeof(string));
dt.Columns.Add("Max Temp", typeof(string));
dt.Columns.Add("Min Temp", typeof(string));
dt.Columns.Add("Text", typeof(string));
dt.Columns.Add("Icon", typeof(Bitmap));
string city = "London";
string uri = string.Format("http://api.apixu.com/v1/forecast.xml?key=keygoeshere&q={0}&days=7", city);
XDocument doc = XDocument.Load(uri);
foreach (var npc in doc.Descendants("forecastday"))
{
string iconUri = (string)npc.Descendants("icon").FirstOrDefault();
WebClient client = new WebClient();
byte[] image = client.DownloadData("http:" + iconUri);
MemoryStream stream = new MemoryStream(image);
Bitmap newBitMap = new Bitmap(stream);
dt.Rows.Add(new object[] {
(string)npc.Descendants("date").FirstOrDefault(),
(string)npc.Descendants("maxtemp_c").FirstOrDefault(),
(string)npc.Descendants("mintemp_c").FirstOrDefault(),
(string)npc.Descendants("text").FirstOrDefault(),
newBitMap
});
}
dataGridView1.DataSource = dt;
}
}
}

Iterating through an Xml can't see the second Child node (C#)

I'm iterating parallel in 2 Xml docs. When an element value X from first Xml is
equal to the value in the second Xml (ex. USD== USD), it should write a
row in a DataTable with two columns. Element value X (USD) in the first Column
and an other child element value Y (value from ) from first Xml in the second col.
The second Xml has a single node with multiple elements.
DataTable dTable = new DataTable();
dTable.Columns.Add("ColumnOne");
dTable.Columns.Add("ColumnTwo");
DataRow dRow = null;
foreach (XmlNode nodeFirst in firstXmlDoc.SelectNodes("//ValCurs/Valute"))
{
foreach (XmlNode nodeSecond in secondXmlDoc.SelectSingleNode("SelectedVal"))
{
if (nodeFirst.SelectSingleNode("CharCode").InnerText == nodeSecond.InnerText)
{
dRow = dTable.NewRow();
dRow["ColumnOne"] = nodeFirst.SelectSingleNode("CharCode").InnerText;
dRow["ColumnTwo"] = nodeFirst.SelectSingleNode("Value").InnerText;
dTable.Rows.Add(dRow);
}
}
}
myDataGridView.DataSource = dTable;
This isn't working, I get in "nodeFirst.SelectSingleNode("Value").InnerText" a null value (" ") and it stops, filling the DataTable just with the "CharCode" value.
It's interesting that it worked the first few times I runned it, but it doesn't anymore.
First Xml:
<ValCurs>
<Valute ID="47">
<NumCode>978</NumCode>
<CharCode>EUR</CharCode>
<Nominal>1</Nominal>
<Name>Euro</Name>
<Value>20.3457</Value>
</Valute>
<Valute ID="44">
<NumCode>840</NumCode>
<CharCode>USD</CharCode>
<Nominal>1</Nominal>
<Name>Dolar S.U.A.</Name>
<Value>17.4603</Value>
</Valute>
...
</ValCurs>
Second Xml:
<SelectedVal>
<Cod>UAH</Cod>
<Cod>EUR</Cod>
<Cod>CAD</Cod>
<Cod>RON</Cod>
<Cod>NOK</Cod>
<Cod>RUB</Cod>
<Cod>JPY</Cod>
<Cod>AUD</Cod>
</SelectedVal>
Try following code using xml linq :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication1
{
class Program
{
const string CURRENCY_FILE = #"c:\temp\test.xml";
const string COUNTRY_FILE = #"c:\temp\test1.xml";
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("ID", typeof(int));
dt.Columns.Add("NUMCODE", typeof(int));
dt.Columns.Add("CHARCODE", typeof(string));
dt.Columns.Add("Nominal", typeof(int));
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("Value", typeof(decimal));
XDocument currencyXml = XDocument.Load(CURRENCY_FILE);
List<XElement> valutes = currencyXml.Descendants("Valute").ToList();
foreach (XElement valute in valutes)
{
dt.Rows.Add(new object[] {
(int)valute.Attribute("ID"),
(int)valute.Element("NumCode"),
(string)valute.Element("CharCode"),
(int)valute.Element("Nominal"),
(string)valute.Element("Name"),
(decimal)valute.Element("Value")
});
}
XDocument countryXml = XDocument.Load(COUNTRY_FILE);
List<string> countries = countryXml.Descendants("Cod").Select(x => (string)x).ToList();
DataTable filteredTable = dt.AsEnumerable().Where(x => countries.Contains(x.Field<string>("CharCode"))).CopyToDataTable();
}
}
}

Extracting CSV Header data type

In my current project, I am trying to derive the data type of CSV file. For instance, the following is the .CSV file.
sepallength,sepalwidth,petallength,petalwidth,class
6.2,2.8,4.8,1.8,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
5.1,3.5,1.4,0.3,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.9,3,4.2,1.5,Iris-versicolor
5.7,3,4.2,1.2,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.4,2.8,5.6,2.2,Iris-virginica
My requirement is that a program should give me the following output.
"sepallength" is of "float" datatype
"sepalwidth" is of "float" datatype
"petallength" is of "float" datatype
"petalwidth" is of "float" datatype
"class" is of "String" datatype
I have written the following program. However the "columnName.GetType()" function always return string data type.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data;
using Microsoft.VisualBasic.FileIO;
using System.IO;
namespace ReadCSVFile
{
class Program
{
static void Main(string[] args)
{
// This is a file Path that a user has to input.
string csv_file_path = #"C:\TestCSV\iris.csv";
string columnName;
DataTable csvData = GetDataTabletFromCSVFile(csv_file_path);
Console.WriteLine("Column Value of the CSV file are as follows:");
Console.WriteLine("=========================================");
// This will retrieve columnNames from the table.
foreach (DataColumn column in csvData.Columns)
{
columnName = column.ColumnName;
Console.WriteLine(columnName);
Console.WriteLine("Column type " + columnName.GetType());
}
}
/*
* This function takes the file Path as an input and returns a Datatable.
*/
private static DataTable GetDataTabletFromCSVFile(string csv_file_path)
{
DataTable csvData = new DataTable();
try
{
// Connect to the .CSV file Path
using (TextFieldParser csvReader = new TextFieldParser(csv_file_path))
{
csvReader.SetDelimiters(new string[] { "," });
csvReader.HasFieldsEnclosedInQuotes = true;
// This will read the column name of a .CSV file.
string[] colFields = csvReader.ReadFields();
foreach (string column in colFields)
{
DataColumn datecolumn = new DataColumn(column);
datecolumn.AllowDBNull = true;
csvData.Columns.Add(datecolumn);
}
// This code retrieves rows from the .CSV file.
while (!csvReader.EndOfData)
{
string[] fieldData = csvReader.ReadFields();
//Making empty value as null
for (int i = 0; i < fieldData.Length; i++)
{
if (fieldData[i] == "")
{
fieldData[i] = null;
}
}
csvData.Rows.Add(fieldData);
}
}
}
catch (Exception ex)
{
}
return csvData;
}
check my example code:
(you cannot use it with .columnname as this is always a string, only the content of the columns are of different types:
public static void Main()
{
string str = "6,2"; // float with decimals from europe
Console.WriteLine(mytesttype(str).GetType());
str = "6232";
Console.WriteLine(mytesttype(str).GetType());
str = "6String";
Console.WriteLine(mytesttype(str).GetType());
}
static object mytesttype(string str) {
int i;
float f;
if (int.TryParse(str,out i)) return i;
if (float.TryParse(str, out f)) return f;
return str;
}

Categories