How to create threads for functions with many arguments - c#

Suppose I have a directory with log files. log file
My program has to create 3 text files with some statistics.
date statistic domain statistic user statistic
Each log file has to process separately in parallel. But I don't understand how to create threads for functions with many arguments.
Main functions are void WriteInFileUserStat(string filename,string txt, Dictionary dict), void WriteInFileDomainStat(string filename, string txt, Dictionary dict), void WriteInFileDateStat(string filename, string txt, Dictionary dict).
class Program
{
static void Main(string[] args)
{
string dirName = #"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log";
string[] filelist = Directory.GetFiles(#"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log", "*.txt");
string[] fileliststat = Directory.GetFiles(#"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log\statistic", "*.txt");
string userlog = #"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log\statistic\userlog.txt";
string domainlog = #"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log\statistic\domainlog.txt";
string datelog = #"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log\statistic\datelog.txt";
Dictionary<string, string> userDict = new Dictionary<string, string>();
Dictionary<string, string> domainDict = new Dictionary<string, string>();
Dictionary<string, string> dateDict = new Dictionary<string, string>();
//Thread userStatThread = new Thread(new ParameterizedThreadStart(WriteInFileUserStat));
//userStatThread.Start(s, userlog, userDict);
if (Directory.Exists(dirName))
{
Console.WriteLine("Файлы с логами прокси-сервера:");
string[] files = Directory.GetFiles(dirName);
foreach (string s in files)
{
string fname = s;
Console.WriteLine(s);
}
}
Console.WriteLine("Статистика по пользователям: ");
foreach(string s in filelist)
{
WriteInFileUserStat(s, userlog, userDict);
}
foreach (KeyValuePair<string, string> keyValue in userDict)
{
Console.WriteLine(keyValue.Key + " - " + keyValue.Value);
}
Console.WriteLine("Статистика по доменам: ");
foreach (string s in filelist)
{
WriteInFileDomainStat(s, domainlog, domainDict);
}
foreach (KeyValuePair<string, string> keyValue in domainDict)
{
Console.WriteLine(keyValue.Key + " - " + keyValue.Value);
}
Console.WriteLine("Статистика по датам: ");
foreach (string s in filelist)
{
WriteInFileDateStat(s, datelog, dateDict);
}
foreach (KeyValuePair<string, string> keyValue in dateDict)
{
Console.WriteLine(keyValue.Key + " - " + keyValue.Value);
}
Console.ReadLine();
}
/*Парсинг файла*/
public static void FilesParsing(string line)
{
string userName;
string domainName;
string traffic;
string date;
string[] filelist = Directory.GetFiles(#"C:\MY DISK\Study\7th semester\Основы параллельной обработки данных\log", "*.txt");
foreach (string file_to_read in filelist)
{
}
string[] parts = line.Split('\t');
userName = parts[0];
domainName = parts[1];
traffic = parts[2];
date = parts[3];
Console.WriteLine($"Username = {userName} , Domain name = {domainName}, Traffic = {traffic}, Date = {date}");
}
/*Чтение из файла*/
static public void ReadingFile(string filename)
{
using (StreamReader sr = new StreamReader(filename))
{
string line;
while ((line = sr.ReadLine()) != null)
{
FilesParsing(line);
}
}
}
/*Запись в файл статистики по пользователю*/
static public void WriteInFileUserStat(string filename,string txt, Dictionary<string, string> dict)
{
string user;
string traffic;
int trafficInt = 0;
int tempValue = 0;
using (StreamWriter swr = new StreamWriter(txt))
{
using (StreamReader sr = new StreamReader(filename))
{
string line;
while ((line = sr.ReadLine()) != null)
{
string[] parts = line.Split('\t');
user = parts[0];
traffic = parts[2];
if (dict.ContainsKey(user))
{
tempValue = Convert.ToInt32(dict[user]) + Convert.ToInt32(traffic);
dict.Remove(user);
trafficInt = Convert.ToInt32(traffic);
dict.Add(user, Convert.ToString(tempValue));
}
else dict.Add(user, traffic);
//swr.WriteLine("{0}\t{1}", user, traffic);
}
}
foreach (KeyValuePair<string, string> KeyValue in dict)
{
swr.WriteLine("{0} - {1}", KeyValue.Key, KeyValue.Value);
}
swr.Close();
}
}
/*Запись в файл статистики по доменам*/
static public void WriteInFileDomainStat(string filename, string txt, Dictionary<string, string> dict)
{
string domain;
string traffic;
int trafficInt = 0;
int tempValue = 0;
using (StreamWriter swr = new StreamWriter(txt))
{
using (StreamReader sr = new StreamReader(filename))
{
string line;
while ((line = sr.ReadLine()) != null)
{
string[] parts = line.Split('\t');
domain = parts[1];
traffic = parts[2];
if (dict.ContainsKey(domain))
{
tempValue = Convert.ToInt32(dict[domain]) + Convert.ToInt32(traffic);
dict.Remove(domain);
trafficInt = Convert.ToInt32(traffic);
dict.Add(domain, Convert.ToString(tempValue));
}
else dict.Add(domain, traffic);
//swr.WriteLine("{0}\t{1}", domain, traffic);
}
}
foreach (KeyValuePair<string, string> KeyValue in dict)
{
swr.WriteLine("{0} - {1}", KeyValue.Key, KeyValue.Value);
}
swr.Close();
}
}
/*Запись в файл статистики по дате*/
static public void WriteInFileDateStat(string filename, string txt, Dictionary<string, string> dict)
{
string date;
string traffic;
int trafficInt = 0;
int tempValue = 0;
using (StreamWriter swr = new StreamWriter(txt))
{
using (StreamReader sr = new StreamReader(filename))
{
string line;
while ((line = sr.ReadLine()) != null)
{
string[] parts = line.Split('\t');
date = parts[3];
traffic = parts[2];
if (dict.ContainsKey(date))
{
tempValue = Convert.ToInt32(dict[date]) + Convert.ToInt32(traffic);
dict.Remove(date);
trafficInt = Convert.ToInt32(traffic);
dict.Add(date, Convert.ToString(tempValue));
}
else dict.Add(date, traffic);
//swr.WriteLine("{0}\t{1}", date, traffic);
}
}
foreach (KeyValuePair<string, string> KeyValue in dict)
{
swr.WriteLine("{0} - {1}", KeyValue.Key, KeyValue.Value);
}
swr.Close();
}
}
}

If you got many arguments but only want 1 type to write in, make a single class, struct or tupple to take them all. Use that as the type for the argument. That is the way Events Arguments go.

Related

How do I add parsed lines into a dictionary?

I am trying to make a dictionary where the compiler reads from a text file where there are two words per line. I have parsed it through the split() method but I am struggling on how to add the corresponding keys and values from the line to the dictionary container. I am trying to add it in the ReadStream2() function after doing the split() in the line line.add(rez,rez). I know this is wrong but I have no idea how to combine what I am parsing into the dictionary in terms of keys and values. Thanks!
class Program
{
static void Main(string[] args)
{
Dictionary<string, string> line = new Dictionary<string, string>();
FileStream filestream = null;
string path = "Dictionary.txt";
//WriteByte(filestream, path);
//ReadByte(filestream, path);
//WriteStream(filestream, path);
//ReadFromFile();
Menu(filestream, path);
ReadStream2(filestream,path);
Group(filestream, path);
}
static void WriteByte(FileStream filestream, string path)
{
string str;
Console.WriteLine("Enter word");
str = Console.ReadLine();
try
{
filestream = new FileStream("Dictionary.txt", FileMode.Open, FileAccess.Write);
byte[] by = Encoding.Default.GetBytes(str);
filestream.Write(by, 0, by.Length);
Console.WriteLine("File written");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
filestream.Close();
}
}
static void ReadByte(FileStream filestream, string path)
{
try
{
filestream = new FileStream(path, FileMode.Open, FileAccess.Read);
byte[] by = new byte[(int)filestream.Length];
filestream.Read(by, 0, by.Length);
string str = Encoding.Default.GetString(by);
Console.WriteLine("File read");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
filestream.Close();
}
}
static void WriteStream(FileStream filestream, string path)
{
using (filestream = new FileStream(path, FileMode.Append, FileAccess.Write))
{
using (StreamWriter streamWriter = new StreamWriter(filestream))
{
//string str;
//Console.WriteLine("Enter word");
//str = Console.ReadLine();
//streamWriter.WriteLine(str);
}
}
}
static void ReadStream2(FileStream fileStream, string path)
{
using (fileStream = new FileStream(path, FileMode.Open, FileAccess.Read))
{
Dictionary<string, string> line = new Dictionary<string, string>();
using (StreamReader sw = new StreamReader(fileStream))
{
string rez = "";
while(sw.Peek() > 0)
{
rez = sw.ReadLine();
Console.WriteLine(rez);
string[] words = rez.Split(' ');
line.Add(rez, rez);
}
}
}
}
static void Group(FileStream fileStream, string path)
{
var list = File
.ReadLines(path)
.Select((v, i) => new { Index = i, Value = v })
.GroupBy(p => p.Index / 2)
.ToDictionary(g => g.First().Value, g => g.Last().Value);
}
static void Menu(FileStream fileStream, string path)
{
char choice;
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Welcome this is a English dictionary press d to continue");
Console.ResetColor();
choice = Convert.ToChar(Console.ReadLine());
while (choice == 'd' || choice == 'D')
{
ReadStream2(fileStream, path);
}
}
static void askWord()
{
string ask;
Console.WriteLine("What english word would you like to translate");
ask = Console.ReadLine();
if (ask == )
}
}
}
I have difficulties to understand.
Are you trying to read the words array? words[0] and words[1]?
string[] words = rez.Split(' ');
line.Add(words[0], words[1]);
#Larrythelobster
I write an sample project, hope I can help you:
using System.Text;
class Program
{
static void Main(string[] args)
{
WelcomeText();
string path = "Dictionary.txt";
var dict = ReadFile(path);
var input = GetInputWord();
var translateWord = SearchWord(dict, input);
ShowResult(translateWord);
}
private static void WelcomeText()
{
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Welcome! This is a English dictionary...");
Console.ResetColor();
}
private static IDictionary<string, string> ReadFile(string path)
{
var lines = File.ReadAllLines(path);
var dict = new Dictionary<string, string>();
foreach(var line in lines)
{
var words = line.Split(' ');
dict.Add(words[0], words[1]);
}
return dict;
}
private static string GetInputWord()
{
Console.WriteLine("Enter a word to translate: ");
var word = Console.ReadLine();
return word;
}
private static string SearchWord(IDictionary<string, string> dict, string wordToSearch)
{
return dict[wordToSearch];
}
private static void ShowResult(string translateWord)
{
Console.WriteLine("The translate word is: " + translateWord);
}
}
Dictionary.txt (English > French):
yellow jaune
red rouge
blue bleu
green vert

Looping does not work to create a text file with City and pace name using C#

I am trying to create text file which should contain a city name and interesting places name of that city. For this, initially I create a text file with City name. My City_list.txt file looks like this-
Flensburg
Kiel
Lübeck
Neumünster
Heide
Geesthacht
Husum
Elmshorn
Pinneberg
Wedel
...... many more
Now My next work is,by using the city name get all external links exist in Wikipedia article. So , I create a list to get all the city name so that it can create place name one after other. But my code is not working.I create the place list for Wedel, for the last one only. But not for flensburg to wedel. I know there is one looping problem, But could not get the point where should I modify my work.
My code is as follows-
public void PoiListFromWiki()
{
var startPath = Application.StartupPath;
string folderName = Path.Combine(startPath, "POI_List");
System.IO.Directory.CreateDirectory(folderName);
string fileName = "POI.txt";
var path = Path.Combine(folderName, fileName);
List<string> ReadFile = File.ReadAllLines(#"I:\TouristPlace\TouristPlace\bin\Debug\CityList\POI_list.txt", Encoding.GetEncoding("windows-1252")).ToList();
foreach (string line in ReadFile)
{
Dictionary<string, string> cities = new Dictionary<string, string>();
using (var client = new HttpClient())
{
var response = client.GetAsync("https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gsradius=10000&gspage=" + WebUtility.UrlEncode(line) + "&gslimit=500&gsprop=type|name|dim|country|region|globe&format=json").Result;
if (response.IsSuccessStatusCode)
{
var responseContent = response.Content;
string responseString = responseContent.ReadAsStringAsync().Result;
var obj = JsonConvert.DeserializeObject<RootObject>(responseString).query.geosearch.Select(a => a.title).ToList(); //NulReferanceException error occurd
List<string> places = new List<string>();
foreach (var item in obj)
{
places.Add(item);
}
cities[line] = string.Join(";", places);
var output = line + ";" + cities[line];
File.WriteAllText(path, output); //here is problem
}
}
}
}
My Expected Result is
Flensburg;place1;place2..
Kiel;Place1;Place2..
Lübeck;Place1;Place2..
But With my code I got only
Lübeck;Place1;Place2..
You write the output file again and again in each loop but WriteAllText overwrite everytime the previous data with the new one. But you could do a lot better than appending to the previous text. Just use a StringBuilder to store the text while you get it and write just one time after the exit from the loop.
Here the significant changes to make to your current code....
public void PoiListFromWiki()
{
StringBuilder results = new StringBuilder();
.....
foreach (string line in ReadFile)
{
Dictionary<string, string> cities = new Dictionary<string, string>();
using (var client = new HttpClient())
{
....
cities[line] = string.Join(";", places);
results.AppendLine(line + ";" + cities[line]);
}
}
File.WriteAllText(path, results.ToString());
}
File.WrtieAllText overwrites the file. You need to use File.AppendAllText.
public void PoiListFromWiki()
{
var startPath = Application.StartupPath;
string folderName = Path.Combine(startPath, "POI_List");
System.IO.Directory.CreateDirectory(folderName);
string fileName = "POI.txt";
var path = Path.Combine(folderName, fileName);
List<string> ReadFile = File.ReadAllLines(#"I:\TouristPlace\TouristPlace\bin\Debug\CityList\POI_list.txt", Encoding.GetEncoding("windows-1252")).ToList();
foreach (string line in ReadFile)
{
Dictionary<string, string> cities = new Dictionary<string, string>();
using (var client = new HttpClient())
{
var response = client.GetAsync("https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gsradius=10000&gspage=" + WebUtility.UrlEncode(line) + "&gslimit=500&gsprop=type|name|dim|country|region|globe&format=json").Result;
if (response.IsSuccessStatusCode)
{
var responseContent = response.Content;
string responseString = responseContent.ReadAsStringAsync().Result;
var obj = JsonConvert.DeserializeObject<RootObject>(responseString).query.geosearch.Select(a => a.title).ToList(); //NulReferanceException error occurd
List<string> places = new List<string>();
foreach (var item in obj)
{
places.Add(item);
}
cities[line] = string.Join(";", places);
var output = line + ";" + cities[line] + "\r\n";
if (!File.Exists(path))
{
File.WriteAllText(path, output); //here is problem
}
else
{
File.AppendAllText(path, output);
}
}
}
}
}
Try it:
public void PoiListFromWiki()
{
var startPath = Application.StartupPath;
string folderName = Path.Combine(startPath, "POI_List");
System.IO.Directory.CreateDirectory(folderName);
string fileName = "POI.txt";
var path = Path.Combine(folderName, fileName);
var output="";
List<string> ReadFile = File.ReadAllLines(#"I:\TouristPlace\TouristPlace\bin\Debug\CityList\POI_list.txt", Encoding.GetEncoding("windows-1252")).ToList();
foreach (string line in ReadFile)
{
Dictionary<string, string> cities = new Dictionary<string, string>();
using (var client = new HttpClient())
{
var response = client.GetAsync("https://en.wikipedia.org/w/api.php?action=query&list=geosearch&gsradius=10000&gspage=" + WebUtility.UrlEncode(line) + "&gslimit=500&gsprop=type|name|dim|country|region|globe&format=json").Result;
if (response.IsSuccessStatusCode)
{
var responseContent = response.Content;
string responseString = responseContent.ReadAsStringAsync().Result;
var obj = JsonConvert.DeserializeObject<RootObject>(responseString).query.geosearch.Select(a => a.title).ToList(); //NulReferanceException error occurd
List<string> places = new List<string>();
foreach (var item in obj)
{
places.Add(item);
}
cities[line] = string.Join(";", places);
output += line + ";" + cities[line]+"/r/n"; //always add new info in string, then write it all in file
File.WriteAllText(path, output);
}
}
}
}

Editing/Saving a row in a CSV file

After following this topic I am able to create
the new row but my question is how do
I save or write the new line to the file?
I tried 'StreamWriter' but it only writes the newly
created line.
Any suggestions please?
Here is my code so far:
string path = #"C:/CSV.txt";
string[] lines = File.ReadAllLines(path);
var splitlines = lines.Select(l => l.Split(','));
foreach (var line in splitlines)
{
if(line[1].Contains("34"))
{
line[1] = "100";
var newline = string.Join(",", line);
StreamWriter sr = new StreamWriter(path);
sr.WriteLine(newline);
sr.Close();
}
}
Here is your solution using StreamReader class:
String path = #"C:\CSV.txt";
List<String> lines = new List<String>();
if (File.Exists(path));
{
using (StreamReader reader = new StreamReader(path))
{
String line;
while ((line = reader.ReadLine()) != null)
{
if (line.Contains(","))
{
String[] split = line.Split(',');
if (split[1].Contains("34"))
{
split[1] = "100";
line = String.Join(",", split);
}
}
lines.Add(line);
}
}
using (StreamWriter writer = new StreamWriter(path, false))
{
foreach (String line in lines)
writer.WriteLine(line);
}
}
If you want to overwrite the file, use this StreamWriter constructor with append = false.
Maybe somthing like this, you probably will need to clean it up and add some error handling, but it does the job
string path = #"C:\\CSV.txt";
string[] lines = File.ReadAllLines(path);
for (int i = 0; i < lines.Length; i++)
{
string line = lines[i];
if (line.Contains(","))
{
var split = line.Split(',');
if (split[1].Contains("34"))
{
split[1] = "100";
line = string.Join(",", split);
}
}
}
File.WriteAllLines(#"C:\\CSV.txt", lines);
string str;
string PID;
string PN;
string UP;
string DIS;
string STK;
string CSVFilePathName = #"C:\admin.csv";
string[] Lines = File.ReadAllLines(CSVFilePathName);
File.Delete(CSVFilePathName);
StreamWriter sw = new StreamWriter(CSVFilePathName", true);
PID = ((TextBox)(GridView1.Rows[e.RowIndex].Cells[0].Controls[0])).Text;
PN = ((TextBox)(GridView1.Rows[e.RowIndex].Cells[1].Controls[0])).Text;
UP = ((TextBox)(GridView1.Rows[e.RowIndex].Cells[2].Controls[0])).Text;
DIS = ((TextBox)(GridView1.Rows[e.RowIndex].Cells[3].Controls[0])).Text;
STK = ((TextBox)(GridView1.Rows[e.RowIndex].Cells[4].Controls[0])).Text;
foreach (string li in Lines)
{
string[] Fields = li.Split(',');
if(PID==Fields[0])
{
str = PID + "," + PN + "," + UP + "," + DIS + "," + STK;
}
else
{
str = Fields[0] + "," + Fields[1] + "," + Fields[2] + "," + Fields[3] + "," + Fields[4];
}
sw.WriteLine(str);
}
sw.Flush();
sw.Close();

Converting a csv file to json using C#

I was wondering if someone's written a utility to convert a CSV file to Json using C#. From a previous question on stackoverflow, I'm aware of this nice utility - https://github.com/cparker15/csv-to-json and at the moment I plan to refer to it but an existing C# implementation would be very helpful! Thanks!
If you can use System.Web.Extensions, something like this could work:
var csv = new List<string[]>(); // or, List<YourClass>
var lines = System.IO.File.ReadAllLines(#"C:\file.txt");
foreach (string line in lines)
csv.Add(line.Split(',')); // or, populate YourClass
string json = new
System.Web.Script.Serialization.JavaScriptSerializer().Serialize(csv);
You might have more complex parsing requirements for the csv file and you might have a class that encapsulates the data from one line, but the point is that you can serialize to JSON with one line of code once you have a Collection of lines.
Cinchoo ETL - an open source library available to do the conversion of CSV to JSON easily with few lines of code
For a sample CSV:
Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC
Sample code,
string csv = #"Id, Name, City
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC
";
StringBuilder sb = new StringBuilder();
using (var p = ChoCSVReader.LoadText(csv)
.WithFirstLineHeader()
)
{
using (var w = new ChoJSONWriter(sb))
w.Write(p);
}
Console.WriteLine(sb.ToString());
Output JSON:
[
{
"Id": "1",
"Name": "Tom",
"City": "NY"
},
{
"Id": "2",
"Name": "Mark",
"City": "NJ"
},
{
"Id": "3",
"Name": "Lou",
"City": "FL"
},
{
"Id": "4",
"Name": "Smith",
"City": "PA"
},
{
"Id": "5",
"Name": "Raj",
"City": "DC"
}
]
Sample fiddle: https://dotnetfiddle.net/pclnsT
Checkout CodeProject article for some additional help.
UPDATE:
If your CSV file has duplicate column names or no names, please use the below steps to produce the JSON file
string csv = #"Id, Name,
1, Tom, NY
2, Mark, NJ
3, Lou, FL
4, Smith, PA
5, Raj, DC
";
StringBuilder sb = new StringBuilder();
using (var p = ChoCSVReader.LoadText(csv)
.WithField("Id", position: 1)
.WithField("Name", position: 2)
.WithField("City", position: 3)
.WithFirstLineHeader(true)
)
{
using (var w = new ChoJSONWriter(sb))
w.Write(p);
}
Console.WriteLine(sb.ToString());
Sample fiddle: https://dotnetfiddle.net/pP5Du6
Disclaimer: I'm the author of this library.
I used Dictionary and returned json using newtonsoft
public string ConvertCsvFileToJsonObject(string path)
{
var csv = new List<string[]>();
var lines = File.ReadAllLines(path);
foreach (string line in lines)
csv.Add(line.Split(','));
var properties = lines[0].Split(',');
var listObjResult = new List<Dictionary<string, string>>();
for (int i = 1; i < lines.Length; i++)
{
var objResult = new Dictionary<string, string>();
for (int j = 0; j < properties.Length; j++)
objResult.Add(properties[j], csv[i][j]);
listObjResult.Add(objResult);
}
return JsonConvert.SerializeObject(listObjResult);
}
Install Nuget package NewtonSoft.Json
Add reference dll Microsoft.VisualBasic
using System.Linq;
using Newtonsoft.Json;
using Microsoft.VisualBasic.FileIO;
using System.IO;
using System;
using System.Collections.Generic;
using System.Globalization;
namespace Project
{
public static class Program
{
public static void Main(string[] args)
{
string CSVpath = #"D:\New Folder\information.csv";
string analyticsData = ReadFile(CSVpath);
}
private static string ReadFile(string filePath)
{
string payload = "";
try
{
if (!string.IsNullOrWhiteSpace(filePath) && File.Exists(filePath) && Path.GetExtension(filePath).Equals(".csv", StringComparison.InvariantCultureIgnoreCase))
{
string[] lines = File.ReadAllLines(filePath);
if (lines != null && lines.Length > 1)
{
var headers = GetHeaders(lines.First());
payload = GetPayload(headers, lines.Skip(1));
}
}
}
catch (Exception exp)
{
}
return payload;
}
private static IEnumerable<string> GetHeaders(string data)
{
IEnumerable<string> headers = null;
if (!string.IsNullOrWhiteSpace(data) && data.Contains(','))
{
headers = GetFields(data).Select(x => x.Replace(" ", ""));
}
return headers;
}
private static string GetPayload(IEnumerable<string> headers, IEnumerable<string> fields)
{
string jsonObject = "";
try
{
var dictionaryList = fields.Select(x => GetField(headers, x));
jsonObject = JsonConvert.SerializeObject(dictionaryList);
}
catch (Exception ex)
{
}
return jsonObject;
}
private static Dictionary<string, string> GetField(IEnumerable<string> headers, string fields)
{
Dictionary<string, string> dictionary = null;
if (!string.IsNullOrWhiteSpace(fields))
{
var columns = GetFields(fields);
if (columns != null && headers != null && columns.Count() == headers.Count())
{
dictionary = headers.Zip(columns, (x, y) => new { x, y }).ToDictionary(item => item.x, item => item.y);
}
}
return dictionary;
}
public static IEnumerable<string> GetFields(string line)
{
IEnumerable<string> fields = null;
using (TextReader reader = new StringReader(line))
{
using (TextFieldParser parser = new TextFieldParser(reader))
{
parser.TextFieldType = FieldType.Delimited; parser.SetDelimiters(","); fields = parser.ReadFields();
}
}
return fields;
}
}
}
Taking only a dependency on Newtonsoft.Json, here's a helper method given an array of CSV lines, the first one being the header.
public static IEnumerable<JObject> CsvToJson(IEnumerable<string> csvLines)
{
var csvLinesList = csvLines.ToList();
var header = csvLinesList[0].Split(',');
for (int i = 1; i < csvLinesList.Count; i++)
{
var thisLineSplit = csvLinesList[i].Split(',');
var pairedWithHeader = header.Zip(thisLineSplit, (h, v) => new KeyValuePair<string, string>(h, v));
yield return new JObject(pairedWithHeader.Select(j => new JProperty(j.Key, j.Value)));
}
}
I use ChoETL:
using ChoETL;
using System.IO;
public class FromCSVtoJSON
{
public FromCSVtoJSON()
{
}
public void convertFile(string inputFile, string outputFile)
{
using (var writer = new ChoJSONWriter(outputFile))
{
using (var reader = new ChoCSVReader(inputFile).WithFirstLineHeader())
{
writer.Write(reader);
}
}
}
}
From that same SO answer, there is a link to this post.
CsvToJson extention method
/// <summary>
/// Converts a CSV string to a Json array format.
/// </summary>
/// <remarks>First line in CSV must be a header with field name columns.</remarks>
/// <param name="value"></param>
/// <returns></returns>
public static string CsvToJson(this string value)
{
// Get lines.
if (value == null) return null;
string[] lines = value.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
if (lines.Length < 2) throw new InvalidDataException("Must have header line.");
// Get headers.
string[] headers = lines.First().SplitQuotedLine(new char[] { ',' }, false);
// Build JSON array.
StringBuilder sb = new StringBuilder();
sb.AppendLine("[");
for (int i = 1; i < lines.Length; i++)
{
string[] fields = lines[i].SplitQuotedLine(new char[] { ',', ' ' }, true, '"', false);
if (fields.Length != headers.Length) throw new InvalidDataException("Field count must match header count.");
var jsonElements = headers.Zip(fields, (header, field) => string.Format("{0}: {1}", header, field)).ToArray();
string jsonObject = "{" + string.Format("{0}", string.Join(",", jsonElements)) + "}";
if (i < lines.Length - 1)
jsonObject += ",";
sb.AppendLine(jsonObject);
}
sb.AppendLine("]");
return sb.ToString();
}
There appears to be an issue with where some methods called within the above extension live (see the comments of the original blog post), but it should get you most of the way there.
EDIT Here is another SO answer about splitting a CSV line. You could use one of the suggested regex solutions to create your own SplitQuotedLine method:
public static string SplitQuotedLine(this string value, char separator, bool quotes) {
// Use the "quotes" bool if you need to keep/strip the quotes or something...
var s = new StringBuilder();
var regex = new Regex("(?<=^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)");
foreach (Match m in regex.Matches(value)) {
s.Append(m.Value);
}
return s.ToString();
}
I did not test the above, so forgive me if I made any errors.
Also, it would appear that Zip is a LINQ extension method, so that takes care of that problem.
Here's mine.. It can parse 9k CSV records in centuries. LOL
class CSVTOJSON
{
public string ConvertToJSON()
{
string json = string.Empty;
string csv = string.Empty;
using (StreamReader reader = new StreamReader("data.csv"))
{
csv = reader.ReadToEnd();
}
string[] lines = csv.Split(new string[] { "\n" }, System.StringSplitOptions.None);
if (lines.Length > 1)
{
// parse headers
string[] headers = lines[0].Split(',');
StringBuilder sbjson = new StringBuilder();
sbjson.Clear();
sbjson.Append("[");
// parse data
for (int i = 1; i < lines.Length; i++)
{
if (string.IsNullOrWhiteSpace(lines[i])) continue;
if (string.IsNullOrEmpty(lines[i])) continue;
sbjson.Append("{");
string[] data = lines[i].Split(',');
for (int h = 0; h < headers.Length; h++)
{
sbjson.Append(
$"\"{headers[h]}\": \"{data[h]}\"" + (h < headers.Length - 1 ? "," : null)
);
}
sbjson.Append("}" + (i < lines.Length - 1 ? "," : null));
}
sbjson.Append("]");
json = sbjson.ToString();
}
return json;
}
}
But it works.
console log:
Converting CSV to JSON
CSV has 9486 data
Total duration converting CSV to JSON: 00:00:00.0775373
Small variation to the solution by bc3tech i.e. avoiding external dependencies (on Newtonsoft.Json), and instead using System.Text.Json (dotnet core 3+)
public static IEnumerable<string> CsvToJson(string fileName, char delim = '|')
{
var lines = File.ReadLines(fileName);
var hdr = new List<string>(lines.First().Trim().Split(delim));
foreach (var l in lines.Skip(1).Where(l => (l.Trim() != String.Empty)))
{
var val = l.Trim().Split(delim);
var ds = hdr.Zip(val, (k, v) => new { k, v }).ToDictionary(x => x.k, x => x.v);
yield return JsonSerializer.Serialize(ds);
}
}
I can see most people simply assume parsing CSV file is to simply split comma delimiter between each column, but the following format is still a valid CSV
"aaa","bbb","ccc"
"z, z",yyy,xxx
There is a nice class hidden within Microsoft.VisualBasic.FileIO to handle CSV file format correctly. I combine this with JSON.NET came up with the solution.
public static string? CsvToJson(string input, string delimiter)
{
using (TextFieldParser parser = new TextFieldParser(
new MemoryStream(Encoding.UTF8.GetBytes(input))))
{
parser.Delimiters = new string[] { delimiter };
string[]? headers = parser.ReadFields();
if (headers == null) return null;
string[]? row;
string comma = "";
var sb = new StringBuilder((int)(input.Length * 1.1));
sb.Append("[");
while ((row = parser.ReadFields()) != null)
{
var dict = new Dictionary<string, object>();
for (int i = 0; row != null && i < row.Length; i++)
dict[headers[i]] = row[i];
var obj = JsonConvert.SerializeObject(dict);
sb.Append(comma + obj);
comma = ",";
}
return sb.Append("]").ToString();
}
}
Usage
var str = #"Header1,""Header,,2 "",Data3
1,444.00, ""Liang, Jerry""
0,""5,550"",Jerry
";
var json = CsvToJson(str, ",");
Result
[
{
"Header1": "1",
"Header,,2": "444.00",
"Data3": "Liang, Jerry"
},
{
"Header1": "0441",
"Header,,2": "5,550",
"Data3": "Jerry"
}
]
I looked for the answer for this question finally i solved it by using Dictionary
public static void CreateJsonFromCSV()
{
string path = "C:\\Users\\xx\\xx\\xx\\xx\\lang.csv";
string textFilePath = path;
const Int32 BufferSize = 128;
using (var fileStream = File.OpenRead(textFilePath))
using (var streamReader = new StreamReader(fileStream, Encoding.UTF8, true, BufferSize))
{
String line;
Dictionary<string, string> jsonRow = new Dictionary<string, string>();
while ((line = streamReader.ReadLine()) != null)
{
string[] parts = line.Split(',');
string key_ = parts[0];
string value = parts[1];
if (!jsonRow.Keys.Contains(key_))
{
jsonRow.Add(key_, value );
}
}
var json = new JavaScriptSerializer().Serialize(jsonRow);
string path_ = "C:\\XX\\XX\\XX\\XX\\XX.csv";
File.WriteAllText(path_, json);
}
}
Make sure you add the below in web.config before you do parse large csv files.
<system.web.extensions>
<scripting>
<webServices>
<jsonSerialization maxJsonLength="50000000"/>
</webServices>
</scripting>
</system.web.extensions>
Try this:
StreamReader sr = new StreamReader(filePath);
while ((line = sr.ReadLine()) != null)
{
//Console.WriteLine(line);
string[] csv = line.Split(',');
var dictionary = new Dictionary<string, string>();
dictionary.Add("dispatching_base_number",csv[0]);
dictionary.Add("available_vehicles", csv[1]);
dictionary.Add("vehicles_in_trips", csv[2]);
dictionary.Add("Cancellations", csv[3]);
string jsonN = new System.Web.Script.Serialization.JavaScriptSerializer().Serialize(dictionary);
Console.WriteLine("Sending message: {0}",jsonN);
}
Try this and convert CSV to JSON object:
public static List<object> CsvToJson( string body, string[] column ) {
if ( string.IsNullOrEmpty( body ) ) return null;
string[] rowSeparators = new string[] { "\r\n" };
string[] rows = body.Split( rowSeparators, StringSplitOptions.None );
body = null;
if ( rows == null || ( rows != null && rows.Length == 0 ) ) return null;
string[] cellSeparator = new string[] { "," };
List<object> data = new List<object>( );
int clen = column.Length;
rows.Select( row => {
if ( string.IsNullOrEmpty( row ) ) return row;
string[] cells = row.Trim( ).Split( cellSeparator, StringSplitOptions.None );
if ( cells == null ) return row;
if ( cells.Length < clen ) return row;
Dictionary<object, object> jrows = new Dictionary<object, object>( );
for ( int i = 0; i < clen; i++ ) {
jrows.Add( column[i], cells[i]?.Trim( ) );
}
data.Add( jrows );
return row;
} ).ToList( );
rowSeparators = null; rows = null;
cellSeparator = null;
return data;
}
var data = CsvToJson("csv_input_str", new string[]{ "column_map" })
string jsonStr = new JavaScriptSerializer { MaxJsonLength = int.MaxValue }.Serialize( data );
First, load the csv file into datatable and serialize it to Json document. It uses OLEDB Provider that can parse the csv wisely,
Courtesy to Jim Scott, https://stackoverflow.com/a/1050278/6928056
Courtesy to K_B, https://stackoverflow.com/a/2979938/6928056
using System.Data;
using System.Data.OleDb;
using System.Globalization;
using System.IO;
using Newtonsoft.Json;
static string ConvertCsvToJson(string path, bool isFirstRowHeader)
{
string header = isFirstRowHeader ? "Yes" : "No";
string pathOnly = Path.GetDirectoryName(path);
string fileName = Path.GetFileName(path);
string sql = #"SELECT * FROM [" + fileName + "]";
using(OleDbConnection connection = new OleDbConnection(
#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + pathOnly +
";Extended Properties=\"Text;HDR=" + header + "\""))
using(OleDbCommand command = new OleDbCommand(sql, connection))
using(OleDbDataAdapter adapter = new OleDbDataAdapter(command))
{
var dataTable = new DataTable();
dataTable.Locale = CultureInfo.CurrentCulture;
adapter.Fill(dataTable);
return JsonConvert.SerializeObject(dataTable, Formatting.Indented);
}
}
Simple method to convert flat csv file to a collection of simple json formatted objects. Works with files with header row on the first line. Part of this method was found somewhere else on SO.
Add reference to Microsoft.VisualBasic.
using Microsoft.VisualBasic.FileIO;
public static StringBuilder ReadCsv()
{
var path = #"X:\...\input.csv";
using (TextFieldParser csvParser = new TextFieldParser(path))
{
csvParser.CommentTokens = new string[] { "#" };
//Remember to use your own separator
csvParser.SetDelimiters(new string[] { ";" });
csvParser.HasFieldsEnclosedInQuotes = false;
StringBuilder json = new StringBuilder();
string[] colNames = new string[0];
string[] fields = new string[0];
json.Append("[");
int counter = 0;
while (!csvParser.EndOfData)
{
if (counter == 0)
{
//Read properies' names
colNames = csvParser.ReadFields();
counter++;
Console.WriteLine($"{colNames.Length} columns detected!");
}
else
{
// Read current line fields, pointer moves to the next line.
// Read the properties' values
fields = csvParser.ReadFields();
json.Append("{");
for (int i = 0; i < colNames.Length; i++)
{
json.Append($"\"{colNames[i]}\":{TryParse(fields[i])}");
if (i != colNames.Length - 1)
{
json.Append(",");
}
}
json.Append("},");
Console.WriteLine($"Writing record nr.: {counter}");
counter++;
}
}
json.Length--; //Remove trailing comma
json.Append("]");
return json;
}
}
string TryParse(string s)
{
if (string.IsNullOrEmpty(s)) return "null";
//Remember to set your decimal character here!
if (s.Contains('.'))
{
double dResult;
//This works on my computer, could be different on your machine
if (double.TryParse(s, NumberStyles.AllowDecimalPoint,
CultureInfo.InvariantCulture, out dResult))
return dResult.ToString(CultureInfo.InvariantCulture);
}
else
{
int intResult;
if (int.TryParse(s, out intResult))
return intResult.ToString(CultureInfo.InvariantCulture);
}
return "\"" + s + "\"";
}
This should give you a simple list of json objects.
If you are looking for a C# only solution this might work for you. I recently face the same issue and created this method to overcome.
public static string ConvertToJsonStructure(string csvDataAsString, char delimiter = ',', bool hasHeaders = true)
{
var output = string.Empty;
if (string.IsNullOrEmpty(csvDataAsString))
return "{}";
var rows = csvDataAsString.Split(new string[] { Environment.NewLine }, StringSplitOptions.None);
var headers = hasHeaders ? rows[0].Split(delimiter) : null;
var colCount = headers?.Length;
var jsonStart = "{";
var jsonEnd = "}";
if (rows.Length > (1 + (hasHeaders ? 1 : 0)))
{
jsonStart = "[";
jsonEnd = "]";
}
output = output + jsonStart;
int startIndex = hasHeaders ? 1 : 0;
for (int i = startIndex; i < rows.Length; i++)
{
var cols = rows[i].Split(delimiter);
if (colCount == null)
colCount = cols.Length;
var tempJson = "{";
for (int j = 0; j < colCount.Value; j++)
{
if (hasHeaders)
tempJson = tempJson + $"\"{headers[j]}\":";
var isNumber = Regex.IsMatch(cols[j], #"^\d + $");
var val = isNumber ? cols[j] : $"\"{cols[j]}\"";
tempJson = tempJson + val;
if (j < colCount.Value - 1)
tempJson = tempJson + ",";
}
tempJson = tempJson + "}";
if (i < rows.Length - 1)
tempJson = tempJson + ",";
output = output + tempJson;
}
output = output + jsonEnd;
return output;
}
}
You need to pass your content as string to this method and you can do something like this to read your csv
var csvAsString = File.ReadAllText("TestCsv.csv");
var result = CsvToJson.ConvertToJsonStructure(csvAsString);

How to split a text file into multiple files?

In C#, what is the most efficient method to split a text file into multiple text files (the splitting delimiter being a blank line), while preserving the character encoding?
I would use the StreamReader and StreamWriter classes:
public void Split(string inputfile, string outputfilesformat) {
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try {
using(var infile = new System.IO.StreamReader(inputfile)) {
while(!infile.EndOfStream){
line = infile.ReadLine();
if(string.IsNullOrEmpty(line)) {
if(outfile != null) {
outfile.Dispose();
outfile = null;
}
continue;
}
if(outfile == null) {
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
} finally {
if(outfile != null)
outfile.Dispose();
}
}
You would then call this method like this:
Split("C:\\somefile.txt", "C:\\output-files-{0}.txt");
Purely for those who want to avoid thinking:
If you have a CSV (comma separated values) file and want to split the file when a field changes, identify/name the file by the change (without unnecessary quote marks), and strip out comments/certain lines (here identified by starting with "#)
Modified method:
public void Split(string inputfile, string outputfilesformat)
{
System.IO.StreamWriter outfile = null;
string line;
string[] splitArray;
string nameFromFile = "";
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
splitArray = line.Split(new char[] { ',' });
if (!splitArray[0].StartsWith("\"#"))
{
if (splitArray[4].Replace("\"", "") != nameFromFile.Replace("\"", ""))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
nameFromFile = splitArray[4].Replace("\"", "");
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, nameFromFile),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Local path call:
string strpath = Server.MapPath("~/Data/SPLIT/DATA.TXT");
string newFile = Server.MapPath("~/Data/SPLIT");
if (System.IO.File.Exists(#strpath))
{
Split(strpath, newFile+"\\{0}.CSV");
}
In the case anyone needs to split a text file into multiple files using a string:
public static void Main(string[] args)
{
void Split(string inputfile, string outputfilesformat)
{
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
if (line.Trim().Contains("String You Want File To Split From"))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Split("C:test.txt", "C:\\output-files-{0}.txt");
}

Categories