Im converting csv files to xml files via c#. I'm saving the csv file in a List of string, but it doesn't take symbols symbols such as ä, á, ê.
public Lesson CsvToLesson(List<string> csv)
{
string lesName = csv[0][csv[0].Length - 3].ToString();
List<Word> words = new List<Word>();
for(int i = 3; i < csv.Count; i++)
{
string lang1 = "";
string lang2 = "";
bool firstWord = true;
foreach (char c in csv[i])
{
if (firstWord)
{
if(c != ';')
{
lang1 += c;
} else
{
firstWord = false;
}
} else {
if (c != ';')
{
lang2 += c;
}
else
{
break;
}
}
}
words.Add(new Word(lang1, lang2, 1, i));
}
return new Lesson(lesName, words);
}
to return them as an Object called Lesson.
<Word kasten="1" id="24">
<lang1>eine Sekret�rin</lang1>
<lang2>une secr�taire</lang2>
</Word>
The reading method:
public void saveCsv(string path)
{
string line;
List<string> csv = new List<string>();
StreamReader file = new StreamReader(path);
while((line = file.ReadLine()) != null)
{
csv.Add(line);
}
file.Close();
AddLesson(controller.CsvToLesson(csv));
}
How can I fix this?
Thanks to Cid.
The Problem was that the csv file wasn't saved as utf-8 csv file. There are multiple options in Excel.
Take a look at that post, if you have the same problem:
How to check encoding of a CSV file
Related
I want to compare two csv files and print the differences in a file. I currently use the code below to remove a row. Can I change this code so that it compares two csv files or is there a better way in c# to compare csv files?
List<string> lines = new List<string>();
using (StreamReader reader = new StreamReader(System.IO.File.OpenRead(path)))
{
string line;
while ((line = reader.ReadLine()) != null)
{
if (line.Contains(csvseperator))
{
string[] split = line.Split(Convert.ToChar(scheidingsteken));
if (split[selectedRow] == value)
{
}
else
{
line = string.Join(csvseperator, split);
lines.Add(line);
}
}
}
}
using (StreamWriter writer = new StreamWriter(path, false))
{
foreach (string line in lines)
writer.WriteLine(line);
}
}
Here is another way to find differences between CSV files, using Cinchoo ETL - an open source library
For the below sample CSV files
sample1.csv
id,name
1,Tom
2,Mark
3,Angie
sample2.csv
id,name
1,Tom
2,Mark
4,Lu
METHOD 1:
Using Cinchoo ETL, below code shows how to find differences between rows by all columns
var input1 = new ChoCSVReader("sample1.csv").WithFirstLineHeader().ToArray();
var input2 = new ChoCSVReader("sample2.csv").WithFirstLineHeader().ToArray();
using (var output = new ChoCSVWriter("sampleDiff.csv").WithFirstLineHeader())
{
output.Write(input1.OfType<ChoDynamicObject>().Except(input2.OfType<ChoDynamicObject>(), ChoDynamicObjectEqualityComparer.Default));
output.Write(input2.OfType<ChoDynamicObject>().Except(input1.OfType<ChoDynamicObject>(), ChoDynamicObjectEqualityComparer.Default));
}
sampleDiff.csv
id,name
3,Angie
4,Lu
Sample fiddle: https://dotnetfiddle.net/nwLeJ2
METHOD 2:
If you want to do the differences by id column,
var input1 = new ChoCSVReader("sample1.csv").WithFirstLineHeader().ToArray();
var input2 = new ChoCSVReader("sample2.csv").WithFirstLineHeader().ToArray();
using (var output = new ChoCSVWriter("sampleDiff.csv").WithFirstLineHeader())
{
output.Write(input1.OfType<ChoDynamicObject>().Except(input2.OfType<ChoDynamicObject>(), new ChoDynamicObjectEqualityComparer(new string[] { "id" })));
output.Write(input2.OfType<ChoDynamicObject>().Except(input1.OfType<ChoDynamicObject>(), new ChoDynamicObjectEqualityComparer(new string[] { "id" })));
}
Sample fiddle: https://dotnetfiddle.net/t6mmJW
If you only want to compare one column you can use this code:
List<string> lines = new List<string>();
List<string> lines2 = new List<string>();
try
{
StreamReader reader = new StreamReader(System.IO.File.OpenRead(pad));
StreamReader read = new StreamReader(System.IO.File.OpenRead(pad2));
string line;
string line2;
//With this you can change the cells you want to compair
int comp1 = 1;
int comp2 = 1;
while ((line = reader.ReadLine()) != null && (line2 = read.ReadLine()) != null)
{
string[] split = line.Split(Convert.ToChar(seperator));
string[] split2 = line2.Split(Convert.ToChar(seperator));
if (line.Contains(seperator) && line2.Contains(seperator))
{
if (split[comp1] != split2[comp2])
{
//It is not the same
}
else
{
//It is the same
}
}
}
reader.Dispose();
read.Dispose();
}
catch
{
}
Want to create a generic text file parser in c# for any find of text file.Actually i have 4 application all 4 getting input data from txt file format but text files are not homogeneous in nature.i have tried fixedwithdelemition.
private static DataTable FixedWidthDiliminatedTxtRead()
{
string[] fields;
StringBuilder sb = new StringBuilder();
List<StringBuilder> lst = new List<StringBuilder>();
DataTable dtable = new DataTable();
ArrayList aList;
using (TextFieldParser tfp = new TextFieldParser(testOCC))
{
tfp.TextFieldType = FieldType.FixedWidth;
tfp.SetFieldWidths(new int[12] { 2,25,8,12,13,5,6,3,10,11,10,24 });
for (int col = 1; col < 13; ++col)
dtable.Columns.Add("COL" + col);
while (!tfp.EndOfData)
{
fields = tfp.ReadFields();
aList = new ArrayList();
for (int i = 0; i < fields.Length; ++i)
aList.Add(fields[i] as string);
if (dtable.Columns.Count == aList.Count)
dtable.Rows.Add(aList.ToArray());
}
}
return dtable;
}
but i feel its very rigid one and really varies application to application making it configgurable .any better way ..
tfp.SetFieldWidths(new int[12] { 2,25,8,12,13,5,6,3,10,11,10,24 });
File nature :
Its a report kind of file .
position of columns are very similar
row data of file id different .
I get this as a reference
http://www.codeproject.com/Articles/11698/A-Portable-and-Efficient-Generic-Parser-for-Flat-F
any other thoughts ?
If the only thing different is the field widths, you could just try sending the field widths in as a parameter:
private static DataTable FixedWidthDiliminatedTxtRead(int[] fieldWidthArray)
{
string[] fields;
StringBuilder sb = new StringBuilder();
List<StringBuilder> lst = new List<StringBuilder>();
DataTable dtable = new DataTable();
ArrayList aList;
using (TextFieldParser tfp = new TextFieldParser(testOCC))
{
tfp.TextFieldType = FieldType.FixedWidth;
tfp.SetFieldWidths(fieldWidthArray);
for (int col = 1; col < 13; ++col)
dtable.Columns.Add("COL" + col);
while (!tfp.EndOfData)
{
fields = tfp.ReadFields();
aList = new ArrayList();
for (int i = 0; i < fields.Length; ++i)
aList.Add(fields[i] as string);
if (dtable.Columns.Count == aList.Count)
dtable.Rows.Add(aList.ToArray());
}
}
return dtable;
}
If you will have more logic to grab the data, you might want to consider defining an interface or abstract class for a GenericTextParser and create concrete implementations for each other file.
Hey I made one of these last week.
I did not write it with the intentions of other people using it so I appologize in advance if its not documented well but I cleaned it up for you. ALSO I grabbed several segments of code from stack overflow so I am not the original author of several pieces of this.
The places you need to edit are the path and pathout and the seperators of text.
char[] delimiters = new char[]
So it searches for part of a word and then grabs the whole word. I used a c# console application for this.
Here you go:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace UniqueListofStringFinder
{
class Program
{
static void Main(string[] args)
{
string path = #"c:\Your Path\in.txt";
string pathOut = #"c:\Your Path\out.txt";
string data = "!";
Console.WriteLine("Current Path In is set to: " + path);
Console.WriteLine("Current Path Out is set to: " + pathOut);
Console.WriteLine(Environment.NewLine + Environment.NewLine + "Input String to Search For:");
Console.Read();
string input = Console.ReadLine();
// Delete the file if it exists.
if (!File.Exists(path))
{
// Create the file.
using (FileStream fs = File.Create(path))
{
Byte[] info =
new UTF8Encoding(true).GetBytes("This is some text in the file.");
// Add some information to the file.
fs.Write(info, 0, info.Length);
}
}
System.IO.StreamReader file = new System.IO.StreamReader(path);
List<string> Spec = new List<string>();
using (StreamReader sr = File.OpenText(path))
{
while (!file.EndOfStream)
{
string s = file.ReadLine();
if (s.Contains(input))
{
char[] delimiters = new char[] { '\r', '\n', '\t', ')', '(', ',', '=', '"', '\'', '<', '>', '$', ' ', '#', '[', ']' };
string[] parts = s.Split(delimiters,
StringSplitOptions.RemoveEmptyEntries);
foreach (string word in parts)
{
if (word.Contains(input))
{
if( word.IndexOf(input) == 0)
{
Spec.Add(word);
}
}
}
}
}
Spec.Sort();
// Open the stream and read it back.
//while ((s = sr.ReadLine()) != null)
//{
// Console.WriteLine(s);
//}
}
Console.WriteLine();
StringBuilder builder = new StringBuilder();
foreach (string s in Spec) // Loop through all strings
{
builder.Append(s).Append(Environment.NewLine); // Append string to StringBuilder
}
string result = builder.ToString(); // Get string from StringBuilder
Program a = new Program();
data = a.uniqueness(result);
int i = a.writeFile(data,pathOut);
}
public string uniqueness(string rawData )
{
if (rawData == "")
{
return "Empty Data Set";
}
List<string> dataVar = new List<string>();
List<string> holdData = new List<string>();
bool testBool = false;
using (StringReader reader = new StringReader(rawData))
{
string line;
while ((line = reader.ReadLine()) != null)
{
foreach (string s in holdData)
{
if (line == s)
{
testBool = true;
}
}
if (testBool == false)
{
holdData.Add(line);
}
testBool = false;
// Do something with the line
}
}
int i = 0;
string dataOut = "";
foreach (string s in holdData)
{
dataOut += s + "\r\n";
i++;
}
// Write the string to a file.
return dataOut;
}
public int writeFile(string dataOut, string pathOut)
{
try
{
System.IO.StreamWriter file = new System.IO.StreamWriter(pathOut);
file.WriteLine(dataOut);
file.Close();
}
catch (Exception ex)
{
dataOut += ex.ToString();
return 1;
}
return 0;
}
}
}
private static DataTable FixedWidthTxtRead(string filename, int[] fieldWidths)
{
string[] fields;
DataTable dtable = new DataTable();
ArrayList aList;
using (TextFieldParser tfp = new TextFieldParser(filename))
{
tfp.TextFieldType = FieldType.FixedWidth;
tfp.SetFieldWidths(fieldWidths);
for (int col = 1; col <= fieldWidths.length; ++col)
dtable.Columns.Add("COL" + col);
while (!tfp.EndOfData)
{
fields = tfp.ReadFields();
aList = new ArrayList();
for (int i = 0; i < fields.Length; ++i)
aList.Add(fields[i] as string);
if (dtable.Columns.Count == aList.Count) dtable.Rows.Add(aList.ToArray());
}
}
return dtable;
}
Here's what I did:
I built a factory for the type of processor needed (based on file type/format), which abstracted the file reader.
I then built a collection object that contained a set of triggers for each field I was interested in (also contained the property name for which this field is destined). This settings collection is loaded in via an XML configuration file, so all I need to change are the settings, and the base parsing process can react to how the settings are configured. Finally I built a reflection wrapper wherein once a field is parsed, the corresponding property on the model object is set.
As the file flowed through, the triggers for each setting evaluated each lines value. When it found what it was set to find (via pattern matching, or column length values) it fired and event that bubbled up and set a property on the model object. I can show some pseudo code if you're interested. It needs some work for efficiency's sake, but I like the concept.
I have a list of words. I want the program to scan for multiple words from a text file.
This is what i already have:
int counter = 0;
string line;
StringBuilder sb = new StringBuilder();
string[] words = { "var", "bob", "for", "example"};
try
{
using (StreamReader file = new StreamReader("test.txt"))
{
while ((line = file.ReadLine()) != null)
{
if (line.Contains(Convert.ToChar(words)))
{
sb.AppendLine(line.ToString());
}
}
}
listResults.Text += sb.ToString();
}
catch (Exception ex)
{
listResults.ForeColor = Color.Red;
listResults.Text = "---ERROR---";
}
So i want to scan the file for a word, and if it's not there, scan for the next word...
String.Contains() only takes one argument: a string. What your call to Contains(Convert.ToChar(words)) does, is probably not what you expect.
As explained in Using C# to check if string contains a string in string array, you might want to do something like this:
using (StreamReader file = new StreamReader("test.txt"))
{
while ((line = file.ReadLine()) != null)
{
foreach (string word in words)
{
if (line.Contains(word))
{
sb.AppendLine(line);
}
}
}
}
Or if you want to follow your exact problem statement ("scan the file for a word, and if it's not there, scan for the next word"), you might want to take a look at Return StreamReader to Beginning:
using (StreamReader file = new StreamReader("test.txt"))
{
foreach (string word in words)
{
while ((line = file.ReadLine()) != null)
{
if (line.Contains(word))
{
sb.AppendLine(line);
}
}
if (sb.Length == 0)
{
// Rewind file to prepare for next word
file.Position = 0;
file.DiscardBufferedData();
}
else
{
return sb.ToString();
}
}
}
But this will think "bob" is part of "bobcat". If you don't agree, see String compare C# - whole word match, and replace:
line.Contains(word)
with
string wordWithBoundaries = "\\b" + word + "\\b";
Regex.IsMatch(line, wordWithBoundaries);
StringBuilder sb = new StringBuilder();
string[] words = { "var", "bob", "for", "example" };
string[] file_lines = File.ReadAllLines("filepath");
for (int i = 0; i < file_lines.Length; i++)
{
string[] split_words = file_lines[i].Split(' ');
foreach (string str in split_words)
{
foreach (string word in words)
{
if (str == word)
{
sb.AppendLine(file_lines[i]);
}
}
}
}
This works a treat:
var query =
from line in System.IO.File.ReadLines("test.txt")
where words.Any(word => line.Contains(word))
select line;
To get these out as a single string, just do this:
var results = String.Join(Environment.NewLine, query);
Couldn't be much simpler.
If you want to match only whole words it becomes only a little more complicated. You can do this:
Regex[] regexs =
words
.Select(word => new Regex(String.Format(#"\b{0}\b", Regex.Escape(word))))
.ToArray();
var query =
from line in System.IO.File.ReadLines(fileName)
where regexs.Any(regex => regex.IsMatch(line))
select line;
In C#, what is the most efficient method to split a text file into multiple text files (the splitting delimiter being a blank line), while preserving the character encoding?
I would use the StreamReader and StreamWriter classes:
public void Split(string inputfile, string outputfilesformat) {
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try {
using(var infile = new System.IO.StreamReader(inputfile)) {
while(!infile.EndOfStream){
line = infile.ReadLine();
if(string.IsNullOrEmpty(line)) {
if(outfile != null) {
outfile.Dispose();
outfile = null;
}
continue;
}
if(outfile == null) {
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
} finally {
if(outfile != null)
outfile.Dispose();
}
}
You would then call this method like this:
Split("C:\\somefile.txt", "C:\\output-files-{0}.txt");
Purely for those who want to avoid thinking:
If you have a CSV (comma separated values) file and want to split the file when a field changes, identify/name the file by the change (without unnecessary quote marks), and strip out comments/certain lines (here identified by starting with "#)
Modified method:
public void Split(string inputfile, string outputfilesformat)
{
System.IO.StreamWriter outfile = null;
string line;
string[] splitArray;
string nameFromFile = "";
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
splitArray = line.Split(new char[] { ',' });
if (!splitArray[0].StartsWith("\"#"))
{
if (splitArray[4].Replace("\"", "") != nameFromFile.Replace("\"", ""))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
nameFromFile = splitArray[4].Replace("\"", "");
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, nameFromFile),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Local path call:
string strpath = Server.MapPath("~/Data/SPLIT/DATA.TXT");
string newFile = Server.MapPath("~/Data/SPLIT");
if (System.IO.File.Exists(#strpath))
{
Split(strpath, newFile+"\\{0}.CSV");
}
In the case anyone needs to split a text file into multiple files using a string:
public static void Main(string[] args)
{
void Split(string inputfile, string outputfilesformat)
{
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
if (line.Trim().Contains("String You Want File To Split From"))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Split("C:test.txt", "C:\\output-files-{0}.txt");
}
all,
I started out with what i thought was going to be a pretty simple task. (convert a csv to "wiki" format) but im hitting a few snags that im having trouble working through
I have 3 main problems
1) some of the cells contain \r\n ( so when reading line by line this treats each new line as a new cell
2) some of the rows contain "," ( i tried switching to \t delemited files but im still running into a problem escaping when its between two "")
3) some rows are completely blank except for the delmiter ("," or "\t") others are incomplete (which is fine i just need to make sure that the cell goes in the correct place)
I've tried a few of the CSV reader classes but they would bump up agenst of teh problems listed above
I'm trying to keep this app as small as possible so i am also trying to avoid dlls and large classes that only a small portion do what i want.
so far i have two "attempts that are not working
Atempt 1 (doesn't handel \r\n in a cell)
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
openFileDialog1.Filter = "tab sep file (*.txt)|*.txt|All files (*.*)|*.*";
openFileDialog1.FilterIndex = 1;
openFileDialog1.RestoreDirectory = true;
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
if (cb_sortable.Checked)
{
header = "{| class=\"wikitable sortable\" border=\"1\" \r\n|+ Sortable table";
}
StringBuilder sb = new StringBuilder();
string line;
bool firstline = true;
StreamReader sr = new StreamReader(openFileDialog1.FileName);
sb.AppendLine(header);
while ((line = sr.ReadLine()) != null)
{
if (line.Replace("\t", "").Length > 1)
{
string[] hold;
string lead = "| ";
if (firstline && cb_header.Checked == true)
{
lead = "| align=\"center\" style=\"background:#f0f0f0;\"| ";
}
hold = line.Split('\t');
sb.AppendLine(table);
foreach (string row in hold)
{
sb.AppendLine(lead + row.Replace("\"", ""));
}
firstline = false;
}
}
sb.AppendLine(footer);
Clipboard.SetText(sb.ToString());
MessageBox.Show("Done!");
}
}
string header = "{| class=\"wikitable\" border=\"1\" ";
string footer = "|}";
string table = "|-";
attempt 2 ( can handle \r\n but shifts cells over blank cells) (its not complete yet)
OpenFileDialog openFileDialog1 = new OpenFileDialog();
openFileDialog1.InitialDirectory = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
openFileDialog1.Filter = "txt file (*.txt)|*.txt|All files (*.*)|*.*";
openFileDialog1.FilterIndex = 1;
openFileDialog1.RestoreDirectory = true;
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
if (cb_sortable.Checked)
{
header = "{| class=\"wikitable sortable\" border=\"1\" \r\n|+ Sortable table";
}
using (StreamReader sr = new StreamReader(openFileDialog1.FileName))
{
string text = sr.ReadToEnd();
string[] cells = text.Split('\t');
int columnCount = 0;
foreach (string cell in cells)
{
if (cell.Contains("\r\n"))
{
break;
}
columnCount++;
}
}
basically all I needs is a "split if not between \" " but im just at a loss right now
any tips or tricks would be greatly appreciated
Checkout this project instead of rolling your own CSV parser.
You might take a look at http://www.filehelpers.com/ as well...
Don't try to do it by yourself if you can use libraries!
Try taking a look here. Your code doesn't make web requests, but effectively this shows you how to parse a csv that is returned from a web service.
There's a decent implementation here...
A Fast CSV Reader by Sébastien Lorion
It makes much more sense in this case to use tried-and-tested code rather than trying to roll your own.
For a specification that's essentially two pages long, the CSV format is deceptive in its simplicity. The majority of short parser implementations that can be found on the internet are blatantly incorrect in one way or another. That notwithstanding, the format hardly seems to call for 1k+ SLOC implementations.
public static class CsvImport {
/// <summary>
/// Parse a Comma Separated Value (CSV) source into rows of strings. [1]
///
/// The header row (if present) is not treated specially. No checking is
/// performed to ensure uniform column lengths among rows. If no input
/// is available, a single row containing String.Empty is returned. No
/// support is provided for debugging invalid CSV files. Callers who
/// desire such assistance are encouraged to use a TextReader that can
/// report the current line and column position.
///
/// [1] https://www.rfc-editor.org/rfc/rfc4180
/// </summary>
public static IEnumerable<string[]> Deserialize(TextReader input) {
if (input.Peek() == Sentinel) yield return new [] { String.Empty };
while (input.Peek() != Sentinel) {
// must read in entire row *now* to see if we're at end of input
yield return DeserializeRow(input).ToArray();
}
}
const int Sentinel = -1;
const char Quote = '"';
const char Separator = (char)System.Globalization.CultureInfo.CurrentCulture.TextInfo.ListSeparator;
static IEnumerable<string> DeserializeRow(TextReader input) {
var field = new StringBuilder();
while (true) {
var c = input.Read();
if (c == Separator) {
yield return field.ToString();
field = new StringBuilder();
} else if (c == '\r') {
if (input.Peek() == '\n') {
input.Read();
}
yield return field.ToString();
yield break;
} else if (new [] { '\n', Sentinel }.Contains(c)) {
yield return field.ToString();
yield break;
} else if (c == Quote) {
field.Append(DeserializeQuoted(input));
} else {
field.Append((char) c);
}
}
}
static string DeserializeQuoted(TextReader input) {
var quoted = new StringBuilder();
while (input.Peek() != Sentinel) {
var c = input.Read();
if (c == Quote) {
if (input.Peek() == Quote) {
quoted.Append(Quote);
input.Read();
} else {
return quoted.ToString();
}
} else {
quoted.Append((char) c);
}
}
throw new UnexpectedEof("End-of-file inside quoted section.");
}
public class UnexpectedEof : Exception {
public UnexpectedEof(string message) : base(message) { }
}
}