In C#, what is the most efficient method to split a text file into multiple text files (the splitting delimiter being a blank line), while preserving the character encoding?
I would use the StreamReader and StreamWriter classes:
public void Split(string inputfile, string outputfilesformat) {
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try {
using(var infile = new System.IO.StreamReader(inputfile)) {
while(!infile.EndOfStream){
line = infile.ReadLine();
if(string.IsNullOrEmpty(line)) {
if(outfile != null) {
outfile.Dispose();
outfile = null;
}
continue;
}
if(outfile == null) {
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
} finally {
if(outfile != null)
outfile.Dispose();
}
}
You would then call this method like this:
Split("C:\\somefile.txt", "C:\\output-files-{0}.txt");
Purely for those who want to avoid thinking:
If you have a CSV (comma separated values) file and want to split the file when a field changes, identify/name the file by the change (without unnecessary quote marks), and strip out comments/certain lines (here identified by starting with "#)
Modified method:
public void Split(string inputfile, string outputfilesformat)
{
System.IO.StreamWriter outfile = null;
string line;
string[] splitArray;
string nameFromFile = "";
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
splitArray = line.Split(new char[] { ',' });
if (!splitArray[0].StartsWith("\"#"))
{
if (splitArray[4].Replace("\"", "") != nameFromFile.Replace("\"", ""))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
nameFromFile = splitArray[4].Replace("\"", "");
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, nameFromFile),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Local path call:
string strpath = Server.MapPath("~/Data/SPLIT/DATA.TXT");
string newFile = Server.MapPath("~/Data/SPLIT");
if (System.IO.File.Exists(#strpath))
{
Split(strpath, newFile+"\\{0}.CSV");
}
In the case anyone needs to split a text file into multiple files using a string:
public static void Main(string[] args)
{
void Split(string inputfile, string outputfilesformat)
{
int i = 0;
System.IO.StreamWriter outfile = null;
string line;
try
{
using (var infile = new System.IO.StreamReader(inputfile))
{
while (!infile.EndOfStream)
{
line = infile.ReadLine();
if (line.Trim().Contains("String You Want File To Split From"))
{
if (outfile != null)
{
outfile.Dispose();
outfile = null;
}
continue;
}
if (outfile == null)
{
outfile = new System.IO.StreamWriter(
string.Format(outputfilesformat, i++),
false,
infile.CurrentEncoding);
}
outfile.WriteLine(line);
}
}
}
finally
{
if (outfile != null)
outfile.Dispose();
}
}
Split("C:test.txt", "C:\\output-files-{0}.txt");
}
Related
E2739158012008-10-01O9918107NPF7547379999010012008-10-0100125000000
E2739158PU0000-00-00 010012008-10-0100081625219
E3180826011985-01-14L9918007NPM4927359999010011985-01-1400005620000
E3180826PU0000-00-00 020011985-01-14000110443500021997-01-1400000518799
E3292015011985-01-16L9918007NPM4927349999010011985-01-1600003623300
I have this flat file and I need to group this based on the 2nd position to 8th position
example(2739158/3180826/3292015) and write to another flat file.
So the data Starting with 'E' should Repeat in the single line along with that group field(2nd to 8th Position in the start) and I should take the 9th Position after 'E'
Also I need to replace Empty space with ('*' star)
For example
1st Line
2739158**E**012008-10-01O9918107NPF7547379999010012008-10-0100125000000*****E**012008-10-01O9918107NPF7547379999010012008-10-0100125000000
2nd Line
3180826**E**011985-01-14L9918007NPM4927359999010011985-01-1400005620000**E**011985-01-14L9918007NPM4927359999010011985-01-140000562000**E**011985-01-14L9918007NPM4927359999010011985-01-140000562000***
3rd Line
3292015**E**011985-01-16L9918007NPM4927349999010011985-01-1600003623300****
Can we do this in Stream reader c#, please?
Any help would be highly appreciated.The file size is more than 285 MB so it it good to read through Stream Reader?
Thanks
#jdweng: thanks very much for your input. i tried somehow without grouping and it works as expected.Thanks everyone who tried to solve the issue.
string sTest= string.Empty; List<SortLines> lines = new List<SortLines>();
List<String> FinalLines = new List<String>();
using (StreamReader sr = new StreamReader(#"C:\data\Input1.txt))
{
sr.ReadLine();
string line = "";
while (!sr.EndOfStream)
{
line = sr.ReadLine();
//line = line.Trim();
if (line.Length > 0)
{
line = line.Replace(" ", "*");
SortLines newLine = new SortLines()
{
key = line.Substring(1, 7),
line = line
};
if (sTest != newLine.key)
{
//Add the Line Items to String List
sOuterLine = sTest + sOneLine;
FinalLines.Add(sOuterLine);
string sFinalLine = newLine.line.Remove(1, 7);
string snewLine = newLine.key + sFinalLine;
sTest = snewLine.Substring(0, 7);
//To hold the data for the 1st occurence
sOtherLine = snewLine.Remove(0, 7);
bOtherLine = true;
string sKey = newLine.key;
lines.Add(newLine);
}
else if (sTest == newLine.key)
{
string sConcatLine = String.Empty;
string sFinalLine = newLine.line.Remove(1, 7);
//Check if 1st Set
if (bOtherLine == true)
{
sOneLine = sOtherLine + sFinalLine;
bOtherLine = false;
}
//If not add subsequent data
else
{
sOneLine = sOneLine + sFinalLine;
}
//Check for the last line in the flat file
if (sr.Peek() == -1)
{
sOuterLine = sTest + sOneLine;
FinalLines.Add(sOuterLine);
}
}
}
}
}
//Remove the Empty List
FinalLines.RemoveAll(x => x == "");
StreamWriter srWriter = new StreamWriter(#"C:\data\test.txt);
foreach (var group in FinalLines)
{
srWriter.WriteLine(group);
}
srWriter.Flush();
srWriter.Close();
Try code below :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
const string INPUT_FILENAME = #"c:\temp\test.txt";
const string OUTPUT_FILENAME = #"c:\temp\test1.txt";
static void Main(string[] args)
{
List<SortLines> lines = new List<SortLines>();
StreamReader reader = new StreamReader(INPUT_FILENAME);
string line = "";
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if (line.Length > 0)
{
line = line.Replace(" ", "*");
SortLines newLine = new SortLines() { key = line.Substring(2, 7), line = line };
lines.Add(newLine);
}
}
reader.Close();
var groups = lines.GroupBy(x => x.key);
StreamWriter writer = new StreamWriter(OUTPUT_FILENAME);
foreach (var group in groups)
{
foreach (SortLines sortLine in group)
{
writer.WriteLine(sortLine.line);
}
}
writer.Flush();
writer.Close();
}
}
public class SortLines : IComparable<SortLines>
{
public string line { get; set; }
public string key { get; set; }
public int CompareTo(SortLines other)
{
return key.CompareTo(other);
}
}
}
Im converting csv files to xml files via c#. I'm saving the csv file in a List of string, but it doesn't take symbols symbols such as ä, á, ê.
public Lesson CsvToLesson(List<string> csv)
{
string lesName = csv[0][csv[0].Length - 3].ToString();
List<Word> words = new List<Word>();
for(int i = 3; i < csv.Count; i++)
{
string lang1 = "";
string lang2 = "";
bool firstWord = true;
foreach (char c in csv[i])
{
if (firstWord)
{
if(c != ';')
{
lang1 += c;
} else
{
firstWord = false;
}
} else {
if (c != ';')
{
lang2 += c;
}
else
{
break;
}
}
}
words.Add(new Word(lang1, lang2, 1, i));
}
return new Lesson(lesName, words);
}
to return them as an Object called Lesson.
<Word kasten="1" id="24">
<lang1>eine Sekret�rin</lang1>
<lang2>une secr�taire</lang2>
</Word>
The reading method:
public void saveCsv(string path)
{
string line;
List<string> csv = new List<string>();
StreamReader file = new StreamReader(path);
while((line = file.ReadLine()) != null)
{
csv.Add(line);
}
file.Close();
AddLesson(controller.CsvToLesson(csv));
}
How can I fix this?
Thanks to Cid.
The Problem was that the csv file wasn't saved as utf-8 csv file. There are multiple options in Excel.
Take a look at that post, if you have the same problem:
How to check encoding of a CSV file
I have a text file "Test 2.txt" with the below format:
string1
string2
string3
string4
string5
string6
string7
string8
string9
...and so on...
I would like to convert it to text file "Test.txt" with below format:
string1,string2,string3
string4,string5,string6
string7,string8,string9
...and so on...
My code at the moment is below:
string line;
string DatabaseFullPath = #"C:\Test2.xsr";
using (var file = new StreamReader(DatabaseFullPath, Encoding.UTF8))
{
int count = File.ReadLines(DatabaseFullPath).Count();
while ((line = file.ReadLine()) != null)
{
count++;
if (count % 3 == 0)
{
using (StreamWriter writetext = new StreamWriter(#"D:\Test.txt"))
{
writetext.WriteLine(line + '\n');
}
}
else
{
using (StreamWriter writetext = new StreamWriter(#"D:\Test.txt"))
{
writetext.Write(line + ',');
}
}
}
}
It seems that the StreamWriter is just overwriting each string "line" with another string "line" but I am not sure about it.
string DatabaseFullPath = #"C:\Test2.xsr";
using (var file = new StreamReader(DatabaseFullPath, Encoding.UTF8)) {
using (StreamWriter writetext = new StreamWriter(#"D:\Test.txt")) {
int count = 0;
string line;
while ((line = file.ReadLine()) != null) {
if (++count % 3 == 0)
writetext.WriteLine(line);
else
writetext.Write(line + ',');
}
}
}
I am trying to delete specific line from a text file using c# as:
public static void DeleteProject(int id)
{
var fileloc = WebConfigurationManager.AppSettings["FFProject-Manager"];
var fileloc2 = WebConfigurationManager.AppSettings["TempFile"];
string line = null;
using (StreamReader reader = new StreamReader(fileloc))
{
using (StreamWriter writer = new StreamWriter(fileloc2))
{
while ((line = reader.ReadLine()) != null)
{
string uidCompare = Convert.ToString(line.Split(',')[0]);
string uidToCompare = Convert.ToString(id);
if (String.Compare(uidCompare, uidToCompare) == 0)
continue;
writer.WriteLine(line);
}
}
}
File.Delete(fileloc);
File.Move(fileloc2, fileloc);
}
}
But when I try to execute it gives the error as:
The process cannot access the file 'C:\Flat_Files\Project-Manager.txt' because it is being used by another process.
Where am I going wrong?
I'm having a question about writing data to a CSV file.
I have a file named test.csv in which are 2 fields > accountnumber and relation ID.
Now I want to add another field next to it: IBAN.
The IBAN is the data from the first row which is validated by the SOAP function BBANtoIBAN.
How can I keep the 2 rows of data accountnumbers and relation IDs in the CSV and add the IBAN in the 3rd row?
This is my code so far:
using (var client = new WebService.BANBICSoapClient("IBANBICSoap"))
{
List<List<string>> dataList = new List<List<string>>();
TextFieldParser parser = new TextFieldParser(#"C:\CSV\test.csv");
parser.TextFieldType = FieldType.Delimited;
parser.SetDelimiters(";");
while (!parser.EndOfData)
{
List<string> data = new List<string>();
string row = parser.ReadLine();
try
{
string resultIBAN = client.BBANtoIBAN(row);
if (resultIBAN != string.Empty)
data.Add(resultIBAN);
else
data.Add("Accountnumber is not correct.");
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
dataList.Add(data);
}
}
I see it as:
StreamReader sr = new StreamReader(#"C:\CSV\test.csv")
StreamWriter sw = new StreamWriter(#"C:\CSV\testOut.csv")
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
try
{
string[] rowsArray = line.Split(';');
string row = rowsArray[0];
string resultIBAN = client.BBANtoIBAN(row);
if (resultIBAN != string.Empty)
{
line +=";"+ resultIBAN;
}
else
{
line +=";"+"Accountnumber is not correct.";
}
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
sw.WriteLine(line)
}
sr.Close();
sw.Close();
I would do something like this to parse the csv file, and add an extra item to the data list:
List<List<string>> dataList = new List<List<string>>();
string filename = #"C:\CSV\test.csv";
using (StreamReader sr = new StreamReader(filename))
{
string fileContent = sr.ReadToEnd();
foreach (string line in fileContent.Split(new string[] {Environment.NewLine},StringSplitOptions.RemoveEmptyEntries))
{
List<string> data = new List<string>();
foreach (string field in line.Split(';'))
{
data.Add(field);
}
try
{
string resultIBAN = client.BBANtoIBAN(data[0]);
if (resultIBAN != string.Empty)
{
data.Add(resultIBAN);
}
else
{
data.Add("Accountnumber is not correct.");
}
}
catch (Exception msg)
{
Console.WriteLine(msg);
}
dataList.Add(data);
}