I am trying to split a string in a .txt-file by commas (,) into a string[] and then replacing every item of the string[] to another formula, for example:
"Marko Kostic, Faculty of Technical Sciences, University of Novi Sad,
Trg D. Obradovica 6, 21125 Novi Sad, Serbia"
I want to split this string by commas in between the words and then I want to put every value in separate line like a list and then changing every value with another like "Marko Kostic" to be
<addr-line>Marko Kostic<\addr-line>
The problem is the writer wrote only the last value of string[] and erase the previous values.
Any suggestions?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using Microsoft.Office.Interop;
using Microsoft.Office.Interop.Word;
using System.Diagnostics;
using System.Reflection;
using System.Collections;
using System.Runtime.InteropServices;
namespace AffiliationParser
{
class Program
{
static void Main(string[] args)
{
Microsoft.Office.Interop.Word.Application oWord = new Microsoft.Office.Interop.Word.Application();
object missing = System.Reflection.Missing.Value;
object isVisible = false;
using (StreamReader batch = new StreamReader(#"D:\Developing\REF\AffiliationParser\AffiliationParser\AffiliationParser\bin\Debug\Run.bat"))
{
string bat;
while (!batch.EndOfStream)
{
bat = batch.ReadLine();
// do your processing with batch command
if (bat == "pause")
{
continue;
}
string fpath = bat.Substring(bat.IndexOf(" \""));
string path = fpath.Replace("\"", "").Replace(" ","");
string[] name = Directory.GetFiles(path, "*.txt");
string words = name.Min();
string word = words.Substring(words.LastIndexOf("\\")).Replace("\\", "");
Console.WriteLine("Processing........");
Console.WriteLine(word);
string Npath = path + #"\Arr" + word;
if (File.Exists(Npath))
{
System.Windows.Forms.MessageBox.Show("The file Arr" + word + " alredy exist in " + path);
continue;
}
else
{
File.Copy(words, Npath);
StreamReader temp = new StreamReader(Npath, Encoding.UTF8);
string tempstring = temp.ReadToEnd();
string[] temp3 = tempstring.Split(',');
temp.Close();
foreach (string item in temp3)
{
string Nitem = item.TrimStart().TrimEnd();
//Match MatchCont = Regex.Match(Nitem, #"Afganistan|Albania|Algeria|American\s+Samoa|Andorra|Angola|Anguilla|Antarctica|Antigua\s+and\s+Barbuda|Argentina|Armenia|Aruba|Australia|Austria|Azerbaijan|Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bermuda|Bhutan|Bolivia|Bosnia\s+and\s+Herzegovina|Botswana|Bouvet\s+Island|Brazil|British\s+Indian\s+Ocean\s+Territory|Brunei\s+Darussalam|Bulgaria|Burkina\s+Faso|Burundi|Cambodia|Cameroon|Canada|Cape\s+Verde|Cayman\s+Islands|Central\s+African\s+Republic|Chad|Chile|China|Christmas\s+Island|Cocos\s+\(Keeling\)\s+Islands|Colombia|Comoros|Democratic\s+People's\s+Republic\s+of\s+Korea|Democratic\s+Republic\s+of\s+Congo|Cook\s+Islands|Costa\s+Rica|Cote\s+D'Ivoire|Croatia|Cuba|Cyprus|Czech\s+Republic|Republic\s+of\s+Korea|Denmark|Djibouti|Dominica|Dominican\s+Republic|East\s+Timor|Ecuador|Egypt|El\s+Salvador|Equatorial\s+Guinea|Eritrea|Estonia|Ethiopia|Falkland\s+Islands\s+\(Malvinas\)|Faroe\s+Islands|Fiji|Finland|France\s+Metropolitan|France|French\s+Guiana|French\s+Polynesia|French\s+Southern\s+Territories|Gabon|Gambia|Georgia|Germany|Ghana|Gibraltar|Greece|Greenland|Grenadaf|Guadeloupe|Guam|Guatemala|Guinea|Guinea\-Bissau|Guyana|Haiti|Heard\s+Island\s+and\s+McDonald\s+Island|Honduras|Hong\s+Kong|Hungary|Iceland|India|Indonesia|Iran|Iraq|Ireland|Northern\s+Ireland|Isle\s+Of\s+Man|Israel|Italy|Jamaica|Japan|Jordan|Kazakhstan|Kenya|Kiribati|Kuwait|Kyrgyzstan|Lao\s+People'S\s+Democratic\s+Republic|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|Macau|Macedonia|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall\s+Islands|Martinique|Mauritania|Mauritius|Mayotte|Mexico|Micronesia|Moldova|Monaco|Mongolia|Montserrat|Morocco|Mozambique|Myanmar|Namibia|Nauru|Nepal|Netherlands\s+Antilles|New\s+Caledonia|New\s+Zealand|Nicaragua|Nigeria|Niger|Niue|Norfolk\s+Island|Northern\s+Mariana\s+Islands|Norway|Oman|Pakistan|Palau|Palestine|Panama|Papua\s+New\s+Guinea|Paraguay|Peru|Philippines|Pitcairn|Poland|Portugal|Puerto\s+Rico|Qatar|Reunion|Romania|Russia|Rwanda|Saint\s+Kitts\s+and\s+Nevis|Saint\s+Lucia|Saint\s+Vincent\s+and\s+The\s+Grenadines|Samoa|San\s+Marino|Sao\s+Tome\s+and\s+Principe|Saudi\s+Arabia|Scotland|Senegal|Serbia|Kosovo|Montenegro|Seychelles|Sierra\s+Leone|Singapore|Slovakia|Slovenia|Solomon\s+Islands|Somalia|South\s+Africa|South\s+Georgia\s+and\s+The\s+South\s+Sandwich\s+Islands|Spain|Sri\s+Lanka|St.\s+Helena|St.\s+Pierre\s+and\s+Miquelon|Sudan|Suriname|Svalbard\s+and\s+Jan\s+Mayen\s+Islands|Swaziland|Sweden|Switzerland|Syria|Taiwan|Tajikistan|Tanzania|Thailand|The\s+Netherlands|Togo|Tokelau|Tonga|Trinidad\s+and\s+Tobago|Tunisia|Turkey|Turkmenistan|Turks\s+and\s+Caicos\s+Islands|Tuvalu|Uganda|Ukraine|United\s+Arab\s+Emirates|UAE|UK|United\s+States\s+Minor\s+Outlying\s+Islands|Uruguay|USA|Uzbekistan|Vanuatu|Vatican\s+City\s+State\s+\(Holy\s+See\)|Venezuela|Vietnam|British\s+Virgin\s+Islands|USA\s+Virgin\s+Islands|Wallis\s+and\s+Futuna\s+Islands|Western\s+Sahara|West\s+Indies|Yemen|Zambia|Zimbabwe|Abkhazia|Afghanistan|Akrotiri\s+and\s+Dhekelia|Aland|Ascension\s+Island|The\s+Bahamas|Brunei|Central\s+Africa|Cocos|Congo|Cote\s+d'lvoire|Czech|Dominican|Falkland\s+Islands|Cambia,\s+The|Grenada|Guemsey|Isle\s+of\s+Man|Jersey|Korea|Laos|Macao|Nagorno\-Karabakh|Netherlands|Northern\s+Cyprus|Pitcaim\s+Islands|Sahrawi\s+Arab\s+Democratic|Saint\-Barthelemy|Saint\s+Helena|Saint\s+Martin|Saint\s+Pierre\s+and\s+Miquelon|Saint\s+Vincent\s+and\s+Grenadines|Samos|Somaliland|South\s+Ossetia|Svalbard|Transnistria|Tristan\s+da\s+Cunha|United\s+Kingdom|Vatican\s+City|Virgin\s+Islands|Wallis\s+and\s+Futuna|Espa�a|Witsch|United\s+States|Prague\s+Czech\s+Republic", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
//if (MatchCont.Success==true)
//{
// MatchCont.Result(#"<country>" + Nitem + #"<\country>");
//}
}
}
}
}
}
}
}
Try to include code in you question, it's not a best practice to simply hand out answers. That being said, you'll want to look at the String.Split method, String.Trim and the File.AppendText method.
Simple ways to do this:
string[] stuff = data.Split(',');
StreamWriter sW = File.AppendText(pathToFile);
foreach(string parts in stuff)
{
sW.WriteLine(parts.Trim());
}
Very, very basic, and not giving you the answer without some work on your part. Good luck!
Here's some references: File.AppendText and String.Trim
string input="a,b,c,d";
string [] parts=input.Split(",",StringSplitOptions.RemoveEmptyEntries);
List<string> output=new List<string>();
foreach(string s in parts)
{
// do sth you like;
var newStr="<abc>"+s+"</abc>";
output.Add(newStr);
}
return output.ToArray();
Related
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;
namespace Rename_Files
{
public partial class Form1 : Form
{
string[] files;
public Form1()
{
InitializeComponent();
files = Directory.GetFiles(#"C:\Program Files (x86)\Steam\steamapps\common\King's Quest\Binaries\Win\Saved Games", "*.*", SearchOption.AllDirectories);
for(int i = 2; i < files.Length; i++)
{
string text = File.ReadAllText(files[i]);
int startPos = text.IndexOf("currentLevelName");
int length = text.IndexOf("currentLevelEntryDirection") - 3;
string sub = text.Substring(startPos, length);
}
}
private void Form1_Load(object sender, EventArgs e)
{
}
}
}
The part i want to extract is :
currentLevelName":"E1_WL1_FindBow_M","currentLevelEntryDirection"
This is a part of the file content :
m_ItemsEncodedJsons ArrayProperty None ! m_WhatLevelPlayerIsAtEncodedJson ArrayProperty O G {"currentLevelName":"E1_WL1_FindBow_M","currentLevelEntryDirection":8} & m_WhatCheckPointPlay
the way i'm trying now i'm getting exception because
System.ArgumentOutOfRangeException: 'Index and length must refer to a location within the string.
Parameter name: length'
startPos value is : 1613 and the value of length is 1653
so the exception is logic but i'm not sure yet how to extract the specific string out of the text.
Update :
this is almost working :
int startPos = text.IndexOf("currentLevelName");
int length = text.IndexOf("currentLevelEntryDirection");
string sub = text.Substring(startPos, length - startPos);
the result in sub is :
"currentLevelName\":\"E1_WL1_HangingBedsA_M\",\""
but i want that sub will contain this :
currentLevelName"E1_WL1_HangingBedsA_M\"
optional without the two "" either and maybe to add _
currentLevelName_"E1_WL1_HangingBedsA_M\"
or
currentLevelName_E1_WL1_HangingBedsA_M\
The problem you are facing is indeed this one:
How to extract the content with specific pattern from a String?
In this case, you can use Regular Expression to extract the content you want.
Given the following text:
m_ItemsEncodedJsons ArrayProperty None ! m_WhatLevelPlayerIsAtEncodedJson ArrayProperty O G {"currentLevelName":"E1_WL1_FindBow_M","currentLevelEntryDirection":8} & m_WhatCheckPointPlay
By using this Regex pattern:
string pattern = #"""currentLevelName"":"".*"",""currentLevelEntryDirection"":\d+";
You will be able to extract the following content:
"currentLevelName":"E1_WL1_FindBow_M","currentLevelEntryDirection":8
Here is the code snippet in C#:
using System;
using System.Text.RegularExpressions;
public class Example
{
public static void Main()
{
// this is the original text
string input = #"m_ItemsEncodedJsons ArrayProperty None ! m_WhatLevelPlayerIsAtEncodedJson ArrayProperty O G {""currentLevelName"":""E1_WL1_FindBow_M"",""currentLevelEntryDirection"":8} & m_WhatCheckPointPlay";
// this is the pattern you are looking for
string pattern = #"""currentLevelName"":"".*"",""currentLevelEntryDirection"":\d+";
RegexOptions options = RegexOptions.Multiline;
foreach (Match m in Regex.Matches(input, pattern, options))
{
Console.WriteLine("'{0}' found at index {1}.", m.Value, m.Index);
}
}
}
One of the reasons you should use Regex in this case is that, if the value of currentLevelEntryDirection is not single-digit, e.g. 8123, the above code snippet can still be able to extract the correct value.
You can also find the above example and edit it here: https://regex101.com/r/W4ihuk/3
Furthermore, you can extract the property names and values by using capturing group. For example:
string pattern = #"""(currentLevelName)"":""(.*)"",""(currentLevelEntryDirection)"":(\d+)";
You can extract the following data:
currentLevelName, E1_WL1_FindBow_M, currentLevelEntryDirection, 8 and you can get the values by looping all the Match objects.
it seems the content is separated by a space delimiter. and the positions are fixed.
If so, you could do something like :
var splitted = text.Split(' ');
var json = splitted[8]; // this is the json part in the content;
However, since we don't know wither the content might change or not. You can still use this :
var startPos = text.IndexOf('{');
var endPos = text.IndexOf('}') + 1;
var json = text.Substring(startPos, endPos - startPos);
This would extract the Json part of the file. Now, you can implement a json model that will be used to deserialize this json like this :
using System.Text.Json;
using System.Text.Json.Serialization;
public class JsonModel
{
[JsonPropertyName("currentLevelName")]
public string? CurrentLevelName { get; set; }
[JsonPropertyName("currentLevelEntryDirection")]
public int CurrentLevelEntryDirection { get; set; }
}
With that we can do :
var result = JsonSerializer.Deserialize<JsonModel>(json);
var leveName = result.CurrentLevelName;
private string FindCurrentLevelName(string MyString)
{
var FirstSplit = MyString.Split(new char[] { '{' },
StringSplitOptions.RemoveEmptyEntries);
if (FirstSplit.Length != 2)
{
return "";
}
var SecondSplit = FirstSplit[1].Split(new char[] { '}' },
StringSplitOptions.RemoveEmptyEntries);
if (SecondSplit.Length != 2)
{
return "";
}
var FinalSplit = SecondSplit[0].Split(new char[] { '"' },
StringSplitOptions.RemoveEmptyEntries);
if (FinalSplit.Length != 6)
{
return "";
}
return FinalSplit[2];
}
To get the specific string pattern in a non-JSON format data string
Use the regex to get the stirng and operate it will be good I thought.
By using the regex pattern: "currentLevelName":"\w+"
in your example content, your will get: "currentLevelName":"E1_WL1_HangingBedsA_M"
Then use the result to create or replace your file name.
the code below will get the savedGame001.txt's content and extract the currentLevelName block, then create a new file whcih the name is in this format: [filename]_[theCurrentLevelName]
using System.Text.RegularExpressions;
// your file path
string filePath = #"C:\Users\a0204\Downloads";
// your file name
string fileName = #"savedGame001.txt";
// read file content
string stringContent = string.Empty;
stringContent = System.IO.File.ReadAllText(filePath + "\\" + fileName);
// Get the mathced string By regex => "currentLevelName":"\w+"
var regex = new Regex("\"currentLevelName\":\"\\w+\"");
Match matched = regex.Match(stringContent);
string matchedString = matched.Value;
// Get the string below the colon
int colonPosition = matchedString.IndexOf(":");
string value = matchedString.Substring(colonPosition + 1);
value = value.Replace("\"", string.Empty);
// remove the .txt and add the matched string to file name
fileName = fileName.Remove(fileName.Length - 4, 4);
string newFileName = fileName + "_" + value;
// check the new file name
Console.WriteLine(newFileName);
// write content to new file name
FileStream fileStream = File.Create(filePath + "\\" + newFileName);
fileStream.Dispose();
File.WriteAllText(filePath + "\\" + newFileName, stringContent);
Console.ReadLine();
PS: the code was written by .NET6 console app
How to append text (from txt file e.g englishWord.txt) to the end of all lines (from another txt file e.g PolishWord.Txt) ?
i have one txt file with many english word and translated polish word in another txt file. I would like the result to be such as it:
englishWord.txt:
familiar
involve
ability
expand
polishWord.txt:
znajomy
angażować
umiejętność
rozszerzać
Program make the result.txt like this:
familiar - znajomy
involve - angazować
ability - umiejętność
expand - rozszerzac
Thanks for your attention. I’m looking forward to your reply.
Sorry for my bad English.
This is what i tried:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp8
{
class Program
{
static void Main(string[] args)
{
string englishWord = File.ReadAllText("c:/temp/english.txt");
string polishWord = File.ReadAllText("c:/temp/polish.txt");
foreach (var lineE in englishWord)
{
string lineEnglish = lineE;
}
foreach (var lineP in polishWord)
{
string linePolish = lineP;
}
string result = lineEnglish + " - " + linePolish;
result = File.WriteAllText"c:/temp/result.txt";
}
}
}
An easy way to do this would be to first read the line from both files, split those lines by spaces into two arrays, then iterate through those arrays at the same time. I.E.:
System.IO.StreamReader english = new System.IO.StreamReader("englishWord.txt");
System.IO.StreamReader polish = new System.IO.StreamReader("polishWord.txt");
String englishLine = english.ReadLine();
String polishLine = polish.ReadLine();
String englishWords[] = englishLine.split(' ');
String polishWords[] = polishLine.split(' ');
for (int i = 0; i < englishWords.Length; i++) {
/* Output englishWords[i] and polishWords[i] to result.txt here */
}
Hope this helps!
Try this:
using (var english = new StreamReader("englishWord.txt"))
using (var polish = new StreamReader("polishWord.txt"))
using (var result = new StreamWriter("result.txt"))
{
while (!english.EndOfStream && !polish.EndOfStream)
{
result.Write(english.ReadLine());
result.Write(" - ");
result.Write(polish.ReadLine());
result.WriteLine();
}
}
This produces results exactly like in the specification. However, if you're trying to build something like a dictionary I'd suggest other formats like writing to Excel file or a csv file.
//load files
var englishFile = File.OpenText("englishWord.txt").ReadToEnd();
var polishFile = File.OpenText("polishWord.txt").ReadToEnd();
//convert to string arrays
var englishArray = englishFile.Split(' ');
var polishArray = polishFile.Split(' ');
//build output string
String outputString = "";
//take the lenght of the shorter array in case they're not the same length
int maxLength = Math.Min(englishArray.Length, polishArray.Length);
for (int i=0; i<maxLength; i++)
{
outputString += englishArray[i] + " - " + polishArray[i] + " ";
}
//write output to file
File.WriteAllText("results.txt", outputString);
string lot = "RU644276G01";
var year = "201" + lot.Substring(2, 1);
var folder = #"\\sinsdn38.ap.infineon.com\ArchView\03_Reports\" + year +
#"\" + lot.Substring(3, 2) + #"\" + lot.Substring(0,8) + #"\";
DirectoryInfo di = new DirectoryInfo(folder);
foreach (var fi in di.GetFiles("*.TLT"))
{
var file = fi.FullName;
string line;
using (StreamReader sr = new StreamReader(file))
{
while ((line = sr.ReadLine()) != null)
{
if (line.StartsWith("TEST-END"))
{
timeStampTextBox.Text = line;
}
}
}
This is my code currently.
I want to read from a specific line (for example line 8) and the line starts with "Test-End". However, line 8 contains all these
"TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193"
but I only want to read "2017-01-98 15:51".
How do I change my code to get that? Currently I'm getting the whole line instead of the specific timestamp that I want.
Edit
How do I change the code such that the string lot =" " can be any number, meaning it does not need to be RU644276G01, it can be a different number which will be typed by users. I have created a textbox for users to input the number.
You extract the text. It seems quite regular pattern, so regular expressions should be able to help:
using System;
using System.Text.RegularExpressions;
public class Program
{
public static void Main()
{
var line = "TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193";
Regex re = new Regex(#"^(?:TEST-END : )(.*?\d{4}-\d{2}-\d{2} \d{2}:\d{2})");
var match = re.Match(line);
Console.WriteLine(match.Groups[1]);
Console.ReadLine(); // leave console open
}
}
Output:
2017-01-08 15:51 // this is group 1, group 0 is the full capture including TEST-END :
Use this to check it in regexr: https://regexr.com/3l1sf if you hover about the text it will diplay your capturing groups
The regex means:
^ start of the string
(?:TEST-END : ) non capturing group, text must be present
( a group
.*? as few (0-n) anythings as possible
\d{4}-\d{2}-\d{2} \d{2}:\d{2} 4 digits-2 digits-2digits 2digits:2digits
) end of group
More about regular expressions:
RegEx-Class
a regex Tester (one of many, the one I use): https://regexr.com/
Here is my answer using Regular Expressions.
if (line.StartsWith("TEST-END"))
{
Regex re = new Regex(#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}");
var match = re.Match(line);
if(m.Success)
{
timeStampTextBox.Text = match.Value;
}
}
Output: 2017-01-08 15:51
you can split the line with ":", like this
var value = line.split(':');
and get your date like this.
var date = value[1] + ":" + value[2].Replace("PROGRAM", "");
above statement means
date = "2017-01-98 15" + ":" + "51"
if (line.StartsWith("TEST-END"))
{
var value = line.split(':');
var date = value[1] + ":" + value[2].Replace("PROGRAM", "");
timeStampTextBox.Text = date;
}
This is not the best answer, it depends on exactly the statement you had given.
I finally got all three parameters out of the last line
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Dictionary<string, string> dict = new Dictionary<string, string>();
string pattern = #"(?'name'[^\s]+)\s:\s(?'value'[\w\s\-]*|\d{4}-\d{2}-\d{2}\s\d{2}:\d{2})";
string line = "TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193";
MatchCollection matches = Regex.Matches(line, pattern, RegexOptions.RightToLeft);
foreach (Match match in matches)
{
Console.WriteLine("name : '{0}', value : '{1}'", match.Groups["name"].Value, match.Groups["value"].Value);
dict.Add(match.Groups["name"].Value, match.Groups["value"].Value);
}
DateTime date = DateTime.Parse(dict["TEST-END"]);
Console.ReadLine();
}
}
}
I have a csv file that I need to read in the first line and save it to a List. Only problem is there are commas in some of the text and it is splitting in the middle of a field when I need it not to. Unfortunately I cannot change the data inside so whats there needs to stay. I currently also write the data to csv so I was thinking maybe instead of using a comma I can use a different character. Does anyone know if this is possible? I have been researching but am not coming up with a proper answer. Here is my code below:
using System;
using System.CodeDom;
using System.IO;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace TestJSON
{
class Program
{
static void Main()
{
var data = JsonConvert.DeserializeObject<dynamic>(File.ReadAllText(
#"C:\Users\nphillips\workspace\2016R23\UITestAutomation\SeedDataGenerator\src\staticresources\seeddata.resource"));
string fileName = "";
var bundles = data.RecordSetBundles;
foreach (var bundle in bundles)
{
var records = bundle.Records;
foreach (var record in records)
{
var test = record.attributes;
foreach (var testagain in test)
{
// Getting the object Name Ex. Location, Item, etc.
var jprop = testagain as JProperty;
if (jprop != null)
{
fileName = jprop.First.ToString().Split('_')[2]+ ".csv";
}
break;
}
string header = "";
string value = "";
foreach (var child in record)
{
var theChild = child as JProperty;
if (theChild != null && !theChild.Name.Equals("attributes"))
{
header += child.Name + ",";
value += child.Value.ToString() + ",";
}
}
value += "+" + Environment.NewLine;
if (!File.Exists(fileName))
{
header += "+" + Environment.NewLine;
File.WriteAllText(fileName, header);
}
else
{
// Need to read in here
var readCSV = new StreamReader(fileName);
var splits = readCSV.ReadLine();
}
File.AppendAllText(fileName, value);
}
}
}
}
}
You need to know how the file is delimited. I would guess that this file is tab delimited, so split on that instead.
Assuming your line is called myCSVLine... I.E
string seperator = "\t";
string[] splitLine = myCSVLine.Split(seperator.ToCharArray());
splitLine would now have all of your strings, including ones with commas
I will try to explain the problem as good as I can. I have a text file with email addresses and names. It looks like this: Barb Beney "de.mariof#vienna.aa", "Beny Beney" bet#catering.at,etc....all in the same line. This is just an example and I have like thousands of such data in one big text file. I want to extract the emails and names so that I get something like this in the end:
Beny Beney bet#catering.at - separate, next to each other, in one line and without quote marks. And in the end it should eliminate all duplicate addresses from the file.
I wrote the code for extracting email addresses and it works, but I don't know how to do the rest. How to extract the names put it in one line as the addresses and eliminate duplicates. I hope I described it properly so you know what I'm trying to do. This is the code I have:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using System.IO;
namespace Email
{
class Program
{
static void Main(string[] args)
{
ExtractEmails(#"C:\Users\drake\Desktop\New.txt", #"C:\Users\drake\Desktop\Email.txt");
}
public static void ExtractEmails(string inFilePath, string outFilePath)
{
string data = File.ReadAllText(inFilePath);
Regex emailRegex = new Regex(#"\w+([-+.]\w+)*#\w+([-.]\w+)*\.\w+([-.]\w+)*",
RegexOptions.IgnoreCase);
MatchCollection emailMatches = emailRegex.Matches(data);
StringBuilder sb = new StringBuilder();
foreach (Match emailMatch in emailMatches)
{
sb.AppendLine(emailMatch.Value);
}
File.WriteAllText(outFilePath, sb.ToString());
}
}
}
For the new desired formatting, you could do something like this:
private string[] parseEmails(string bigStringiIn){
string[] output;
string bigString;
bigString = bigStringiIn.Replace("\"", "");
output = bigString.Slit(",".ToCharArray());
return output;
}
it takes the string with the mail adresses, replaces the quote marks, then splits the string into a string array with the format: name lastname email#some.com
for the duplicated entries deletion, a nested for should do the trick, checking (maybe after a .Split()) for matching strings.
Welcome you can use this code and it will work on file made by creating new file which will contain all e-mails without duplicates:
static void Main(string[] args)
{
TextWriter w = File.CreateText(#"C:\Users\drake\Desktop\NonDuplicateEmails.txt");
ExtractEmails(#"C:\Users\drake\Desktop\New.txt", #"C:\Users\drake\Desktop\Email.txt");
TextReader r = File.OpenText(#"C:\Users\drake\Desktop\Email.txt");
RemovingAllDupes(r, w);
}
public static void RemovingAllDupes(TextReader reader, TextWriter writer)
{
string currentLine;
HashSet<string> previousLines = new HashSet<string>();
while ((currentLine = reader.ReadLine()) != null)
{
// Add returns true if it was actually added,
// false if it was already there
if (previousLines.Add(currentLine))
{
writer.WriteLine(currentLine);
}
}
writer.Close();
}
you can also use this code with big files:
static void Main(string[] args)
{
ExtractEmails(#"C:\Users\drake\Desktop\New.txt", #"C:\Users\drake\Desktop\Email.txt");
var sr = new StreamReader(File.OpenRead(#"C:\Users\drake\Desktop\Email.txt"));
var sw = new StreamWriter(File.OpenWrite(#"C:\Users\drake\Desktop\NonDuplicateEmails.txt"));
RemovingAllDupes(sr, sw);
}
public static void RemovingAllDupes(StreamReader str, StreamWriter stw)
{
var lines = new HashSet<int>();
while (!str.EndOfStream)
{
string line = str.ReadLine();
int hc = line.GetHashCode();
if (lines.Contains(hc))
continue;
lines.Add(hc);
stw.WriteLine(line);
}
stw.Flush();
stw.Close();
str.Close();