c# count a specific word happen times in a txt file

c# count a specific word happen times in a txt file - c#

static void Main(string[] args)
{
StreamReader oReader;
if (File.Exists(#"C:\cmd.txt"))
{
Console.WriteLine("IMAGE");
string cSearforSomething = Console.ReadLine().Trim();
oReader = new StreamReader(#"C:\cmd.txt");
string cColl = oReader.ReadToEnd();
string cCriteria = #"\b" + cSearforSomething + #"\b";
System.Text.RegularExpressions.Regex oRegex = new System.Text.RegularExpressions.Regex(cCriteria, RegexOptions.IgnoreCase);
int count = oRegex.Matches(cColl).Count;
Console.WriteLine(count.ToString());
}
Console.ReadLine();
}
I cannot calculate the string "IMAGE" happen times in my file? Is my code wrong?

Try this code
public static void Main()
{
var str = File.ReadAllText(#"C:\cmd.txt");
var searchTerm = "IMAGE";
var matches = Regex.Matches(str, #"\b" + searchTerm + #"\b", RegexOptions.IgnoreCase);
Console.WriteLine(matches.Count);
Console.ReadLine();
}

According to https://msdn.microsoft.com/en-us/library/az24scfc.aspx the "\b" matches a backspace. That might explain why you aren't matching the regular expression.

using System;
using System.Text.RegularExpressions;
namespace ConsoleApplication8
{
class Program
{
static void Main(string[] args)
{
string cColl = System.IO.File.ReadAllText(#"C:\some.txt");
//string cColl = "This is similar, similar, similar, similar, similar, similar";
Console.WriteLine(cColl);
string cCriteria = #"\b" + "similar" + #"\b";
Regex oRegex = new Regex(cCriteria, RegexOptions.IgnoreCase);
int count = oRegex.Matches(cColl).Count;
Console.WriteLine(count.ToString());
Console.ReadLine();
}
}
}

Related

How to add a string of text into another txt file

I have this txt file that contains this text:
MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T
but I only want the values that come after SSN~% and after the MED~
I want to be able read from the Line that starts with QRD and then be able to grab ANY value after SSN~% and MED~, so the value can be anything I'm just using ABC123 and WA0034 as examples.
Form1.cs
private void Parse(string filename)
{
string line;
var str = File.ReadAllText(filename);
System.IO.StreamReader file = new System.IO.StreamReader(filename);
targetRichTextBox = richTextBox1;
WriteTextSafelyInRichTextBox(str);
while ((line = file.ReadLine()) != null)
{
if ((line.Contains("QRD"))
{
//Enter code here
}
}
char[] delimiterChars = { '^' };
string[] words = str.Split(delimiterChars);
var createText = (RetrunTemplate.Get().Replace(words[24], "VHIC-").Replace(words[25], "9A-MED~WA0034"));
var outputFilename = outputDir + "\\OutboundMessage - " + DateTime.UtcNow.ToString("yyyy-MM-dd HH-mm-ss-ff", CultureInfo.InvariantCulture) + ".txt";
File.WriteAllText(outputFilename, createText);
targetRichTextBox = richTextBox2;
WriteTextSafelyInRichTextBox(createText);
file.Close();
File.Delete(filename);
MessageBox.Show("You have successfuly creatd an outbound Message");
}
RetrunTemplate
class RetrunTemplate
{
public static string Get()
{
string retrunTemplate = #"MSH^~|\&^R3POCSEND^442~CHEY209.FO-BAYPINES.MED.VA.GOV~DNS^R3POCRCV^^20171108131710-0400^^RCL~I06^442157252912^D^2.5^^^AL^NE^USA" + Environment.NewLine +
"PID^^^4420041228V165312~~~USVHA&&0363~NI~VA FACILITY ID&442&L~~20171108|666393848~~~" + Environment.NewLine +
#"USSSA&&0363~SS~VA FACILITY ID&442&L|""~~~USDOD&&0363~TIN~VA FACILITY ID&442&L" + Environment.NewLine +
#"""~~~USDOD&&0363~FIN~VA FACILITY ID&442&L|7209344~~~USVHA&&0363~PI~VA FACILITY ID&442&L" + Environment.NewLine +
#"^VHIC-ABC123~~~USVHA&&0363~PI~VA FACILITY ID&742V1&L^ZEIGLER~PG~EIGHT~~~~L" + Environment.NewLine +
#"|""~~~~~~N^^19220304^M^^^9234234~""~SAN FRANCISCO~CA~94114~USA~P~""~075|~~SAN JOSE~CO~~""~N^^""^^^^^^^^^^^^^^^^^^" + Environment.NewLine +
#"PV1^^^9A-MED" + Environment.NewLine + "HH1^WA0034";
return retrunTemplate;
}
}

Suppose you read the file line by line. You can validate each line against the following Regex, and extract what you want.
var text = "QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T";
var rgx = new Regex(#"QRD.+SSN~%(.+)MED~(.+)");
var match = rgx.Match(text);
if (match.Success)
{
Console.WriteLine(match.Groups[1].Value);
Console.WriteLine(match.Groups[2].Value);
}
The match.Groups[1] has ABC123^9A-, and match.Groups[2] has WA0034^^^T. You can now do what you will with those text.
Regex Breakdown
#"QRD.+SSN~%(.+)MED~(.+)"
QRD - Starts with the string QRD
.+ - Followed by one or more characters
SSN~% - Followed by SSN~~%
(.+) - Grab (to Groups[1]) one or more characters between SSN~% and MED~
MED! - Followed by MED~
(.+) - Grab everything else in the line to Groups[2]

Here's my effort.
var input = #"MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T" ;
var pattern = #"SSN\~\%([A-Z0-9]+).*MED\~([A-Z0-9]+)";
var matches = Regex.Matches(input, pattern, RegexOptions.Multiline).
Select( m => new { SSN = m.Groups[1].Value, MED = m.Groups[2].Value});
foreach(var m in matches ) {
Console.WriteLine($"SSN = {m.SSN}, MED = {m.MED}");
}
Output
SSN = ABC123, MED = WA0034
With QRD matching
var input = #"MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T";
var pattern = #"SSN\~\%([A-Z0-9]+).*MED\~([A-Z0-9]+)";
var matches = input
.Split()
.Where(l => l.StartsWith("QRD"))
.Select(l => Regex.Matches(l, pattern).Select(m => new { SSN = m.Groups[1].Value, MED = m.Groups[2].Value }));
foreach (var groups in matches)
{
foreach (var g in groups)
{
Console.WriteLine($"SSN = {g.SSN}, MED = {g.MED}");
}
}
Output
SSN = ABC123, MED = WA0034

How to read a specific line and text from a text file

string lot = "RU644276G01";
var year = "201" + lot.Substring(2, 1);
var folder = #"\\sinsdn38.ap.infineon.com\ArchView\03_Reports\" + year +
#"\" + lot.Substring(3, 2) + #"\" + lot.Substring(0,8) + #"\";
DirectoryInfo di = new DirectoryInfo(folder);
foreach (var fi in di.GetFiles("*.TLT"))
{
var file = fi.FullName;
string line;
using (StreamReader sr = new StreamReader(file))
{
while ((line = sr.ReadLine()) != null)
{
if (line.StartsWith("TEST-END"))
{
timeStampTextBox.Text = line;
}
}
}
This is my code currently.
I want to read from a specific line (for example line 8) and the line starts with "Test-End". However, line 8 contains all these
"TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193"
but I only want to read "2017-01-98 15:51".
How do I change my code to get that? Currently I'm getting the whole line instead of the specific timestamp that I want.
Edit
How do I change the code such that the string lot =" " can be any number, meaning it does not need to be RU644276G01, it can be a different number which will be typed by users. I have created a textbox for users to input the number.

You extract the text. It seems quite regular pattern, so regular expressions should be able to help:
using System;
using System.Text.RegularExpressions;
public class Program
{
public static void Main()
{
var line = "TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193";
Regex re = new Regex(#"^(?:TEST-END : )(.*?\d{4}-\d{2}-\d{2} \d{2}:\d{2})");
var match = re.Match(line);
Console.WriteLine(match.Groups[1]);
Console.ReadLine(); // leave console open
}
}
Output:
2017-01-08 15:51 // this is group 1, group 0 is the full capture including TEST-END :
Use this to check it in regexr: https://regexr.com/3l1sf if you hover about the text it will diplay your capturing groups
The regex means:
^ start of the string
(?:TEST-END : ) non capturing group, text must be present
( a group
.*? as few (0-n) anythings as possible
\d{4}-\d{2}-\d{2} \d{2}:\d{2} 4 digits-2 digits-2digits 2digits:2digits
) end of group
More about regular expressions:
RegEx-Class
a regex Tester (one of many, the one I use): https://regexr.com/

Here is my answer using Regular Expressions.
if (line.StartsWith("TEST-END"))
{
Regex re = new Regex(#"\d{4}-\d{2}-\d{2} \d{2}:\d{2}");
var match = re.Match(line);
if(m.Success)
{
timeStampTextBox.Text = match.Value;
}
}
Output: 2017-01-08 15:51

you can split the line with ":", like this
var value = line.split(':');
and get your date like this.
var date = value[1] + ":" + value[2].Replace("PROGRAM", "");
above statement means
date = "2017-01-98 15" + ":" + "51"
if (line.StartsWith("TEST-END"))
{
var value = line.split(':');
var date = value[1] + ":" + value[2].Replace("PROGRAM", "");
timeStampTextBox.Text = date;
}
This is not the best answer, it depends on exactly the statement you had given.

I finally got all three parameters out of the last line
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Dictionary<string, string> dict = new Dictionary<string, string>();
string pattern = #"(?'name'[^\s]+)\s:\s(?'value'[\w\s\-]*|\d{4}-\d{2}-\d{2}\s\d{2}:\d{2})";
string line = "TEST-END : 2017-01-08 15:51 PROGRAM : TLE8888QK-B2 BAU-NR : 95187193";
MatchCollection matches = Regex.Matches(line, pattern, RegexOptions.RightToLeft);
foreach (Match match in matches)
{
Console.WriteLine("name : '{0}', value : '{1}'", match.Groups["name"].Value, match.Groups["value"].Value);
dict.Add(match.Groups["name"].Value, match.Groups["value"].Value);
}
DateTime date = DateTime.Parse(dict["TEST-END"]);
Console.ReadLine();
}
}
}

Delete all but {x} C# string

I'm trying to cycle through a .txt to build a test function for another application I'm building.
I've got a list of UK based lat/long values that are formatted like this:
Latitude: 57°39′55″N 57.665198
Longitude: 6°57′27″W -6.95739395
Distance: 184.8338 mi Bearing: 329.815°
with the intended result of this small application being just the lat/long values:
57.665198
-6.95739395
So far I've got a StreamReader working with a myString.StartsWith("Latitude") {} but I'm stuck.
How do I detect a splitstring of 2 spaces " " inside of a string and delete everything before that? My code so far is this:
static void Main(string[] args)
{
string text = "";
using (var streamReader = new StreamReader(#"c:\mb\latlong.txt", Encoding.UTF8))
{
text = streamReader.ReadToEnd();
if (text.Trim().StartsWith("Latitude: "))
{
text.Split()
} else if (text.StartsWith("Distance: "))
{
} else if (text.StartsWith(""))
{
}
streamReader.ReadLine();
}
Console.ReadKey();
}
Thanks in advance

You can try using regular expressions
var result = File
.ReadLines(#"C:\MyFile.txt")
.SelectMany(line => Regex
.Matches(line, #"(?<=\s)-?[0-9]+(\.[0-9]+)*$")
.OfType<Match>()
.Select(match => match.Value));
Test
// 57.665198
// -6.95739395
Console.Write(String.Join(Environment.NewLine, result));

Use string.IndexOf(" ") to find the position of the two spaces in the string. Then you can use string.Substring(position) to get the string after that point.
In your code:
if (text.Trim().StartsWith("Latitude: "))
{
var positionOfTwoSpaces = text.IndexOf(" ");
var latString = text.Substring(positionOfTwoSpaces);
var latValue = float.Parse(latString);
}

You can try the regular expression solution. (You might need to fix up the space counts in the regex definitions)
static void Main(string[] args)
{
string text = "";
Regex lat = new Regex("Latitude: .+? (.+)");
Regex lon = new Regex("Longitude .+? (.+)");
using (var streamReader = new StreamReader(#"c:\mb\latlong.txt", Encoding.UTF8))
{
string line;
while ((line = streamReader.ReadLine() != null)
{
if (lat.IsMatch(line))
lat.Match(line).Groups[1].Value // latitude
else if(lon.IsMatch(line))
lon.Match(line).Groups[1].Value // longitude
}
}
Console.ReadKey();
}

A simple solution would be
string[] fileLines = IO.File.ReadAllLines("input file path");
List<string> resultLines = new List<string>();
foreach (string line in fileLines) {
string[] parts = line.Split(" "); //Double space
if (parts.Count() > 1) {
string lastPart = parts.LastOrDefault();
if (!string.IsNullOrEmpty(lastPart)) {
resultLines.Add(lastPart);
}
}
}
IO.File.WriteAllLines("output file path", resultLines.ToArray());

As I already suggested in my comment. You can look for the last occurrence of the space and substring from there.
using System;
using System.IO;
using System.Text;
public class Test
{
public static void Main()
{
String line = String.Empty;
while(!String.IsNullOrEmpty((line = streamReader.ReadLine())))
{
if(line.StartsWith("Latitude:"))
{
line = line.Substring(line.LastIndexOf(' ') + 1);
Console.WriteLine(line);
}
}
Console.ReadKey();
}
}
Working example.
I didn't provide all the code because this is just copy paste for the longitude case. I think you can do this by your own. :)

RegEx.Replace is replacing more than the matched

I'm trying to replace the following match a pattenr in xml string where the pattern is various types of attributes that are present in any given xml element.
so if the xml string was:
<TEST xlmns="https://www.test.com">
<XXX>Foo</XXX>
<YYY>Bar</YYY>
</TEST>
I want to remove the namespaces above using pattenr .*?(?:[a-z][a-z0-9_]*).*?((?:[a-z][a-z0-9_]*))(=)(\".*?\") in the below code:
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var txt = "<TEST xlmns=\"https://www.test.com\"> <XXX>Foo</XXX> <YYY>Bar</YYY> </TEST>";
const string pattern = ".*?(?:[a-z][a-z0-9_]*).*?((?:[a-z][a-z0-9_]*))(=)(\".*?\")";
var r = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
var m = r.Match(txt);
if (m.Success)
{
String var1 = m.Groups[1].ToString();
String c1 = m.Groups[2].ToString();
String string1 = m.Groups[3].ToString();
Console.Write( var1.ToString() + c1.ToString() + string1.ToString() + "\n");
Console.WriteLine(RegExReplace(txt,pattern,""));
}
Console.ReadLine();
}
static String RegExReplace(String input, String pattern, String replacement)
{
if (string.IsNullOrEmpty(input))
return input;
return Regex.Replace(input, pattern, replacement, RegexOptions.IgnoreCase);
}
}
}
But where it matches, in this case <TEST xlmns="https://www.test.com"> is turned into > when it should have been <TEST>
What have i done wrong in the replace method?

If you just want to remove namespace, change your regex to:
const string pattern = "xlmns=\".*\"";
If you want to remove all attributes, use the given regex:
const string pattern = "\w+=\".*\"";
Full code:
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var txt = "<TEST xlmns=\"https://www.test.com\"> <XXX>Foo</XXX> <YYY>Bar</YYY> </TEST>";
const string pattern = "\w+=\".*\"";
var r = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
var m = r.Match(txt);
if (m.Success)
{
String var1 = m.Groups[1].ToString();
String c1 = m.Groups[2].ToString();
String string1 = m.Groups[3].ToString();
Console.Write( var1.ToString() + c1.ToString() + string1.ToString() + "\n");
Console.WriteLine(RegExReplace(txt,pattern,""));
}
Console.ReadLine();
}
static String RegExReplace(String input, String pattern, String replacement)
{
if (string.IsNullOrEmpty(input))
return input;
return Regex.Replace(input, pattern, replacement, RegexOptions.IgnoreCase);
}
}
}

Remove words from string c#

I am working on a ASP.NET 4.0 web application, the main goal for it to do is go to the URL in the MyURL variable then read it from top to bottom, search for all lines that start with "description" and only keep those while removing all HTML tags. What I want to do next is remove the "description" text from the results afterwords so I have just my device names left. How would I do this?
protected void parseButton_Click(object sender, EventArgs e)
{
MyURL = deviceCombo.Text;
WebRequest objRequest = HttpWebRequest.Create(MyURL);
objRequest.Credentials = CredentialCache.DefaultCredentials;
using (StreamReader objReader = new StreamReader(objRequest.GetResponse().GetResponseStream()))
{
originalText.Text = objReader.ReadToEnd();
}
//Read all lines of file
String[] crString = { "<BR> " };
String[] aLines = originalText.Text.Split(crString, StringSplitOptions.RemoveEmptyEntries);
String noHtml = String.Empty;
for (int x = 0; x < aLines.Length; x++)
{
if (aLines[x].Contains(filterCombo.SelectedValue))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
//Print results to textbox
resultsBox.Text = String.Join(Environment.NewLine, noHtml);
}
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}

Ok so I figured out how to remove the words through one of my existing functions:
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n").Replace("description", "").Replace("INFRA:CORE:", "")
.Replace("RESERVED", "")
.Replace(":", "")
.Replace(";", "")
.Replace("-0/3/0", "");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}

public static void Main(String[] args)
{
string str = "He is driving a red car.";
Console.WriteLine(str.Replace("red", "").Replace(" ", " "));
}
Output:
He is driving a car.
Note: In the second Replace its a double space.
Link : https://i.stack.imgur.com/rbluf.png
Try this.It will remove all occurrence of the word which you want to remove.

Try something like this, using LINQ:
List<string> lines = new List<string>{
"Hello world",
"Description: foo",
"Garbage:baz",
"description purple"};
//now add all your lines from your html doc.
if (aLines[x].Contains(filterCombo.SelectedValue))
{
lines.Add(RemoveHTML(aLines[x]) + "\r\n");
}
var myDescriptions = lines.Where(x=>x.ToLower().BeginsWith("description"))
.Select(x=> x.ToLower().Replace("description",string.Empty)
.Trim());
// you now have "foo" and "purple", and anything else.
You may have to adjust for colons, etc.

void Main()
{
string test = "<html>wowzers description: none <div>description:a1fj391</div></html>";
IEnumerable<string> results = getDescriptions(test);
foreach (string result in results)
{
Console.WriteLine(result);
}
//result: none
// a1fj391
}
static Regex MyRegex = new Regex(
"description:\\s*(?<value>[\\d\\w]+)",
RegexOptions.Compiled);
IEnumerable<string> getDescriptions(string html)
{
foreach(Match match in MyRegex.Matches(html))
{
yield return match.Groups["value"].Value;
}
}

Adapted From Code Project
string value = "ABC - UPDATED";
int index = value.IndexOf(" - UPDATED");
if (index != -1)
{
value = value.Remove(index);
}
It will print ABC without - UPDATED

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

c# count a specific word happen times in a txt file - c#

Try this code public static void Main() { var str = File.ReadAllText(#"C:\cmd.txt"); var searchTerm = "IMAGE"; var matches = Regex.Matches(str, #"\b" + searchTerm + #"\b", RegexOptions.IgnoreCase); Console.WriteLine(matches.Count); Console.ReadLine(); }

According to https://msdn.microsoft.com/en-us/library/az24scfc.aspx the "\b" matches a backspace. That might explain why you aren't matching the regular expression.

Related

How to add a string of text into another txt file

How to read a specific line and text from a text file

Delete all but {x} C# string

RegEx.Replace is replacing more than the matched

Remove words from string c#

Categories

Resources