I am working on a ASP.NET 4.0 web application, the main goal for it to do is go to the URL in the MyURL variable then read it from top to bottom, search for all lines that start with "description" and only keep those while removing all HTML tags. What I want to do next is remove the "description" text from the results afterwords so I have just my device names left. How would I do this?
protected void parseButton_Click(object sender, EventArgs e)
{
MyURL = deviceCombo.Text;
WebRequest objRequest = HttpWebRequest.Create(MyURL);
objRequest.Credentials = CredentialCache.DefaultCredentials;
using (StreamReader objReader = new StreamReader(objRequest.GetResponse().GetResponseStream()))
{
originalText.Text = objReader.ReadToEnd();
}
//Read all lines of file
String[] crString = { "<BR> " };
String[] aLines = originalText.Text.Split(crString, StringSplitOptions.RemoveEmptyEntries);
String noHtml = String.Empty;
for (int x = 0; x < aLines.Length; x++)
{
if (aLines[x].Contains(filterCombo.SelectedValue))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
//Print results to textbox
resultsBox.Text = String.Join(Environment.NewLine, noHtml);
}
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}
Ok so I figured out how to remove the words through one of my existing functions:
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n").Replace("description", "").Replace("INFRA:CORE:", "")
.Replace("RESERVED", "")
.Replace(":", "")
.Replace(";", "")
.Replace("-0/3/0", "");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}
public static void Main(String[] args)
{
string str = "He is driving a red car.";
Console.WriteLine(str.Replace("red", "").Replace(" ", " "));
}
Output:
He is driving a car.
Note: In the second Replace its a double space.
Link : https://i.stack.imgur.com/rbluf.png
Try this.It will remove all occurrence of the word which you want to remove.
Try something like this, using LINQ:
List<string> lines = new List<string>{
"Hello world",
"Description: foo",
"Garbage:baz",
"description purple"};
//now add all your lines from your html doc.
if (aLines[x].Contains(filterCombo.SelectedValue))
{
lines.Add(RemoveHTML(aLines[x]) + "\r\n");
}
var myDescriptions = lines.Where(x=>x.ToLower().BeginsWith("description"))
.Select(x=> x.ToLower().Replace("description",string.Empty)
.Trim());
// you now have "foo" and "purple", and anything else.
You may have to adjust for colons, etc.
void Main()
{
string test = "<html>wowzers description: none <div>description:a1fj391</div></html>";
IEnumerable<string> results = getDescriptions(test);
foreach (string result in results)
{
Console.WriteLine(result);
}
//result: none
// a1fj391
}
static Regex MyRegex = new Regex(
"description:\\s*(?<value>[\\d\\w]+)",
RegexOptions.Compiled);
IEnumerable<string> getDescriptions(string html)
{
foreach(Match match in MyRegex.Matches(html))
{
yield return match.Groups["value"].Value;
}
}
Adapted From Code Project
string value = "ABC - UPDATED";
int index = value.IndexOf(" - UPDATED");
if (index != -1)
{
value = value.Remove(index);
}
It will print ABC without - UPDATED
Related
if i input string or number.
Example Input : 02x2, 05x3, 05x3, 02x4, 19.02x21, 21.02x3
I want ouput :
02x2
02x4
05x3
05x3
02x3
02x21
Helpme
Try This
string str = "02x2, 05x3, 05x3, 02x4, 19.02x21, 21.02x3";
var str1 = str.Split(',');
string test = "";
List<string> strs = new List<string>();
foreach (var item in str1.OrderBy(x => x.Trim()))
{
var str2 = item.Split('x');
string i = "";
if (str2[0].ToString().Contains("."))
{
var st = str2[0].ToString().Split('.');
i = st[st.Count() -1];
}
else
{
i = str2[0].ToString();
}
test += i.ToString() + 'x' + str2[1].ToString() + ",";
}
test.TrimEnd(',');
Output : 02x2, 02x4, 05x3, 05x3,02x21,02x3
I have this txt file that contains this text:
MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T
but I only want the values that come after SSN~% and after the MED~
I want to be able read from the Line that starts with QRD and then be able to grab ANY value after SSN~% and MED~, so the value can be anything I'm just using ABC123 and WA0034 as examples.
Form1.cs
private void Parse(string filename)
{
string line;
var str = File.ReadAllText(filename);
System.IO.StreamReader file = new System.IO.StreamReader(filename);
targetRichTextBox = richTextBox1;
WriteTextSafelyInRichTextBox(str);
while ((line = file.ReadLine()) != null)
{
if ((line.Contains("QRD"))
{
//Enter code here
}
}
char[] delimiterChars = { '^' };
string[] words = str.Split(delimiterChars);
var createText = (RetrunTemplate.Get().Replace(words[24], "VHIC-").Replace(words[25], "9A-MED~WA0034"));
var outputFilename = outputDir + "\\OutboundMessage - " + DateTime.UtcNow.ToString("yyyy-MM-dd HH-mm-ss-ff", CultureInfo.InvariantCulture) + ".txt";
File.WriteAllText(outputFilename, createText);
targetRichTextBox = richTextBox2;
WriteTextSafelyInRichTextBox(createText);
file.Close();
File.Delete(filename);
MessageBox.Show("You have successfuly creatd an outbound Message");
}
RetrunTemplate
class RetrunTemplate
{
public static string Get()
{
string retrunTemplate = #"MSH^~|\&^R3POCSEND^442~CHEY209.FO-BAYPINES.MED.VA.GOV~DNS^R3POCRCV^^20171108131710-0400^^RCL~I06^442157252912^D^2.5^^^AL^NE^USA" + Environment.NewLine +
"PID^^^4420041228V165312~~~USVHA&&0363~NI~VA FACILITY ID&442&L~~20171108|666393848~~~" + Environment.NewLine +
#"USSSA&&0363~SS~VA FACILITY ID&442&L|""~~~USDOD&&0363~TIN~VA FACILITY ID&442&L" + Environment.NewLine +
#"""~~~USDOD&&0363~FIN~VA FACILITY ID&442&L|7209344~~~USVHA&&0363~PI~VA FACILITY ID&442&L" + Environment.NewLine +
#"^VHIC-ABC123~~~USVHA&&0363~PI~VA FACILITY ID&742V1&L^ZEIGLER~PG~EIGHT~~~~L" + Environment.NewLine +
#"|""~~~~~~N^^19220304^M^^^9234234~""~SAN FRANCISCO~CA~94114~USA~P~""~075|~~SAN JOSE~CO~~""~N^^""^^^^^^^^^^^^^^^^^^" + Environment.NewLine +
#"PV1^^^9A-MED" + Environment.NewLine + "HH1^WA0034";
return retrunTemplate;
}
}
Suppose you read the file line by line. You can validate each line against the following Regex, and extract what you want.
var text = "QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T";
var rgx = new Regex(#"QRD.+SSN~%(.+)MED~(.+)");
var match = rgx.Match(text);
if (match.Success)
{
Console.WriteLine(match.Groups[1].Value);
Console.WriteLine(match.Groups[2].Value);
}
The match.Groups[1] has ABC123^9A-, and match.Groups[2] has WA0034^^^T. You can now do what you will with those text.
Regex Breakdown
#"QRD.+SSN~%(.+)MED~(.+)"
QRD - Starts with the string QRD
.+ - Followed by one or more characters
SSN~% - Followed by SSN~~%
(.+) - Grab (to Groups[1]) one or more characters between SSN~% and MED~
MED! - Followed by MED~
(.+) - Grab everything else in the line to Groups[2]
Here's my effort.
var input = #"MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T" ;
var pattern = #"SSN\~\%([A-Z0-9]+).*MED\~([A-Z0-9]+)";
var matches = Regex.Matches(input, pattern, RegexOptions.Multiline).
Select( m => new { SSN = m.Groups[1].Value, MED = m.Groups[2].Value});
foreach(var m in matches ) {
Console.WriteLine($"SSN = {m.SSN}, MED = {m.MED}");
}
Output
SSN = ABC123, MED = WA0034
With QRD matching
var input = #"MSH^~|\&^R3POCQUERYS^050~BCMABU.MED.VA.GOV~DNS^R3POCQUERYR^^201711081317040500^^RQC~I06^50279320^D^2.5^^^AL^NE^USA
QRD^20171108131704-0500^R^I^WQRY^^^^SSN~%ABC123^9A-MED~WA0034^^^T";
var pattern = #"SSN\~\%([A-Z0-9]+).*MED\~([A-Z0-9]+)";
var matches = input
.Split()
.Where(l => l.StartsWith("QRD"))
.Select(l => Regex.Matches(l, pattern).Select(m => new { SSN = m.Groups[1].Value, MED = m.Groups[2].Value }));
foreach (var groups in matches)
{
foreach (var g in groups)
{
Console.WriteLine($"SSN = {g.SSN}, MED = {g.MED}");
}
}
Output
SSN = ABC123, MED = WA0034
static void Main(string[] args)
{
StreamReader oReader;
if (File.Exists(#"C:\cmd.txt"))
{
Console.WriteLine("IMAGE");
string cSearforSomething = Console.ReadLine().Trim();
oReader = new StreamReader(#"C:\cmd.txt");
string cColl = oReader.ReadToEnd();
string cCriteria = #"\b" + cSearforSomething + #"\b";
System.Text.RegularExpressions.Regex oRegex = new System.Text.RegularExpressions.Regex(cCriteria, RegexOptions.IgnoreCase);
int count = oRegex.Matches(cColl).Count;
Console.WriteLine(count.ToString());
}
Console.ReadLine();
}
I cannot calculate the string "IMAGE" happen times in my file? Is my code wrong?
Try this code
public static void Main()
{
var str = File.ReadAllText(#"C:\cmd.txt");
var searchTerm = "IMAGE";
var matches = Regex.Matches(str, #"\b" + searchTerm + #"\b", RegexOptions.IgnoreCase);
Console.WriteLine(matches.Count);
Console.ReadLine();
}
According to https://msdn.microsoft.com/en-us/library/az24scfc.aspx the "\b" matches a backspace. That might explain why you aren't matching the regular expression.
using System;
using System.Text.RegularExpressions;
namespace ConsoleApplication8
{
class Program
{
static void Main(string[] args)
{
string cColl = System.IO.File.ReadAllText(#"C:\some.txt");
//string cColl = "This is similar, similar, similar, similar, similar, similar";
Console.WriteLine(cColl);
string cCriteria = #"\b" + "similar" + #"\b";
Regex oRegex = new Regex(cCriteria, RegexOptions.IgnoreCase);
int count = oRegex.Matches(cColl).Count;
Console.WriteLine(count.ToString());
Console.ReadLine();
}
}
}
I'm trying to cycle through a .txt to build a test function for another application I'm building.
I've got a list of UK based lat/long values that are formatted like this:
Latitude: 57°39′55″N 57.665198
Longitude: 6°57′27″W -6.95739395
Distance: 184.8338 mi Bearing: 329.815°
with the intended result of this small application being just the lat/long values:
57.665198
-6.95739395
So far I've got a StreamReader working with a myString.StartsWith("Latitude") {} but I'm stuck.
How do I detect a splitstring of 2 spaces " " inside of a string and delete everything before that? My code so far is this:
static void Main(string[] args)
{
string text = "";
using (var streamReader = new StreamReader(#"c:\mb\latlong.txt", Encoding.UTF8))
{
text = streamReader.ReadToEnd();
if (text.Trim().StartsWith("Latitude: "))
{
text.Split()
} else if (text.StartsWith("Distance: "))
{
} else if (text.StartsWith(""))
{
}
streamReader.ReadLine();
}
Console.ReadKey();
}
Thanks in advance
You can try using regular expressions
var result = File
.ReadLines(#"C:\MyFile.txt")
.SelectMany(line => Regex
.Matches(line, #"(?<=\s)-?[0-9]+(\.[0-9]+)*$")
.OfType<Match>()
.Select(match => match.Value));
Test
// 57.665198
// -6.95739395
Console.Write(String.Join(Environment.NewLine, result));
Use string.IndexOf(" ") to find the position of the two spaces in the string. Then you can use string.Substring(position) to get the string after that point.
In your code:
if (text.Trim().StartsWith("Latitude: "))
{
var positionOfTwoSpaces = text.IndexOf(" ");
var latString = text.Substring(positionOfTwoSpaces);
var latValue = float.Parse(latString);
}
You can try the regular expression solution. (You might need to fix up the space counts in the regex definitions)
static void Main(string[] args)
{
string text = "";
Regex lat = new Regex("Latitude: .+? (.+)");
Regex lon = new Regex("Longitude .+? (.+)");
using (var streamReader = new StreamReader(#"c:\mb\latlong.txt", Encoding.UTF8))
{
string line;
while ((line = streamReader.ReadLine() != null)
{
if (lat.IsMatch(line))
lat.Match(line).Groups[1].Value // latitude
else if(lon.IsMatch(line))
lon.Match(line).Groups[1].Value // longitude
}
}
Console.ReadKey();
}
A simple solution would be
string[] fileLines = IO.File.ReadAllLines("input file path");
List<string> resultLines = new List<string>();
foreach (string line in fileLines) {
string[] parts = line.Split(" "); //Double space
if (parts.Count() > 1) {
string lastPart = parts.LastOrDefault();
if (!string.IsNullOrEmpty(lastPart)) {
resultLines.Add(lastPart);
}
}
}
IO.File.WriteAllLines("output file path", resultLines.ToArray());
As I already suggested in my comment. You can look for the last occurrence of the space and substring from there.
using System;
using System.IO;
using System.Text;
public class Test
{
public static void Main()
{
String line = String.Empty;
while(!String.IsNullOrEmpty((line = streamReader.ReadLine())))
{
if(line.StartsWith("Latitude:"))
{
line = line.Substring(line.LastIndexOf(' ') + 1);
Console.WriteLine(line);
}
}
Console.ReadKey();
}
}
Working example.
I didn't provide all the code because this is just copy paste for the longitude case. I think you can do this by your own. :)
Currently I am building an agenda with extra options.
for testing purposes I store the data in a simple .txt file
(after that it will be connected to the agenda of a virtual assistant.)
To change or delete text from this .txt file I have a problem.
Although the part of the content that needs to be replaced and the search string are exactly the same it doesn't replace the text in content.
code:
Change method
public override void Change(List<object> oldData, List<object> newData)
{
int index = -1;
for (int i = 0; i < agenda.Count; i++)
{
if(agenda[i].GetType() == "Task")
{
Task t = (Task)agenda[i];
if(t.remarks == oldData[0].ToString() && t.datetime == (DateTime)oldData[1] && t.reminders == oldData[2])
{
index = i;
break;
}
}
}
string search = "Task\r\nTo do: " + oldData[0].ToString() + "\r\nDateTime: " + (DateTime)oldData[1] + "\r\n";
reminders = (Dictionary<DateTime, bool>) oldData[2];
if(reminders.Count != 0)
{
search += "Reminders\r\n";
foreach (KeyValuePair<DateTime, bool> rem in reminders)
{
if (rem.Value)
search += "speak " + rem.Key + "\r\n";
else
search += rem.Key + "\r\n";
}
}
// get new data
string newRemarks = (string)newData[0];
DateTime newDateTime = (DateTime)newData[1];
Dictionary<DateTime, bool> newReminders = (Dictionary<DateTime, bool>)newData[2];
string replace = "Task\r\nTo do: " + newRemarks + "\r\nDateTime: " + newDateTime + "\r\n";
if(newReminders.Count != 0)
{
replace += "Reminders\r\n";
foreach (KeyValuePair<DateTime, bool> rem in newReminders)
{
if (rem.Value)
replace += "speak " + rem.Key + "\r\n";
else
replace += rem.Key + "\r\n";
}
}
Replace(search, replace);
if (index != -1)
{
remarks = newRemarks;
datetime = newDateTime;
reminders = newReminders;
agenda[index] = this;
}
}
replace method
private void Replace(string search, string replace)
{
StreamReader reader = new StreamReader(path);
string content = reader.ReadToEnd();
reader.Close();
content = Regex.Replace(content, search, replace);
content.Trim();
StreamWriter writer = new StreamWriter(path);
writer.Write(content);
writer.Close();
}
When running in debug I get the correct info:
content "-- agenda --\r\n\r\nTask\r\nTo do: test\r\nDateTime: 16-4-2012 15:00:00\r\nReminders:\r\nspeak 16-4-2012 13:00:00\r\n16-4-2012 13:30:00\r\n\r\nTask\r\nTo do: testing\r\nDateTime: 16-4-2012 9:00:00\r\nReminders:\r\nspeak 16-4-2012 8:00:00\r\n\r\nTask\r\nTo do: aaargh\r\nDateTime: 18-4-2012 12:00:00\r\nReminders:\r\n18-4-2012 11:00:00\r\n" string
search "Task\r\nTo do: aaargh\r\nDateTime: 18-4-2012 12:00:00\r\nReminders\r\n18-4-2012 11:00:00\r\n" string
replace "Task\r\nTo do: aaargh\r\nDateTime: 18-4-2012 13:00:00\r\nReminders\r\n18-4-2012 11:00:00\r\n" string
But it doesn't change the text. How do I make sure that the Regex.Replace finds the right piece of content?
PS. I did check several topics on this, but none of the solutions mentioned there work for me.
You missed a : right after Reminders. Just check it again :)
You could try using a StringBuilder to build up you want to write out to the file.
Just knocked up a quick example in a console app but this appears to work for me and I think it might be what you are looking for.
StringBuilder sb = new StringBuilder();
sb.Append("Tasks\r\n");
sb.Append("\r\n");
sb.Append("\tTask 1 details");
Console.WriteLine(sb.ToString());
StreamWriter writer = new StreamWriter("Tasks.txt");
writer.Write(sb.ToString());
writer.Close();