I am trying to convert html code to bbcode and I found this nifty little class packed with regexes that does just that.
public static string ConvertBBCodeToHTML(string str)
{
Regex exp;
// format the bold tags: [b][/b]
// becomes: <strong></strong>
exp = new Regex(#"[b](.+?)[/b]");
str = exp.Replace(str, "<strong>$1</strong>");
// format the italic tags: [i][/i]
// becomes: <em></em>
exp = new Regex(#"[i](.+?)[/i]");
str = exp.Replace(str, "<em>$1</em>");
// format the underline tags: [u][/u]
// becomes: <u></u>
exp = new Regex(#"[u](.+?)[/u]");
str = exp.Replace(str, "<u>$1</u>");
// format the strike tags: [s][/s]
// becomes: <strike></strike>
exp = new Regex(#"[s](.+?)[/s]");
str = exp.Replace(str, "<strike>$1</strike>");
// format the url tags: [url=www.website.com]my site[/url]
// becomes: <a href="www.website.com">my site[/url]
exp = new Regex(#"[url=([^]]+)]([^]]+)[/url]");
str = exp.Replace(str, "<a href="$1">$2[/url]");
// format the img tags:
// becomes: <img src="www.website.com/img/image.jpeg">
exp = new Regex(#"[img]([^]]+)[/img]");
str = exp.Replace(str, "<img src="$1">");
// format img tags with alt: [img=www.website.com/img/image.jpeg]this is the alt text[/img]
// becomes: <img src="www.website.com/img/image.jpeg" alt="this is the alt text">
exp = new Regex(#"[img=([^]]+)]([^]]+)[/img]");
str = exp.Replace(str, "<img src="$1" alt="$2">");
//format the colour tags: [color=red][/color]
// becomes: <font color="red"></font>
// supports UK English and US English spelling of colour/color
exp = new Regex(#"[color=([^]]+)]([^]]+)[/color]");
str = exp.Replace(str, "<font color="$1">$2</font>");
exp = new Regex(#"[colour=([^]]+)]([^]]+)[/colour]");
str = exp.Replace(str, "<font color="$1">$2</font>");
// format the size tags: [size=3][/size]
// becomes: <font size="+3"></font>
exp = new Regex(#"[size=([^]]+)]([^]]+)[/size]");
str = exp.Replace(str, "<font size=" +$1">$2</font>");
// lastly, replace any new line characters with
str = str.Replace("rn", "rn");
return str;
}
The problem is that I'm getting the CS1056 Unexpected character '$' error when doing the regex replace even if it seems to be perfectly valid.
You need to escape the embedded double quotes " in strings like:
"<a href="$1">$2[/url]"
They should be:
"<a href=\"$1\">$2[/url]"
Or with verbatim string literals:
#"<a href=""$1"">$2[/url]"
You should use single quotes to embed values within the string like below:
exp = new Regex(#"[url=([^]]+)]([^]]+)[/url]");
str = exp.Replace(str, "<a href='$1'>$2[/url]");
Related
i want to replace every link(s) in a string with the link i want to provide. What i have tried is-
StreamReader reader = new StreamReader(dd1.SelectedItem.Value);
string readFile = reader.ReadToEnd();
Regex regx = new Regex("http(s)?://([\\w+?\\.\\w+])+([a-zA-Z0-9\\~\\!\\#\\#\\$\\%\\^\\&\\*\\(\\)_\\-\\=\\+\\\\\\/\\?\\.\\:\\;\\'\\,]*([a-zA-Z0-9\\?\\#\\=\\/]){1})?", RegexOptions.IgnoreCase);
string output=regx.ToString();
output = readFile;
MatchCollection matches = regx.Matches(output);
foreach (Match match in matches)
{
output = output.Replace(#"match.Value", #"http://localhost:61187/two?" + "sender=" + Server.UrlEncode(this.txtUsername.Text) + "&reciever=" + output);
}
Here, i have a string output which contains some links. So, i have used regex to parse the links in the string. But, the string named "output" is not read and its neither showing an error nor an output.
It seems to me that you should be using regx.Replace(...) instead:
StreamReader reader = new StreamReader(dd1.SelectedItem.Value);
string readFile = reader.ReadToEnd();
Regex regx = new Regex("http(s)?://([\\w+?\\.\\w+])+([a-zA-Z0-9\\~\\!\\#\\#\\$\\%\\^\\&\\*\\(\\)_\\-\\=\\+\\\\\\/\\?\\.\\:\\;\\'\\,]*([a-zA-Z0-9\\?\\#\\=\\/]){1})?", RegexOptions.IgnoreCase);
string output = regx.ToString();
output = readFile;
string username = Server.UrlEncode(this.txtUsername.Text);
output = regx.Replace(output, new MatchEvaluator((match) =>
{
var url = Uri.EscapeDataString(match.Value);
return $"http://localhost:61187/two?sender={username}&receiver={url}";
}));
This will replace every match with the URL returned by the anonymous function.
I have a xml file, structure is like following:
<template><body>public DiffSectionType Type<template:br/>{<template:br/><template:tab/>get<template:br/><template:tab/>{<template:br/><template:tab/><template:tab/>return _Type;<template:br/><template:tab/>}<template:br/>}</body></template>
I would like to be more readable, like:
public DiffSectionType Type
{
get
{
return _Type;
}
}
<template:br/> => new line
<template:tab/> => tab
I can read body string, but not able to put it in correct format,
I have tried
var document = XDocument.Load("template.xml");
var body = from element in document.Elements("template").Elements("body")
select element;
foreach(var v in body)
{
Console.WriteLine(v.Value);
}
You could use Regex to solve this so something like this:
string str = #"<template><body>public DiffSectionType Type<template:br/>{<template:br/><template:tab/>get<template:br/><template:tab/>{<template:br/><template:tab/><template:tab/>return _Type;<template:br/><template:tab/>}<template:br/>}</body></template>";
str = Regex.Replace(str, "<template:br\x2F>", Environment.NewLine);
str = Regex.Replace(str, "<template:tab\x2F>", "\t");
str = Regex.Replace(str, "(<\x2Ftemplate>)|(<template>)", "");
str = Regex.Replace(str, "(<\x2Fbody>)|(<body>)", "");
I am matching and replacing matching words with <span>keyword</span>,
Support users enter the search keyword in lower case as united states it matches the keyword and replaces it
Actual String
String str = "This is United States Of America"
After Match & Replace function string is replaced with lower case match string as entered by the user
This is united states Of America after match
I want to match and replace the string while maintaining the actual case of the matched word or words in the string or database
I use following code for this. How can i alter this so that my requirements are meant
string pattern = #"(\b(?:" + Request["SearchKeyword"].ToString().Trim() + #")\b))";
regex = new Regex(pattern, RegexOptions.IgnoreCase);
result = regex.Replace(result, "<span class='highlight'>" + Request["SearchKeyword"].ToString() + "</span>",);
Desired Output expected
This is United States Of America
You need to use the Match().Value instead of the original request string.
Here is the code you can use:
var req = "united states";
var str = "This is United States Of America";
var pattern = #"((?<=^\p{P}*|\p{Zs})(?:" + req.ToString().Trim() + #")(?=\p{P}*$|\p{Zs}))";
var regx = new Regex(pattern, RegexOptions.IgnoreCase);
var m = regx.Match(str);
var result = string.Empty;
if (m.Success)
result = regx.Replace(str, "<span class='highlight'>" + m.Value + "</span>");
Output:
EDIT: (just in case)
Using a lambda, you can obtain the same result:
var regx = new Regex(pattern, RegexOptions.IgnoreCase);
var result = regx.Replace(str, m => "<span class='highlight'>" + m.Value + "</span>");
It is safe even in case we have no match.
you can use another overload of Regex.Replace method with MatchEvaluator
string str = "This is United States Of America";
string SearchKeyword = "united states";
string pattern = #"(\b(?:" + SearchKeyword.Trim() + #")\b)";
var regex = new Regex(pattern, RegexOptions.IgnoreCase);
var result = regex.Replace(str, new MatchEvaluator(m => "<span class='highlight'>" + m.ToString() + "</span>"));
I have a problem finding all occurences of a pattern in a string.
Check this string :
string msg= "=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?= =?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?=";
I want to return the 2 occurrences (in order to later decode them):
=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?=
and
=?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?="
With the following regex code, it returns only 1 occurrence: the full string.
var charSetOccurences = new Regex(#"=\?.*\?B\?.*\?=", RegexOptions.IgnoreCase);
var charSetMatches = charSetOccurences.Matches(input);
foreach (Match match in charSetMatches)
{
charSet = match.Groups[0].Value.Replace("=?", "").Replace("?B?", "").Replace("?b?", "");
}
Do you know what I'm missing?
When regexp parser sees the .* character sequence, it matches everything up to the end of the string and goes back, char by char, (greedy match). So, to avoid the problem, you can use a non-greedy match or explicitly define the characters that can appear at a string.
"=\?[a-zA-Z0-9?=-]*\?B\?[a-zA-Z0-9?=-]*\?="
A non-regex way:
string msg= "=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?= =?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?=";
string[] charSetOccurences = msg.Split(new string[]{ " " }, StringSplitOptions.None);
foreach (string s in charSetOccurences)
{
string charSet = s.Replace("=?", "").Replace("?B?", "").Replace("?b?", "");
Console.WriteLine(charSet);
}
See an ideone.
And if you still want to use regex, you should make the .* lazy by adding a ?. This was already mentioned by the previous users, but it seems you are not getting matches?
string msg= "=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?= =?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?=";
var charSetOccurences = new Regex(#"=\?.*?\?B\?.*?\?=", RegexOptions.IgnoreCase);
var charSetMatches = charSetOccurences.Matches(msg);
foreach (Match match in charSetMatches)
{
string charSet = match.Groups[0].Value.Replace("=?", "").Replace("?B?", "").Replace("?b?", "");
Console.WriteLine(charSet);
}
See another ideone.
The output is the same in both cases:
windows-1258UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?=
windows-1258IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=
EDIT: As per update, see an all in one solution for your problem
string msg= "=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?= =?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?=";
var charSetOccurences = new Regex(#"=\?.*?\?[BQ]\?.*?\?=", RegexOptions.IgnoreCase);
MatchCollection matches = charSetOccurences.Matches(msg);
foreach (Match match in matches)
{
string[] encoding = match.Groups[0].Value.Split(new string[]{ "?" }, StringSplitOptions.None);
string charSet = encoding[1];
string encodeType = encoding[2];
string encodedString = encoding[3];
Console.WriteLine("Charset: " + charSet);
Console.WriteLine("Encoding type: " + encodeType);
Console.WriteLine("Encoded String: " + encodedString + "\n");
}
Returns:
Charset: windows-1258
Encoding type: B
Encoded String: UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz
Charset: windows-1258
Encoding type: B
Encoded String: IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=
See this.
Or since we already had the regex, we can use:
string msg= "=?windows-1258?B?UkU6IFRyIDogUGxhbiBkZSBjb250aW51aXTpIGQnYWN0aXZpdOkgZGVz?= =?windows-1258?B?IHNlcnZldXJzIFdlYiBHb1ZveWFnZXN=?=";
var charSetOccurences = new Regex(#"=\?(.*?)\?([BQ])\?(.*?)\?=", RegexOptions.IgnoreCase);
MatchCollection matches = charSetOccurences.Matches(msg);
foreach (Match match in matches)
{
Console.WriteLine("Charset: " + match.Groups[1].Value);
Console.WriteLine("Encoding type: " + match.Groups[2].Value);
Console.WriteLine("Encoded String: " + match.Groups[3].Value + "\n");
}
Returns the same output.
.* is greedy and will match everything from the first ? to the last ?B?.
You need to use either a non-greedy match
=\?.*?\?B\?.*?\?=
or exclude ? from your list of characters
=\?[^?]*\?B\?[^?]*\?=
there are code snippets that strip the invalid characters inside a string before we save it as an XML ... but I have one more problem: Let's say my user wants to have a column name like "[MyColumnOne] ...so now I do not want to strip these "[","] well because these are the ones that user has defined and wants to see them so if I use some codes that are stripping the invalid characters they are also removing "[" and "[" but in this case I still need them to be saved... what can I do?
Never mind, I changed my RegEx format to use XML 1.1 instead of XML 1.0 and now it is working good :
string pattern = String.Empty;
//pattern = #"#x((10?|[2-F])FFF[EF]|FDD[0-9A-F]|7F|8[0-46-9A-F]9[0-9A-F])"; //XML 1.0
pattern = #"#x((10?|[2-F])FFF[EF]|FDD[0-9A-F]|[19][0-9A-F]|7F|8[0-46-9A-F]|0?[1-8BCEF])"; // XML 1.1
Regex regex = new Regex(pattern, RegexOptions.IgnoreCase);
if (regex.IsMatch(sString))
{
sString = regex.Replace(sString, String.Empty);
File.WriteAllText(sString, sString, Encoding.UTF8);
}
return sString;
This worked for me, and it was fast.
private object NormalizeString(object p) {
object result = p;
if (p is string || p is long) {
string s = string.Format("{0}", p);
string resultString = s.Trim();
if (string.IsNullOrWhiteSpace(resultString)) return "";
Regex rxInvalidChars = new Regex("[\r\n\t]+", RegexOptions.IgnoreCase);
if (rxInvalidChars.IsMatch(resultString)) {
resultString = rxInvalidChars.Replace(resultString, " ");
}
//string pattern = String.Empty;
//pattern = #"";
////pattern = #"#x((10?|[2-F])FFF[EF]|FDD[0-9A-F]|7F|8[0-46-9A-F]9[0-9A-F])"; //XML 1.0
////pattern = #"#x((10?|[2-F])FFF[EF]|FDD[0-9A-F]|[19][0-9A-F]|7F|8[0-46-9A-F]|0?[1-8BCEF])"; // XML 1.1
//Regex rxInvalidXMLChars = new Regex(pattern, RegexOptions.IgnoreCase);
//if (rxInvalidXMLChars.IsMatch(resultString)) {
// resultString = rxInvalidXMLChars.Replace(resultString, "");
//}
result = string.Join("", resultString.Where(c => c >= ' '));
}
return result;
}