Is there a better way to replace strings?
I am surprised that Replace does not take in a character array or string array. I guess that I could write my own extension but I was curious if there is a better built in way to do the following? Notice the last Replace is a string not a character.
myString.Replace(';', '\n').Replace(',', '\n').Replace('\r', '\n').Replace('\t', '\n').Replace(' ', '\n').Replace("\n\n", "\n");
You can use a replace regular expression.
s/[;,\t\r ]|[\n]{2}/\n/g
s/ at the beginning means a search
The characters between [ and ] are the characters to search for (in any order)
The second / delimits the search-for text and the replace text
In English, this reads:
"Search for ; or , or \t or \r or (space) or exactly two sequential \n and replace it with \n"
In C#, you could do the following: (after importing System.Text.RegularExpressions)
Regex pattern = new Regex("[;,\t\r ]|[\n]{2}");
pattern.Replace(myString, "\n");
If you are feeling particularly clever and don't want to use Regex:
char[] separators = new char[]{' ',';',',','\r','\t','\n'};
string s = "this;is,\ra\t\n\n\ntest";
string[] temp = s.Split(separators, StringSplitOptions.RemoveEmptyEntries);
s = String.Join("\n", temp);
You could wrap this in an extension method with little effort as well.
Edit: Or just wait 2 minutes and I'll end up writing it anyway :)
public static class ExtensionMethods
{
public static string Replace(this string s, char[] separators, string newVal)
{
string[] temp;
temp = s.Split(separators, StringSplitOptions.RemoveEmptyEntries);
return String.Join( newVal, temp );
}
}
And voila...
char[] separators = new char[]{' ',';',',','\r','\t','\n'};
string s = "this;is,\ra\t\n\n\ntest";
s = s.Replace(separators, "\n");
You could use Linq's Aggregate function:
string s = "the\nquick\tbrown\rdog,jumped;over the lazy fox.";
char[] chars = new char[] { ' ', ';', ',', '\r', '\t', '\n' };
string snew = chars.Aggregate(s, (c1, c2) => c1.Replace(c2, '\n'));
Here's the extension method:
public static string ReplaceAll(this string seed, char[] chars, char replacementCharacter)
{
return chars.Aggregate(seed, (str, cItem) => str.Replace(cItem, replacementCharacter));
}
Extension method usage example:
string snew = s.ReplaceAll(chars, '\n');
This is the shortest way:
myString = Regex.Replace(myString, #"[;,\t\r ]|[\n]{2}", "\n");
Strings are just immutable char arrays
You just need to make it mutable:
either by using StringBuilder
go in the unsafe world and play with pointers (dangerous though)
and try to iterate through the array of characters the least amount of times. Note the HashSet here, as it avoids to traverse the character sequence inside the loop. Should you need an even faster lookup, you can replace HashSet by an optimized lookup for char (based on an array[256]).
Example with StringBuilder
public static void MultiReplace(this StringBuilder builder,
char[] toReplace,
char replacement)
{
HashSet<char> set = new HashSet<char>(toReplace);
for (int i = 0; i < builder.Length; ++i)
{
var currentCharacter = builder[i];
if (set.Contains(currentCharacter))
{
builder[i] = replacement;
}
}
}
Edit - Optimized version (only valid for ASCII)
public static void MultiReplace(this StringBuilder builder,
char[] toReplace,
char replacement)
{
var set = new bool[256];
foreach (var charToReplace in toReplace)
{
set[charToReplace] = true;
}
for (int i = 0; i < builder.Length; ++i)
{
var currentCharacter = builder[i];
if (set[currentCharacter])
{
builder[i] = replacement;
}
}
}
Then you just use it like this:
var builder = new StringBuilder("my bad,url&slugs");
builder.MultiReplace(new []{' ', '&', ','}, '-');
var result = builder.ToString();
Ohhh, the performance horror!
The answer is a bit outdated, but still...
public static class StringUtils
{
#region Private members
[ThreadStatic]
private static StringBuilder m_ReplaceSB;
private static StringBuilder GetReplaceSB(int capacity)
{
var result = m_ReplaceSB;
if (null == result)
{
result = new StringBuilder(capacity);
m_ReplaceSB = result;
}
else
{
result.Clear();
result.EnsureCapacity(capacity);
}
return result;
}
public static string ReplaceAny(this string s, char replaceWith, params char[] chars)
{
if (null == chars)
return s;
if (null == s)
return null;
StringBuilder sb = null;
for (int i = 0, count = s.Length; i < count; i++)
{
var temp = s[i];
var replace = false;
for (int j = 0, cc = chars.Length; j < cc; j++)
if (temp == chars[j])
{
if (null == sb)
{
sb = GetReplaceSB(count);
if (i > 0)
sb.Append(s, 0, i);
}
replace = true;
break;
}
if (replace)
sb.Append(replaceWith);
else
if (null != sb)
sb.Append(temp);
}
return null == sb ? s : sb.ToString();
}
}
You may also simply write these string extension methods, and put them somewhere in your solution:
using System.Text;
public static class StringExtensions
{
public static string ReplaceAll(this string original, string toBeReplaced, string newValue)
{
if (string.IsNullOrEmpty(original) || string.IsNullOrEmpty(toBeReplaced)) return original;
if (newValue == null) newValue = string.Empty;
StringBuilder sb = new StringBuilder();
foreach (char ch in original)
{
if (toBeReplaced.IndexOf(ch) < 0) sb.Append(ch);
else sb.Append(newValue);
}
return sb.ToString();
}
public static string ReplaceAll(this string original, string[] toBeReplaced, string newValue)
{
if (string.IsNullOrEmpty(original) || toBeReplaced == null || toBeReplaced.Length <= 0) return original;
if (newValue == null) newValue = string.Empty;
foreach (string str in toBeReplaced)
if (!string.IsNullOrEmpty(str))
original = original.Replace(str, newValue);
return original;
}
}
Call them like this:
"ABCDE".ReplaceAll("ACE", "xy");
xyBxyDxy
And this:
"ABCDEF".ReplaceAll(new string[] { "AB", "DE", "EF" }, "xy");
xyCxyF
Use RegEx.Replace, something like this:
string input = "This is text with far too much " +
"whitespace.";
string pattern = "[;,]";
string replacement = "\n";
Regex rgx = new Regex(pattern);
string result = rgx.Replace(input, replacement);
Here's more info on this MSDN documentation for RegEx.Replace
Performance-Wise this probably might not be the best solution but it works.
var str = "filename:with&bad$separators.txt";
char[] charArray = new char[] { '#', '%', '&', '{', '}', '\\', '<', '>', '*', '?', '/', ' ', '$', '!', '\'', '"', ':', '#' };
foreach (var singleChar in charArray)
{
str = str.Replace(singleChar, '_');
}
string ToBeReplaceCharacters = #"~()##$%&+,'"<>|;\/*?";
string fileName = "filename;with<bad:separators?";
foreach (var RepChar in ToBeReplaceCharacters)
{
fileName = fileName.Replace(RepChar.ToString(), "");
}
A .NET Core version for replacing a defined set of string chars to a specific char. It leverages the recently introduced Span type and string.Create method.
The idea is to prepare a replacement array, so no actual comparison operations would be required for the each string char. Thus, the replacement process reminds the way a state machine works. In order to avoid initialization of all items of the replacement array, let's store oldChar ^ newChar (XOR'ed) values there, what gives the following benefits:
If a char is not changing: ch ^ ch = 0 - no need to initialize non-changing items
The final char can be found by XOR'ing: ch ^ repl[ch]:
ch ^ 0 = ch - not changed chars case
ch ^ (ch ^ newChar) = newChar - replaced char
So the only requirement would be to ensure that the replacement array is zero-ed when initialized. We'll be using ArrayPool<char> to avoid allocations each time the ReplaceAll method is called. And, in order to ensure that the arrays are zero-ed without expensive call to Array.Clear method, we'll be maintaining a pool dedicated for the ReplaceAll method. We'll be clearing the replacement array (exact items only) before returning it to the pool.
public static class StringExtensions
{
private static readonly ArrayPool<char> _replacementPool = ArrayPool<char>.Create();
public static string ReplaceAll(this string str, char newChar, params char[] oldChars)
{
// If nothing to do, return the original string.
if (string.IsNullOrEmpty(str) ||
oldChars is null ||
oldChars.Length == 0)
{
return str;
}
// If only one character needs to be replaced,
// use the more efficient `string.Replace`.
if (oldChars.Length == 1)
{
return str.Replace(oldChars[0], newChar);
}
// Get a replacement array from the pool.
var replacements = _replacementPool.Rent(char.MaxValue + 1);
try
{
// Intialize the replacement array in the way that
// all elements represent `oldChar ^ newChar`.
foreach (var oldCh in oldChars)
{
replacements[oldCh] = (char)(newChar ^ oldCh);
}
// Create a string with replaced characters.
return string.Create(str.Length, (str, replacements), (dst, args) =>
{
var repl = args.replacements;
foreach (var ch in args.str)
{
dst[0] = (char)(repl[ch] ^ ch);
dst = dst.Slice(1);
}
});
}
finally
{
// Clear the replacement array.
foreach (var oldCh in oldChars)
{
replacements[oldCh] = char.MinValue;
}
// Return the replacement array back to the pool.
_replacementPool.Return(replacements);
}
}
}
I know this question is super old, but I want to offer 2 options that are more efficient:
1st off, the extension method posted by Paul Walls is good but can be made more efficient by using the StringBuilder class, which is like the string data type but made especially for situations where you will be changing string values more than once. Here is a version I made of the extension method using StringBuilder:
public static string ReplaceChars(this string s, char[] separators, char newVal)
{
StringBuilder sb = new StringBuilder(s);
foreach (var c in separators) { sb.Replace(c, newVal); }
return sb.ToString();
}
I ran this operation 100,000 times and using StringBuilder took 73ms compared to 81ms using string. So the difference is typically negligible, unless you're running many operations or using a huge string.
Secondly, here is a 1 liner loop you can use:
foreach (char c in separators) { s = s.Replace(c, '\n'); }
I personally think this is the best option. It is highly efficient and doesn't require writing an extension method. In my testing this ran the 100k iterations in only 63ms, making it the most efficient.
Here is an example in context:
string s = "this;is,\ra\t\n\n\ntest";
char[] separators = new char[] { ' ', ';', ',', '\r', '\t', '\n' };
foreach (char c in separators) { s = s.Replace(c, '\n'); }
Credit to Paul Walls for the first 2 lines in this example.
I also fiddled around with that problem, and found that most of the solutions here are very slow. The fastest one was actually the LINQ + Aggregate method that dodgy_coder posted.
But I thought, well that might be also quite heavy in memory allocations depending upon how many old characters there are. So I came out with this:
The idea here is to have a cached replacement map of the old characters for the current thread, to safe allocations. And other than that just working with a character array of the input that later on is returned as string again. Whereas the character array is modified as less as possible.
[ThreadStatic]
private static bool[] replaceMap;
public static string Replace(this string input, char[] oldChars, char newChar)
{
if (input == null) throw new ArgumentNullException(nameof(input));
if (oldChars == null) throw new ArgumentNullException(nameof(oldChars));
if (oldChars.Length == 1) return input.Replace(oldChars[0], newChar);
if (oldChars.Length == 0) return input;
replaceMap = replaceMap ?? new bool[char.MaxValue + 1];
foreach (var oldChar in oldChars)
{
replaceMap[oldChar] = true;
}
try
{
var count = input.Length;
var output = input.ToCharArray();
for (var i = 0; i < count; i++)
{
if (replaceMap[input[i]])
{
output[i] = newChar;
}
}
return new string(output);
}
finally
{
foreach (var oldChar in oldChars)
{
replaceMap[oldChar] = false;
}
}
}
For me this is at most two allocations for the actual input string to work on. A StringBuilder turned out to be much slower for me for some reasons. And it is 2 times faster than the LINQ variant.
No "Replace" (Linq only):
string myString = ";,\r\t \n\n=1;;2,,3\r\r4\t\t5 6\n\n\n\n7=";
char NoRepeat = '\n';
string ByeBye = ";,\r\t ";
string myResult = myString.ToCharArray().Where(t => !"STOP-OUTSIDER".Contains(t))
.Select(t => "" + ( ByeBye.Contains(t) ? '\n' : t))
.Aggregate((all, next) => (
next == "" + NoRepeat && all.Substring(all.Length - 1) == "" + NoRepeat
? all : all + next ) );
Having built my own solution, and looking at the solution used here, I leveraged an answer that isn't using complex code and is generally efficient for most parameters.
Cover base cases where other methods are more appropriate. If there are no chars to replacement, return the original string. If there is only one, just use the Replace method.
Use a StringBuilder and initialize the capacity to the length of the original string. After all, the new string being built will have the same length of the original string if its just chars being replaced. This ensure only 1 memory allocation is used for the new string.
Assuming that the 'char' length could be small or large will impact performance. Large collections are better with hashsets, while smaller collections are not. This is a near-perfect use case for Hybrid Dictionaries. They switch to using a Hash based lookup once the collection gets too large. However, we don't care about the value of the dictionary, so I just set it to "true".
Have different methods for StringBuilder verse just a string will prevent unnecessary memory allocation. If its just a string, don't instantiate a StringBuilder unless the base cases were checked. If its already a StringBuilder, then perform the replacements and return the StringBuilder itself (as other StringBuilder methods like Append do).
I put the replacement char first, and the chars to check at the end. This way, I can leverage the params keyword for easily passing additional strings. However, you don't have to do this if you prefer the other order.
namespace Test.Extensions
{
public static class StringExtensions
{
public static string ReplaceAll(this string str, char replacementCharacter, params char[] chars)
{
if (chars.Length == 0)
return str;
if (chars.Length == 1)
return str.Replace(chars[0], replacementCharacter);
StringBuilder sb = new StringBuilder(str.Length);
var searcher = new HybridDictionary(chars.Length);
for (int i = 0; i < chars.Length; i++)
searcher[chars[i]] = true;
foreach (var c in str)
{
if (searcher.Contains(c))
sb.Append(replacementCharacter);
else
sb.Append(c);
}
return sb.ToString();
}
public static StringBuilder ReplaceAll(this StringBuilder sb, char replacementCharacter, params char[] chars)
{
if (chars.Length == 0)
return sb;
if (chars.Length == 1)
return sb.Replace(chars[0], replacementCharacter);
var searcher = new HybridDictionary(chars.Length);
for (int i = 0; i < chars.Length; i++)
searcher[chars[i]] = true;
for (int i = 0; i < sb.Length; i++)
{
var val = sb[i];
if (searcher.Contains(val))
sb[i] = replacementCharacter;
}
return sb;
}
}
}
Related
Why the index in my case goes beyond the boundaries of the array, I still can't understand?
Complete the method/function so that it converts dash/underscore delimited words into camel casing. The first word within the output should be capitalized only if the original word was capitalized (known as Upper Camel Case, also often referred to as Pascal case).
using System;
using System.Text;
public class Kata
{
public static string ToCamelCase(string str)
{
string[] res=str.Split(new char[]{'-','_'},System.StringSplitOptions.RemoveEmptyEntries);
StringBuilder sb=new StringBuilder(res[0]);
for(int i=1;i<res.Length;i++){
char[] ch=res[i].ToCharArray();
ch[0]=Char.ToUpper(ch[0]);
sb.Append(new string(ch));
}
return sb.ToString();
}
}
#Evgeny20 First, you should do your own homework. Second, debuggers are your friend, whatever environment they have you using for your CS homework figure out the debugger so you can step through it. This type of thing becomes easy then.
I'll provide a solution that's using .net core 3.1. If you're using old framework you'll need to modify it to not use Spans and instead use string functions. Don't use the char array.
If you get array index out of bound issues with this - using the forced upper case - then then put in a test for the string to make sure it's non-null and non-empty. But I don't think you will...
static async Task Main(string[] args)
{
const string S1 = "camel-Case_this-String";
const string Check = "camelCaseThisString";
var result = ToCamelCase(S1);
if (result != Check)
throw new InvalidOperationException();
}
public static string ToCamelCase(string str)
{
if (string.IsNullOrEmpty(str))
return str;
string[] tokens = str.Split(new char[] { '-', '_' }, StringSplitOptions.RemoveEmptyEntries);
if (tokens.Length == 0)
return str;
StringBuilder sb = new StringBuilder(str.Length);
sb.Append(tokens.First());
foreach (var token in tokens.Skip(1))
{
sb.Append(char.ToUpper(token[0]));
if (token.Length > 1)
{
sb.Append(token.AsSpan().Slice(1));
}
}
return sb.ToString();
}
I'm offering two solutions. The first formats the input as Pascal Case.
static void Main(string[] args)
{
Console.WriteLine(ToCamelCase("camels-drink-LOTS-OF_WATER"));
Console.ReadKey();
}
public static string ToCamelCase(string str)
{
// This holds our output
StringBuilder sb = new StringBuilder();
// Step 1 - split input into array of words
string[] res = str.Split(new char[] { '-', '_' }, System.StringSplitOptions.RemoveEmptyEntries);
// Step 2 - loop through the array
for (int i = 0; i < res.Length; i++)
{
// For each word,
// change all of the letters to lowercase
res[i] = res[i].ToLower();
// Then change the first letter to uppercase
char[] ch = res[i].ToCharArray();
ch[0] = Char.ToUpper(ch[0]);
// Finally, add the result to our output
string c = new string(ch);
sb.Append(c);
}
return sb.ToString();
}
The second formats as Camel Case - it contains additional logic to handle the first word in your input array.
static void Main(string[] args)
{
Console.WriteLine(ToCamelCase("camels-drink-LOTS-OF_WATER"));
Console.ReadKey();
}
public static string ToCamelCase(string str)
{
// This holds our output
StringBuilder sb = new StringBuilder();
// Step 1 - split input into array of words
string[] res = str.Split(new char[] { '-', '_' }, System.StringSplitOptions.RemoveEmptyEntries);
// Step 2 - loop through the array
for (int i = 0; i < res.Length; i++)
{
// For each word,
// change all of the letters to lowercase
res[i] = res[i].ToLower();
// Then change the first letter to uppercase unless it's the first word
char[] ch = res[i].ToCharArray();
if (i != 0)
{
ch[0] = Char.ToUpper(ch[0]);
}
// Finally, add the result to our output
string c = new string(ch);
sb.Append(c);
}
return sb.ToString();
}
Let me know in the comments if you need any more help.
If this answer helps you, please click the gray checkmark to the left. This marks the question as solved and gives me some "rep" which is used to access features on Stack Overflow.
I would like to split a string with delimiters but keep the delimiters in the result.
How would I do this in C#?
If the split chars were ,, ., and ;, I'd try:
using System.Text.RegularExpressions;
...
string[] parts = Regex.Split(originalString, #"(?<=[.,;])")
(?<=PATTERN) is positive look-behind for PATTERN. It should match at any place where the preceding text fits PATTERN so there should be a match (and a split) after each occurrence of any of the characters.
If you want the delimiter to be its "own split", you can use Regex.Split e.g.:
string input = "plum-pear";
string pattern = "(-)";
string[] substrings = Regex.Split(input, pattern); // Split on hyphens
foreach (string match in substrings)
{
Console.WriteLine("'{0}'", match);
}
// The method writes the following to the console:
// 'plum'
// '-'
// 'pear'
So if you are looking for splitting a mathematical formula, you can use the following Regex
#"([*()\^\/]|(?<!E)[\+\-])"
This will ensure you can also use constants like 1E-02 and avoid having them split into 1E, - and 02
So:
Regex.Split("10E-02*x+sin(x)^2", #"([*()\^\/]|(?<!E)[\+\-])")
Yields:
10E-02
*
x
+
sin
(
x
)
^
2
Building off from BFree's answer, I had the same goal, but I wanted to split on an array of characters similar to the original Split method, and I also have multiple splits per string:
public static IEnumerable<string> SplitAndKeep(this string s, char[] delims)
{
int start = 0, index;
while ((index = s.IndexOfAny(delims, start)) != -1)
{
if(index-start > 0)
yield return s.Substring(start, index - start);
yield return s.Substring(index, 1);
start = index + 1;
}
if (start < s.Length)
{
yield return s.Substring(start);
}
}
Just in case anyone wants this answer aswell...
Instead of string[] parts = Regex.Split(originalString, #"(?<=[.,;])") you could use string[] parts = Regex.Split(originalString, #"(?=yourmatch)") where yourmatch is whatever your separator is.
Supposing the original string was
777- cat
777 - dog
777 - mouse
777 - rat
777 - wolf
Regex.Split(originalString, #"(?=777)") would return
777 - cat
777 - dog
and so on
This version does not use LINQ or Regex and so it's probably relatively efficient. I think it might be easier to use than the Regex because you don't have to worry about escaping special delimiters. It returns an IList<string> which is more efficient than always converting to an array. It's an extension method, which is convenient. You can pass in the delimiters as either an array or as multiple parameters.
/// <summary>
/// Splits the given string into a list of substrings, while outputting the splitting
/// delimiters (each in its own string) as well. It's just like String.Split() except
/// the delimiters are preserved. No empty strings are output.</summary>
/// <param name="s">String to parse. Can be null or empty.</param>
/// <param name="delimiters">The delimiting characters. Can be an empty array.</param>
/// <returns></returns>
public static IList<string> SplitAndKeepDelimiters(this string s, params char[] delimiters)
{
var parts = new List<string>();
if (!string.IsNullOrEmpty(s))
{
int iFirst = 0;
do
{
int iLast = s.IndexOfAny(delimiters, iFirst);
if (iLast >= 0)
{
if (iLast > iFirst)
parts.Add(s.Substring(iFirst, iLast - iFirst)); //part before the delimiter
parts.Add(new string(s[iLast], 1));//the delimiter
iFirst = iLast + 1;
continue;
}
//No delimiters were found, but at least one character remains. Add the rest and stop.
parts.Add(s.Substring(iFirst, s.Length - iFirst));
break;
} while (iFirst < s.Length);
}
return parts;
}
Some unit tests:
text = "[a link|http://www.google.com]";
result = text.SplitAndKeepDelimiters('[', '|', ']');
Assert.IsTrue(result.Count == 5);
Assert.AreEqual(result[0], "[");
Assert.AreEqual(result[1], "a link");
Assert.AreEqual(result[2], "|");
Assert.AreEqual(result[3], "http://www.google.com");
Assert.AreEqual(result[4], "]");
A lot of answers to this! One I knocked up to split by various strings (the original answer caters for just characters i.e. length of 1). This hasn't been fully tested.
public static IEnumerable<string> SplitAndKeep(string s, params string[] delims)
{
var rows = new List<string>() { s };
foreach (string delim in delims)//delimiter counter
{
for (int i = 0; i < rows.Count; i++)//row counter
{
int index = rows[i].IndexOf(delim);
if (index > -1
&& rows[i].Length > index + 1)
{
string leftPart = rows[i].Substring(0, index + delim.Length);
string rightPart = rows[i].Substring(index + delim.Length);
rows[i] = leftPart;
rows.Insert(i + 1, rightPart);
}
}
}
return rows;
}
This seems to work, but its not been tested much.
public static string[] SplitAndKeepSeparators(string value, char[] separators, StringSplitOptions splitOptions)
{
List<string> splitValues = new List<string>();
int itemStart = 0;
for (int pos = 0; pos < value.Length; pos++)
{
for (int sepIndex = 0; sepIndex < separators.Length; sepIndex++)
{
if (separators[sepIndex] == value[pos])
{
// add the section of string before the separator
// (unless its empty and we are discarding empty sections)
if (itemStart != pos || splitOptions == StringSplitOptions.None)
{
splitValues.Add(value.Substring(itemStart, pos - itemStart));
}
itemStart = pos + 1;
// add the separator
splitValues.Add(separators[sepIndex].ToString());
break;
}
}
}
// add anything after the final separator
// (unless its empty and we are discarding empty sections)
if (itemStart != value.Length || splitOptions == StringSplitOptions.None)
{
splitValues.Add(value.Substring(itemStart, value.Length - itemStart));
}
return splitValues.ToArray();
}
Recently I wrote an extension method do to this:
public static class StringExtensions
{
public static IEnumerable<string> SplitAndKeep(this string s, string seperator)
{
string[] obj = s.Split(new string[] { seperator }, StringSplitOptions.None);
for (int i = 0; i < obj.Length; i++)
{
string result = i == obj.Length - 1 ? obj[i] : obj[i] + seperator;
yield return result;
}
}
}
I'd say the easiest way to accomplish this (except for the argument Hans Kesting brought up) is to split the string the regular way, then iterate over the array and add the delimiter to every element but the last.
To avoid adding character to new line try this :
string[] substrings = Regex.Split(input,#"(?<=[-])");
result = originalString.Split(separator);
for(int i = 0; i < result.Length - 1; i++)
result[i] += separator;
(EDIT - this is a bad answer - I misread his question and didn't see that he was splitting by multiple characters.)
(EDIT - a correct LINQ version is awkward, since the separator shouldn't get concatenated onto the final string in the split array.)
Iterate through the string character by character (which is what regex does anyway.
When you find a splitter, then spin off a substring.
pseudo code
int hold, counter;
List<String> afterSplit;
string toSplit
for(hold = 0, counter = 0; counter < toSplit.Length; counter++)
{
if(toSplit[counter] = /*split charaters*/)
{
afterSplit.Add(toSplit.Substring(hold, counter));
hold = counter;
}
}
That's sort of C# but not really. Obviously, choose the appropriate function names.
Also, I think there might be an off-by-1 error in there.
But that will do what you're asking.
veggerby's answer modified to
have no string items in the list
have fixed string as delimiter like "ab" instead of single character
var delimiter = "ab";
var text = "ab33ab9ab"
var parts = Regex.Split(text, $#"({Regex.Escape(delimiter)})")
.Where(p => p != string.Empty)
.ToList();
// parts = "ab", "33", "ab", "9", "ab"
The Regex.Escape() is there just in case your delimiter contains characters which regex interprets as special pattern commands (like *, () and thus have to be escaped.
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace ConsoleApplication9
{
class Program
{
static void Main(string[] args)
{
string input = #"This;is:a.test";
char sep0 = ';', sep1 = ':', sep2 = '.';
string pattern = string.Format("[{0}{1}{2}]|[^{0}{1}{2}]+", sep0, sep1, sep2);
Regex regex = new Regex(pattern);
MatchCollection matches = regex.Matches(input);
List<string> parts=new List<string>();
foreach (Match match in matches)
{
parts.Add(match.ToString());
}
}
}
}
I wanted to do a multiline string like this but needed to keep the line breaks so I did this
string x =
#"line 1 {0}
line 2 {1}
";
foreach(var line in string.Format(x, "one", "two")
.Split("\n")
.Select(x => x.Contains('\r') ? x + '\n' : x)
.AsEnumerable()
) {
Console.Write(line);
}
yields
line 1 one
line 2 two
I came across same problem but with multiple delimiters. Here's my solution:
public static string[] SplitLeft(this string #this, char[] delimiters, int count)
{
var splits = new List<string>();
int next = -1;
while (splits.Count + 1 < count && (next = #this.IndexOfAny(delimiters, next + 1)) >= 0)
{
splits.Add(#this.Substring(0, next));
#this = new string(#this.Skip(next).ToArray());
}
splits.Add(#this);
return splits.ToArray();
}
Sample with separating CamelCase variable names:
var variableSplit = variableName.SplitLeft(
Enumerable.Range('A', 26).Select(i => (char)i).ToArray());
I wrote this code to split and keep delimiters:
private static string[] SplitKeepDelimiters(string toSplit, char[] delimiters, StringSplitOptions splitOptions = StringSplitOptions.None)
{
var tokens = new List<string>();
int idx = 0;
for (int i = 0; i < toSplit.Length; ++i)
{
if (delimiters.Contains(toSplit[i]))
{
tokens.Add(toSplit.Substring(idx, i - idx)); // token found
tokens.Add(toSplit[i].ToString()); // delimiter
idx = i + 1; // start idx for the next token
}
}
// last token
tokens.Add(toSplit.Substring(idx));
if (splitOptions == StringSplitOptions.RemoveEmptyEntries)
{
tokens = tokens.Where(token => token.Length > 0).ToList();
}
return tokens.ToArray();
}
Usage example:
string toSplit = "AAA,BBB,CCC;DD;,EE,";
char[] delimiters = new char[] {',', ';'};
string[] tokens = SplitKeepDelimiters(toSplit, delimiters, StringSplitOptions.RemoveEmptyEntries);
foreach (var token in tokens)
{
Console.WriteLine(token);
}
I have a string that represent an action,
each arg in the action is seporated by the char ';',
for each arg I want to replace the char ',' with the char '.' but only if the ',' is not between ' char using Regex replace
For example:
1- "ActionName('1,b';1,2)"
2- "ActionName('a,b';1,2;1.2;'1,3')"
Desire result:
1- "ActionName('1,b';1.2)"
2- "ActionName('a,b';1.2;1.2;'1,3')
Conditions:
The ',' can appear multiple times inside a string.
Currntly I split the string for ';' loop over all the parts and each part I split for '\''.
Example Code:
public string Transform(string expression)
{
string newExpression = string.Empty;
string[] expParts = expression.Split(';');
for (int i = 0; i < expParts.Length; i++)
{
string newSubExpression = string.Empty;
string[] subExpParts = expParts[i].Split(new char[] { '\'' });
for (int subIndex = 0; subIndex < subExpParts.Length; subIndex += 2)
{
newSubExpression += subExpParts[subIndex].Replace(',', ".");
if (subIndex < subExpParts.Length - 1)
newSubExpression += "\'" + subExpParts[subIndex + 1] + "\'";
}
newExpression += newSubExpression;
if (i < expParts.Length - 1)
newExpression = newExpression + ",";
}
return newExpression;
}
You can use (?<=^([^']|'[^']*')*),
var myPattern= "(?<=^([^']|'[^']*')*),";
var regex = new Regex(myPattern);
var result = regex.Replace("ActionName('a,b';1,2;1.2;'1,3')", ".");
Output
ActionName('a,b';1.2;1.2;'1,3')
Demo here
Since you have tagged the question a regex, I post a regex that works for your input (at least what you posted):
(,(?![\w\d]*'))
Just an example, I think that it can be useful for you as a starting point...
You need to replace the matching regex with a ., in C# you can do like this:
result = Regex.Replace(input, #"(,(?![\w\d]*'))", #".");
Take a look at regex lookaround documentation for more information.
A simple FSM (Finite State Machine) will do. Please, notice that we have just two states (encoded with inQuotation): are we within quotated chunk or not.
public static string Transform(string expression) {
if (string.IsNullOrEmpty(expression))
return expression; // Or throw ArgumentNullException
StringBuilder sb = new StringBuilder(expression.Length);
bool inQuotation = false;
foreach (char c in expression)
if (c == ',' && !inQuotation)
sb.Append('.');
else {
if (c == '\'')
inQuotation = !inQuotation;
sb.Append(c);
}
return sb.ToString();
}
Tests:
string[] tests = new string[] {
"ActionName('1,b';1,2)",
"ActionName('a,b';1,2;1.2;'1,3')",
};
var result = tests
.Select((line, index) => $"{index + 1}- {Transform(line)}");
Console.WriteLine(string.Join(Environment.NewLine, result));
Outcome:
1- ActionName('1,b';1.2)
2- ActionName('a,b';1.2;1.2;'1,3')
I am using this method to clean a string:
public static string CleanString(string dirtyString)
{
string removeChars = " ?&^$##!()+-,:;<>’\'-_*";
string result = dirtyString;
foreach (char c in removeChars)
{
result = result.Replace(c.ToString(), string.Empty);
}
return result;
}
This method gives the correct result. However, there is a performance glitch in this method. Every time I pass the string, every character goes into the loop. If I have a large string then it will take too much time to return the object.
Is there a better way of doing the same thing? Maybe using LINQ or jQuery/JavaScript?
Any suggestions would be appreciated.
OK, consider the following test:
public class CleanString
{
//by MSDN http://msdn.microsoft.com/en-us/library/844skk0h(v=vs.71).aspx
public static string UseRegex(string strIn)
{
// Replace invalid characters with empty strings.
return Regex.Replace(strIn, #"[^\w\.#-]", "");
}
// by Paolo Tedesco
public static String UseStringBuilder(string strIn)
{
const string removeChars = " ?&^$##!()+-,:;<>’\'-_*";
// specify capacity of StringBuilder to avoid resizing
StringBuilder sb = new StringBuilder(strIn.Length);
foreach (char x in strIn.Where(c => !removeChars.Contains(c)))
{
sb.Append(x);
}
return sb.ToString();
}
// by Paolo Tedesco, but using a HashSet
public static String UseStringBuilderWithHashSet(string strIn)
{
var hashSet = new HashSet<char>(" ?&^$##!()+-,:;<>’\'-_*");
// specify capacity of StringBuilder to avoid resizing
StringBuilder sb = new StringBuilder(strIn.Length);
foreach (char x in strIn.Where(c => !hashSet.Contains(c)))
{
sb.Append(x);
}
return sb.ToString();
}
// by SteveDog
public static string UseStringBuilderWithHashSet2(string dirtyString)
{
HashSet<char> removeChars = new HashSet<char>(" ?&^$##!()+-,:;<>’\'-_*");
StringBuilder result = new StringBuilder(dirtyString.Length);
foreach (char c in dirtyString)
if (removeChars.Contains(c))
result.Append(c);
return result.ToString();
}
// original by patel.milanb
public static string UseReplace(string dirtyString)
{
string removeChars = " ?&^$##!()+-,:;<>’\'-_*";
string result = dirtyString;
foreach (char c in removeChars)
{
result = result.Replace(c.ToString(), string.Empty);
}
return result;
}
// by L.B
public static string UseWhere(string dirtyString)
{
return new String(dirtyString.Where(Char.IsLetterOrDigit).ToArray());
}
}
static class Program
{
/// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
var dirtyString = "sdfdf.dsf8908()=(=(sadfJJLef#ssyd€sdöf////fj()=/§(§&/(\"&sdfdf.dsf8908()=(=(sadfJJLef#ssyd€sdöf////fj()=/§(§&/(\"&sdfdf.dsf8908()=(=(sadfJJLef#ssyd€sdöf";
var sw = new Stopwatch();
var iterations = 50000;
sw.Start();
for (var i = 0; i < iterations; i++)
CleanString.<SomeMethod>(dirtyString);
sw.Stop();
Debug.WriteLine("CleanString.<SomeMethod>: " + sw.ElapsedMilliseconds.ToString());
sw.Reset();
....
<repeat>
....
}
}
Output
CleanString.UseReplace: 791
CleanString.UseStringBuilder: 2805
CleanString.UseStringBuilderWithHashSet: 521
CleanString.UseStringBuilderWithHashSet2: 331
CleanString.UseRegex: 1700
CleanString.UseWhere: 233
Conclusion
It probably does not matter which method you use.
The difference in time between the fastest (UseWhere: 233ms) and the slowest (UseStringBuilder: 2805ms) method is 2572ms when called 50000 (!) times in a row. If you don't run the method that often, the difference does not really matter.
But if performance is critical, use the UseWhere method (written by L.B). Note, however, that its behavior is slightly different.
If it's purely speed and efficiency you are after, I would recommend doing something like this:
public static string CleanString(string dirtyString)
{
HashSet<char> removeChars = new HashSet<char>(" ?&^$##!()+-,:;<>’\'-_*");
StringBuilder result = new StringBuilder(dirtyString.Length);
foreach (char c in dirtyString)
if (!removeChars.Contains(c)) // prevent dirty chars
result.Append(c);
return result.ToString();
}
RegEx is certainly an elegant solution, but it adds extra overhead. By specifying the starting length of the string builder, it will only need to allocate the memory once (and a second time for the ToString at the end). This will cut down on memory usage and increase the speed, especially on longer strings.
However, as L.B. said, if you are using this to properly encode text that is bound for HTML output, you should be using HttpUtility.HtmlEncode instead of doing it yourself.
use regex [?&^$##!()+-,:;<>’\'-_*] for replacing with empty string
I don't know if, performance-wise, using a Regex or LINQ would be an improvement.
Something that could be useful, would be to create the new string with a StringBuilder instead of using string.Replace each time:
using System.Linq;
using System.Text;
static class Program {
static void Main(string[] args) {
const string removeChars = " ?&^$##!()+-,:;<>’\'-_*";
string result = "x&y(z)";
// specify capacity of StringBuilder to avoid resizing
StringBuilder sb = new StringBuilder(result.Length);
foreach (char x in result.Where(c => !removeChars.Contains(c))) {
sb.Append(x);
}
result = sb.ToString();
}
}
This one is even faster!
use:
string dirty=#"tfgtf$#$%gttg%$% 664%$";
string clean = dirty.Clean();
public static string Clean(this String name)
{
var namearray = new Char[name.Length];
var newIndex = 0;
for (var index = 0; index < namearray.Length; index++)
{
var letter = (Int32)name[index];
if (!((letter > 96 && letter < 123) || (letter > 64 && letter < 91) || (letter > 47 && letter < 58)))
continue;
namearray[newIndex] = (Char)letter;
++newIndex;
}
return new String(namearray).TrimEnd();
}
Give this a try: http://msdn.microsoft.com/en-us/library/xwewhkd1.aspx
Perhaps it helps to first explain the 'why' and then the 'what'. The reason you're getting slow performance is because c# copies-and-replaces the strings for each replacement. From my experience using Regex in .NET isn't always better - although in most scenario's (I think including this one) it'll probably work just fine.
If I really need performance I usually don't leave it up to luck and just tell the compiler exactly what I want: that is: create a string with the upper bound number of characters and copy all the chars in there that you need. It's also possible to replace the hashset with a switch / case or array in which case you might end up with a jump table or array lookup - which is even faster.
The 'pragmatic' best, but fast solution is:
char[] data = new char[dirtyString.Length];
int ptr = 0;
HashSet<char> hs = new HashSet<char>() { /* all your excluded chars go here */ };
foreach (char c in dirtyString)
if (!hs.Contains(c))
data[ptr++] = c;
return new string(data, 0, ptr);
BTW: this solution is incorrect when you want to process high surrogate Unicode characters - but can easily be adapted to include these characters.
-Stefan.
I use this in my current project and it works fine. It takes a sentence, it removes all the non alphanumerical characters, it then returns the sentence with all the words in the first letter upper case and everything else in lower case. Maybe I should call it SentenceNormalizer. Naming is hard :)
internal static string StringSanitizer(string whateverString)
{
whateverString = whateverString.Trim().ToLower();
Regex cleaner = new Regex("(?:[^a-zA-Z0-9 ])", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
var listOfWords = (cleaner.Replace(whateverString, string.Empty).Split(' ', StringSplitOptions.RemoveEmptyEntries)).ToList();
string cleanString = string.Empty;
foreach (string word in listOfWords)
{
cleanString += $"{word.First().ToString().ToUpper() + word.Substring(1)} ";
}
return cleanString;
}
I am not able to spend time on acid testing this but this line did not actually clean slashes as desired.
HashSet<char> removeChars = new HashSet<char>(" ?&^$##!()+-,:;<>’\'-_*");
I had to add slashes individually and escape the backslash
HashSet<char> removeChars = new HashSet<char>(" ?&^$##!()+-,:;<>’'-_*");
removeChars.Add('/');
removeChars.Add('\\');
What is the best way to convert from Pascal Case (upper Camel Case) to a sentence.
For example starting with
"AwaitingFeedback"
and converting that to
"Awaiting feedback"
C# preferable but I could convert it from Java or similar.
public static string ToSentenceCase(this string str)
{
return Regex.Replace(str, "[a-z][A-Z]", m => m.Value[0] + " " + char.ToLower(m.Value[1]));
}
In versions of visual studio after 2015, you can do
public static string ToSentenceCase(this string str)
{
return Regex.Replace(str, "[a-z][A-Z]", m => $"{m.Value[0]} {char.ToLower(m.Value[1])}");
}
Based on: Converting Pascal case to sentences using regular expression
I will prefer to use Humanizer for this. Humanizer is a Portable Class Library that meets all your .NET needs for manipulating and displaying strings, enums, dates, times, timespans, numbers and quantities.
Short Answer
"AwaitingFeedback".Humanize() => Awaiting feedback
Long and Descriptive Answer
Humanizer can do a lot more work other examples are:
"PascalCaseInputStringIsTurnedIntoSentence".Humanize() => "Pascal case input string is turned into sentence"
"Underscored_input_string_is_turned_into_sentence".Humanize() => "Underscored input string is turned into sentence"
"Can_return_title_Case".Humanize(LetterCasing.Title) => "Can Return Title Case"
"CanReturnLowerCase".Humanize(LetterCasing.LowerCase) => "can return lower case"
Complete code is :
using Humanizer;
using static System.Console;
namespace HumanizerConsoleApp
{
class Program
{
static void Main(string[] args)
{
WriteLine("AwaitingFeedback".Humanize());
WriteLine("PascalCaseInputStringIsTurnedIntoSentence".Humanize());
WriteLine("Underscored_input_string_is_turned_into_sentence".Humanize());
WriteLine("Can_return_title_Case".Humanize(LetterCasing.Title));
WriteLine("CanReturnLowerCase".Humanize(LetterCasing.LowerCase));
}
}
}
Output
Awaiting feedback
Pascal case input string is turned into sentence
Underscored input string is turned into sentence Can Return Title Case
can return lower case
If you prefer to write your own C# code you can achieve this by writing some C# code stuff as answered by others already.
Here you go...
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace CamelCaseToString
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine(CamelCaseToString("ThisIsYourMasterCallingYou"));
}
private static string CamelCaseToString(string str)
{
if (str == null || str.Length == 0)
return null;
StringBuilder retVal = new StringBuilder(32);
retVal.Append(char.ToUpper(str[0]));
for (int i = 1; i < str.Length; i++ )
{
if (char.IsLower(str[i]))
{
retVal.Append(str[i]);
}
else
{
retVal.Append(" ");
retVal.Append(char.ToLower(str[i]));
}
}
return retVal.ToString();
}
}
}
This works for me:
Regex.Replace(strIn, "([A-Z]{1,2}|[0-9]+)", " $1").TrimStart()
This is just like #SSTA, but is more efficient than calling TrimStart.
Regex.Replace("ThisIsMyCapsDelimitedString", "(\\B[A-Z])", " $1")
Found this in the MvcContrib source, doesn't seem to be mentioned here yet.
return Regex.Replace(input, "([A-Z])", " $1", RegexOptions.Compiled).Trim();
Just because everyone has been using Regex (except this guy), here's an implementation with StringBuilder that was about 5x faster in my tests. Includes checking for numbers too.
"SomeBunchOfCamelCase2".FromCamelCaseToSentence == "Some Bunch Of Camel Case 2"
public static string FromCamelCaseToSentence(this string input) {
if(string.IsNullOrEmpty(input)) return input;
var sb = new StringBuilder();
// start with the first character -- consistent camelcase and pascal case
sb.Append(char.ToUpper(input[0]));
// march through the rest of it
for(var i = 1; i < input.Length; i++) {
// any time we hit an uppercase OR number, it's a new word
if(char.IsUpper(input[i]) || char.IsDigit(input[i])) sb.Append(' ');
// add regularly
sb.Append(input[i]);
}
return sb.ToString();
}
Here's a basic way of doing it that I came up with using Regex
public static string CamelCaseToSentence(this string value)
{
var sb = new StringBuilder();
var firstWord = true;
foreach (var match in Regex.Matches(value, "([A-Z][a-z]+)|[0-9]+"))
{
if (firstWord)
{
sb.Append(match.ToString());
firstWord = false;
}
else
{
sb.Append(" ");
sb.Append(match.ToString().ToLower());
}
}
return sb.ToString();
}
It will also split off numbers which I didn't specify but would be useful.
string camel = "MyCamelCaseString";
string s = Regex.Replace(camel, "([A-Z])", " $1").ToLower().Trim();
Console.WriteLine(s.Substring(0,1).ToUpper() + s.Substring(1));
Edit: didn't notice your casing requirements, modifed accordingly. You could use a matchevaluator to do the casing, but I think a substring is easier. You could also wrap it in a 2nd regex replace where you change the first character
"^\w"
to upper
\U (i think)
I'd use a regex, inserting a space before each upper case character, then lowering all the string.
string spacedString = System.Text.RegularExpressions.Regex.Replace(yourString, "\B([A-Z])", " \k");
spacedString = spacedString.ToLower();
It is easy to do in JavaScript (or PHP, etc.) where you can define a function in the replace call:
var camel = "AwaitingFeedbackDearMaster";
var sentence = camel.replace(/([A-Z].)/g, function (c) { return ' ' + c.toLowerCase(); });
alert(sentence);
Although I haven't solved the initial cap problem... :-)
Now, for the Java solution:
String ToSentence(String camel)
{
if (camel == null) return ""; // Or null...
String[] words = camel.split("(?=[A-Z])");
if (words == null) return "";
if (words.length == 1) return words[0];
StringBuilder sentence = new StringBuilder(camel.length());
if (words[0].length() > 0) // Just in case of camelCase instead of CamelCase
{
sentence.append(words[0] + " " + words[1].toLowerCase());
}
else
{
sentence.append(words[1]);
}
for (int i = 2; i < words.length; i++)
{
sentence.append(" " + words[i].toLowerCase());
}
return sentence.toString();
}
System.out.println(ToSentence("AwaitingAFeedbackDearMaster"));
System.out.println(ToSentence(null));
System.out.println(ToSentence(""));
System.out.println(ToSentence("A"));
System.out.println(ToSentence("Aaagh!"));
System.out.println(ToSentence("stackoverflow"));
System.out.println(ToSentence("disableGPS"));
System.out.println(ToSentence("Ahh89Boo"));
System.out.println(ToSentence("ABC"));
Note the trick to split the sentence without loosing any character...
Pseudo-code:
NewString = "";
Loop through every char of the string (skip the first one)
If char is upper-case ('A'-'Z')
NewString = NewString + ' ' + lowercase(char)
Else
NewString = NewString + char
Better ways can perhaps be done by using regex or by string replacement routines (replace 'X' with ' x')
An xquery solution that works for both UpperCamel and lowerCamel case:
To output sentence case (only the first character of the first word is capitalized):
declare function content:sentenceCase($string)
{
let $firstCharacter := substring($string, 1, 1)
let $remainingCharacters := substring-after($string, $firstCharacter)
return
concat(upper-case($firstCharacter),lower-case(replace($remainingCharacters, '([A-Z])', ' $1')))
};
To output title case (first character of each word capitalized):
declare function content:titleCase($string)
{
let $firstCharacter := substring($string, 1, 1)
let $remainingCharacters := substring-after($string, $firstCharacter)
return
concat(upper-case($firstCharacter),replace($remainingCharacters, '([A-Z])', ' $1'))
};
Found myself doing something similar, and I appreciate having a point-of-departure with this discussion. This is my solution, placed as an extension method to the string class in the context of a console application.
using System;
using System.Text;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string piratese = "avastTharMatey";
string ivyese = "CheerioPipPip";
Console.WriteLine("{0}\n{1}\n", piratese.CamelCaseToString(), ivyese.CamelCaseToString());
Console.WriteLine("For Pete\'s sake, man, hit ENTER!");
string strExit = Console.ReadLine();
}
}
public static class StringExtension
{
public static string CamelCaseToString(this string str)
{
StringBuilder retVal = new StringBuilder(32);
if (!string.IsNullOrEmpty(str))
{
string strTrimmed = str.Trim();
if (!string.IsNullOrEmpty(strTrimmed))
{
retVal.Append(char.ToUpper(strTrimmed[0]));
if (strTrimmed.Length > 1)
{
for (int i = 1; i < strTrimmed.Length; i++)
{
if (char.IsUpper(strTrimmed[i])) retVal.Append(" ");
retVal.Append(char.ToLower(strTrimmed[i]));
}
}
}
}
return retVal.ToString();
}
}
}
Most of the preceding answers split acronyms and numbers, adding a space in front of each character. I wanted acronyms and numbers to be kept together so I have a simple state machine that emits a space every time the input transitions from one state to the other.
/// <summary>
/// Add a space before any capitalized letter (but not for a run of capitals or numbers)
/// </summary>
internal static string FromCamelCaseToSentence(string input)
{
if (string.IsNullOrEmpty(input)) return String.Empty;
var sb = new StringBuilder();
bool upper = true;
for (var i = 0; i < input.Length; i++)
{
bool isUpperOrDigit = char.IsUpper(input[i]) || char.IsDigit(input[i]);
// any time we transition to upper or digits, it's a new word
if (!upper && isUpperOrDigit)
{
sb.Append(' ');
}
sb.Append(input[i]);
upper = isUpperOrDigit;
}
return sb.ToString();
}
And here's some tests:
[TestCase(null, ExpectedResult = "")]
[TestCase("", ExpectedResult = "")]
[TestCase("ABC", ExpectedResult = "ABC")]
[TestCase("abc", ExpectedResult = "abc")]
[TestCase("camelCase", ExpectedResult = "camel Case")]
[TestCase("PascalCase", ExpectedResult = "Pascal Case")]
[TestCase("Pascal123", ExpectedResult = "Pascal 123")]
[TestCase("CustomerID", ExpectedResult = "Customer ID")]
[TestCase("CustomABC123", ExpectedResult = "Custom ABC123")]
public string CanSplitCamelCase(string input)
{
return FromCamelCaseToSentence(input);
}
Mostly already answered here
Small chage to the accepted answer, to convert the second and subsequent Capitalised letters to lower case, so change
if (char.IsUpper(text[i]))
newText.Append(' ');
newText.Append(text[i]);
to
if (char.IsUpper(text[i]))
{
newText.Append(' ');
newText.Append(char.ToLower(text[i]));
}
else
newText.Append(text[i]);
Here is my implementation. This is the fastest that I got while avoiding creating spaces for abbreviations.
public static string PascalCaseToSentence(string input)
{
if (string.IsNullOrEmpty(input) || input.Length < 2)
return input;
var sb = new char[input.Length + ((input.Length + 1) / 2)];
var len = 0;
var lastIsLower = false;
for (int i = 0; i < input.Length; i++)
{
var current = input[i];
if (current < 97)
{
if (lastIsLower)
{
sb[len] = ' ';
len++;
}
lastIsLower = false;
}
else
{
lastIsLower = true;
}
sb[len] = current;
len++;
}
return new string(sb, 0, len);
}