Related
I'm trying to create something that will be like a basic random password generator including uppercase, lowercase and digits - but for some reason this
static void Main(string[] args)
{
bool validLength = false;
int userDefinedLength = 0;
Console.WriteLine("How many characters would you like your password to be?");
do
{
try
{
userDefinedLength = int.Parse(Console.ReadLine());
validLength = true;
if (userDefinedLength < 3)
{
Console.WriteLine("Please enter something larger than 3.");
validLength = false;
}
}
catch (Exception)
{
Console.WriteLine("Please input a valid integer length.");
}
} while (validLength == false);
char[] passwordArray = new char[userDefinedLength];
int asciiValue = 0;
char asciiChar = ' ';
bool validPassword = false;
Random ranAsciiGroup = new Random();
Random ascValue = new Random();
do
{
for (int i = 0; i < passwordArray.Length; i++)
{
int randomAsc = 0;
randomAsc = ranAsciiGroup.Next(1, 4);
//Console.WriteLine(randomAsc);
if (randomAsc == 1)
{
asciiValue = ascValue.Next(65, 91);
//Console.WriteLine(asciiValue);
}
else if (randomAsc == 2)
{
asciiValue = ascValue.Next(97, 123);
//Console.WriteLine(asciiValue);
}
else if (randomAsc == 3)
{
asciiValue = ascValue.Next(48, 58);
//Console.WriteLine(asciiValue);
}
asciiChar = (char)asciiValue;
passwordArray[i] = asciiChar;
//validPassword = true;
}
bool isDigit = false;
bool isUpper = false;
bool isLower = false;
for (int i = 0; i < passwordArray.Length; i++)
{
if (char.IsDigit(passwordArray[i]))
{
isDigit = true;
}
if (char.IsUpper(passwordArray[i]))
{
isUpper = true;
}
if (char.IsLower(passwordArray[i]))
{
isLower = true;
}
}
if (isDigit == true && isUpper == true && isLower == true)
{
validPassword = true;
}
} while (validPassword == false);
Console.WriteLine("Your password is...");
Console.ForegroundColor = ConsoleColor.DarkGreen;
foreach (char c in passwordArray)
{
Console.Write(c);
}
Console.ReadLine();
}
The password that it produces seems to not be using any numbers that are less than 6. And some of the characters that it produces are quite repeated - e.g. the lower case tend to have characters that appear much more than some others - or some that don't appear at all. I'll leave a 100 character example here.
m9nj88m8GBpF7Hk87E8p9CAE987pEj7pm7j89iHplo7DIpB87B9irlAk9Ik9C8q8i97B9o8l8GDjj88j898Dmk9A69969Ino988I
Seed your RNG
Don't forget to seed your instance of random, e.g.
var random = new System.Random(Environment.TickCount);
Also, one instance should be enough.
Eliminating repetition
If you wish to ensure that all characters are represented, you can use a different random selection technique. For example, you could generate a very very long string that contains all the characters you want, sort it randomly, then take the first n characters. This approach would completely eliminate repeated characters and guarantee that every character gets used eventually. For example:
using System;
using System.Linq;
public class Program
{
static readonly Random _random = new System.Random(Environment.TickCount);
const string dictionary = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklpmnopqrstuvwxyz0123456789!##$%^&*()_+-=";
static string GetPassword(int length)
{
return new String
(
dictionary
.OrderBy( a => _random.NextDouble() )
.Take(length)
.ToArray()
);
}
public static void Main()
{
var s = GetPassword(30);
Console.WriteLine(s);
}
}
In this example we treat the string as an Enumerable<char> (yes, this is allowed) and use LINQ methods to sort it randomly and take the first n characters. We then pass the results to the constructor for a new System.String containing the answer.
Sample output:
Run #1:
!#t(hfTz0rB5cvKy1d$oeVI2mnsCba
Run #2:
y7Y(MB1pWH$wO5XPD0b+%Rkn9a4F!_
Run #3:
tH92lnh*sL+WOaTYR160&xiZpC5#G3
Looks pretty random to me.
Controlled repetition
The above solution of course only allows at most one instance of each character in the dictionary. But maybe you want them to be able to appear more than once, but not too much. If you'd like a controlled, limited number of possible repeats, you can make a small change:
static string GetPassword(int length, int maxRepeats)
{
return new String
(
Enumerable.Range(0, maxRepeats)
.SelectMany( i => dictionary )
.OrderBy( a => _random.NextDouble() )
.Take(length)
.ToArray()
);
}
In this example we clone the dictionary maxRepeats times and concatenate them using SelectMany. Then we sort that gigantic string randomly and take the first n characters for the password.
Working code on .NET Fiddle
The System.Random class is not designed to be a strong RNG, it is designed to be a fast RNG. If you want a quality random number generator you need to use one based on the System.Security.Cryptography.RandomNumberGenerator class.
Here is a gist I found that uses a crypto random generator as the internals of the existing random class.
Running it with the new generator gives me
mm77D5EDjO0OhOOe8kppiY0toc0HWQjpo37b4LFj56LvcQvA4jE83J8BS8xeX6zcEr2Od8A70v2xFKiY0ROY3gN105rZt6PE8F2i
which you can see appears to not have the biases you found.
According to following code , I need to generate a string in different cases based on the method input. My issue is where I wanna generate 9A9A (at least 1 number and 1 letter) or 9A9A9A (at least 2 numbers and 2 letters). In most cases, this conditions is not met.
private AuthMessage GetAuthCode(string CodeType) //(out string Message)
{
Guid Guid = Guid.NewGuid();
Random Random = new Random();
string AuthCode = string.Empty;
string RefCode = string.Empty;
RefCode = Guid.ToString("N");
switch (CodeType)
{
case "0": //9999
{
AuthCode = Random.Next(1000, 9999).ToString();
break;
}
case "1": //99999
{
AuthCode = Random.Next(10000, 99999).ToString();
break;
}
case "2": //999999
{
AuthCode = Random.Next(100000, 999999).ToString();
break;
}
case "3": //999-999
{
AuthCode = Regex.Replace(Random.Next(100000, 999999).ToString(), #"^(.{3})(.{3})$", "$1-$2");
break;
}
case "4": //9A9A
{
AuthCode = Guid.ToString("N").Substring(14, 4).ToUpper();
break;
}
case "5": //9A9A9
{
AuthCode = Guid.ToString("N").Substring(15, 5).ToUpper();
break;
}
case "6": //9A9A9A
{
AuthCode = Guid.ToString("N").Substring(6, 6).ToUpper();
break;
}
case "7": //9A9-A9A
{
AuthCode = Regex.Replace(Guid.ToString("N").Substring(6, 6), #"(.{3})(.{3})", #"$1-$2").ToUpper();
break;
}
case "8": //9A9-A9A
{
AuthCode = Regex.Replace(Regex.Replace(Convert.ToBase64String(Guid.ToByteArray()), "[/+=]", "").Substring(0, 6), #"(.{3})(.{3})", #"$1-$2").ToUpper();
break;
}
default:
{
AuthCode = Random.Next(1000, 9999).ToString();
break;
}
}
AuthMessage Response = new AuthMessage();
Response.AuthCode = AuthCode;
Response.RefCode = RefCode;
return Response;
}
Guid representation is composed of hexadecimal digits, i.e. characters 0-9 and a-f. The problem with relying on it to obtain a mixture of letters and numbers is that a character at any given position could be either a letter or a decimal digit, with probability tilted roughly 5:3 in favor of a decimal digit.
If you want to generate a specific mix of digits and letters, you should generate the string one character at a time, without relying on Guid representation.
I thought I'd have a go - it gives me a good chance of being ridiculed. This isn't the most efficient way of generating the codes, but it should be fairly random.
private string GetAuthCode(string CodeType)
{
var patterns = new Dictionary<char, Func<Char>>()
{
{ '9', () => RandomBytes().Where(x => x >= '0' && x <= '9').First() },
{ 'A', () => RandomBytes().Where(x => x >= 'A' && x <= 'Z').First() },
{ '-', () => '-' },
};
return
String.IsNullOrEmpty(CodeType)
? ""
: patterns[CodeType[0]]().ToString() + GetAuthCode(CodeType.Substring(1));
}
private IEnumerable<char> RandomBytes()
{
using (var rng = System.Security.Cryptography.RNGCryptoServiceProvider.Create())
{
var bytes = new byte[256];
while (true)
{
rng.GetBytes(bytes);
foreach (var #byte in bytes)
{
yield return (char)#byte;
}
}
}
}
Now, due to the funky monkey state machine that implements iterator methods, this code does dispose of the RNG despite the while (true).
I simplified the GetAuthCode method slightly, but I think this demonstrates a suitable way to generate the codes.
public class TokenCreator
{
private Random random = new Random();
private const string[] chars= "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
public static string CreateToken(int length)
{
return new string(Enumerable.Repeat(chars, length)
.Select(s => s[random.Next(s.Length)]).ToArray());
}
}
AuthCode = TokenCreator.CreateToken(5);
This implementation can little help you. But again it doesn't guarantee you get alphanumeric combination. Sometime you may get only alphabets and sometime you may get only numbers. Though chances are so little.
OverView The following code uses Regex.Replace along woth IsMatch which allows one to create an any length pattern such as XXXXXX or XX-XX which will replace any X with a random character or letter.
For example XX-XX could return A(-1d or with dashes just XXXX to A(1d.
Security
It uses the Web.Security.Membership.GeneratePassword to get a string of 48 random characters, digits and what not. Then a clear password is culled from that random characters based on the pattern needed.
(at least 1 number and 1 letter)
This is done in the validation regex which assures there is atleast one alphabetic character and one number. That generate method is called until that validation reports a valid match from the rules you mentioned.
Secure Transport
Finally a secure string is setup to be returned.
This one works off of getting a
// This pattern enforces our rule that a pwd must have one letter and one digit.
string pattern = #" # This regex pattern enforces the rules before returning matching
(?=.*[a - zA - Z]) # Somewhere there is a an alphabectic character
(?=.*\d) # Somewhere there is a number; if no number found return no match.
(.+) # Successful match, rules are satisfied. Return match";
Random rn = new Random(); // Used to cherry pick from chars to use.
// Creates 48 alpha and non alpha (at least 10 non digit alphas) random characters.
string charsToUse = System.Web.Security.Membership.GeneratePassword(48, 5);
// When replacement is done, replace an `X` matched with a random char.
MatchEvaluator RandomChar = delegate (Match m)
{
return charsToUse[rn.Next(charsToUse.Length)].ToString();
};
Func<string, string> Validate =
(string str) => Regex.IsMatch(str, pattern, RegexOptions.IgnorePatternWhitespace)
? str : string.Empty; // return empty on failure.
string pwdClear = string.Empty;
// Generate valid pwd based on rules. Loop until rules are met.
while (string.IsNullOrEmpty(pwdClear))
pwdClear = Validate(Regex.Replace("XXXX-XXXX-XXXX-XXXX-XXXX", "X", RandomChar));
// Create a secure string for the password for transportation.
SecureString ss = new SecureString();
pwdClear.ToList()
.ForEach(chr => ss.AppendChar(chr));
This answer is based off of a non-secure implementation on my blog see C#: Generate a Random Sequence of Numbers and Letters From a User Defined Pattern and Characters
For testing purposes, when I need to generate random strings with specific properties, I use something similar to this. You might have to adapt it to your needs.
public sealed class StringGenerator
{
private static readonly char[] NumericChars = "0123456789".ToCharArray();
private static readonly char[] LowerAlphaChars = "abcdefghijklmnopqrstuvwxyz".ToCharArray();
private static readonly char[] UpperAlphaChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".ToCharArray();
public StringGenerator(IRandom rnd)
{
Rnd = rnd ?? new SecureRandom();
}
private IRandom Rnd
{
get;
set;
}
public string Generate(int length, int minNumeric = 0, int minAlpha = 0, AlphaCase alphaCase = AlphaCase.Both)
{
if (length < 0)
{
throw new ArgumentOutOfRangeException("length");
}
if (minNumeric < 0)
{
throw new ArgumentOutOfRangeException("minNumeric");
}
if (minAlpha < 0)
{
throw new ArgumentOutOfRangeException("minAlpha");
}
if (length < minNumeric + minAlpha)
{
throw new ArgumentException();
}
if (length == 0)
{
return string.Empty;
}
var result = new char[length];
var index = 0;
foreach(var numeric in GenerateNumeric().Take(minNumeric))
{
result[index++] = numeric;
}
var alphaCharacters = GetAlphaCharacters(alphaCase);
foreach (var alpha in Generate(alphaCharacters).Take(minAlpha))
{
result[index++] = alpha;
}
var restLength = length - index;
if (restLength > 0)
{
var restCharacters = new List<char>(NumericChars.Concat(alphaCharacters));
foreach (var rest in Generate(restCharacters).Take(restLength))
{
result[index++] = rest;
}
}
// shuffle result
return new string(result.OrderBy(x => Rnd.Next()).ToArray());
}
private IList<char> GetAlphaCharacters(AlphaCase alphaCase)
{
switch (alphaCase)
{
case AlphaCase.Lower:
return LowerAlphaChars;
case AlphaCase.Upper:
return UpperAlphaChars;
case AlphaCase.Both:
default:
return new List<char>(LowerAlphaChars.Concat(UpperAlphaChars));
}
}
public IEnumerable<char> GenerateNumeric()
{
return Generate(NumericChars);
}
public IEnumerable<char> GenerateLowerAlpha()
{
return Generate(LowerAlphaChars);
}
public IEnumerable<char> GenerateUpperAlpha()
{
return Generate(UpperAlphaChars);
}
public IEnumerable<char> Generate(IList<char> characters)
{
if (characters == null)
{
throw new ArgumentNullException();
}
if (!characters.Any())
{
yield break;
}
while (true)
{
yield return characters[Rnd.Next(characters.Count)];
}
}
}
public enum AlphaCase
{
Lower,
Upper,
Both
}
public interface IRandom
{
int Next();
int Next(int maxValue);
}
public sealed class SecureRandom : IRandom
{
private readonly RandomNumberGenerator Rng = new RNGCryptoServiceProvider();
public int Next()
{
var data = new byte[sizeof(int)];
Rng.GetBytes(data);
return BitConverter.ToInt32(data, 0) & (int.MaxValue - 1);
}
public int Next(int maxValue)
{
return Next(0, maxValue);
}
public int Next(int minValue, int maxValue)
{
if (minValue > maxValue)
{
throw new ArgumentOutOfRangeException();
}
return (int)Math.Floor(minValue + ((double)maxValue - minValue) * NextDouble());
}
public double NextDouble()
{
var data = new byte[sizeof(uint)];
Rng.GetBytes(data);
var randomUint = BitConverter.ToUInt32(data, 0);
return randomUint / (uint.MaxValue + 1d);
}
}
Edit: This is an answer to the question of how to generate random alphanumeric strings with conditions. As stated for testing purposes. Not to be used as is in a context where security is relevant.
Edit 2: Shamelessly borrowing from this answer, the solution now uses a wrapper around RNGCryptoServiceProvider. Still not sure if this should be used in security relevant context, but at least now it should be "better" than simply using Random
In C#, can I convert a string value to a string literal, the way I would see it in code? I would like to replace tabs, newlines, etc. with their escape sequences.
If this code:
Console.WriteLine(someString);
produces:
Hello
World!
I want this code:
Console.WriteLine(ToLiteral(someString));
to produce:
\tHello\r\n\tWorld!\r\n
A long time ago, I found this:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
return writer.ToString();
}
}
}
This code:
var input = "\tHello\r\n\tWorld!";
Console.WriteLine(input);
Console.WriteLine(ToLiteral(input));
Produces:
Hello
World!
"\tHello\r\n\tWorld!"
These days, Graham discovered you can use Roslyn's Microsoft.CodeAnalysis.CSharp package on NuGet:
private static string ToLiteral(string valueTextForCompiler)
{
return Microsoft.CodeAnalysis.CSharp.SymbolDisplay.FormatLiteral(valueTextForCompiler, false);
}
Use Regex.Escape(String):
Regex.Escape escapes a minimal set of characters (, *, +, ?, |, {, [,
(,), ^, $,., #, and white space) by replacing them with their escape
codes.
There's a method for this in Roslyn's Microsoft.CodeAnalysis.CSharp package on NuGet:
private static string ToLiteral(string valueTextForCompiler)
{
return Microsoft.CodeAnalysis.CSharp.SymbolDisplay.FormatLiteral(valueTextForCompiler, false);
}
Obviously, this didn't exist at the time of the original question, but it might help people who end up here from Google Search.
This is a fully working implementation, including escaping of Unicode and ASCII non-printable characters. It does not insert "+" signs like Hallgrim's answer.
static string ToLiteral(string input) {
StringBuilder literal = new StringBuilder(input.Length + 2);
literal.Append("\"");
foreach (var c in input) {
switch (c) {
case '\"': literal.Append("\\\""); break;
case '\\': literal.Append(#"\\"); break;
case '\0': literal.Append(#"\0"); break;
case '\a': literal.Append(#"\a"); break;
case '\b': literal.Append(#"\b"); break;
case '\f': literal.Append(#"\f"); break;
case '\n': literal.Append(#"\n"); break;
case '\r': literal.Append(#"\r"); break;
case '\t': literal.Append(#"\t"); break;
case '\v': literal.Append(#"\v"); break;
default:
// ASCII printable character
if (c >= 0x20 && c <= 0x7e) {
literal.Append(c);
// As UTF16 escaped character
} else {
literal.Append(#"\u");
literal.Append(((int)c).ToString("x4"));
}
break;
}
}
literal.Append("\"");
return literal.ToString();
}
Note that this also escapes all Unicode characters. If your environment supports them, you could change that part to escape only control characters:
// UTF16 control characters
} else if (Char.GetUnicodeCategory(c) == UnicodeCategory.Control) {
literal.Append(#"\u");
literal.Append(((int)c).ToString("x4"));
} else {
literal.Append(c);
}
A more structured approach, including all escape sequences for strings and chars, is:
It doesn't replace Unicode characters with their literal equivalent. It doesn't cook eggs, either.
public class ReplaceString
{
static readonly IDictionary<string, string> m_replaceDict
= new Dictionary<string, string>();
const string ms_regexEscapes = #"[\a\b\f\n\r\t\v\\""]";
public static string StringLiteral(string i_string)
{
return Regex.Replace(i_string, ms_regexEscapes, match);
}
public static string CharLiteral(char c)
{
return c == '\'' ? #"'\''" : string.Format("'{0}'", c);
}
private static string match(Match m)
{
string match = m.ToString();
if (m_replaceDict.ContainsKey(match))
{
return m_replaceDict[match];
}
throw new NotSupportedException();
}
static ReplaceString()
{
m_replaceDict.Add("\a", #"\a");
m_replaceDict.Add("\b", #"\b");
m_replaceDict.Add("\f", #"\f");
m_replaceDict.Add("\n", #"\n");
m_replaceDict.Add("\r", #"\r");
m_replaceDict.Add("\t", #"\t");
m_replaceDict.Add("\v", #"\v");
m_replaceDict.Add("\\", #"\\");
m_replaceDict.Add("\0", #"\0");
//The SO parser gets fooled by the verbatim version
//of the string to replace - #"\"""
//so use the 'regular' version
m_replaceDict.Add("\"", "\\\"");
}
static void Main(string[] args){
string s = "here's a \"\n\tstring\" to test";
Console.WriteLine(ReplaceString.StringLiteral(s));
Console.WriteLine(ReplaceString.CharLiteral('c'));
Console.WriteLine(ReplaceString.CharLiteral('\''));
}
}
Try:
var t = HttpUtility.JavaScriptStringEncode(s);
public static class StringHelpers
{
private static Dictionary<string, string> escapeMapping = new Dictionary<string, string>()
{
{"\"", #"\\\"""},
{"\\\\", #"\\"},
{"\a", #"\a"},
{"\b", #"\b"},
{"\f", #"\f"},
{"\n", #"\n"},
{"\r", #"\r"},
{"\t", #"\t"},
{"\v", #"\v"},
{"\0", #"\0"},
};
private static Regex escapeRegex = new Regex(string.Join("|", escapeMapping.Keys.ToArray()));
public static string Escape(this string s)
{
return escapeRegex.Replace(s, EscapeMatchEval);
}
private static string EscapeMatchEval(Match m)
{
if (escapeMapping.ContainsKey(m.Value))
{
return escapeMapping[m.Value];
}
return escapeMapping[Regex.Escape(m.Value)];
}
}
Hallgrim's answer is excellent, but the "+", newline and indent additions were breaking functionality for me. An easy way around it is:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, new CodeGeneratorOptions {IndentString = "\t"});
var literal = writer.ToString();
literal = literal.Replace(string.Format("\" +{0}\t\"", Environment.NewLine), "");
return literal;
}
}
}
Here is a little improvement for Smilediver's answer. It will not escape all no-ASCII characters, but only these are really needed.
using System;
using System.Globalization;
using System.Text;
public static class CodeHelper
{
public static string ToLiteral(this string input)
{
var literal = new StringBuilder(input.Length + 2);
literal.Append("\"");
foreach (var c in input)
{
switch (c)
{
case '\'': literal.Append(#"\'"); break;
case '\"': literal.Append("\\\""); break;
case '\\': literal.Append(#"\\"); break;
case '\0': literal.Append(#"\0"); break;
case '\a': literal.Append(#"\a"); break;
case '\b': literal.Append(#"\b"); break;
case '\f': literal.Append(#"\f"); break;
case '\n': literal.Append(#"\n"); break;
case '\r': literal.Append(#"\r"); break;
case '\t': literal.Append(#"\t"); break;
case '\v': literal.Append(#"\v"); break;
default:
if (Char.GetUnicodeCategory(c) != UnicodeCategory.Control)
{
literal.Append(c);
}
else
{
literal.Append(#"\u");
literal.Append(((ushort)c).ToString("x4"));
}
break;
}
}
literal.Append("\"");
return literal.ToString();
}
}
Interesting question.
If you can't find a better method, you can always replace.
In case you're opting for it, you could use this C# Escape Sequence List:
\' - single quote, needed for character literals
\" - double quote, needed for string literals
\ - backslash
\0 - Unicode character 0
\a - Alert (character 7)
\b - Backspace (character 8)
\f - Form feed (character 12)
\n - New line (character 10)
\r - Carriage return (character 13)
\t - Horizontal tab (character 9)
\v - Vertical quote (character 11)
\uxxxx - Unicode escape sequence for character with hex value xxxx
\xn[n][n][n] - Unicode escape sequence for character with hex value nnnn (variable length version of \uxxxx)
\Uxxxxxxxx - Unicode escape sequence for character with hex value xxxxxxxx (for generating surrogates)
This list can be found in the C# Frequently Asked Questions
What character escape sequences are available?
If JSON conventions are enough for the unescaped strings you want to get escaped and you already use Json.NET (Newtonsoft.Json) in your project (it has a pretty large overhead), you may use this package like the following:
using System;
using Newtonsoft.Json;
public class Program
{
public static void Main()
{
Console.WriteLine(ToLiteral(#"abc\n123"));
}
private static string ToLiteral(string input)
{
return JsonConvert.DeserializeObject<string>("\"" + input + "\"");
}
}
public static class StringEscape
{
static char[] toEscape = "\0\x1\x2\x3\x4\x5\x6\a\b\t\n\v\f\r\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\"\\".ToCharArray();
static string[] literals = #"\0,\x0001,\x0002,\x0003,\x0004,\x0005,\x0006,\a,\b,\t,\n,\v,\f,\r,\x000e,\x000f,\x0010,\x0011,\x0012,\x0013,\x0014,\x0015,\x0016,\x0017,\x0018,\x0019,\x001a,\x001b,\x001c,\x001d,\x001e,\x001f".Split(new char[] { ',' });
public static string Escape(this string input)
{
int i = input.IndexOfAny(toEscape);
if (i < 0) return input;
var sb = new System.Text.StringBuilder(input.Length + 5);
int j = 0;
do
{
sb.Append(input, j, i - j);
var c = input[i];
if (c < 0x20) sb.Append(literals[c]); else sb.Append(#"\").Append(c);
} while ((i = input.IndexOfAny(toEscape, j = ++i)) > 0);
return sb.Append(input, j, input.Length - j).ToString();
}
}
My attempt at adding ToVerbatim to Hallgrim's accepted answer:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, new CodeGeneratorOptions { IndentString = "\t" });
var literal = writer.ToString();
literal = literal.Replace(string.Format("\" +{0}\t\"", Environment.NewLine), "");
return literal;
}
}
}
private static string ToVerbatim(string input)
{
string literal = ToLiteral(input);
string verbatim = "#" + literal.Replace(#"\r\n", Environment.NewLine);
return verbatim;
}
Hallgrim's answer was excellent. Here's a small tweak in case you need to parse out additional white space characters and linebreaks with a C# regular expression. I needed this in the case of a serialized JSON value for insertion into Google Sheets and ran into trouble as the code was inserting tabs, +, spaces, etc.
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
var literal = writer.ToString();
var r2 = new Regex(#"\"" \+.\n[\s]+\""", RegexOptions.ECMAScript);
literal = r2.Replace(literal, "");
return literal;
I submit my own implementation, which handles null values and should be more performant on account of using array lookup tables, manual hex conversion, and avoiding switch statements.
using System;
using System.Text;
using System.Linq;
public static class StringLiteralEncoding {
private static readonly char[] HEX_DIGIT_LOWER = "0123456789abcdef".ToCharArray();
private static readonly char[] LITERALENCODE_ESCAPE_CHARS;
static StringLiteralEncoding() {
// Per http://msdn.microsoft.com/en-us/library/h21280bw.aspx
var escapes = new string[] { "\aa", "\bb", "\ff", "\nn", "\rr", "\tt", "\vv", "\"\"", "\\\\", "??", "\00" };
LITERALENCODE_ESCAPE_CHARS = new char[escapes.Max(e => e[0]) + 1];
foreach(var escape in escapes)
LITERALENCODE_ESCAPE_CHARS[escape[0]] = escape[1];
}
/// <summary>
/// Convert the string to the equivalent C# string literal, enclosing the string in double quotes and inserting
/// escape sequences as necessary.
/// </summary>
/// <param name="s">The string to be converted to a C# string literal.</param>
/// <returns><paramref name="s"/> represented as a C# string literal.</returns>
public static string Encode(string s) {
if(null == s) return "null";
var sb = new StringBuilder(s.Length + 2).Append('"');
for(var rp = 0; rp < s.Length; rp++) {
var c = s[rp];
if(c < LITERALENCODE_ESCAPE_CHARS.Length && '\0' != LITERALENCODE_ESCAPE_CHARS[c])
sb.Append('\\').Append(LITERALENCODE_ESCAPE_CHARS[c]);
else if('~' >= c && c >= ' ')
sb.Append(c);
else
sb.Append(#"\x")
.Append(HEX_DIGIT_LOWER[c >> 12 & 0x0F])
.Append(HEX_DIGIT_LOWER[c >> 8 & 0x0F])
.Append(HEX_DIGIT_LOWER[c >> 4 & 0x0F])
.Append(HEX_DIGIT_LOWER[c & 0x0F]);
}
return sb.Append('"').ToString();
}
}
Code:
string someString1 = "\tHello\r\n\tWorld!\r\n";
string someString2 = #"\tHello\r\n\tWorld!\r\n";
Console.WriteLine(someString1);
Console.WriteLine(someString2);
Output:
Hello
World!
\tHello\r\n\tWorld!\r\n
I'm trying to do a simple string generation based on pattern.
My idea was to use Regex to do simple replace.
I've started with simple method:
private static string parseTemplate(string template)
{
return Regex.Replace(template, #"(\[d)((:)?([\d]+)?)\]", RandomDigit());
}
private static string RandomDigit()
{
Random r = new Random();
return r.Next(0, 9).ToString();
}
What this does for now is replacing groups like [d], [d:3] with what supposed to be random digit.
Unfortunately every group is replaced with the same digit, for example if I put test [d][d][d:3] my method will return test 222.
I would like to get different digit in every place, like test 361.
Second problem I have is way to handle length:
right now I must specify [d] for every digit I want, but it would be easier to specify [d:3] and get the same output.
I know that there is a project called Fare, but I would like to do this without this library
For now I only search for [d], but is this method will work fine there won't be a problem to add other groups for example: [s] for special characters or any other type of patters.
Edit1
As it was suggested I changed Random to a static variable like so:
private static string parseTemplate(string template)
{
return Regex.Replace(template, #"(\[d)((:)?([\d]+)?)\]", RandomDigit());
}
private static Random r = new Random();
private static string RandomDigit()
{
return r.Next(0, 9).ToString();
}
Problem is that when I call my code like so:
Console.WriteLine(parseTemplate("test [d:2][d:][d]"));
Console.WriteLine(parseTemplate("test [d:2][d:][d]"));
I get output like this
test 222
test 555
I would like output like this (for example):
test 265
test 962
I think that problem is with Regex.Replace which calls my RandomDigit only once.
For your first issue: When you call new Random() you are seeding with the same value every time you call the function - initialise a static Random member variable once then return r.Next(0,9).ToString();
Edit:
In answer to your comment, try using MatchEvaluator with a delegate, something like the following (untested):
static string RandomReplacer(Match m)
{
var result = new StringBuilder();
foreach (char c in m.ToString())
result.Append(c == 'd' ? RandomDigit() : c);
return result.ToString()
}
private static string parseTemplate(string template)
{
return Regex.Replace(template, #"(\[d)((:)?([\d]+)?)\]", new MatchEvaluator(RandomReplacer));
}
You can then extend this approach to match [d:3] and parse it in your MatchEvaluator accordingly, solving your second issue.
Assumnig [d:3] means "three random digits", the following MatchEvaluator uses the length (read from group 4) to generate a random digit string:
static string ReplaceSingleMatch(Match m)
{
int length;
if (!int.TryParse(m.Groups[1].Value, out length))
length = 1;
char[] chars = new char[length];
for (int i = 0; i < chars.Length; i++)
chars[i] = RandomDigit()[0];
return new string(chars);
}
You can then call this as follows:
private static string parseTemplate(string template)
{
return Regex.Replace(template, #"\[d(?::(\d+))?\]", ReplaceSingleMatch);
}
You might want to then change RandomDigit to return a single char rather than a string, or to take an int and return multiple characters.
private static string GenerateMask(string mask)
{
StringBuilder output = new StringBuilder();
for (int i = 0; i < mask.Length; i++)
{
if (mask[i] == 'd' && mask[i - 1] != '\\')
{
int quantifier = 1;
if (mask[i + 1] == ':')
Int32.TryParse(mask[i + 2].ToString(), out quantifier);
output.Append(GetRandomDigit(quantifier));
i += 2;
}
else
{
if(mask[i] != '\\')
output.Append(mask[i]);
}
}
return output.ToString();
}
private static string GetRandomDigit(int length)
{
Random random = new Random();
StringBuilder output = new StringBuilder();
while (output.Length != length)
output.Append(random.Next(0, 9));
return output.ToString();
}
There's a custom algorithm I just put together for fun mostly and here's the implementation:
Console.WriteLine(GenerateMask(#"Hey Da\d, meet my new girlfrien\d she's d:2"));
//Output: Hey Dad, meet my new girlfriend she's 44
In C#, can I convert a string value to a string literal, the way I would see it in code? I would like to replace tabs, newlines, etc. with their escape sequences.
If this code:
Console.WriteLine(someString);
produces:
Hello
World!
I want this code:
Console.WriteLine(ToLiteral(someString));
to produce:
\tHello\r\n\tWorld!\r\n
A long time ago, I found this:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
return writer.ToString();
}
}
}
This code:
var input = "\tHello\r\n\tWorld!";
Console.WriteLine(input);
Console.WriteLine(ToLiteral(input));
Produces:
Hello
World!
"\tHello\r\n\tWorld!"
These days, Graham discovered you can use Roslyn's Microsoft.CodeAnalysis.CSharp package on NuGet:
private static string ToLiteral(string valueTextForCompiler)
{
return Microsoft.CodeAnalysis.CSharp.SymbolDisplay.FormatLiteral(valueTextForCompiler, false);
}
Use Regex.Escape(String):
Regex.Escape escapes a minimal set of characters (, *, +, ?, |, {, [,
(,), ^, $,., #, and white space) by replacing them with their escape
codes.
There's a method for this in Roslyn's Microsoft.CodeAnalysis.CSharp package on NuGet:
private static string ToLiteral(string valueTextForCompiler)
{
return Microsoft.CodeAnalysis.CSharp.SymbolDisplay.FormatLiteral(valueTextForCompiler, false);
}
Obviously, this didn't exist at the time of the original question, but it might help people who end up here from Google Search.
This is a fully working implementation, including escaping of Unicode and ASCII non-printable characters. It does not insert "+" signs like Hallgrim's answer.
static string ToLiteral(string input) {
StringBuilder literal = new StringBuilder(input.Length + 2);
literal.Append("\"");
foreach (var c in input) {
switch (c) {
case '\"': literal.Append("\\\""); break;
case '\\': literal.Append(#"\\"); break;
case '\0': literal.Append(#"\0"); break;
case '\a': literal.Append(#"\a"); break;
case '\b': literal.Append(#"\b"); break;
case '\f': literal.Append(#"\f"); break;
case '\n': literal.Append(#"\n"); break;
case '\r': literal.Append(#"\r"); break;
case '\t': literal.Append(#"\t"); break;
case '\v': literal.Append(#"\v"); break;
default:
// ASCII printable character
if (c >= 0x20 && c <= 0x7e) {
literal.Append(c);
// As UTF16 escaped character
} else {
literal.Append(#"\u");
literal.Append(((int)c).ToString("x4"));
}
break;
}
}
literal.Append("\"");
return literal.ToString();
}
Note that this also escapes all Unicode characters. If your environment supports them, you could change that part to escape only control characters:
// UTF16 control characters
} else if (Char.GetUnicodeCategory(c) == UnicodeCategory.Control) {
literal.Append(#"\u");
literal.Append(((int)c).ToString("x4"));
} else {
literal.Append(c);
}
A more structured approach, including all escape sequences for strings and chars, is:
It doesn't replace Unicode characters with their literal equivalent. It doesn't cook eggs, either.
public class ReplaceString
{
static readonly IDictionary<string, string> m_replaceDict
= new Dictionary<string, string>();
const string ms_regexEscapes = #"[\a\b\f\n\r\t\v\\""]";
public static string StringLiteral(string i_string)
{
return Regex.Replace(i_string, ms_regexEscapes, match);
}
public static string CharLiteral(char c)
{
return c == '\'' ? #"'\''" : string.Format("'{0}'", c);
}
private static string match(Match m)
{
string match = m.ToString();
if (m_replaceDict.ContainsKey(match))
{
return m_replaceDict[match];
}
throw new NotSupportedException();
}
static ReplaceString()
{
m_replaceDict.Add("\a", #"\a");
m_replaceDict.Add("\b", #"\b");
m_replaceDict.Add("\f", #"\f");
m_replaceDict.Add("\n", #"\n");
m_replaceDict.Add("\r", #"\r");
m_replaceDict.Add("\t", #"\t");
m_replaceDict.Add("\v", #"\v");
m_replaceDict.Add("\\", #"\\");
m_replaceDict.Add("\0", #"\0");
//The SO parser gets fooled by the verbatim version
//of the string to replace - #"\"""
//so use the 'regular' version
m_replaceDict.Add("\"", "\\\"");
}
static void Main(string[] args){
string s = "here's a \"\n\tstring\" to test";
Console.WriteLine(ReplaceString.StringLiteral(s));
Console.WriteLine(ReplaceString.CharLiteral('c'));
Console.WriteLine(ReplaceString.CharLiteral('\''));
}
}
Try:
var t = HttpUtility.JavaScriptStringEncode(s);
public static class StringHelpers
{
private static Dictionary<string, string> escapeMapping = new Dictionary<string, string>()
{
{"\"", #"\\\"""},
{"\\\\", #"\\"},
{"\a", #"\a"},
{"\b", #"\b"},
{"\f", #"\f"},
{"\n", #"\n"},
{"\r", #"\r"},
{"\t", #"\t"},
{"\v", #"\v"},
{"\0", #"\0"},
};
private static Regex escapeRegex = new Regex(string.Join("|", escapeMapping.Keys.ToArray()));
public static string Escape(this string s)
{
return escapeRegex.Replace(s, EscapeMatchEval);
}
private static string EscapeMatchEval(Match m)
{
if (escapeMapping.ContainsKey(m.Value))
{
return escapeMapping[m.Value];
}
return escapeMapping[Regex.Escape(m.Value)];
}
}
Hallgrim's answer is excellent, but the "+", newline and indent additions were breaking functionality for me. An easy way around it is:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, new CodeGeneratorOptions {IndentString = "\t"});
var literal = writer.ToString();
literal = literal.Replace(string.Format("\" +{0}\t\"", Environment.NewLine), "");
return literal;
}
}
}
Here is a little improvement for Smilediver's answer. It will not escape all no-ASCII characters, but only these are really needed.
using System;
using System.Globalization;
using System.Text;
public static class CodeHelper
{
public static string ToLiteral(this string input)
{
var literal = new StringBuilder(input.Length + 2);
literal.Append("\"");
foreach (var c in input)
{
switch (c)
{
case '\'': literal.Append(#"\'"); break;
case '\"': literal.Append("\\\""); break;
case '\\': literal.Append(#"\\"); break;
case '\0': literal.Append(#"\0"); break;
case '\a': literal.Append(#"\a"); break;
case '\b': literal.Append(#"\b"); break;
case '\f': literal.Append(#"\f"); break;
case '\n': literal.Append(#"\n"); break;
case '\r': literal.Append(#"\r"); break;
case '\t': literal.Append(#"\t"); break;
case '\v': literal.Append(#"\v"); break;
default:
if (Char.GetUnicodeCategory(c) != UnicodeCategory.Control)
{
literal.Append(c);
}
else
{
literal.Append(#"\u");
literal.Append(((ushort)c).ToString("x4"));
}
break;
}
}
literal.Append("\"");
return literal.ToString();
}
}
Interesting question.
If you can't find a better method, you can always replace.
In case you're opting for it, you could use this C# Escape Sequence List:
\' - single quote, needed for character literals
\" - double quote, needed for string literals
\ - backslash
\0 - Unicode character 0
\a - Alert (character 7)
\b - Backspace (character 8)
\f - Form feed (character 12)
\n - New line (character 10)
\r - Carriage return (character 13)
\t - Horizontal tab (character 9)
\v - Vertical quote (character 11)
\uxxxx - Unicode escape sequence for character with hex value xxxx
\xn[n][n][n] - Unicode escape sequence for character with hex value nnnn (variable length version of \uxxxx)
\Uxxxxxxxx - Unicode escape sequence for character with hex value xxxxxxxx (for generating surrogates)
This list can be found in the C# Frequently Asked Questions
What character escape sequences are available?
If JSON conventions are enough for the unescaped strings you want to get escaped and you already use Json.NET (Newtonsoft.Json) in your project (it has a pretty large overhead), you may use this package like the following:
using System;
using Newtonsoft.Json;
public class Program
{
public static void Main()
{
Console.WriteLine(ToLiteral(#"abc\n123"));
}
private static string ToLiteral(string input)
{
return JsonConvert.DeserializeObject<string>("\"" + input + "\"");
}
}
public static class StringEscape
{
static char[] toEscape = "\0\x1\x2\x3\x4\x5\x6\a\b\t\n\v\f\r\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\"\\".ToCharArray();
static string[] literals = #"\0,\x0001,\x0002,\x0003,\x0004,\x0005,\x0006,\a,\b,\t,\n,\v,\f,\r,\x000e,\x000f,\x0010,\x0011,\x0012,\x0013,\x0014,\x0015,\x0016,\x0017,\x0018,\x0019,\x001a,\x001b,\x001c,\x001d,\x001e,\x001f".Split(new char[] { ',' });
public static string Escape(this string input)
{
int i = input.IndexOfAny(toEscape);
if (i < 0) return input;
var sb = new System.Text.StringBuilder(input.Length + 5);
int j = 0;
do
{
sb.Append(input, j, i - j);
var c = input[i];
if (c < 0x20) sb.Append(literals[c]); else sb.Append(#"\").Append(c);
} while ((i = input.IndexOfAny(toEscape, j = ++i)) > 0);
return sb.Append(input, j, input.Length - j).ToString();
}
}
My attempt at adding ToVerbatim to Hallgrim's accepted answer:
private static string ToLiteral(string input)
{
using (var writer = new StringWriter())
{
using (var provider = CodeDomProvider.CreateProvider("CSharp"))
{
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, new CodeGeneratorOptions { IndentString = "\t" });
var literal = writer.ToString();
literal = literal.Replace(string.Format("\" +{0}\t\"", Environment.NewLine), "");
return literal;
}
}
}
private static string ToVerbatim(string input)
{
string literal = ToLiteral(input);
string verbatim = "#" + literal.Replace(#"\r\n", Environment.NewLine);
return verbatim;
}
Hallgrim's answer was excellent. Here's a small tweak in case you need to parse out additional white space characters and linebreaks with a C# regular expression. I needed this in the case of a serialized JSON value for insertion into Google Sheets and ran into trouble as the code was inserting tabs, +, spaces, etc.
provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
var literal = writer.ToString();
var r2 = new Regex(#"\"" \+.\n[\s]+\""", RegexOptions.ECMAScript);
literal = r2.Replace(literal, "");
return literal;
I submit my own implementation, which handles null values and should be more performant on account of using array lookup tables, manual hex conversion, and avoiding switch statements.
using System;
using System.Text;
using System.Linq;
public static class StringLiteralEncoding {
private static readonly char[] HEX_DIGIT_LOWER = "0123456789abcdef".ToCharArray();
private static readonly char[] LITERALENCODE_ESCAPE_CHARS;
static StringLiteralEncoding() {
// Per http://msdn.microsoft.com/en-us/library/h21280bw.aspx
var escapes = new string[] { "\aa", "\bb", "\ff", "\nn", "\rr", "\tt", "\vv", "\"\"", "\\\\", "??", "\00" };
LITERALENCODE_ESCAPE_CHARS = new char[escapes.Max(e => e[0]) + 1];
foreach(var escape in escapes)
LITERALENCODE_ESCAPE_CHARS[escape[0]] = escape[1];
}
/// <summary>
/// Convert the string to the equivalent C# string literal, enclosing the string in double quotes and inserting
/// escape sequences as necessary.
/// </summary>
/// <param name="s">The string to be converted to a C# string literal.</param>
/// <returns><paramref name="s"/> represented as a C# string literal.</returns>
public static string Encode(string s) {
if(null == s) return "null";
var sb = new StringBuilder(s.Length + 2).Append('"');
for(var rp = 0; rp < s.Length; rp++) {
var c = s[rp];
if(c < LITERALENCODE_ESCAPE_CHARS.Length && '\0' != LITERALENCODE_ESCAPE_CHARS[c])
sb.Append('\\').Append(LITERALENCODE_ESCAPE_CHARS[c]);
else if('~' >= c && c >= ' ')
sb.Append(c);
else
sb.Append(#"\x")
.Append(HEX_DIGIT_LOWER[c >> 12 & 0x0F])
.Append(HEX_DIGIT_LOWER[c >> 8 & 0x0F])
.Append(HEX_DIGIT_LOWER[c >> 4 & 0x0F])
.Append(HEX_DIGIT_LOWER[c & 0x0F]);
}
return sb.Append('"').ToString();
}
}
Code:
string someString1 = "\tHello\r\n\tWorld!\r\n";
string someString2 = #"\tHello\r\n\tWorld!\r\n";
Console.WriteLine(someString1);
Console.WriteLine(someString2);
Output:
Hello
World!
\tHello\r\n\tWorld!\r\n