PHP Equivalent to C# GetBytesFromUTF8 - c#

I am trying to create a php function thats will allow me access to a dotnet single sign on system and I am hung up on finding a php equivalent to GetBytesFromUTF8, I have tried ord and mb_string to no avail. Any ideas for a php equivalent to the C# GetBytesFromUTF8?
//Function to Create the SSO function SSO($key,$uid){ $lenth=32; $aZ09 = array_merge(range('A', 'Z'), range('a', 'z'),range(0, 9)); $randphrase ='';
for($c=0;$c < $lenth;$c++) {
$randphrase .= $aZ09[mt_rand(0,count($aZ09)-1)];
}
//Append key onto phrase end
$randkey=$randphrase.$key;
//Number of Bytes is string (THIS IS THE PROBLEM, ITS JUST ADDING THE STRING LENGTH)
$bytevalue=mb_strlen($randkey, 'latin1');
// SHA512 Hash
//$toencode= utf8_encode($bytevalue);
$output = hash("sha512", $bytevalue);
//base 64 encode the hash
$sso = base64_encode($output);
$length = mb_strlen($sso);
$characters = 2;
$start = $length - $characters;
$last2 = substr($sso , $start ,$characters);
//$startitup = APIClient::Create('http://my.staging.dosespot.com/LoginSingleSignOn.aspx','SingleSignOnCode=$ssocode');
// Yes, Strip the extra ==
if($last2 == "=="){$ssocode = substr($sso,0,-2);}
// No, just pass the value to the next step
else{$ssocode=$sso;}
//Use first 22 charecters of random.
$shortphrase=substr($randphrase,0,22);
//Append uid & key onto shortened phrase end
$uidv=$uid.$shortphrase.$key;
//Number of Bytes is string
$idbytevalue=mb_strlen($uidv, 'latin1');
//$idbytevalue= strBytes(utf8_encode($uidv));
// SHA512 Hash
$idencode= utf8_encode($idbytevalue);
$idoutput = hash("sha512", $idencode);
// Base64 Encode of hash
$idssoe = base64_encode($idoutput);
//Determine if we need to strip the zeros
$idlength = mb_strlen($idssoe);
$idcharacters = 2;
$idstart = $idlength - $idcharacters;
$idlast2 = substr($idssoe , $idstart ,$idcharacters);
if($idlast2 == "=="){$ssouidv = substr($idssoe,0,-2);}
// No, just pass the value to the next step
else{$ssouidv=$idssoe;}
return array($ssocode, $ssouidv);
}
I am trying to replicate this c#:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace DoseSpot.EncryptionLibrary
{
public class EncodingUtility
{
public enum encodingOptions : int
{
ASCII = 0,
UTF7,
UTF8,
UTF32,
Unicode,
Base64String
}
public static string GetString(byte[] data, encodingOptions eo)
{
switch (eo)
{
case encodingOptions.ASCII:
return ToASCII(data);
case encodingOptions.Unicode:
return ToUnicode(data);
case encodingOptions.Base64String:
return ToBase64String(data);
case encodingOptions.UTF7:
return ToUTF7(data);
case encodingOptions.UTF32:
return ToUTF32(data);
case encodingOptions.UTF8:
default:
return ToUTF8(data);
}
}
public static byte[] GetBytes(string message, encodingOptions eo)
{
switch (eo)
{
case encodingOptions.ASCII:
return FromASCII(message);
case encodingOptions.Unicode:
return FromUnicode(message);
case encodingOptions.Base64String:
return FromBase64String(message);
case encodingOptions.UTF7:
return FromUTF7(message);
case encodingOptions.UTF32:
return FromUTF32(message);
case encodingOptions.UTF8:
default:
return FromUTF8(message);
}
}
protected static string ToBase64String(byte[] data)
{
return Convert.ToBase64String(data);
}
protected static string ToUnicode(byte[] data)
{
return unicode.GetString(data);
}
protected static string ToASCII(byte[] data)
{
return ascii.GetString(data);
}
protected static string ToUTF7(byte[] data)
{
return utf7.GetString(data);
}
protected static string ToUTF8(byte[] data)
{
return utf8.GetString(data);
}
protected static string ToUTF32(byte[] data)
{
return utf32.GetString(data);
}
protected static byte[] FromBase64String(string originalString)
{
return Convert.FromBase64String(originalString);
}
protected static byte[] FromUnicode(string originalString)
{
return unicode.GetBytes(originalString);
}
protected static byte[] FromASCII(string originalString)
{
return ascii.GetBytes(originalString);
}
protected static byte[] FromUTF7(string originalString)
{
return utf7.GetBytes(originalString);
}
protected static byte[] FromUTF8(string originalString)
{
return utf8.GetBytes(originalString);
}
protected static byte[] FromUTF32(string originalString)
{
return utf32.GetBytes(originalString);
}
public static Encoding getEncoding(encodingOptions eo)
{
switch (eo)
{
case encodingOptions.ASCII:
return ascii;
case encodingOptions.UTF7:
return utf7;
case encodingOptions.UTF8:
return utf8;
case encodingOptions.UTF32:
return utf32;
case encodingOptions.Unicode:
default:
return unicode;
}
}
private static ASCIIEncoding ascii = new ASCIIEncoding();
private static UTF8Encoding utf8 = new UTF8Encoding();
private static UTF7Encoding utf7 = new UTF7Encoding();
private static UTF32Encoding utf32 = new UTF32Encoding();
private static UnicodeEncoding unicode = new UnicodeEncoding();
}
}
public static class EncryptionCommon
{
public static int KeyLength = 32;
public static int PhraseLength = 32;
public static string CreatePhrase()
{
return Randomizer.RandomNumberOfLettersAll(PhraseLength);
}
public static string CreateKey()
{
return Randomizer.RandomNumberOfLetters(KeyLength);
}
public static string Encrypt(string Phrase, string MyKey)
{
byte[] data = EncodingUtility.GetBytes(Phrase + MyKey, EncodingUtility.encodingOptions.UTF8);
byte[] result = new SHA512Managed().ComputeHash(data);
string tempString = EncodingUtility.GetString(result, EncodingUtility.encodingOptions.Base64String);
if (tempString.Substring(tempString.Length - 2).ToString().Equals("=="))
tempString = tempString.Substring(0, tempString.Length - 2);
return tempString;
}
public static string EncryptUserId(string Phrase, int UserId, string MyKey)
{
string UserPhrase = UserId.ToString();
if (Phrase.Length > 22)
UserPhrase += Phrase.Substring(0, 22);
else
UserPhrase += Phrase;
return Encrypt(UserPhrase, MyKey);
}
public static bool VerifyKey(string key, string combinedPhraseAndEncryptedString)
{
Dictionary<string, string> myDict = SplitStringIntoPhraseAndHash(combinedPhraseAndEncryptedString);
string phrase = myDict["phrase"];
string providedEncryptedPhrase = myDict["encryptedString"];
string testEncryptedPhrase = Encrypt(phrase, key);
if (providedEncryptedPhrase.Equals(testEncryptedPhrase))
return true;
else
return false;
}
public static Dictionary<string, string> SplitStringIntoPhraseAndHash(string stringToSplit)
{
Dictionary<string, string> myResult = new Dictionary<string, string>();
if (stringToSplit != null && stringToSplit.Trim().Length >= PhraseLength)
{
string phraseFound = stringToSplit.Substring(0, PhraseLength);
string encryptedString = stringToSplit.Substring(PhraseLength);
myResult.Add("phrase", phraseFound);
myResult.Add("encryptedString", encryptedString);
}
return myResult;
}
public static string CreatePhraseEncryptedCombinedString(string phrase, string key)
{
string toReturn = phrase;
toReturn += Encrypt(phrase, key);
return toReturn;
}
}
I am trying to replicate this C# process in PHP to no avail.
HOW TO CREATE THE CORRECT SINGLESIGNONCODE:
1. You have been provided a key (in UTF-8)
2. Create a random phrase 32 characters long in UTF-8
a. Create32CharPhrase
3. Append the key to the phrase
a. Create32CharPhrase + Key
4. Get the value in Bytes from UTF-8 String
a. GetBytesFromUTF8(Create32CharPhrase + Key)
5. Use SHA512 to hash the byte value you just received
SHA512Hash(GetBytesFromUTF8(Create32CharPhrase + Key))
6. Get a Base64String out of the hash that you created
GetBase64String(SHA512Hash(GetBytesFromUTF8(Create32CharPhrase + Key)))
7. If there are two = signs at the end, then remove them.
RemoveExtraEqualsSigns(GetBase64String(SHA512Hash(GetBytesFromUTF8(Create32CharPhrase
+ Key))))
The Second part of the function...
HOW TO CREATE THE CORRECT SINGLESIGNONUSERIDVERIFY:
1. Grab the first 22 characters of the phrase from step 1
2. Append to the UserId string the 22 characters grabbed from step one
3. (UserId) + (first 22 characters of phrase)
4. Append the key to the string created in 2b
(UserId) +(first 22 characters of phrase) + key
5. Get the Byte value of the string
GetBytesFromUTF8((UserId) + (first 22 characters of phrase) + key)
6. Use SHA512 to hash the byte value you just received
SHA512Hash(GetBytesFromUTF8((UserId) + (first 22 characters of phrase) + key))
7. Get a Base64String out of the hash that you created
8. GetBase64String(SHA512Hash(GetBytesFromUTF8((UserId) + (first 22 characters of phrase) +
key)))
9. If there are two = signs at the end, then remove them.
RemoveExtraEqualsSigns(GetBase64String(SHA512Hash(GetBytesFromUTF8((UserId) + (first 22
characters of phrase) + key))))

Taken out of an edit to the original post
PHP SSO for ASP service
function SSO($key,$uid){
$lenth=32;
$aZ09 = array_merge(range('A', 'Z'), range('a', 'z'),range(0, 9));
$randphrase ='';
for($c=0;$c < $lenth;$c++) {
$randphrase .= $aZ09[mt_rand(0,count($aZ09)-1)];
}
//echo "Key: ".$key."<br/>";
//echo "Phrase: ".$randphrase."<br/>";
//Append key onto phrase end
$randkey=$randphrase.$key;
// SHA512 Hash
$toencode= utf8_encode($randkey);
// Pass 3rd, optional parameter as TRUE to output raw binary data
$output = hash("sha512", $toencode, true);
//base 64 encode the hash binary data
$sso = base64_encode($output);
$length = mb_strlen($sso);
$characters = 2;
$start = $length - $characters;
$last2 = substr($sso , $start ,$characters);
// Yes, Strip the extra ==
if($last2 == "==")
{$ssocode = substr($sso,0,-2);}
// No, just pass the value to the next step
else{$ssocode=$sso;}
// Prepend the random phrase to the encrypted code.
$ssocode = $randphrase.$ssocode;
//echo "SSO: ".$ssocode."<br/>";
//Use first 22 charecters of random.
$shortphrase=substr($randphrase,0,22);
//Append uid & key onto shortened phrase end
$uidv=$uid.$shortphrase.$key;
// SHA512 Hash
$idencode= utf8_encode($uidv);
// Pass 3rd, optional parameter as TRUE to output raw binary data
$idoutput = hash("sha512", $idencode, true);
// Base64 Encode of hash binary data
$idssoe = base64_encode($idoutput);
//Determine if we need to strip the zeros
$idlength = mb_strlen($idssoe);
$idcharacters = 2;
$idstart = $idlength - $idcharacters;
$idlast2 = substr($idssoe , $idstart ,$idcharacters);
if($idlast2 == "==")
{$ssouidv = substr($idssoe,0,-2);}
// No, just pass the value to the next step
else{$ssouidv=$idssoe;}
//echo "SSOID: ".$ssouidv;
return array($ssocode, $ssouidv);
}

Related

C# How to convert \uxxxx to plaintext (hex escape sequence) [duplicate]

How can I convert this string:
This string contains the Unicode character Pi(π)
into an escaped ASCII string:
This string contains the Unicode character Pi(\u03a0)
and vice versa?
The current Encoding available in C# converts the π character to "?". I need to preserve that character.
This goes back and forth to and from the \uXXXX format.
class Program {
static void Main( string[] args ) {
string unicodeString = "This function contains a unicode character pi (\u03a0)";
Console.WriteLine( unicodeString );
string encoded = EncodeNonAsciiCharacters(unicodeString);
Console.WriteLine( encoded );
string decoded = DecodeEncodedNonAsciiCharacters( encoded );
Console.WriteLine( decoded );
}
static string EncodeNonAsciiCharacters( string value ) {
StringBuilder sb = new StringBuilder();
foreach( char c in value ) {
if( c > 127 ) {
// This character is too big for ASCII
string encodedValue = "\\u" + ((int) c).ToString( "x4" );
sb.Append( encodedValue );
}
else {
sb.Append( c );
}
}
return sb.ToString();
}
static string DecodeEncodedNonAsciiCharacters( string value ) {
return Regex.Replace(
value,
#"\\u(?<Value>[a-zA-Z0-9]{4})",
m => {
return ((char) int.Parse( m.Groups["Value"].Value, NumberStyles.HexNumber )).ToString();
} );
}
}
Outputs:
This function contains a unicode character pi (π)
This function contains a unicode character pi (\u03a0)
This function contains a unicode character pi (π)
For Unescape You can simply use this functions:
System.Text.RegularExpressions.Regex.Unescape(string)
System.Uri.UnescapeDataString(string)
I suggest using this method (It works better with UTF-8):
UnescapeDataString(string)
string StringFold(string input, Func<char, string> proc)
{
return string.Concat(input.Select(proc).ToArray());
}
string FoldProc(char input)
{
if (input >= 128)
{
return string.Format(#"\u{0:x4}", (int)input);
}
return input.ToString();
}
string EscapeToAscii(string input)
{
return StringFold(input, FoldProc);
}
As a one-liner:
var result = Regex.Replace(input, #"[^\x00-\x7F]", c =>
string.Format(#"\u{0:x4}", (int)c.Value[0]));
class Program
{
static void Main(string[] args)
{
char[] originalString = "This string contains the unicode character Pi(π)".ToCharArray();
StringBuilder asAscii = new StringBuilder(); // store final ascii string and Unicode points
foreach (char c in originalString)
{
// test if char is ascii, otherwise convert to Unicode Code Point
int cint = Convert.ToInt32(c);
if (cint <= 127 && cint >= 0)
asAscii.Append(c);
else
asAscii.Append(String.Format("\\u{0:x4} ", cint).Trim());
}
Console.WriteLine("Final string: {0}", asAscii);
Console.ReadKey();
}
}
All non-ASCII chars are converted to their Unicode Code Point representation and appended to the final string.
Here is my current implementation:
public static class UnicodeStringExtensions
{
public static string EncodeNonAsciiCharacters(this string value) {
var bytes = Encoding.Unicode.GetBytes(value);
var sb = StringBuilderCache.Acquire(value.Length);
bool encodedsomething = false;
for (int i = 0; i < bytes.Length; i += 2) {
var c = BitConverter.ToUInt16(bytes, i);
if ((c >= 0x20 && c <= 0x7f) || c == 0x0A || c == 0x0D) {
sb.Append((char) c);
} else {
sb.Append($"\\u{c:x4}");
encodedsomething = true;
}
}
if (!encodedsomething) {
StringBuilderCache.Release(sb);
return value;
}
return StringBuilderCache.GetStringAndRelease(sb);
}
public static string DecodeEncodedNonAsciiCharacters(this string value)
=> Regex.Replace(value,/*language=regexp*/#"(?:\\u[a-fA-F0-9]{4})+", Decode);
static readonly string[] Splitsequence = new [] { "\\u" };
private static string Decode(Match m) {
var bytes = m.Value.Split(Splitsequence, StringSplitOptions.RemoveEmptyEntries)
.Select(s => ushort.Parse(s, NumberStyles.HexNumber)).SelectMany(BitConverter.GetBytes).ToArray();
return Encoding.Unicode.GetString(bytes);
}
}
This passes a test:
public void TestBigUnicode() {
var s = "\U00020000";
var encoded = s.EncodeNonAsciiCharacters();
var decoded = encoded.DecodeEncodedNonAsciiCharacters();
Assert.Equals(s, decoded);
}
with the encoded value: "\ud840\udc00"
This implementation makes use of a StringBuilderCache (reference source link)
A small patch to #Adam Sills's answer which solves FormatException on cases where the input string like "c:\u00ab\otherdirectory\" plus RegexOptions.Compiled makes the Regex compilation much faster:
private static Regex DECODING_REGEX = new Regex(#"\\u(?<Value>[a-fA-F0-9]{4})", RegexOptions.Compiled);
private const string PLACEHOLDER = #"#!#";
public static string DecodeEncodedNonAsciiCharacters(this string value)
{
return DECODING_REGEX.Replace(
value.Replace(#"\\", PLACEHOLDER),
m => {
return ((char)int.Parse(m.Groups["Value"].Value, NumberStyles.HexNumber)).ToString(); })
.Replace(PLACEHOLDER, #"\\");
}
To store actual Unicode codepoints, you have to first decode the String's UTF-16 codeunits to UTF-32 codeunits (which are currently the same as the Unicode codepoints). Use System.Text.Encoding.UTF32.GetBytes() for that, and then write the resulting bytes to the StringBuilder as needed,i.e.
static void Main(string[] args)
{
String originalString = "This string contains the unicode character Pi(π)";
Byte[] bytes = Encoding.UTF32.GetBytes(originalString);
StringBuilder asAscii = new StringBuilder();
for (int idx = 0; idx < bytes.Length; idx += 4)
{
uint codepoint = BitConverter.ToUInt32(bytes, idx);
if (codepoint <= 127)
asAscii.Append(Convert.ToChar(codepoint));
else
asAscii.AppendFormat("\\u{0:x4}", codepoint);
}
Console.WriteLine("Final string: {0}", asAscii);
Console.ReadKey();
}
You need to use the Convert() method in the Encoding class:
Create an Encoding object that represents ASCII encoding
Create an Encoding object that represents Unicode encoding
Call Encoding.Convert() with the source encoding, the destination encoding, and the string to be encoded
There is an example here:
using System;
using System.Text;
namespace ConvertExample
{
class ConvertExampleClass
{
static void Main()
{
string unicodeString = "This string contains the unicode character Pi(\u03a0)";
// Create two different encodings.
Encoding ascii = Encoding.ASCII;
Encoding unicode = Encoding.Unicode;
// Convert the string into a byte[].
byte[] unicodeBytes = unicode.GetBytes(unicodeString);
// Perform the conversion from one encoding to the other.
byte[] asciiBytes = Encoding.Convert(unicode, ascii, unicodeBytes);
// Convert the new byte[] into a char[] and then into a string.
// This is a slightly different approach to converting to illustrate
// the use of GetCharCount/GetChars.
char[] asciiChars = new char[ascii.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
ascii.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
string asciiString = new string(asciiChars);
// Display the strings created before and after the conversion.
Console.WriteLine("Original string: {0}", unicodeString);
Console.WriteLine("Ascii converted string: {0}", asciiString);
}
}
}

How to XOR a MD5 hash and return a 32 character string?

How do I further encrypt a MD5 hash by XOR'ing it with a string of variable size (not bigger than 32 characters) ?
I would like the result of the XOR to be a 32 character string as well.
What i have tried so far is:
convert the md5 string to binary
convert second string to binary
pad second binary with 0's (to the left) until both binaries are of equal length
iterate the binary representations and XOR them
convert the XOR'ed result to a string
The approach may be wrong, im not sure how to do it. My problem is, when converting the result of the XOR, it is not a 32 character long string, as I would like it to be.
Sample code (equal length strings in this case):
class Program
{
static void Main(string[] args)
{
var md51 = ToBinary(ConvertToByteArray(CalculateMD5Hash("Maaa"), Encoding.ASCII));
var md52 = ToBinary(ConvertToByteArray(CalculateMD5Hash("Moo"), Encoding.ASCII));
List<int> xoredResult = new List<int>();
for (int i = 0; i < md51.Length; i++)
{
var string1 = md51[i];
var string2 = md52[i];
var xor = string1 ^ string2;
xoredResult.Add(xor);
}
var resultingString = string.Join("", xoredResult);
Console.WriteLine(resultingString.Length);
var data = GetBytesFromBinaryString(resultingString);
var text = Encoding.ASCII.GetString(data);
}
public static byte[] ConvertToByteArray(string str, Encoding encoding)
{
return encoding.GetBytes(str);
}
public static String ToBinary(Byte[] data)
{
return string.Join("", data.Select(byt => Convert.ToString(byt, 2).PadLeft(8, '0')));
}
public static Byte[] GetBytesFromBinaryString(String binary)
{
var list = new List<Byte>();
for (int i = 0; i < binary.Length; i += 8)
{
String t = binary.Substring(i, 8);
list.Add(Convert.ToByte(t, 2));
}
return list.ToArray();
}
public static string CalculateMD5Hash(string input)
{
// step 1, calculate MD5 hash from input
MD5 md5 = System.Security.Cryptography.MD5.Create();
byte[] inputBytes = System.Text.Encoding.ASCII.GetBytes(input);
byte[] hash = md5.ComputeHash(inputBytes);
// step 2, convert byte array to hex string
StringBuilder sb = new StringBuilder();
for (int i = 0; i < hash.Length; i++)
{
sb.Append(hash[i].ToString("X2"));
}
return sb.ToString();
}
}
xoring a string with what is essentially random bytes is not guaranteed to give you a valid string as a output. Your var text = Encoding.ASCII.GetString(data); is likely failing because you are passing it a non valid string in byte form. You must use something like var text = Convert.ToBase64String(data) to be able to represent the random data without loss of information in the process.

Generating SHA256 in Swift (iOS)

I am trying to generate SHA256 using the following function:-
func generateHMAC(key: String, data: String) -> String {
let keyData = key.dataFromHexadecimalString()! as NSData
let dataIn = data.dataUsingEncoding(NSUTF16StringEncoding)
var result: [CUnsignedChar]
result = Array(count: Int(CC_SHA256_DIGEST_LENGTH), repeatedValue: 0)
CCHmac(CCHmacAlgorithm(kCCHmacAlgSHA256), keyData.bytes, keyData.length, dataIn!.bytes, dataIn!.length, &result)
let hash = NSMutableString()
for val in result {
hash.appendFormat("%02hhx", val)
}
return hash as String
}
Input is
AccountNumber: 100195
Amount: 10
BillerID: 59
ChannelID: 2
Context: 11|test
CountryID: 1
CustomerID: 34
EmailID: ankur.arya#me.com
ReturnURL: https://uat.myfatoora.com/ReceiptPOC.aspx
SECURITYTOKEN: 6B4A47A6-40A0-4C9D-A925-5CECA2910881
TxnRefNum: 991107844408242
UserName: USP
and the output is
4cd1acc736a9702c8cdb1a546d1c274a67cb285dbdbb972aab39ee51c2a2‌​26c8
However, this doesn’t match with the output of backend which uses the following algo
private string CreateSHA256POC(bool useRequest)
{
// Hex Decode the Secure Secret for use in using the HMACSHA256 hasher
// hex decoding eliminates this source of error as it is independent of the character encoding
// hex decoding is precise in converting to a byte array and is the preferred form for representing binary values as hex strings.
secureHash = "";
byte[] convertedHash = new byte[_secureSecret.Length / 2];
for (int i = 0; i < _secureSecret.Length / 2; i++)
{
convertedHash[i] = (byte)Int32.Parse(_secureSecret.Substring(i * 2, 2), System.Globalization.NumberStyles.HexNumber);
}
// Build string from collection in preperation to be hashed
StringBuilder sb = new StringBuilder();
SortedList<String, String> list = (useRequest ? requestFields : responseFields);
foreach (KeyValuePair<string, string> kvp in list)
{
// if (kvp.Key.StartsWith("vpc_") || kvp.Key.StartsWith("user_"))
sb.Append(kvp.Key + "=" + kvp.Value + "&");
}
// remove trailing & from string
if (sb.Length > 0)
sb.Remove(sb.Length - 1, 1);
// Create secureHash on string
string hexHash = "";
using (HMACSHA256 hasher = new HMACSHA256(convertedHash))
{
byte[] hashValue = hasher.ComputeHash(Encoding.UTF8.GetBytes(sb.ToString()));
foreach (byte b in hashValue)
{
hexHash += b.ToString("X2");
secureHash = hexHash;
}
}
return hexHash;
}
and their output is
41D8E81C128100A76185F24CE00BC6A4FEA30839E6DE3DFFBC3B5814E4FD0C4E
Secret key is
71DD0F73AFFBB47825FF9864DDE95F3B
Can you please help me to update my method in Swift to get the same result as backend.
Thanks.

can't display Arabic characters in unity using c# [duplicate]

How can I convert this string:
This string contains the Unicode character Pi(π)
into an escaped ASCII string:
This string contains the Unicode character Pi(\u03a0)
and vice versa?
The current Encoding available in C# converts the π character to "?". I need to preserve that character.
This goes back and forth to and from the \uXXXX format.
class Program {
static void Main( string[] args ) {
string unicodeString = "This function contains a unicode character pi (\u03a0)";
Console.WriteLine( unicodeString );
string encoded = EncodeNonAsciiCharacters(unicodeString);
Console.WriteLine( encoded );
string decoded = DecodeEncodedNonAsciiCharacters( encoded );
Console.WriteLine( decoded );
}
static string EncodeNonAsciiCharacters( string value ) {
StringBuilder sb = new StringBuilder();
foreach( char c in value ) {
if( c > 127 ) {
// This character is too big for ASCII
string encodedValue = "\\u" + ((int) c).ToString( "x4" );
sb.Append( encodedValue );
}
else {
sb.Append( c );
}
}
return sb.ToString();
}
static string DecodeEncodedNonAsciiCharacters( string value ) {
return Regex.Replace(
value,
#"\\u(?<Value>[a-zA-Z0-9]{4})",
m => {
return ((char) int.Parse( m.Groups["Value"].Value, NumberStyles.HexNumber )).ToString();
} );
}
}
Outputs:
This function contains a unicode character pi (π)
This function contains a unicode character pi (\u03a0)
This function contains a unicode character pi (π)
For Unescape You can simply use this functions:
System.Text.RegularExpressions.Regex.Unescape(string)
System.Uri.UnescapeDataString(string)
I suggest using this method (It works better with UTF-8):
UnescapeDataString(string)
string StringFold(string input, Func<char, string> proc)
{
return string.Concat(input.Select(proc).ToArray());
}
string FoldProc(char input)
{
if (input >= 128)
{
return string.Format(#"\u{0:x4}", (int)input);
}
return input.ToString();
}
string EscapeToAscii(string input)
{
return StringFold(input, FoldProc);
}
As a one-liner:
var result = Regex.Replace(input, #"[^\x00-\x7F]", c =>
string.Format(#"\u{0:x4}", (int)c.Value[0]));
class Program
{
static void Main(string[] args)
{
char[] originalString = "This string contains the unicode character Pi(π)".ToCharArray();
StringBuilder asAscii = new StringBuilder(); // store final ascii string and Unicode points
foreach (char c in originalString)
{
// test if char is ascii, otherwise convert to Unicode Code Point
int cint = Convert.ToInt32(c);
if (cint <= 127 && cint >= 0)
asAscii.Append(c);
else
asAscii.Append(String.Format("\\u{0:x4} ", cint).Trim());
}
Console.WriteLine("Final string: {0}", asAscii);
Console.ReadKey();
}
}
All non-ASCII chars are converted to their Unicode Code Point representation and appended to the final string.
Here is my current implementation:
public static class UnicodeStringExtensions
{
public static string EncodeNonAsciiCharacters(this string value) {
var bytes = Encoding.Unicode.GetBytes(value);
var sb = StringBuilderCache.Acquire(value.Length);
bool encodedsomething = false;
for (int i = 0; i < bytes.Length; i += 2) {
var c = BitConverter.ToUInt16(bytes, i);
if ((c >= 0x20 && c <= 0x7f) || c == 0x0A || c == 0x0D) {
sb.Append((char) c);
} else {
sb.Append($"\\u{c:x4}");
encodedsomething = true;
}
}
if (!encodedsomething) {
StringBuilderCache.Release(sb);
return value;
}
return StringBuilderCache.GetStringAndRelease(sb);
}
public static string DecodeEncodedNonAsciiCharacters(this string value)
=> Regex.Replace(value,/*language=regexp*/#"(?:\\u[a-fA-F0-9]{4})+", Decode);
static readonly string[] Splitsequence = new [] { "\\u" };
private static string Decode(Match m) {
var bytes = m.Value.Split(Splitsequence, StringSplitOptions.RemoveEmptyEntries)
.Select(s => ushort.Parse(s, NumberStyles.HexNumber)).SelectMany(BitConverter.GetBytes).ToArray();
return Encoding.Unicode.GetString(bytes);
}
}
This passes a test:
public void TestBigUnicode() {
var s = "\U00020000";
var encoded = s.EncodeNonAsciiCharacters();
var decoded = encoded.DecodeEncodedNonAsciiCharacters();
Assert.Equals(s, decoded);
}
with the encoded value: "\ud840\udc00"
This implementation makes use of a StringBuilderCache (reference source link)
A small patch to #Adam Sills's answer which solves FormatException on cases where the input string like "c:\u00ab\otherdirectory\" plus RegexOptions.Compiled makes the Regex compilation much faster:
private static Regex DECODING_REGEX = new Regex(#"\\u(?<Value>[a-fA-F0-9]{4})", RegexOptions.Compiled);
private const string PLACEHOLDER = #"#!#";
public static string DecodeEncodedNonAsciiCharacters(this string value)
{
return DECODING_REGEX.Replace(
value.Replace(#"\\", PLACEHOLDER),
m => {
return ((char)int.Parse(m.Groups["Value"].Value, NumberStyles.HexNumber)).ToString(); })
.Replace(PLACEHOLDER, #"\\");
}
To store actual Unicode codepoints, you have to first decode the String's UTF-16 codeunits to UTF-32 codeunits (which are currently the same as the Unicode codepoints). Use System.Text.Encoding.UTF32.GetBytes() for that, and then write the resulting bytes to the StringBuilder as needed,i.e.
static void Main(string[] args)
{
String originalString = "This string contains the unicode character Pi(π)";
Byte[] bytes = Encoding.UTF32.GetBytes(originalString);
StringBuilder asAscii = new StringBuilder();
for (int idx = 0; idx < bytes.Length; idx += 4)
{
uint codepoint = BitConverter.ToUInt32(bytes, idx);
if (codepoint <= 127)
asAscii.Append(Convert.ToChar(codepoint));
else
asAscii.AppendFormat("\\u{0:x4}", codepoint);
}
Console.WriteLine("Final string: {0}", asAscii);
Console.ReadKey();
}
You need to use the Convert() method in the Encoding class:
Create an Encoding object that represents ASCII encoding
Create an Encoding object that represents Unicode encoding
Call Encoding.Convert() with the source encoding, the destination encoding, and the string to be encoded
There is an example here:
using System;
using System.Text;
namespace ConvertExample
{
class ConvertExampleClass
{
static void Main()
{
string unicodeString = "This string contains the unicode character Pi(\u03a0)";
// Create two different encodings.
Encoding ascii = Encoding.ASCII;
Encoding unicode = Encoding.Unicode;
// Convert the string into a byte[].
byte[] unicodeBytes = unicode.GetBytes(unicodeString);
// Perform the conversion from one encoding to the other.
byte[] asciiBytes = Encoding.Convert(unicode, ascii, unicodeBytes);
// Convert the new byte[] into a char[] and then into a string.
// This is a slightly different approach to converting to illustrate
// the use of GetCharCount/GetChars.
char[] asciiChars = new char[ascii.GetCharCount(asciiBytes, 0, asciiBytes.Length)];
ascii.GetChars(asciiBytes, 0, asciiBytes.Length, asciiChars, 0);
string asciiString = new string(asciiChars);
// Display the strings created before and after the conversion.
Console.WriteLine("Original string: {0}", unicodeString);
Console.WriteLine("Ascii converted string: {0}", asciiString);
}
}
}

How do I encode and decode a base64 string?

How do I return a base64 encoded string given a string?
How do I decode a base64 encoded string into a string?
Encode
public static string Base64Encode(string plainText)
{
var plainTextBytes = System.Text.Encoding.UTF8.GetBytes(plainText);
return System.Convert.ToBase64String(plainTextBytes);
}
Decode
public static string Base64Decode(string base64EncodedData)
{
var base64EncodedBytes = System.Convert.FromBase64String(base64EncodedData);
return System.Text.Encoding.UTF8.GetString(base64EncodedBytes);
}
One liner code:
Note: Use System and System.Text directives.
Encode:
string encodedStr = Convert.ToBase64String(Encoding.UTF8.GetBytes("inputStr"));
Decode:
string inputStr = Encoding.UTF8.GetString(Convert.FromBase64String(encodedStr));
I'm sharing my implementation with some neat features:
uses Extension Methods for Encoding class. Rationale is that someone may need to support different types of encodings (not only UTF8).
Another improvement is failing gracefully with null result for null entry - it's very useful in real life scenarios and supports equivalence for X=decode(encode(X)).
Remark: Remember that to use Extension Method you have to (!) import the namespace with using keyword (in this case using MyApplication.Helpers.Encoding).
Code:
namespace MyApplication.Helpers.Encoding
{
public static class EncodingForBase64
{
public static string EncodeBase64(this System.Text.Encoding encoding, string text)
{
if (text == null)
{
return null;
}
byte[] textAsBytes = encoding.GetBytes(text);
return System.Convert.ToBase64String(textAsBytes);
}
public static string DecodeBase64(this System.Text.Encoding encoding, string encodedText)
{
if (encodedText == null)
{
return null;
}
byte[] textAsBytes = System.Convert.FromBase64String(encodedText);
return encoding.GetString(textAsBytes);
}
}
}
Usage example:
using MyApplication.Helpers.Encoding; // !!!
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Test1();
Test2();
}
static void Test1()
{
string textEncoded = System.Text.Encoding.UTF8.EncodeBase64("test1...");
System.Diagnostics.Debug.Assert(textEncoded == "dGVzdDEuLi4=");
string textDecoded = System.Text.Encoding.UTF8.DecodeBase64(textEncoded);
System.Diagnostics.Debug.Assert(textDecoded == "test1...");
}
static void Test2()
{
string textEncoded = System.Text.Encoding.UTF8.EncodeBase64(null);
System.Diagnostics.Debug.Assert(textEncoded == null);
string textDecoded = System.Text.Encoding.UTF8.DecodeBase64(textEncoded);
System.Diagnostics.Debug.Assert(textDecoded == null);
}
}
}
Based on the answers by Andrew Fox and Cebe, I turned it around and made them string extensions instead of Base64String extensions.
public static class StringExtensions
{
public static string ToBase64(this string text)
{
return ToBase64(text, Encoding.UTF8);
}
public static string ToBase64(this string text, Encoding encoding)
{
if (string.IsNullOrEmpty(text))
{
return text;
}
byte[] textAsBytes = encoding.GetBytes(text);
return Convert.ToBase64String(textAsBytes);
}
public static bool TryParseBase64(this string text, out string decodedText)
{
return TryParseBase64(text, Encoding.UTF8, out decodedText);
}
public static bool TryParseBase64(this string text, Encoding encoding, out string decodedText)
{
if (string.IsNullOrEmpty(text))
{
decodedText = text;
return false;
}
try
{
byte[] textAsBytes = Convert.FromBase64String(text);
decodedText = encoding.GetString(textAsBytes);
return true;
}
catch (Exception)
{
decodedText = null;
return false;
}
}
}
A slight variation on andrew.fox answer, as the string to decode might not be a correct base64 encoded string:
using System;
namespace Service.Support
{
public static class Base64
{
public static string ToBase64(this System.Text.Encoding encoding, string text)
{
if (text == null)
{
return null;
}
byte[] textAsBytes = encoding.GetBytes(text);
return Convert.ToBase64String(textAsBytes);
}
public static bool TryParseBase64(this System.Text.Encoding encoding, string encodedText, out string decodedText)
{
if (encodedText == null)
{
decodedText = null;
return false;
}
try
{
byte[] textAsBytes = Convert.FromBase64String(encodedText);
decodedText = encoding.GetString(textAsBytes);
return true;
}
catch (Exception)
{
decodedText = null;
return false;
}
}
}
}
You can use below routine to convert string to base64 format
public static string ToBase64(string s)
{
byte[] buffer = System.Text.Encoding.Unicode.GetBytes(s);
return System.Convert.ToBase64String(buffer);
}
Also you can use very good online tool OnlineUtility.in to encode string in base64 format
URL safe Base64 Encoding/Decoding
public static class Base64Url
{
public static string Encode(string text)
{
return Convert.ToBase64String(Encoding.UTF8.GetBytes(text)).TrimEnd('=').Replace('+', '-')
.Replace('/', '_');
}
public static string Decode(string text)
{
text = text.Replace('_', '/').Replace('-', '+');
switch (text.Length % 4)
{
case 2:
text += "==";
break;
case 3:
text += "=";
break;
}
return Encoding.UTF8.GetString(Convert.FromBase64String(text));
}
}
// Encoding
string passw = "tes123";
var plainTextBytes = System.Text.Encoding.UTF8.GetBytes(passw);
string pass = System.Convert.ToBase64String(plainTextBytes);
// Normal
var encodedTextBytes = Convert.FromBase64String(pass);
string plainText = Encoding.UTF8.GetString(encodedTextBytes);
using System;
using System.Text;
public static class Base64Conversions
{
public static string EncodeBase64(this string text, Encoding encoding = null)
{
if (text == null) return null;
encoding = encoding ?? Encoding.UTF8;
var bytes = encoding.GetBytes(text);
return Convert.ToBase64String(bytes);
}
public static string DecodeBase64(this string encodedText, Encoding encoding = null)
{
if (encodedText == null) return null;
encoding = encoding ?? Encoding.UTF8;
var bytes = Convert.FromBase64String(encodedText);
return encoding.GetString(bytes);
}
}
Usage
var text = "Sample Text";
var base64 = text.EncodeBase64();
base64 = text.EncodeBase64(Encoding.UTF8); //or with Encoding
For those that simply want to encode/decode individual base64 digits:
public static int DecodeBase64Digit(char digit, string digit62 = "+-.~", string digit63 = "/_,")
{
if (digit >= 'A' && digit <= 'Z') return digit - 'A';
if (digit >= 'a' && digit <= 'z') return digit + (26 - 'a');
if (digit >= '0' && digit <= '9') return digit + (52 - '0');
if (digit62.IndexOf(digit) > -1) return 62;
if (digit63.IndexOf(digit) > -1) return 63;
return -1;
}
public static char EncodeBase64Digit(int digit, char digit62 = '+', char digit63 = '/')
{
digit &= 63;
if (digit < 52)
return (char)(digit < 26 ? digit + 'A' : digit + ('a' - 26));
else if (digit < 62)
return (char)(digit + ('0' - 52));
else
return digit == 62 ? digit62 : digit63;
}
There are various versions of Base64 that disagree about what to use for digits 62 and 63, so DecodeBase64Digit can tolerate several of these.
You can display it like this:
var strOriginal = richTextBox1.Text;
byte[] byt = System.Text.Encoding.ASCII.GetBytes(strOriginal);
// convert the byte array to a Base64 string
string strModified = Convert.ToBase64String(byt);
richTextBox1.Text = "" + strModified;
Now, converting it back.
var base64EncodedBytes = System.Convert.FromBase64String(richTextBox1.Text);
richTextBox1.Text = "" + System.Text.Encoding.ASCII.GetString(base64EncodedBytes);
MessageBox.Show("Done Converting! (ASCII from base64)");
I hope this helps!
To encode a string into a base64 string in C#, you can use the Convert.ToBase64String method:
string originalString = "Hello World";
string encodedString = Convert.ToBase64String(Encoding.UTF8.GetBytes(originalString));
To decode a base64 encoded string into a string in C#, you can use the Convert.FromBase64String method:
string encodedString = "SGVsbG8gV29ybGQ=";
string originalString = Encoding.UTF8.GetString(Convert.FromBase64String(encodedString));

Categories