Convert to UNICODE in C# - c#
public string DecodeFromUtf8(string utf8String)
{
// copy the string as UTF-8 bytes.
byte[] utf8Bytes = new byte[utf8String.Length];
for (int i = 0; i < utf8String.Length; ++i)
{
//Debug.Assert( 0 <= utf8String[i] && utf8String[i] <= 255,
//"the char must be in byte's range");
utf8Bytes[i] = (byte)utf8String[i];
}
return Encoding.UTF8.GetString(utf8Bytes, 0, utf8Bytes.Length);
}
this code doesn't work for me
do you have any good ideas?
i need the unicode array for russian fonts like this
public static readonly ReadOnlyCollection<char> Unicodes = Array.AsReadOnly(new char[]
{
'\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007',
'\u0008', '\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u000E', '\u000F',
'\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017',
'\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'\u0020', '\u0021', '\u0022', '\u0023', '\u0024', '\u0025', '\u0026', '\u0027',
'\u0028', '\u0029', '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F',
'\u0030', '\u0031', '\u0032', '\u0033', '\u0034', '\u0035', '\u0036', '\u0037',
'\u0038', '\u0039', '\u003A', '\u003B', '\u003C', '\u003D', '\u003E', '\u003F',
'\u0040', '\u0041', '\u0042', '\u0043', '\u0044', '\u0045', '\u0046', '\u0047',
'\u0048', '\u0049', '\u004A', '\u004B', '\u004C', '\u004D', '\u004E', '\u004F',
'\u0050', '\u0051', '\u0052', '\u0053', '\u0054', '\u0055', '\u0056', '\u0057',
'\u0058', '\u0059', '\u005A', '\u005B', '\u005C', '\u005D', '\u005E', '\u005F',
'\u0060', '\u0061', '\u0062', '\u0063', '\u0064', '\u0065', '\u0066', '\u0067',
'\u0068', '\u0069', '\u006A', '\u006B', '\u006C', '\u006D', '\u006E', '\u006F',
'\u0070', '\u0071', '\u0072', '\u0073', '\u0074', '\u0075', '\u0076', '\u0077',
'\u0078', '\u0079', '\u007A', '\u007B', '\u007C', '\u007D', '\u007E', '\u007F',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\u00A0', '\u058E', '\u0587', '\u0589', '\u0029', '\u0028', '\u00BB', '\u00AB',
'\u2015', '\u00B7', '\u055D', '\u002C', '\u2010', '\u058A', '\u2026', '\u055C',
'\u055B', '\u055E', '\u0531', '\u0561', '\u0532', '\u0562', '\u0533', '\u0563',
'\u0534', '\u0564', '\u0535', '\u0565', '\u0536', '\u0566', '\u0537', '\u0567',
'\u0538', '\u0568', '\u0539', '\u0569', '\u053A', '\u056A', '\u053B', '\u056B',
'\u053C', '\u056C', '\u053D', '\u056D', '\u053E', '\u056E', '\u053F', '\u056F',
'\u0540', '\u0570', '\u0541', '\u0571', '\u0542', '\u0572', '\u0543', '\u0573',
'\u0544', '\u0574', '\u0545', '\u0575', '\u0546', '\u0576', '\u0547', '\u0577',
'\u0548', '\u0578', '\u0549', '\u0579', '\u054A', '\u057A', '\u054B', '\u057B',
'\u054C', '\u057C', '\u054D', '\u057D', '\u054E', '\u057E', '\u054F', '\u057F',
'\u0550', '\u0580', '\u0551', '\u0581', '\u0552', '\u0582', '\u0553', '\u0583',
'\u0554', '\u0584', '\u0555', '\u0585', '\u0556', '\u0586', '\u055A', '\uFFFD' });
Your string seems to be ArmSCII-8.
Adapted from an old encoder/decoder I had written for VISCII:
namespace Utilities
{
using System;
using System.Collections.ObjectModel;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
/// <summary>
/// ArmSCII8 (https://en.wikipedia.org/wiki/ArmSCII#ArmSCII-8)
/// encoding for C#.
/// Fast table-based implementation not based on MappedEncoding.
/// Fully thread safe/reentrant (because ArmSCII8Encoder is fully thread
/// safe/reentrant and ArmSCII8Decoder is always used with flush = true).
/// </summary>
public class ArmSCII8EncodingSimple : Encoding
{
// Taken from https://en.wikipedia.org/wiki/ArmSCII#ArmSCII-8 .
// Includes parts of the ISO-8859-1 in the ranges 00–1F and 7F–9F.
// Doesn't define anything for FF.
public static readonly ReadOnlyCollection<char> Unicodes = Array.AsReadOnly(new char[]
{
'\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007',
'\u0008', '\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u000E', '\u000F',
'\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017',
'\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'\u0020', '\u0021', '\u0022', '\u0023', '\u0024', '\u0025', '\u0026', '\u0027',
'\u0028', '\u0029', '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F',
'\u0030', '\u0031', '\u0032', '\u0033', '\u0034', '\u0035', '\u0036', '\u0037',
'\u0038', '\u0039', '\u003A', '\u003B', '\u003C', '\u003D', '\u003E', '\u003F',
'\u0040', '\u0041', '\u0042', '\u0043', '\u0044', '\u0045', '\u0046', '\u0047',
'\u0048', '\u0049', '\u004A', '\u004B', '\u004C', '\u004D', '\u004E', '\u004F',
'\u0050', '\u0051', '\u0052', '\u0053', '\u0054', '\u0055', '\u0056', '\u0057',
'\u0058', '\u0059', '\u005A', '\u005B', '\u005C', '\u005D', '\u005E', '\u005F',
'\u0060', '\u0061', '\u0062', '\u0063', '\u0064', '\u0065', '\u0066', '\u0067',
'\u0068', '\u0069', '\u006A', '\u006B', '\u006C', '\u006D', '\u006E', '\u006F',
'\u0070', '\u0071', '\u0072', '\u0073', '\u0074', '\u0075', '\u0076', '\u0077',
'\u0078', '\u0079', '\u007A', '\u007B', '\u007C', '\u007D', '\u007E', '\u007F',
'\u0080', '\u0081', '\u0082', '\u0083', '\u0084', '\u0085', '\u0086', '\u0087',
'\u0088', '\u0089', '\u008A', '\u008B', '\u008C', '\u008D', '\u008E', '\u008F',
'\u0090', '\u0091', '\u0092', '\u0093', '\u0094', '\u0095', '\u0096', '\u0097',
'\u0098', '\u0099', '\u009A', '\u009B', '\u009C', '\u009D', '\u009E', '\u009F',
'\u00A0', '\u058E', '\u0587', '\u0589', '\u0029', '\u0028', '\u00BB', '\u00AB',
'\u2015', '\u00B7', '\u055D', '\u002C', '\u2010', '\u058A', '\u2026', '\u055C',
'\u055B', '\u055E', '\u0531', '\u0561', '\u0532', '\u0562', '\u0533', '\u0563',
'\u0534', '\u0564', '\u0535', '\u0565', '\u0536', '\u0566', '\u0537', '\u0567',
'\u0538', '\u0568', '\u0539', '\u0569', '\u053A', '\u056A', '\u053B', '\u056B',
'\u053C', '\u056C', '\u053D', '\u056D', '\u053E', '\u056E', '\u053F', '\u056F',
'\u0540', '\u0570', '\u0541', '\u0571', '\u0542', '\u0572', '\u0543', '\u0573',
'\u0544', '\u0574', '\u0545', '\u0575', '\u0546', '\u0576', '\u0547', '\u0577',
'\u0548', '\u0578', '\u0549', '\u0579', '\u054A', '\u057A', '\u054B', '\u057B',
'\u054C', '\u057C', '\u054D', '\u057D', '\u054E', '\u057E', '\u054F', '\u057F',
'\u0550', '\u0580', '\u0551', '\u0581', '\u0552', '\u0582', '\u0553', '\u0583',
'\u0554', '\u0584', '\u0555', '\u0585', '\u0556', '\u0586', '\u055A', '\0'/**/,
});
private ArmSCII8Decoder decoder;
private ArmSCII8Encoder encoder;
/// <summary>
/// This should be thread safe. The worst case is that two instances
/// of ArmSCII8Decoder are created at the same time, but this isn't
/// a problem, because ArmSCII8Decoder as used in this class is
/// stateless.
/// </summary>
protected ArmSCII8Decoder Decoder
{
get
{
ArmSCII8Decoder decoder2 = decoder;
// Lazy creation of Encoder
if (object.ReferenceEquals(decoder2, null))
{
decoder2 = decoder = new ArmSCII8Decoder();
}
DecoderFallback decoderFallback = DecoderFallback;
// If the Fallback has changed from the last call, update it
if (!object.ReferenceEquals(decoderFallback, null) && !object.ReferenceEquals(decoderFallback, decoder2.Fallback))
{
decoder2.Fallback = decoderFallback;
}
return decoder2;
}
}
/// <summary>
/// This should be thread safe. The worst case is that two instances
/// of ArmSCII8Encoder are created at the same time, but this isn't
/// a problem, because ArmSCII8Encoder as used in this class is
/// stateless.
/// </summary>
protected ArmSCII8Encoder Encoder
{
get
{
ArmSCII8Encoder encoder2 = encoder;
// Lazy creation of Encoder
if (object.ReferenceEquals(encoder2, null))
{
encoder = encoder2 = new ArmSCII8Encoder();
}
EncoderFallback encoderFallback = EncoderFallback;
// If the Fallback has changed from the last call, update it
if (!object.ReferenceEquals(encoderFallback, null) && !object.ReferenceEquals(encoderFallback, encoder2.Fallback))
{
encoder2.Fallback = encoderFallback;
}
return encoder2;
}
}
public override string BodyName
{
get
{
return "x-armscii-8-simple";
}
}
public override string EncodingName
{
get
{
return BodyName;
}
}
public override bool IsSingleByte
{
get
{
return true;
}
}
public override object Clone()
{
var encoding = (ArmSCII8EncodingSimple)base.Clone();
// We reset the encoder and decoder of the cloned instance,
// because otherwise they would be shared between the two
// instances.
encoding.decoder = null;
encoding.encoder = null;
return encoding;
}
public override Decoder GetDecoder()
{
return new ArmSCII8Decoder();
}
public override Encoder GetEncoder()
{
return new ArmSCII8Encoder();
}
public override int GetByteCount(char[] chars, int index, int count)
{
return Encoder.GetByteCount(chars, index, count, true);
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
return Encoder.GetBytes(chars, charIndex, charCount, bytes, byteIndex, true);
}
public override int GetCharCount(byte[] bytes, int index, int count)
{
return Decoder.GetCharCount(bytes, index, count, true);
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
return Decoder.GetChars(bytes, byteIndex, byteCount, chars, charIndex, true);
}
public override int GetMaxByteCount(int charCount)
{
return charCount;
}
public override int GetMaxCharCount(int byteCount)
{
return byteCount;
}
}
/// <summary>
/// Fully thread safe/reentrant.
/// </summary>
public class ArmSCII8Decoder : Decoder
{
private static readonly char[] Unicodes = ArmSCII8EncodingSimple.Unicodes.ToArray();
public override int GetCharCount(byte[] bytes, int index, int count)
{
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (index < 0 || index > bytes.Length)
{
throw new ArgumentOutOfRangeException("index");
}
if (count < 0)
{
throw new ArgumentOutOfRangeException("count");
}
if (index + count > bytes.Length)
{
throw new ArgumentOutOfRangeException("bytes");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
DecoderFallbackBuffer fallbackBuffer = null;
int ret = 0;
int count2 = index + count;
for (; index < count2; index++)
{
byte b = bytes[index];
char ch = Unicodes[b];
if (ch != '\0' || b == 0)
{
ret++;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? DecoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(new[] { b }, index))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
return ret;
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (byteIndex < 0 || byteIndex > bytes.Length)
{
throw new ArgumentOutOfRangeException("byteIndex");
}
if (byteCount < 0)
{
throw new ArgumentOutOfRangeException("byteCount");
}
if (byteIndex + byteCount > bytes.Length)
{
throw new ArgumentOutOfRangeException("bytes");
}
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (charIndex < 0 || charIndex > chars.Length)
{
throw new ArgumentOutOfRangeException("charIndex");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
DecoderFallbackBuffer fallbackBuffer = null;
int byteCount2 = byteCount + byteIndex;
int charIndex2 = charIndex;
for (; byteIndex < byteCount2; byteIndex++)
{
byte b = bytes[byteIndex];
// chars between 0 and 127 are equal in Unicode and ArmSCII8
if (b >= 0 && b <= 127)
{
WriteChar(chars, charIndex2, (char)b);
charIndex2++;
}
else
{
char ch = Unicodes[b];
if (ch != '\0' || b == 0)
{
WriteChar(chars, charIndex2, ch);
charIndex2++;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? DecoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(new[] { b }, byteIndex))
{
HandleFallbackWrite(fallbackBuffer, chars, ref charIndex2);
}
}
}
}
return charIndex2 - charIndex;
}
protected static void HandleFallbackCount(DecoderFallbackBuffer fallbackBuffer, ref int count)
{
while (fallbackBuffer.Remaining > 0)
{
fallbackBuffer.GetNextChar();
count++;
}
}
protected static void HandleFallbackWrite(DecoderFallbackBuffer fallbackBuffer, char[] chars, ref int charIndex)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
WriteChar(chars, charIndex, ch);
charIndex++;
}
}
// Remove the next line if using .NET < 4.5
[MethodImpl(MethodImplOptions.AggressiveInlining)]
protected static void WriteChar(char[] chars, int charIndex, char ch)
{
if (charIndex >= chars.Length)
{
throw new ArgumentException("bytes");
}
chars[charIndex] = ch;
}
}
/// <summary>
/// An instance is thread safe/fully reentrant if the methods are always
/// called with flush = true.
/// </summary>
public class ArmSCII8Encoder : Encoder
{
private static readonly byte[] ArmSCII8s;
// Buffer for High/Low surrogates. Note that this property is read
// but not written if the methods are always used with flush = true.
protected char HighSurrogate { get; set; }
static ArmSCII8Encoder()
{
ArmSCII8s = new byte[1 + ArmSCII8EncodingSimple.Unicodes.Max()];
for (int i = 0; i < ArmSCII8EncodingSimple.Unicodes.Count; i++)
{
char ch = ArmSCII8EncodingSimple.Unicodes[i];
if (i == 0 || (ArmSCII8s[ch] == 0 && ch != '\0'))
{
ArmSCII8s[ch] = (byte)i;
}
}
}
public override int GetByteCount(char[] chars, int index, int count, bool flush)
{
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (index < 0 || index > chars.Length)
{
throw new ArgumentOutOfRangeException("index");
}
if (count < 0)
{
throw new ArgumentOutOfRangeException("count");
}
if (index + count > chars.Length)
{
throw new ArgumentOutOfRangeException("chars");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
EncoderFallbackBuffer fallbackBuffer = null;
char highSurrogate = HighSurrogate;
int ret = 0;
int count2 = index + count;
for (; index < count2; index++)
{
char ch = chars[index];
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// If we have a High/Low surrogates couple, we pass them
// together
if (char.IsLowSurrogate(ch))
{
if (fallbackBuffer.Fallback(highSurrogate, ch, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
highSurrogate = '\0';
continue;
}
else
{
// First we pass the High surrogate to the Fallback
if (fallbackBuffer.Fallback(highSurrogate, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
highSurrogate = '\0';
// Then we fall-through normal handling
}
}
if (ch < ArmSCII8s.Length && (ArmSCII8s[ch] != 0 || ch == '\0'))
{
ret++;
}
else
{
// High/low surrogate handling, done through buffer
if (char.IsHighSurrogate(ch))
{
highSurrogate = ch;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(ch, index))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
}
if (flush)
{
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
if (fallbackBuffer.Fallback(highSurrogate, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
return ret;
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
{
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (charIndex < 0 || charIndex > chars.Length)
{
throw new ArgumentOutOfRangeException("charIndex");
}
if (charCount < 0)
{
throw new ArgumentOutOfRangeException("charCount");
}
if (charIndex + charCount > chars.Length)
{
throw new ArgumentOutOfRangeException("chars");
}
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (byteIndex < 0 || byteIndex > bytes.Length)
{
throw new ArgumentOutOfRangeException("byteIndex");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
EncoderFallbackBuffer fallbackBuffer = null;
// Written only on flush = false
char highSurrogate = HighSurrogate;
int charCount2 = charIndex + charCount;
int byteIndex2 = byteIndex;
for (; charIndex < charCount2; charIndex++)
{
char ch = chars[charIndex];
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// If we have a High/Low surrogates couple, we pass them
// together
if (char.IsLowSurrogate(ch))
{
if (fallbackBuffer.Fallback(highSurrogate, ch, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
highSurrogate = '\0';
continue;
}
else
{
// First we pass the High surrogate to the Fallback
if (fallbackBuffer.Fallback(highSurrogate, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
highSurrogate = '\0';
// Then we fall-through normal handling
}
}
byte b;
if (ch < ArmSCII8s.Length && ((b = ArmSCII8s[ch]) != 0 || ch == '\0'))
{
// Recognized character
WriteByte(bytes, byteIndex2, b);
byteIndex2++;
}
else
{
// High/low surrogate handling, done through buffer
if (char.IsHighSurrogate(ch))
{
highSurrogate = ch;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(ch, charIndex))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
}
}
}
if (flush)
{
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
if (fallbackBuffer.Fallback(highSurrogate, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
}
}
else
{
HighSurrogate = highSurrogate;
}
return byteIndex2 - byteIndex;
}
protected static void HandleFallbackCount(EncoderFallbackBuffer fallbackBuffer, ref int count)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
if (!(ch < ArmSCII8s.Length && (ArmSCII8s[ch] != 0 || ch == '\0')))
{
throw new EncoderFallbackException();
}
count++;
}
}
protected static void HandleFallbackWrite(EncoderFallbackBuffer fallbackBuffer, byte[] bytes, ref int byteIndex)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
byte b;
if (!(ch < ArmSCII8s.Length && ((b = ArmSCII8s[ch]) != 0 || ch == '\0')))
{
throw new EncoderFallbackException();
}
WriteByte(bytes, byteIndex, b);
byteIndex++;
}
}
// Remove the next line if using .NET < 4.5
[MethodImpl(MethodImplOptions.AggressiveInlining)]
protected static void WriteByte(byte[] bytes, int byteIndex, byte b)
{
if (byteIndex == bytes.Length)
{
throw new ArgumentException("bytes");
}
bytes[byteIndex] = b;
}
}
}
Then use it like:
public static string DecodeFromArmSCII8(string str) {
// copy the string as UTF-8 bytes.
byte[] bytes = Encoding.GetEncoding("iso-8859-1").GetBytes(str);
return new ArmSCII8EncodingSimple().GetString(bytes);
}
and
string str = DecodeFromArmSCII8("سñïÇñáëÛ³Ý ²ÉÇݳ Ø.");
The Encoding.GetEncoding("iso-8859-1").GetBytes(str) returns the original byte[] array. Note that if you have the original byte[] you can directly use the ArmSCII8EncodingFast.
Related
Marshal a va_list
I have the following code: [UnmanagedFunctionPointer(CallingConvention.Cdecl)] public delegate void PanicFuncDelegate(string str, IntPtr args); private void PanicFunc(string str, IntPtr args) { LogFunc("PANIC", str, args); } public void LogFunc(string severity, string str, IntPtr args) { vprintf($"[{severity}] "+ str,args); } [DllImport("libc.so.6")] private static extern int vprintf(string format, IntPtr args); This prints to the console the messages correctly formatted. I want to retrieve the values from args to use them in my own logger. If I try to get the value of each pointer from the array in args (as suggested here: Marshal va_list in C# delegate) I get segmentation fault. Any suggestions?
I have a function call with this working, here's what I do: For the DLLImport I use an __arglist to marshall to the va_list, [DllImport("libc.so.6")] private static extern int vprintf(string format, __arglist); Then when calling the function I create the __arglist, vprintf(string format, __arglist(arg1, arg2, arg3...)) Ofcourse you would need to either call the function with all the arguments statically or build that __arglist dynamically, I don't have the code here but it's possible. I wonder if you get a segmentation fault because the elements in the object[] are not pinned? Maybe if you pin the object[] and all elements within that would help? Just a guess though.
Just think on how C program gets variables from va_list, and there is the solution: using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; namespace VaTest { class Program { static void Main(string[] args) { MarshalVaArgs(vaList => vprintf("%c%d%s", vaList), false, 'a', 123, "bc"); } [DllImport("msvcrt")] //windows //[DllImport("c")] //linux private static extern int vprintf(string format, IntPtr vaList); private static int IntSizeOf(Type t) { return (Marshal.SizeOf(t) + IntPtr.Size - 1) & ~(IntPtr.Size - 1); } public static void MarshalVaArgs(Action<IntPtr> action, bool? isUnicode, params object[] args) { var sizes = new int[args.Length]; for (var i = 0; i < args.Length; i++) { sizes[i] = args[i] is string ? IntPtr.Size : IntSizeOf(args[i].GetType()); } var allocs = new List<IntPtr>(); var offset = 0; var result = Marshal.AllocHGlobal(sizes.Sum()); allocs.Add(result); for (var i = 0; i < args.Length; i++) { if (args[i] is string) { var s = (string)args[i]; var data = default(IntPtr); if (isUnicode.HasValue) { if (isUnicode.Value) { data = Marshal.StringToHGlobalUni(s); } else { data = Marshal.StringToHGlobalAnsi(s); } } else { data = Marshal.StringToHGlobalAuto(s); } allocs.Add(data); Marshal.WriteIntPtr(result, offset, data); offset += sizes[i]; } else { Marshal.StructureToPtr(args[i], result + offset, false); offset += sizes[i]; } } action(result); foreach (var ptr in allocs) { Marshal.FreeHGlobal(ptr); } } } } The code is written and tested with .NET Core 3.0 preview 5, compatible with .NET Framework 4.0 and C# 3.0. Outputs: a123bc
As this isn't solved yet i post a long solution that worked for me. I found the solution in an abandoned project https://github.com/GoaLitiuM/libobs-sharp Use like this (tested with FFmpeg): var objects = va_list_Helper.VaListToArray(format, va_List_Ptr); // format: frame=%4d QP=%.2f NAL=%d Slice:%c Poc:%-3d I:%-4d P:%-4d SKIP:%-4d size=%d bytes%s // format (filled): frame= 3 QP=13.00 NAL=0 Slice:B Poc:4 I:0 P:8 SKIP:912 size=32 bytes // va_List objects: 3, 13, 0, 'B', 4, 0, 8, 912, 32 The classes needed: public class va_list_Helper { public static unsafe object[] VaListToArray(string format, byte* va_list) { var vaList = new va_list((IntPtr)va_list); return vaList.GetObjectsByFormat(format); } } public static class Printf { // used public static string[] GetFormatSpecifiers(string format) { if (format.IndexOf('%') == -1) return null; // find specifiers from format string List<int> indices = new List<int>(); for (int j = 0; j < format.Length; j++) { j = format.IndexOf('%', j); if (j == -1) break; indices.Add(j); if (format[j + 1] == '%') // ignore "%%" j++; } if (indices.Count == 0) return null; List<string> formats = new List<string>(indices.Count); for (int mi = 0; mi < indices.Count; mi++) { string formatSpecifier = format.Substring(indices[mi], (mi + 1 < indices.Count ? indices[mi + 1] : format.Length) - indices[mi]); if (!string.IsNullOrWhiteSpace(formatSpecifier)) formats.Add(formatSpecifier); } return formats.ToArray(); } public class FormatSpecificationInfo { public string specification; //public int parameter; public char type; public int width; public int precision; public FormatFlags flags; }; [Flags] public enum FormatFlags { // Type length IsLong = 0x0001, // l IsLongLong = 0x0002, // ll IsShort = 0x0004, // h IsChar = 0x0008, // hh IsLongDouble = 0x0016, // L // Flags LeftAlign = 0x0100, // '-' left align within the width Sign = 0x0200, // '+' use - or + signs for signed types Alternate = 0x0400, // '#' prefix non-zero values with hex types ZeroPad = 0x0800, // '0' pad with zeros Blank = 0x1000, // ' ' pad sign with blank Grouping = 0x2000, // '\' group by thousands ArchSize = 0x4000, // '?' use arch precision // Dynamic parameters DynamicWidth = 0x10000, DynamicPrecision = 0x20000, }; // used public static FormatSpecificationInfo GetFormatSpecifierInfo(string specification) { if (string.IsNullOrWhiteSpace(specification)) return null; FormatSpecificationInfo info = new FormatSpecificationInfo() { type = '\0', width = int.MinValue, precision = 6, }; string width = ""; string precision = ""; int start = -1; int fsLength = 1; // TODO: parse parameter index for (int i = 0; i < specification.Length && info.type == '\0'; i++) { char c = specification[i]; switch (c) { case '%': if (start == -1) start = i; else info.type = c; info.specification = specification.Substring(start, i + 1 - start); fsLength = i + 1; break; // flags case '-': info.flags |= FormatFlags.LeftAlign; break; case '+': info.flags |= FormatFlags.Sign; break; case ' ': info.flags |= FormatFlags.Blank; break; case '#': info.flags |= FormatFlags.Alternate; break; case '\'': info.flags |= FormatFlags.Grouping; break; case '?': info.flags |= FormatFlags.ArchSize; break; // precision case '.': { for (int j = i + 1; j < specification.Length; j++) { if (specification[j] == '*') info.flags |= FormatFlags.DynamicPrecision; else if (char.IsNumber(specification[j])) precision += specification[j]; else break; i++; } } break; // length flags case 'h': info.flags += (int)FormatFlags.IsShort; break; case 'l': info.flags += (int)FormatFlags.IsLong; break; case 'L': info.flags |= FormatFlags.IsLongDouble; break; case 'z': case 'j': case 't': // not supported break; // dynamic width case '*': info.flags |= FormatFlags.DynamicWidth; break; default: { if (char.IsNumber(c)) { if (width == "" && c == '0') info.flags |= FormatFlags.ZeroPad; else width += c; } else if (char.IsLetter(c) && info.type == '\0') { info.type = c; info.specification = specification.Substring(start, i + 1 - start); fsLength = i + 1; } } break; } } // sign overrides space if (info.flags.HasFlag(FormatFlags.Sign) && info.flags.HasFlag(FormatFlags.Blank)) info.flags &= ~FormatFlags.Blank; if (info.flags.HasFlag(FormatFlags.LeftAlign) && info.flags.HasFlag(FormatFlags.ZeroPad)) info.flags &= ~FormatFlags.ZeroPad; // unsupported precision for these types if (info.type == 's' || info.type == 'c' || Char.ToUpper(info.type) == 'X' || info.type == 'o') { info.precision = int.MinValue; } if (!string.IsNullOrWhiteSpace(precision)) info.precision = Convert.ToInt32(precision); if (!string.IsNullOrWhiteSpace(width)) info.width = Convert.ToInt32(width); return info; } } public class va_list { internal IntPtr instance; //unmanaged pointer to va_list public va_list(IntPtr ptr) { instance = ptr; } /// <summary> Returns unmanaged pointer to argument list. </summary> public IntPtr GetPointer() { return instance; } /// <summary> Returns array of objects with help of printf format string. </summary> /// <param name="format"> printf format string. </param> public object[] GetObjectsByFormat(string format) { return GetObjectsByFormat(format, this); } public static unsafe object[] GetObjectsByFormat(string format, va_list va_list) { string[] formatSpecifiers = Printf.GetFormatSpecifiers(format); if (formatSpecifiers == null || va_list == null || va_list.GetPointer() == IntPtr.Zero) return null; IntPtr args = va_list.GetPointer(); List<object> objects = new List<object>(formatSpecifiers.Length); //var bytesDebug = new byte[format.Length]; //Marshal.Copy(va_list.GetPointer(), bytesDebug, 0, bytesDebug.Length); int offset = 0; foreach (string spec in formatSpecifiers) { var info = Printf.GetFormatSpecifierInfo(spec); if (info.type == '\0') continue; // dynamic width and precision arguments // these are stored in stack before the actual value if (info.flags.HasFlag(Printf.FormatFlags.DynamicWidth)) { int widthArg = Marshal.ReadInt32(args, offset); objects.Add(widthArg); offset += Marshal.SizeOf(typeof(IntPtr)); } if (info.flags.HasFlag(Printf.FormatFlags.DynamicPrecision)) { int precArg = Marshal.ReadInt32(args, offset); objects.Add(precArg); offset += Marshal.SizeOf(typeof(IntPtr)); } int iSize = info.flags.HasFlag(Printf.FormatFlags.IsLongLong) ? Marshal.SizeOf(typeof(Int64)) : Marshal.SizeOf(typeof(IntPtr)); // marshal objects from pointer switch (info.type) { // 8/16-bit integers // char / wchar_t (promoted to int) case 'c': char c = (char)Marshal.ReadByte(args, offset); objects.Add(c); //offset += Marshal.SizeOf(typeof(Int32)); offset += Marshal.SizeOf(typeof(IntPtr)); break; // signed integers case 'd': case 'i': { if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h { short sh = (short)Marshal.ReadInt32(args, offset); objects.Add(sh); offset += Marshal.SizeOf(typeof(Int32)); } else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll { long l = Marshal.ReadInt64(args, offset); objects.Add(l); offset += iSize; } else // int and long types { var i = Marshal.ReadInt32(args, offset); objects.Add(i); offset += iSize; } } break; // unsigned integers case 'u': case 'o': case 'x': case 'X': { if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h { ushort su = (ushort)Marshal.ReadInt32(args, offset); objects.Add(su); offset += Marshal.SizeOf(typeof(Int32)); } else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll { ulong lu = (ulong)(long)Marshal.ReadInt64(args, offset); objects.Add(lu); offset += iSize; } else // uint and ulong types { uint u = (uint)Marshal.ReadInt32(args, offset); objects.Add(u); offset += iSize; } } break; // floating-point types case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': { if (info.flags.HasFlag(Printf.FormatFlags.IsLongDouble)) // L { // not really supported but read it as long long lfi = Marshal.ReadInt64(args, offset); double d = *(double*)(void*)&lfi; objects.Add(d); offset += Marshal.SizeOf(typeof(double)); } else // double { long lfi = Marshal.ReadInt64(args, offset); double d = *(double*)(void*)&lfi; objects.Add(d); offset += Marshal.SizeOf(typeof(double)); } } break; // string case 's': { string s = null; // same: //var addr1 = new IntPtr(args.ToInt64() + offset); //var intPtr4 = Marshal.ReadIntPtr(addr1); var intPtr3 = Marshal.ReadIntPtr(args, offset); if (info.flags.HasFlag(Printf.FormatFlags.IsLong)) { s = Marshal.PtrToStringUni(intPtr3); } else { s = Marshal.PtrToStringAnsi(intPtr3); } objects.Add(s); offset += Marshal.SizeOf(typeof(IntPtr)); } break; // pointer case 'p': IntPtr ptr = Marshal.ReadIntPtr(args, offset); objects.Add(ptr); offset += Marshal.SizeOf(typeof(IntPtr)); break; // non-marshallable types, ignored case ' ': case '%': case 'n': break; default: throw new ApplicationException("printf specifier '%" + info.type + "' not supported"); } } return objects.ToArray(); } }
C# StreamReader detect encoding of an XML File
In my xml file i have data like this : <Data> <Field> <Name>BarcodeCapture_0</Name> <Type>SimpleIndex</Type> <DataType>DataMatrix</DataType> <Value>DEA"¡CV°)Ñ võ Fƒ´ 20100410050</Value> </Field> </Data> Im using a class that extend from StreamReader, i override reading methodes to prevent inaccepted characters like character. This is the class public class CustomStreamReader : StreamReader { private const int EOF = -1; public CustomStreamReader(Stream stream) : base(stream) { } public CustomStreamReader(string path) : base(path) { } public CustomStreamReader(string path, Encoding encoding) : base(path, encoding) { } /// <summary> /// Get whether an integer represents a legal XML 1.0 or 1.1 character. See /// the specification at w3.org for these characters. /// </summary> /// <param name="xmlVersion"> /// The version number as a string. Use "1.0" for XML 1.0 character /// validation, and use "1.1" for XML 1.1 character validation. /// </param> public static bool IsLegalXmlChar(string xmlVersion, int character) { switch (xmlVersion) { case "1.1": // http://www.w3.org/TR/xml11/#charsets { return !( character <= 0x8 || character == 0xB || character == 0xC || (character >= 0xE && character <= 0x1F) || (character >= 0x7F && character <= 0x84) || (character >= 0x86 && character <= 0x9F) || character > 0x10FFFF ); } case "1.0": // http://www.w3.org/TR/REC-xml/#charsets { return ( character == 0x9 /* == '\t' == 9 */ || character == 0xA /* == '\n' == 10 */ || character == 0xD /* == '\r' == 13 */ || (character >= 0x20 && character <= 0xD7FF) || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF) ); } default: { throw new ArgumentOutOfRangeException ("xmlVersion", string.Format("'{0}' is not a valid XML version.")); } } } /// <summary> /// Get whether an integer represents a legal XML 1.0 character. See the /// specification at w3.org for these characters. /// </summary> public static bool IsLegalXmlChar(int character) { return CustomStreamReader.IsLegalXmlChar("1.0", character); } public override int Read() { // Read each character, skipping over characters that XML has prohibited int nextCharacter; do { // Read a character if ((nextCharacter = base.Read()) == EOF) { // If the character denotes the end of the file, stop reading break; } } // Skip the character if it's prohibited, and try the next while (!CustomStreamReader.IsLegalXmlChar(nextCharacter)); return nextCharacter; } public override int Peek() { // Return the next legl XML character without reading it int nextCharacter; do { // See what the next character is nextCharacter = base.Peek(); } while ( // If it's prohibited XML, skip over the character in the stream // and try the next. !CustomStreamReader.IsLegalXmlChar(nextCharacter) && (nextCharacter = base.Read()) != EOF ); return nextCharacter; } // method // The following methods are exact copies of the methods in TextReader, // extracting by disassembling it in Refelctor public override int Read(char[] buffer, int index, int count) { if (buffer == null) { throw new ArgumentNullException("buffer"); } if (index < 0) { throw new ArgumentOutOfRangeException("index"); } if (count < 0) { throw new ArgumentOutOfRangeException("count"); } if ((buffer.Length - index) < count) { throw new ArgumentException(); } int num = 0; do { int num2 = this.Read(); if (num2 == -1) { return num; } buffer[index + num++] = (char)num2; } while (num < count); return num; } public override int ReadBlock(char[] buffer, int index, int count) { int num; int num2 = 0; do { num2 += num = this.Read(buffer, index + num2, count - num2); } while ((num > 0) && (num2 < count)); return num2; } public override string ReadLine() { StringBuilder builder = new StringBuilder(); while (true) { int num = this.Read(); switch (num) { case -1: if (builder.Length > 0) { return builder.ToString(); } return null; case 13: case 10: if ((num == 13) && (this.Peek() == 10)) { this.Read(); } return builder.ToString(); } builder.Append((char)num); } } public override string ReadToEnd() { int num; char[] buffer = new char[0x1000]; StringBuilder builder = new StringBuilder(0x1000); while ((num = this.Read(buffer, 0, buffer.Length)) != 0) { builder.Append(buffer, 0, num); } return builder.ToString(); } } In XML deserialization side : CustomStreamReader fStream_scanTransaction_XML = new CustomStreamReader(scanTransactionFilePath, Encoding.UTF8); XmlSerializer s = new XmlSerializer(typeof(ScanTransaction)); ScanTransaction result = ScanTransaction)s.Deserialize(fStream_scanTransaction_XML); The problem is that StreamReader cannot detect encoding, so it not deleting this character and the Xml deserialization faild.
Try: using (var sr = new StreamReader("XMLFile1.xml", Encoding.UTF8)) using (var xtr = new XmlTextReader(sr)) { XmlSerializer s = new XmlSerializer(typeof(ScanTransaction)); ScanTransaction result = (ScanTransaction)s.Deserialize(xtr); } You don't even neeed a "special" StreamReader. The XmlTextReader doesn't check for illegal characters (you can control this with the Normalize boolean property, but it is by default false, so no check on illegal characters)
Postfix increment into if, c#
Code example: using System; public class Test { public static void Main() { int a = 0; if(a++ == 0){ Console.WriteLine(a); } } } In this code the Console will write: 1. I can write this code in another way: public static void Main() { int a = 0; if(a == 0){ a++; Console.WriteLine(a); } } These two examples work exactly the same (from what I know about postfix). The problem is with this example coming from the Microsoft tutorials: using System; public class Document { // Class allowing to view the document as an array of words: public class WordCollection { readonly Document document; internal WordCollection (Document d){ document = d; } // Helper function -- search character array "text", starting // at character "begin", for word number "wordCount". Returns //false if there are less than wordCount words. Sets "start" and //length to the position and length of the word within text private bool GetWord(char[] text, int begin, int wordCount, out int start, out int length) { int end = text.Length; int count = 0; int inWord = -1; start = length = 0; for (int i = begin; i <= end; ++i){ bool isLetter = i < end && Char.IsLetterOrDigit(text[i]); if (inWord >= 0) { if (!isLetter) { if (count++ == wordCount) {//PROBLEM IS HERE!!!!!!!!!!!! start = inWord; length = i - inWord; return true; } inWord = -1; } } else { if (isLetter) { inWord = i; } } } return false; } //Indexer to get and set words of the containing document: public string this[int index] { get { int start, length; if(GetWord(document.TextArray, 0, index, out start, out length)) { return new string(document.TextArray, start, length); } else { throw new IndexOutOfRangeException(); } } set { int start, length; if(GetWord(document.TextArray, 0, index, out start, out length)) { //Replace the word at start/length with // the string "value" if(length == value.Length){ Array.Copy(value.ToCharArray(), 0, document.TextArray, start, length); } else { char[] newText = new char[document.TextArray.Length + value.Length - length]; Array.Copy(document.TextArray, 0, newText, 0, start); Array.Copy(value.ToCharArray(), 0, newText, start, value.Length); Array.Copy(document.TextArray, start + length, newText, start + value.Length, document.TextArray.Length - start - length); document.TextArray = newText; } } else { throw new IndexOutOfRangeException(); } } } public int Count { get { int count = 0, start = 0, length = 0; while (GetWord(document.TextArray, start + length, 0, out start, out length)) { ++count; } return count; } } } // Class allowing the document to be viewed like an array // of character public class CharacterCollection { readonly Document document; internal CharacterCollection(Document d) { document = d; } //Indexer to get and set character in the containing //document public char this[int index] { get { return document.TextArray[index]; } set { document.TextArray[index] = value; } } //get the count of character in the containing document public int Count { get { return document.TextArray.Length; } } } //Because the types of the fields have indexers, //these fields appear as "indexed properties": public WordCollection Words; public readonly CharacterCollection Characters; private char[] TextArray; public Document(string initialText) { TextArray = initialText.ToCharArray(); Words = new WordCollection(this); Characters = new CharacterCollection(this); } public string Text { get { return new string(TextArray); } } class Test { static void Main() { Document d = new Document( "peter piper picked a peck of pickled peppers. How many pickled peppers did peter piper pick?" ); //Change word "peter" to "penelope" for(int i = 0; i < d.Words.Count; ++i){ if (d.Words[i] == "peter") { d.Words[i] = "penelope"; } } for (int i = 0; i < d.Characters.Count; ++i) { if (d.Characters[i] == 'p') { d.Characters[i] = 'P'; } } Console.WriteLine(d.Text); } } } If I change the code marked above to this: if (count == wordCount) {//PROBLEM IS HERE start = inWord; length = i - inWord; count++; return true; } I get an IndexOutOfRangeException, but I don't know why.
Your initial assumption is incorrect (that the two examples work exactly the same). In the following version, count is incremented regardless of whether or not it is equal to wordCount: if (count++ == wordCount) { // Code omitted } In this version, count is ONLY incremented when it is equal to wordCount if (count == wordCount) { // Other code omitted count++; } EDIT The reason this is causing you a failure is that, when you are searching for the second word (when wordCount is 1), the variable count will never equal wordCount (because it never gets incremented), and therefore the GetWord method returns false, which then triggers the else clause in your get method, which throws an IndexOutOfRangeException.
In your version of the code, count is only being incremented when count == wordCount; in the Microsoft version, it's being incremented whether the condition is met or not.
using System; public class Test { public static void Main() { int a = 0; if(a++ == 0){ Console.WriteLine(a); } } } Is not quite the same as: public static void Main() { int a = 0; if(a == 0){ a++; Console.WriteLine(a); } } In the second case a++ is executed only if a == 0. In the first case a++ is executed every time we check the condition.
There is your mistake: public static void Main() { int a = 0; if(a == 0){ a++; Console.WriteLine(a); } } It should be like this: public static void Main() { int a = 0; if(a == 0){ a++; Console.WriteLine(a); } else a++; } a gets alwasy increased. This means, that in your code example count will get only increased when count == wordCount (In which case the method will return true anyway...). You basicly never increasing count.
HTML Decode without System.Web
I am working on a CLR Table-Valued function for SQL Server 2008 R2. I need to HTMLDecode a string at one point, but this is problematic b/c that relies on System.Web, which is not a support assembly for SQL Server. Can anyone think of a better way to do the HTML Decode? FYI SQL Server 2008 CLR only supports up to .NET 3.5 so system.net.webutility will not work.
Also you can use reflector to grab the code from WebUtility directly (please don't blame me for the coding style, its reflected stuff): public class WebUtility { private static class HtmlEntities { private static string[] _entitiesList = new string[] { "\"-quot", "&-amp", "'-apos", "<-lt", ">-gt", "\u00a0-nbsp", "¡-iexcl", "¢-cent", "£-pound", "¤-curren", "¥-yen", "¦-brvbar", "§-sect", "¨-uml", "©-copy", "ª-ordf", "«-laquo", "¬-not", "-shy", "®-reg", "¯-macr", "°-deg", "±-plusmn", "²-sup2", "³-sup3", "´-acute", "µ-micro", "¶-para", "·-middot", "¸-cedil", "¹-sup1", "º-ordm", "»-raquo", "¼-frac14", "½-frac12", "¾-frac34", "¿-iquest", "À-Agrave", "Á-Aacute", "Â-Acirc", "Ã-Atilde", "Ä-Auml", "Å-Aring", "Æ-AElig", "Ç-Ccedil", "È-Egrave", "É-Eacute", "Ê-Ecirc", "Ë-Euml", "Ì-Igrave", "Í-Iacute", "Î-Icirc", "Ï-Iuml", "Ð-ETH", "Ñ-Ntilde", "Ò-Ograve", "Ó-Oacute", "Ô-Ocirc", "Õ-Otilde", "Ö-Ouml", "×-times", "Ø-Oslash", "Ù-Ugrave", "Ú-Uacute", "Û-Ucirc", "Ü-Uuml", "Ý-Yacute", "Þ-THORN", "ß-szlig", "à-agrave", "á-aacute", "â-acirc", "ã-atilde", "ä-auml", "å-aring", "æ-aelig", "ç-ccedil", "è-egrave", "é-eacute", "ê-ecirc", "ë-euml", "ì-igrave", "í-iacute", "î-icirc", "ï-iuml", "ð-eth", "ñ-ntilde", "ò-ograve", "ó-oacute", "ô-ocirc", "õ-otilde", "ö-ouml", "÷-divide", "ø-oslash", "ù-ugrave", "ú-uacute", "û-ucirc", "ü-uuml", "ý-yacute", "þ-thorn", "ÿ-yuml", "Œ-OElig", "œ-oelig", "Š-Scaron", "š-scaron", "Ÿ-Yuml", "ƒ-fnof", "ˆ-circ", "˜-tilde", "Α-Alpha", "Β-Beta", "Γ-Gamma", "Δ-Delta", "Ε-Epsilon", "Ζ-Zeta", "Η-Eta", "Θ-Theta", "Ι-Iota", "Κ-Kappa", "Λ-Lambda", "Μ-Mu", "Ν-Nu", "Ξ-Xi", "Ο-Omicron", "Π-Pi", "Ρ-Rho", "Σ-Sigma", "Τ-Tau", "Υ-Upsilon", "Φ-Phi", "Χ-Chi", "Ψ-Psi", "Ω-Omega", "α-alpha", "β-beta", "γ-gamma", "δ-delta", "ε-epsilon", "ζ-zeta", "η-eta", "θ-theta", "ι-iota", "κ-kappa", "λ-lambda", "μ-mu", "ν-nu", "ξ-xi", "ο-omicron", "π-pi", "ρ-rho", "ς-sigmaf", "σ-sigma", "τ-tau", "υ-upsilon", "φ-phi", "χ-chi", "ψ-psi", "ω-omega", "ϑ-thetasym", "ϒ-upsih", "ϖ-piv", "\u2002-ensp", "\u2003-emsp", "\u2009-thinsp", "-zwnj", "-zwj", "-lrm", "-rlm", "–-ndash", "—-mdash", "‘-lsquo", "’-rsquo", "‚-sbquo", "“-ldquo", "”-rdquo", "„-bdquo", "†-dagger", "‡-Dagger", "•-bull", "…-hellip", "‰-permil", "′-prime", "″-Prime", "‹-lsaquo", "›-rsaquo", "‾-oline", "⁄-frasl", "€-euro", "ℑ-image", "℘-weierp", "ℜ-real", "™-trade", "ℵ-alefsym", "←-larr", "↑-uarr", "→-rarr", "↓-darr", "↔-harr", "↵-crarr", "⇐-lArr", "⇑-uArr", "⇒-rArr", "⇓-dArr", "⇔-hArr", "∀-forall", "∂-part", "∃-exist", "∅-empty", "∇-nabla", "∈-isin", "∉-notin", "∋-ni", "∏-prod", "∑-sum", "−-minus", "∗-lowast", "√-radic", "∝-prop", "∞-infin", "∠-ang", "∧-and", "∨-or", "∩-cap", "∪-cup", "∫-int", "∴-there4", "∼-sim", "≅-cong", "≈-asymp", "≠-ne", "≡-equiv", "≤-le", "≥-ge", "⊂-sub", "⊃-sup", "⊄-nsub", "⊆-sube", "⊇-supe", "⊕-oplus", "⊗-otimes", "⊥-perp", "⋅-sdot", "⌈-lceil", "⌉-rceil", "⌊-lfloor", "⌋-rfloor", "〈-lang", "〉-rang", "◊-loz", "♠-spades", "♣-clubs", "♥-hearts", "♦-diams" }; private static Dictionary<string, char> _lookupTable = WebUtility.HtmlEntities.GenerateLookupTable(); private static Dictionary<string, char> GenerateLookupTable() { Dictionary<string, char> dictionary = new Dictionary<string, char>(StringComparer.Ordinal); string[] entitiesList = WebUtility.HtmlEntities._entitiesList; for (int i = 0; i < entitiesList.Length; i++) { string text = entitiesList[i]; dictionary.Add(text.Substring(2), text[0]); } return dictionary; } public static char Lookup(string entity) { char result; WebUtility.HtmlEntities._lookupTable.TryGetValue(entity, out result); return result; } } private enum UnicodeDecodingConformance { Auto, Strict, Compat, Loose } private static char[] _htmlEntityEndingChars = new char[] { ';', '&' }; private static readonly UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto; public static string HtmlDecode(string value) { if (string.IsNullOrEmpty(value)) { return value; } if (!WebUtility.StringRequiresHtmlDecoding(value)) { return value; } StringWriter stringWriter = new StringWriter(CultureInfo.InvariantCulture); WebUtility.HtmlDecode(value, stringWriter); return stringWriter.ToString(); } private static bool StringRequiresHtmlDecoding(string s) { if (WebUtility._htmlDecodeConformance == UnicodeDecodingConformance.Compat) { return s.IndexOf('&') >= 0; } for (int i = 0; i < s.Length; i++) { char c = s[i]; if (c == '&' || char.IsSurrogate(c)) { return true; } } return false; } private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) { int num = (int)(smpChar - 65536u); leadingSurrogate = (char)(num / 1024 + 55296); trailingSurrogate = (char)(num % 1024 + 56320); } public static void HtmlDecode(string value, TextWriter output) { if (value == null) { return; } if (output == null) { throw new ArgumentNullException("output"); } if (!WebUtility.StringRequiresHtmlDecoding(value)) { output.Write(value); return; } int length = value.Length; int i = 0; while (i < length) { char c = value[i]; if (c != '&') { goto IL_1B6; } int num = value.IndexOfAny(WebUtility._htmlEntityEndingChars, i + 1); if (num <= 0 || value[num] != ';') { goto IL_1B6; } string text = value.Substring(i + 1, num - i - 1); if (text.Length > 1 && text[0] == '#') { uint num2; bool flag; if (text[1] == 'x' || text[1] == 'X') { flag = uint.TryParse(text.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num2); } else { flag = uint.TryParse(text.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num2); } if (flag) { switch (WebUtility._htmlDecodeConformance) { case UnicodeDecodingConformance.Strict: flag = (num2 < 55296u || (57343u < num2 && num2 <= 1114111u)); break; case UnicodeDecodingConformance.Compat: flag = (0u < num2 && num2 <= 65535u); break; case UnicodeDecodingConformance.Loose: flag = (num2 <= 1114111u); break; default: flag = false; break; } } if (!flag) { goto IL_1B6; } if (num2 <= 65535u) { output.Write((char)num2); } else { char value2; char value3; WebUtility.ConvertSmpToUtf16(num2, out value2, out value3); output.Write(value2); output.Write(value3); } i = num; } else { i = num; char c2 = WebUtility.HtmlEntities.Lookup(text); if (c2 != '\0') { c = c2; goto IL_1B6; } output.Write('&'); output.Write(text); output.Write(';'); } IL_1BD: i++; continue; IL_1B6: output.Write(c); goto IL_1BD; } } }
What JSON library works well for you in .NET? [closed]
Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers. We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations. Closed 6 years ago. Improve this question I'd be interested in hearing what JSON library folks in the community have been using inside of .NET? I have a need to parse/serialize some JSON object graphs from inside .NET (C#) to actual .NET types. I could roll my own, but if there are some solid libraries folks have used, I'd like to hear your comments. I saw the list of libraries on the json.org site, but it's a fairly big list and the community is usually good at vetting out the contenders from the pretenders Any details (pros/cons) of your experience with the library would be incredibly helpful. -- thanks in advance.
I've used Json.NET with success in the past. Example from the site: Product product = new Product(); product.Name = "Apple"; product.Expiry = new DateTime(2008, 12, 28); product.Price = 3.99M; product.Sizes = new string[] { "Small", "Medium", "Large" }; string json = JsonConvert.SerializeObject(product); //{ // "Name": "Apple", // "Expiry": new Date(1230422400000), // "Price": 3.99, // "Sizes": [ // "Small", // "Medium", // "Large" // ] //} Product deserializedProduct = JsonConvert.DeserializeObject<Product>(json);
There are at least two built into the framework. The newer : System.Runtime.Serialization.Json and the older : System.Web.Script.Serialization I prefer to not have dependencies on 3rd party libraries. I work with JSON every day and have never needed anything more than what already exists in the framework.
I wrote my own JSON serializer using DataContractJsonSerializer in the System.ServiceModel.Web.dll assembly [which is a component of WCF included in .NET 3.5 as a standard assembly, and in the .NET 3.5 SP1 Client Profile] (in .NET 4.0 and Silverlight 4, it's been moved to System.Runtime.Serialization.dll). using System.IO; using System.Runtime.Serialization.Json; public class JsonObjectSerializer { public string Serialize<T>(T instance) where T : class { var serializer = new DataContractJsonSerializer(typeof(T)); using (var memoryStream = new MemoryStream()) { serializer.WriteObject(memoryStream, instance); memoryStream.Flush(); memoryStream.Position = 0; using (var reader = new StreamReader(memoryStream)) { return reader.ReadToEnd(); } } } public T Deserialize<T>(string serialized) where T : class { var serializer = new DataContractJsonSerializer(typeof(T)); using (var memoryStream = new MemoryStream()) { using (var writer = new StreamWriter(memoryStream)) { writer.Write(serialized); writer.Flush(); memoryStream.Position = 0; return serializer.ReadObject(memoryStream) as T; } } } }
You should also try my ServiceStack JsonSerializer - it's the fastest .NET JSON serializer at the moment based on the benchmarks of the leading JSON serializers and supports serializing any POCO Type, DataContracts, Lists/Dictionaries, Interfaces, Inheritance, Late-bound objects including anonymous types, etc. Basic Example: Customer customer = new Customer { Name="Joe Bloggs", Age=31 }; string json = customer.ToJson(); Customer fromJson = json.FromJson<Customer>(json);
Check out the System.Runtime.Serialization.Json Namespace included with .NET 3.5.
I typed "json" into google and the top hit was json.org, which leads to what looks like a good single utility class: using System; using System.Collections; using System.Globalization; using System.Text; namespace Procurios.Public { /// <summary> /// This class encodes and decodes JSON strings. /// Spec. details, see http://www.json.org/ /// /// JSON uses Arrays and Objects. These correspond here to the datatypes ArrayList and Hashtable. /// All numbers are parsed to doubles. /// </summary> public class JSON { public const int TOKEN_NONE = 0; public const int TOKEN_CURLY_OPEN = 1; public const int TOKEN_CURLY_CLOSE = 2; public const int TOKEN_SQUARED_OPEN = 3; public const int TOKEN_SQUARED_CLOSE = 4; public const int TOKEN_COLON = 5; public const int TOKEN_COMMA = 6; public const int TOKEN_STRING = 7; public const int TOKEN_NUMBER = 8; public const int TOKEN_TRUE = 9; public const int TOKEN_FALSE = 10; public const int TOKEN_NULL = 11; private const int BUILDER_CAPACITY = 2000; /// <summary> /// Parses the string json into a value /// </summary> /// <param name="json">A JSON string.</param> /// <returns>An ArrayList, a Hashtable, a double, a string, null, true, or false</returns> public static object JsonDecode(string json) { bool success = true; return JsonDecode(json, ref success); } /// <summary> /// Parses the string json into a value; and fills 'success' with the successfullness of the parse. /// </summary> /// <param name="json">A JSON string.</param> /// <param name="success">Successful parse?</param> /// <returns>An ArrayList, a Hashtable, a double, a string, null, true, or false</returns> public static object JsonDecode(string json, ref bool success) { success = true; if (json != null) { char[] charArray = json.ToCharArray(); int index = 0; object value = ParseValue(charArray, ref index, ref success); return value; } else { return null; } } /// <summary> /// Converts a Hashtable / ArrayList object into a JSON string /// </summary> /// <param name="json">A Hashtable / ArrayList</param> /// <returns>A JSON encoded string, or null if object 'json' is not serializable</returns> public static string JsonEncode(object json) { StringBuilder builder = new StringBuilder(BUILDER_CAPACITY); bool success = SerializeValue(json, builder); return (success ? builder.ToString() : null); } protected static Hashtable ParseObject(char[] json, ref int index, ref bool success) { Hashtable table = new Hashtable(); int token; // { NextToken(json, ref index); bool done = false; while (!done) { token = LookAhead(json, index); if (token == JSON.TOKEN_NONE) { success = false; return null; } else if (token == JSON.TOKEN_COMMA) { NextToken(json, ref index); } else if (token == JSON.TOKEN_CURLY_CLOSE) { NextToken(json, ref index); return table; } else { // name string name = ParseString(json, ref index, ref success); if (!success) { success = false; return null; } // : token = NextToken(json, ref index); if (token != JSON.TOKEN_COLON) { success = false; return null; } // value object value = ParseValue(json, ref index, ref success); if (!success) { success = false; return null; } table[name] = value; } } return table; } protected static ArrayList ParseArray(char[] json, ref int index, ref bool success) { ArrayList array = new ArrayList(); // [ NextToken(json, ref index); bool done = false; while (!done) { int token = LookAhead(json, index); if (token == JSON.TOKEN_NONE) { success = false; return null; } else if (token == JSON.TOKEN_COMMA) { NextToken(json, ref index); } else if (token == JSON.TOKEN_SQUARED_CLOSE) { NextToken(json, ref index); break; } else { object value = ParseValue(json, ref index, ref success); if (!success) { return null; } array.Add(value); } } return array; } protected static object ParseValue(char[] json, ref int index, ref bool success) { switch (LookAhead(json, index)) { case JSON.TOKEN_STRING: return ParseString(json, ref index, ref success); case JSON.TOKEN_NUMBER: return ParseNumber(json, ref index, ref success); case JSON.TOKEN_CURLY_OPEN: return ParseObject(json, ref index, ref success); case JSON.TOKEN_SQUARED_OPEN: return ParseArray(json, ref index, ref success); case JSON.TOKEN_TRUE: NextToken(json, ref index); return true; case JSON.TOKEN_FALSE: NextToken(json, ref index); return false; case JSON.TOKEN_NULL: NextToken(json, ref index); return null; case JSON.TOKEN_NONE: break; } success = false; return null; } protected static string ParseString(char[] json, ref int index, ref bool success) { StringBuilder s = new StringBuilder(BUILDER_CAPACITY); char c; EatWhitespace(json, ref index); // " c = json[index++]; bool complete = false; while (!complete) { if (index == json.Length) { break; } c = json[index++]; if (c == '"') { complete = true; break; } else if (c == '\\') { if (index == json.Length) { break; } c = json[index++]; if (c == '"') { s.Append('"'); } else if (c == '\\') { s.Append('\\'); } else if (c == '/') { s.Append('/'); } else if (c == 'b') { s.Append('\b'); } else if (c == 'f') { s.Append('\f'); } else if (c == 'n') { s.Append('\n'); } else if (c == 'r') { s.Append('\r'); } else if (c == 't') { s.Append('\t'); } else if (c == 'u') { int remainingLength = json.Length - index; if (remainingLength >= 4) { // parse the 32 bit hex into an integer codepoint uint codePoint; if (!(success = UInt32.TryParse(new string(json, index, 4), NumberStyles.HexNumber, CultureInfo.InvariantCulture, out codePoint))) { return ""; } // convert the integer codepoint to a unicode char and add to string s.Append(Char.ConvertFromUtf32((int)codePoint)); // skip 4 chars index += 4; } else { break; } } } else { s.Append(c); } } if (!complete) { success = false; return null; } return s.ToString(); } protected static double ParseNumber(char[] json, ref int index, ref bool success) { EatWhitespace(json, ref index); int lastIndex = GetLastIndexOfNumber(json, index); int charLength = (lastIndex - index) + 1; double number; success = Double.TryParse(new string(json, index, charLength), NumberStyles.Any, CultureInfo.InvariantCulture, out number); index = lastIndex + 1; return number; } protected static int GetLastIndexOfNumber(char[] json, int index) { int lastIndex; for (lastIndex = index; lastIndex < json.Length; lastIndex++) { if ("0123456789+-.eE".IndexOf(json[lastIndex]) == -1) { break; } } return lastIndex - 1; } protected static void EatWhitespace(char[] json, ref int index) { for (; index < json.Length; index++) { if (" \t\n\r".IndexOf(json[index]) == -1) { break; } } } protected static int LookAhead(char[] json, int index) { int saveIndex = index; return NextToken(json, ref saveIndex); } protected static int NextToken(char[] json, ref int index) { EatWhitespace(json, ref index); if (index == json.Length) { return JSON.TOKEN_NONE; } char c = json[index]; index++; switch (c) { case '{': return JSON.TOKEN_CURLY_OPEN; case '}': return JSON.TOKEN_CURLY_CLOSE; case '[': return JSON.TOKEN_SQUARED_OPEN; case ']': return JSON.TOKEN_SQUARED_CLOSE; case ',': return JSON.TOKEN_COMMA; case '"': return JSON.TOKEN_STRING; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': return JSON.TOKEN_NUMBER; case ':': return JSON.TOKEN_COLON; } index--; int remainingLength = json.Length - index; // false if (remainingLength >= 5) { if (json[index] == 'f' && json[index + 1] == 'a' && json[index + 2] == 'l' && json[index + 3] == 's' && json[index + 4] == 'e') { index += 5; return JSON.TOKEN_FALSE; } } // true if (remainingLength >= 4) { if (json[index] == 't' && json[index + 1] == 'r' && json[index + 2] == 'u' && json[index + 3] == 'e') { index += 4; return JSON.TOKEN_TRUE; } } // null if (remainingLength >= 4) { if (json[index] == 'n' && json[index + 1] == 'u' && json[index + 2] == 'l' && json[index + 3] == 'l') { index += 4; return JSON.TOKEN_NULL; } } return JSON.TOKEN_NONE; } protected static bool SerializeValue(object value, StringBuilder builder) { bool success = true; if (value is string) { success = SerializeString((string)value, builder); } else if (value is Hashtable) { success = SerializeObject((Hashtable)value, builder); } else if (value is ArrayList) { success = SerializeArray((ArrayList)value, builder); } else if (IsNumeric(value)) { success = SerializeNumber(Convert.ToDouble(value), builder); } else if ((value is Boolean) && ((Boolean)value == true)) { builder.Append("true"); } else if ((value is Boolean) && ((Boolean)value == false)) { builder.Append("false"); } else if (value == null) { builder.Append("null"); } else { success = false; } return success; } protected static bool SerializeObject(Hashtable anObject, StringBuilder builder) { builder.Append("{"); IDictionaryEnumerator e = anObject.GetEnumerator(); bool first = true; while (e.MoveNext()) { string key = e.Key.ToString(); object value = e.Value; if (!first) { builder.Append(", "); } SerializeString(key, builder); builder.Append(":"); if (!SerializeValue(value, builder)) { return false; } first = false; } builder.Append("}"); return true; } protected static bool SerializeArray(ArrayList anArray, StringBuilder builder) { builder.Append("["); bool first = true; for (int i = 0; i < anArray.Count; i++) { object value = anArray[i]; if (!first) { builder.Append(", "); } if (!SerializeValue(value, builder)) { return false; } first = false; } builder.Append("]"); return true; } protected static bool SerializeString(string aString, StringBuilder builder) { builder.Append("\""); char[] charArray = aString.ToCharArray(); for (int i = 0; i < charArray.Length; i++) { char c = charArray[i]; if (c == '"') { builder.Append("\\\""); } else if (c == '\\') { builder.Append("\\\\"); } else if (c == '\b') { builder.Append("\\b"); } else if (c == '\f') { builder.Append("\\f"); } else if (c == '\n') { builder.Append("\\n"); } else if (c == '\r') { builder.Append("\\r"); } else if (c == '\t') { builder.Append("\\t"); } else { int codepoint = Convert.ToInt32(c); if ((codepoint >= 32) && (codepoint <= 126)) { builder.Append(c); } else { builder.Append("\\u" + Convert.ToString(codepoint, 16).PadLeft(4, '0')); } } } builder.Append("\""); return true; } protected static bool SerializeNumber(double number, StringBuilder builder) { builder.Append(Convert.ToString(number, CultureInfo.InvariantCulture)); return true; } /// <summary> /// Determines if a given object is numeric in any way /// (can be integer, double, null, etc). /// /// Thanks to mtighe for pointing out Double.TryParse to me. /// </summary> protected static bool IsNumeric(object o) { double result; return (o == null) ? false : Double.TryParse(o.ToString(), out result); } } }
I've used System.Web.Helpers.Json for encoding and decoding. It decodes into a dynamic types, which is a good match for javascript's dynamic data.