Related
I have the following code:
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void PanicFuncDelegate(string str, IntPtr args);
private void PanicFunc(string str, IntPtr args)
{
LogFunc("PANIC", str, args);
}
public void LogFunc(string severity, string str, IntPtr args)
{
vprintf($"[{severity}] "+ str,args);
}
[DllImport("libc.so.6")]
private static extern int vprintf(string format, IntPtr args);
This prints to the console the messages correctly formatted. I want to retrieve the values from args to use them in my own logger.
If I try to get the value of each pointer from the array in args (as suggested here: Marshal va_list in C# delegate) I get segmentation fault.
Any suggestions?
I have a function call with this working, here's what I do:
For the DLLImport I use an __arglist to marshall to the va_list,
[DllImport("libc.so.6")]
private static extern int vprintf(string format, __arglist);
Then when calling the function I create the __arglist,
vprintf(string format, __arglist(arg1, arg2, arg3...))
Ofcourse you would need to either call the function with all the arguments statically or build that __arglist dynamically, I don't have the code here but it's possible.
I wonder if you get a segmentation fault because the elements in the object[] are not pinned? Maybe if you pin the object[] and all elements within that would help? Just a guess though.
Just think on how C program gets variables from va_list, and there is the solution:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
namespace VaTest
{
class Program
{
static void Main(string[] args)
{
MarshalVaArgs(vaList => vprintf("%c%d%s", vaList), false, 'a', 123, "bc");
}
[DllImport("msvcrt")] //windows
//[DllImport("c")] //linux
private static extern int vprintf(string format, IntPtr vaList);
private static int IntSizeOf(Type t)
{
return (Marshal.SizeOf(t) + IntPtr.Size - 1) & ~(IntPtr.Size - 1);
}
public static void MarshalVaArgs(Action<IntPtr> action, bool? isUnicode, params object[] args)
{
var sizes = new int[args.Length];
for (var i = 0; i < args.Length; i++)
{
sizes[i] = args[i] is string ? IntPtr.Size : IntSizeOf(args[i].GetType());
}
var allocs = new List<IntPtr>();
var offset = 0;
var result = Marshal.AllocHGlobal(sizes.Sum());
allocs.Add(result);
for (var i = 0; i < args.Length; i++)
{
if (args[i] is string)
{
var s = (string)args[i];
var data = default(IntPtr);
if (isUnicode.HasValue)
{
if (isUnicode.Value)
{
data = Marshal.StringToHGlobalUni(s);
}
else
{
data = Marshal.StringToHGlobalAnsi(s);
}
}
else
{
data = Marshal.StringToHGlobalAuto(s);
}
allocs.Add(data);
Marshal.WriteIntPtr(result, offset, data);
offset += sizes[i];
}
else
{
Marshal.StructureToPtr(args[i], result + offset, false);
offset += sizes[i];
}
}
action(result);
foreach (var ptr in allocs)
{
Marshal.FreeHGlobal(ptr);
}
}
}
}
The code is written and tested with .NET Core 3.0 preview 5, compatible with .NET Framework 4.0 and C# 3.0.
Outputs:
a123bc
As this isn't solved yet i post a long solution that worked for me.
I found the solution in an abandoned project
https://github.com/GoaLitiuM/libobs-sharp
Use like this (tested with FFmpeg):
var objects = va_list_Helper.VaListToArray(format, va_List_Ptr);
// format: frame=%4d QP=%.2f NAL=%d Slice:%c Poc:%-3d I:%-4d P:%-4d SKIP:%-4d size=%d bytes%s
// format (filled): frame= 3 QP=13.00 NAL=0 Slice:B Poc:4 I:0 P:8 SKIP:912 size=32 bytes
// va_List objects: 3, 13, 0, 'B', 4, 0, 8, 912, 32
The classes needed:
public class va_list_Helper
{
public static unsafe object[] VaListToArray(string format, byte* va_list)
{
var vaList = new va_list((IntPtr)va_list);
return vaList.GetObjectsByFormat(format);
}
}
public static class Printf
{
// used
public static string[] GetFormatSpecifiers(string format)
{
if (format.IndexOf('%') == -1)
return null;
// find specifiers from format string
List<int> indices = new List<int>();
for (int j = 0; j < format.Length; j++)
{
j = format.IndexOf('%', j);
if (j == -1)
break;
indices.Add(j);
if (format[j + 1] == '%') // ignore "%%"
j++;
}
if (indices.Count == 0)
return null;
List<string> formats = new List<string>(indices.Count);
for (int mi = 0; mi < indices.Count; mi++)
{
string formatSpecifier = format.Substring(indices[mi], (mi + 1 < indices.Count ? indices[mi + 1] : format.Length) - indices[mi]);
if (!string.IsNullOrWhiteSpace(formatSpecifier))
formats.Add(formatSpecifier);
}
return formats.ToArray();
}
public class FormatSpecificationInfo
{
public string specification;
//public int parameter;
public char type;
public int width;
public int precision;
public FormatFlags flags;
};
[Flags]
public enum FormatFlags
{
// Type length
IsLong = 0x0001, // l
IsLongLong = 0x0002, // ll
IsShort = 0x0004, // h
IsChar = 0x0008, // hh
IsLongDouble = 0x0016, // L
// Flags
LeftAlign = 0x0100, // '-' left align within the width
Sign = 0x0200, // '+' use - or + signs for signed types
Alternate = 0x0400, // '#' prefix non-zero values with hex types
ZeroPad = 0x0800, // '0' pad with zeros
Blank = 0x1000, // ' ' pad sign with blank
Grouping = 0x2000, // '\' group by thousands
ArchSize = 0x4000, // '?' use arch precision
// Dynamic parameters
DynamicWidth = 0x10000,
DynamicPrecision = 0x20000,
};
// used
public static FormatSpecificationInfo GetFormatSpecifierInfo(string specification)
{
if (string.IsNullOrWhiteSpace(specification))
return null;
FormatSpecificationInfo info = new FormatSpecificationInfo()
{
type = '\0',
width = int.MinValue,
precision = 6,
};
string width = "";
string precision = "";
int start = -1;
int fsLength = 1;
// TODO: parse parameter index
for (int i = 0; i < specification.Length && info.type == '\0'; i++)
{
char c = specification[i];
switch (c)
{
case '%':
if (start == -1)
start = i;
else
info.type = c;
info.specification = specification.Substring(start, i + 1 - start);
fsLength = i + 1;
break;
// flags
case '-':
info.flags |= FormatFlags.LeftAlign;
break;
case '+':
info.flags |= FormatFlags.Sign;
break;
case ' ':
info.flags |= FormatFlags.Blank;
break;
case '#':
info.flags |= FormatFlags.Alternate;
break;
case '\'':
info.flags |= FormatFlags.Grouping;
break;
case '?':
info.flags |= FormatFlags.ArchSize;
break;
// precision
case '.':
{
for (int j = i + 1; j < specification.Length; j++)
{
if (specification[j] == '*')
info.flags |= FormatFlags.DynamicPrecision;
else if (char.IsNumber(specification[j]))
precision += specification[j];
else
break;
i++;
}
}
break;
// length flags
case 'h':
info.flags += (int)FormatFlags.IsShort;
break;
case 'l':
info.flags += (int)FormatFlags.IsLong;
break;
case 'L':
info.flags |= FormatFlags.IsLongDouble;
break;
case 'z':
case 'j':
case 't':
// not supported
break;
// dynamic width
case '*':
info.flags |= FormatFlags.DynamicWidth;
break;
default:
{
if (char.IsNumber(c))
{
if (width == "" && c == '0')
info.flags |= FormatFlags.ZeroPad;
else
width += c;
}
else if (char.IsLetter(c) && info.type == '\0')
{
info.type = c;
info.specification = specification.Substring(start, i + 1 - start);
fsLength = i + 1;
}
}
break;
}
}
// sign overrides space
if (info.flags.HasFlag(FormatFlags.Sign) && info.flags.HasFlag(FormatFlags.Blank))
info.flags &= ~FormatFlags.Blank;
if (info.flags.HasFlag(FormatFlags.LeftAlign) && info.flags.HasFlag(FormatFlags.ZeroPad))
info.flags &= ~FormatFlags.ZeroPad;
// unsupported precision for these types
if (info.type == 's' ||
info.type == 'c' ||
Char.ToUpper(info.type) == 'X' ||
info.type == 'o')
{
info.precision = int.MinValue;
}
if (!string.IsNullOrWhiteSpace(precision))
info.precision = Convert.ToInt32(precision);
if (!string.IsNullOrWhiteSpace(width))
info.width = Convert.ToInt32(width);
return info;
}
}
public class va_list
{
internal IntPtr instance; //unmanaged pointer to va_list
public va_list(IntPtr ptr)
{
instance = ptr;
}
/// <summary> Returns unmanaged pointer to argument list. </summary>
public IntPtr GetPointer()
{
return instance;
}
/// <summary> Returns array of objects with help of printf format string. </summary>
/// <param name="format"> printf format string. </param>
public object[] GetObjectsByFormat(string format)
{
return GetObjectsByFormat(format, this);
}
public static unsafe object[] GetObjectsByFormat(string format, va_list va_list)
{
string[] formatSpecifiers = Printf.GetFormatSpecifiers(format);
if (formatSpecifiers == null || va_list == null || va_list.GetPointer() == IntPtr.Zero)
return null;
IntPtr args = va_list.GetPointer();
List<object> objects = new List<object>(formatSpecifiers.Length);
//var bytesDebug = new byte[format.Length];
//Marshal.Copy(va_list.GetPointer(), bytesDebug, 0, bytesDebug.Length);
int offset = 0;
foreach (string spec in formatSpecifiers)
{
var info = Printf.GetFormatSpecifierInfo(spec);
if (info.type == '\0')
continue;
// dynamic width and precision arguments
// these are stored in stack before the actual value
if (info.flags.HasFlag(Printf.FormatFlags.DynamicWidth))
{
int widthArg = Marshal.ReadInt32(args, offset);
objects.Add(widthArg);
offset += Marshal.SizeOf(typeof(IntPtr));
}
if (info.flags.HasFlag(Printf.FormatFlags.DynamicPrecision))
{
int precArg = Marshal.ReadInt32(args, offset);
objects.Add(precArg);
offset += Marshal.SizeOf(typeof(IntPtr));
}
int iSize = info.flags.HasFlag(Printf.FormatFlags.IsLongLong)
? Marshal.SizeOf(typeof(Int64)) : Marshal.SizeOf(typeof(IntPtr));
// marshal objects from pointer
switch (info.type)
{
// 8/16-bit integers
// char / wchar_t (promoted to int)
case 'c':
char c = (char)Marshal.ReadByte(args, offset);
objects.Add(c);
//offset += Marshal.SizeOf(typeof(Int32));
offset += Marshal.SizeOf(typeof(IntPtr));
break;
// signed integers
case 'd':
case 'i':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h
{
short sh = (short)Marshal.ReadInt32(args, offset);
objects.Add(sh);
offset += Marshal.SizeOf(typeof(Int32));
}
else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll
{
long l = Marshal.ReadInt64(args, offset);
objects.Add(l);
offset += iSize;
}
else // int and long types
{
var i = Marshal.ReadInt32(args, offset);
objects.Add(i);
offset += iSize;
}
}
break;
// unsigned integers
case 'u':
case 'o':
case 'x':
case 'X':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsShort)) // h
{
ushort su = (ushort)Marshal.ReadInt32(args, offset);
objects.Add(su);
offset += Marshal.SizeOf(typeof(Int32));
}
else if (info.flags.HasFlag(Printf.FormatFlags.IsLongLong)) // ll
{
ulong lu = (ulong)(long)Marshal.ReadInt64(args, offset);
objects.Add(lu);
offset += iSize;
}
else // uint and ulong types
{
uint u = (uint)Marshal.ReadInt32(args, offset);
objects.Add(u);
offset += iSize;
}
}
break;
// floating-point types
case 'f':
case 'F':
case 'e':
case 'E':
case 'g':
case 'G':
{
if (info.flags.HasFlag(Printf.FormatFlags.IsLongDouble)) // L
{
// not really supported but read it as long
long lfi = Marshal.ReadInt64(args, offset);
double d = *(double*)(void*)&lfi;
objects.Add(d);
offset += Marshal.SizeOf(typeof(double));
}
else // double
{
long lfi = Marshal.ReadInt64(args, offset);
double d = *(double*)(void*)&lfi;
objects.Add(d);
offset += Marshal.SizeOf(typeof(double));
}
}
break;
// string
case 's':
{
string s = null;
// same:
//var addr1 = new IntPtr(args.ToInt64() + offset);
//var intPtr4 = Marshal.ReadIntPtr(addr1);
var intPtr3 = Marshal.ReadIntPtr(args, offset);
if (info.flags.HasFlag(Printf.FormatFlags.IsLong))
{
s = Marshal.PtrToStringUni(intPtr3);
}
else
{
s = Marshal.PtrToStringAnsi(intPtr3);
}
objects.Add(s);
offset += Marshal.SizeOf(typeof(IntPtr));
}
break;
// pointer
case 'p':
IntPtr ptr = Marshal.ReadIntPtr(args, offset);
objects.Add(ptr);
offset += Marshal.SizeOf(typeof(IntPtr));
break;
// non-marshallable types, ignored
case ' ':
case '%':
case 'n':
break;
default:
throw new ApplicationException("printf specifier '%" + info.type + "' not supported");
}
}
return objects.ToArray();
}
}
public string DecodeFromUtf8(string utf8String)
{
// copy the string as UTF-8 bytes.
byte[] utf8Bytes = new byte[utf8String.Length];
for (int i = 0; i < utf8String.Length; ++i)
{
//Debug.Assert( 0 <= utf8String[i] && utf8String[i] <= 255,
//"the char must be in byte's range");
utf8Bytes[i] = (byte)utf8String[i];
}
return Encoding.UTF8.GetString(utf8Bytes, 0, utf8Bytes.Length);
}
this code doesn't work for me
do you have any good ideas?
i need the unicode array for russian fonts like this
public static readonly ReadOnlyCollection<char> Unicodes = Array.AsReadOnly(new char[]
{
'\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007',
'\u0008', '\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u000E', '\u000F',
'\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017',
'\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'\u0020', '\u0021', '\u0022', '\u0023', '\u0024', '\u0025', '\u0026', '\u0027',
'\u0028', '\u0029', '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F',
'\u0030', '\u0031', '\u0032', '\u0033', '\u0034', '\u0035', '\u0036', '\u0037',
'\u0038', '\u0039', '\u003A', '\u003B', '\u003C', '\u003D', '\u003E', '\u003F',
'\u0040', '\u0041', '\u0042', '\u0043', '\u0044', '\u0045', '\u0046', '\u0047',
'\u0048', '\u0049', '\u004A', '\u004B', '\u004C', '\u004D', '\u004E', '\u004F',
'\u0050', '\u0051', '\u0052', '\u0053', '\u0054', '\u0055', '\u0056', '\u0057',
'\u0058', '\u0059', '\u005A', '\u005B', '\u005C', '\u005D', '\u005E', '\u005F',
'\u0060', '\u0061', '\u0062', '\u0063', '\u0064', '\u0065', '\u0066', '\u0067',
'\u0068', '\u0069', '\u006A', '\u006B', '\u006C', '\u006D', '\u006E', '\u006F',
'\u0070', '\u0071', '\u0072', '\u0073', '\u0074', '\u0075', '\u0076', '\u0077',
'\u0078', '\u0079', '\u007A', '\u007B', '\u007C', '\u007D', '\u007E', '\u007F',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD', '\uFFFD',
'\u00A0', '\u058E', '\u0587', '\u0589', '\u0029', '\u0028', '\u00BB', '\u00AB',
'\u2015', '\u00B7', '\u055D', '\u002C', '\u2010', '\u058A', '\u2026', '\u055C',
'\u055B', '\u055E', '\u0531', '\u0561', '\u0532', '\u0562', '\u0533', '\u0563',
'\u0534', '\u0564', '\u0535', '\u0565', '\u0536', '\u0566', '\u0537', '\u0567',
'\u0538', '\u0568', '\u0539', '\u0569', '\u053A', '\u056A', '\u053B', '\u056B',
'\u053C', '\u056C', '\u053D', '\u056D', '\u053E', '\u056E', '\u053F', '\u056F',
'\u0540', '\u0570', '\u0541', '\u0571', '\u0542', '\u0572', '\u0543', '\u0573',
'\u0544', '\u0574', '\u0545', '\u0575', '\u0546', '\u0576', '\u0547', '\u0577',
'\u0548', '\u0578', '\u0549', '\u0579', '\u054A', '\u057A', '\u054B', '\u057B',
'\u054C', '\u057C', '\u054D', '\u057D', '\u054E', '\u057E', '\u054F', '\u057F',
'\u0550', '\u0580', '\u0551', '\u0581', '\u0552', '\u0582', '\u0553', '\u0583',
'\u0554', '\u0584', '\u0555', '\u0585', '\u0556', '\u0586', '\u055A', '\uFFFD' });
Your string seems to be ArmSCII-8.
Adapted from an old encoder/decoder I had written for VISCII:
namespace Utilities
{
using System;
using System.Collections.ObjectModel;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
/// <summary>
/// ArmSCII8 (https://en.wikipedia.org/wiki/ArmSCII#ArmSCII-8)
/// encoding for C#.
/// Fast table-based implementation not based on MappedEncoding.
/// Fully thread safe/reentrant (because ArmSCII8Encoder is fully thread
/// safe/reentrant and ArmSCII8Decoder is always used with flush = true).
/// </summary>
public class ArmSCII8EncodingSimple : Encoding
{
// Taken from https://en.wikipedia.org/wiki/ArmSCII#ArmSCII-8 .
// Includes parts of the ISO-8859-1 in the ranges 00–1F and 7F–9F.
// Doesn't define anything for FF.
public static readonly ReadOnlyCollection<char> Unicodes = Array.AsReadOnly(new char[]
{
'\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007',
'\u0008', '\u0009', '\u000A', '\u000B', '\u000C', '\u000D', '\u000E', '\u000F',
'\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017',
'\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
'\u0020', '\u0021', '\u0022', '\u0023', '\u0024', '\u0025', '\u0026', '\u0027',
'\u0028', '\u0029', '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F',
'\u0030', '\u0031', '\u0032', '\u0033', '\u0034', '\u0035', '\u0036', '\u0037',
'\u0038', '\u0039', '\u003A', '\u003B', '\u003C', '\u003D', '\u003E', '\u003F',
'\u0040', '\u0041', '\u0042', '\u0043', '\u0044', '\u0045', '\u0046', '\u0047',
'\u0048', '\u0049', '\u004A', '\u004B', '\u004C', '\u004D', '\u004E', '\u004F',
'\u0050', '\u0051', '\u0052', '\u0053', '\u0054', '\u0055', '\u0056', '\u0057',
'\u0058', '\u0059', '\u005A', '\u005B', '\u005C', '\u005D', '\u005E', '\u005F',
'\u0060', '\u0061', '\u0062', '\u0063', '\u0064', '\u0065', '\u0066', '\u0067',
'\u0068', '\u0069', '\u006A', '\u006B', '\u006C', '\u006D', '\u006E', '\u006F',
'\u0070', '\u0071', '\u0072', '\u0073', '\u0074', '\u0075', '\u0076', '\u0077',
'\u0078', '\u0079', '\u007A', '\u007B', '\u007C', '\u007D', '\u007E', '\u007F',
'\u0080', '\u0081', '\u0082', '\u0083', '\u0084', '\u0085', '\u0086', '\u0087',
'\u0088', '\u0089', '\u008A', '\u008B', '\u008C', '\u008D', '\u008E', '\u008F',
'\u0090', '\u0091', '\u0092', '\u0093', '\u0094', '\u0095', '\u0096', '\u0097',
'\u0098', '\u0099', '\u009A', '\u009B', '\u009C', '\u009D', '\u009E', '\u009F',
'\u00A0', '\u058E', '\u0587', '\u0589', '\u0029', '\u0028', '\u00BB', '\u00AB',
'\u2015', '\u00B7', '\u055D', '\u002C', '\u2010', '\u058A', '\u2026', '\u055C',
'\u055B', '\u055E', '\u0531', '\u0561', '\u0532', '\u0562', '\u0533', '\u0563',
'\u0534', '\u0564', '\u0535', '\u0565', '\u0536', '\u0566', '\u0537', '\u0567',
'\u0538', '\u0568', '\u0539', '\u0569', '\u053A', '\u056A', '\u053B', '\u056B',
'\u053C', '\u056C', '\u053D', '\u056D', '\u053E', '\u056E', '\u053F', '\u056F',
'\u0540', '\u0570', '\u0541', '\u0571', '\u0542', '\u0572', '\u0543', '\u0573',
'\u0544', '\u0574', '\u0545', '\u0575', '\u0546', '\u0576', '\u0547', '\u0577',
'\u0548', '\u0578', '\u0549', '\u0579', '\u054A', '\u057A', '\u054B', '\u057B',
'\u054C', '\u057C', '\u054D', '\u057D', '\u054E', '\u057E', '\u054F', '\u057F',
'\u0550', '\u0580', '\u0551', '\u0581', '\u0552', '\u0582', '\u0553', '\u0583',
'\u0554', '\u0584', '\u0555', '\u0585', '\u0556', '\u0586', '\u055A', '\0'/**/,
});
private ArmSCII8Decoder decoder;
private ArmSCII8Encoder encoder;
/// <summary>
/// This should be thread safe. The worst case is that two instances
/// of ArmSCII8Decoder are created at the same time, but this isn't
/// a problem, because ArmSCII8Decoder as used in this class is
/// stateless.
/// </summary>
protected ArmSCII8Decoder Decoder
{
get
{
ArmSCII8Decoder decoder2 = decoder;
// Lazy creation of Encoder
if (object.ReferenceEquals(decoder2, null))
{
decoder2 = decoder = new ArmSCII8Decoder();
}
DecoderFallback decoderFallback = DecoderFallback;
// If the Fallback has changed from the last call, update it
if (!object.ReferenceEquals(decoderFallback, null) && !object.ReferenceEquals(decoderFallback, decoder2.Fallback))
{
decoder2.Fallback = decoderFallback;
}
return decoder2;
}
}
/// <summary>
/// This should be thread safe. The worst case is that two instances
/// of ArmSCII8Encoder are created at the same time, but this isn't
/// a problem, because ArmSCII8Encoder as used in this class is
/// stateless.
/// </summary>
protected ArmSCII8Encoder Encoder
{
get
{
ArmSCII8Encoder encoder2 = encoder;
// Lazy creation of Encoder
if (object.ReferenceEquals(encoder2, null))
{
encoder = encoder2 = new ArmSCII8Encoder();
}
EncoderFallback encoderFallback = EncoderFallback;
// If the Fallback has changed from the last call, update it
if (!object.ReferenceEquals(encoderFallback, null) && !object.ReferenceEquals(encoderFallback, encoder2.Fallback))
{
encoder2.Fallback = encoderFallback;
}
return encoder2;
}
}
public override string BodyName
{
get
{
return "x-armscii-8-simple";
}
}
public override string EncodingName
{
get
{
return BodyName;
}
}
public override bool IsSingleByte
{
get
{
return true;
}
}
public override object Clone()
{
var encoding = (ArmSCII8EncodingSimple)base.Clone();
// We reset the encoder and decoder of the cloned instance,
// because otherwise they would be shared between the two
// instances.
encoding.decoder = null;
encoding.encoder = null;
return encoding;
}
public override Decoder GetDecoder()
{
return new ArmSCII8Decoder();
}
public override Encoder GetEncoder()
{
return new ArmSCII8Encoder();
}
public override int GetByteCount(char[] chars, int index, int count)
{
return Encoder.GetByteCount(chars, index, count, true);
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
{
return Encoder.GetBytes(chars, charIndex, charCount, bytes, byteIndex, true);
}
public override int GetCharCount(byte[] bytes, int index, int count)
{
return Decoder.GetCharCount(bytes, index, count, true);
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
return Decoder.GetChars(bytes, byteIndex, byteCount, chars, charIndex, true);
}
public override int GetMaxByteCount(int charCount)
{
return charCount;
}
public override int GetMaxCharCount(int byteCount)
{
return byteCount;
}
}
/// <summary>
/// Fully thread safe/reentrant.
/// </summary>
public class ArmSCII8Decoder : Decoder
{
private static readonly char[] Unicodes = ArmSCII8EncodingSimple.Unicodes.ToArray();
public override int GetCharCount(byte[] bytes, int index, int count)
{
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (index < 0 || index > bytes.Length)
{
throw new ArgumentOutOfRangeException("index");
}
if (count < 0)
{
throw new ArgumentOutOfRangeException("count");
}
if (index + count > bytes.Length)
{
throw new ArgumentOutOfRangeException("bytes");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
DecoderFallbackBuffer fallbackBuffer = null;
int ret = 0;
int count2 = index + count;
for (; index < count2; index++)
{
byte b = bytes[index];
char ch = Unicodes[b];
if (ch != '\0' || b == 0)
{
ret++;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? DecoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(new[] { b }, index))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
return ret;
}
public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)
{
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (byteIndex < 0 || byteIndex > bytes.Length)
{
throw new ArgumentOutOfRangeException("byteIndex");
}
if (byteCount < 0)
{
throw new ArgumentOutOfRangeException("byteCount");
}
if (byteIndex + byteCount > bytes.Length)
{
throw new ArgumentOutOfRangeException("bytes");
}
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (charIndex < 0 || charIndex > chars.Length)
{
throw new ArgumentOutOfRangeException("charIndex");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
DecoderFallbackBuffer fallbackBuffer = null;
int byteCount2 = byteCount + byteIndex;
int charIndex2 = charIndex;
for (; byteIndex < byteCount2; byteIndex++)
{
byte b = bytes[byteIndex];
// chars between 0 and 127 are equal in Unicode and ArmSCII8
if (b >= 0 && b <= 127)
{
WriteChar(chars, charIndex2, (char)b);
charIndex2++;
}
else
{
char ch = Unicodes[b];
if (ch != '\0' || b == 0)
{
WriteChar(chars, charIndex2, ch);
charIndex2++;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? DecoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(new[] { b }, byteIndex))
{
HandleFallbackWrite(fallbackBuffer, chars, ref charIndex2);
}
}
}
}
return charIndex2 - charIndex;
}
protected static void HandleFallbackCount(DecoderFallbackBuffer fallbackBuffer, ref int count)
{
while (fallbackBuffer.Remaining > 0)
{
fallbackBuffer.GetNextChar();
count++;
}
}
protected static void HandleFallbackWrite(DecoderFallbackBuffer fallbackBuffer, char[] chars, ref int charIndex)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
WriteChar(chars, charIndex, ch);
charIndex++;
}
}
// Remove the next line if using .NET < 4.5
[MethodImpl(MethodImplOptions.AggressiveInlining)]
protected static void WriteChar(char[] chars, int charIndex, char ch)
{
if (charIndex >= chars.Length)
{
throw new ArgumentException("bytes");
}
chars[charIndex] = ch;
}
}
/// <summary>
/// An instance is thread safe/fully reentrant if the methods are always
/// called with flush = true.
/// </summary>
public class ArmSCII8Encoder : Encoder
{
private static readonly byte[] ArmSCII8s;
// Buffer for High/Low surrogates. Note that this property is read
// but not written if the methods are always used with flush = true.
protected char HighSurrogate { get; set; }
static ArmSCII8Encoder()
{
ArmSCII8s = new byte[1 + ArmSCII8EncodingSimple.Unicodes.Max()];
for (int i = 0; i < ArmSCII8EncodingSimple.Unicodes.Count; i++)
{
char ch = ArmSCII8EncodingSimple.Unicodes[i];
if (i == 0 || (ArmSCII8s[ch] == 0 && ch != '\0'))
{
ArmSCII8s[ch] = (byte)i;
}
}
}
public override int GetByteCount(char[] chars, int index, int count, bool flush)
{
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (index < 0 || index > chars.Length)
{
throw new ArgumentOutOfRangeException("index");
}
if (count < 0)
{
throw new ArgumentOutOfRangeException("count");
}
if (index + count > chars.Length)
{
throw new ArgumentOutOfRangeException("chars");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
EncoderFallbackBuffer fallbackBuffer = null;
char highSurrogate = HighSurrogate;
int ret = 0;
int count2 = index + count;
for (; index < count2; index++)
{
char ch = chars[index];
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// If we have a High/Low surrogates couple, we pass them
// together
if (char.IsLowSurrogate(ch))
{
if (fallbackBuffer.Fallback(highSurrogate, ch, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
highSurrogate = '\0';
continue;
}
else
{
// First we pass the High surrogate to the Fallback
if (fallbackBuffer.Fallback(highSurrogate, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
highSurrogate = '\0';
// Then we fall-through normal handling
}
}
if (ch < ArmSCII8s.Length && (ArmSCII8s[ch] != 0 || ch == '\0'))
{
ret++;
}
else
{
// High/low surrogate handling, done through buffer
if (char.IsHighSurrogate(ch))
{
highSurrogate = ch;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(ch, index))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
}
if (flush)
{
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
if (fallbackBuffer.Fallback(highSurrogate, index - 1))
{
HandleFallbackCount(fallbackBuffer, ref ret);
}
}
}
return ret;
}
public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)
{
if (chars == null)
{
throw new ArgumentNullException("chars");
}
if (charIndex < 0 || charIndex > chars.Length)
{
throw new ArgumentOutOfRangeException("charIndex");
}
if (charCount < 0)
{
throw new ArgumentOutOfRangeException("charCount");
}
if (charIndex + charCount > chars.Length)
{
throw new ArgumentOutOfRangeException("chars");
}
if (bytes == null)
{
throw new ArgumentNullException("bytes");
}
if (byteIndex < 0 || byteIndex > bytes.Length)
{
throw new ArgumentOutOfRangeException("byteIndex");
}
// The fallbackBuffer is created on-demand. The instance
// FallbackBuffer isn't used because it wouldn't be thread safe.
EncoderFallbackBuffer fallbackBuffer = null;
// Written only on flush = false
char highSurrogate = HighSurrogate;
int charCount2 = charIndex + charCount;
int byteIndex2 = byteIndex;
for (; charIndex < charCount2; charIndex++)
{
char ch = chars[charIndex];
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// If we have a High/Low surrogates couple, we pass them
// together
if (char.IsLowSurrogate(ch))
{
if (fallbackBuffer.Fallback(highSurrogate, ch, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
highSurrogate = '\0';
continue;
}
else
{
// First we pass the High surrogate to the Fallback
if (fallbackBuffer.Fallback(highSurrogate, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
highSurrogate = '\0';
// Then we fall-through normal handling
}
}
byte b;
if (ch < ArmSCII8s.Length && ((b = ArmSCII8s[ch]) != 0 || ch == '\0'))
{
// Recognized character
WriteByte(bytes, byteIndex2, b);
byteIndex2++;
}
else
{
// High/low surrogate handling, done through buffer
if (char.IsHighSurrogate(ch))
{
highSurrogate = ch;
}
else
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
// Fallback
if (fallbackBuffer.Fallback(ch, charIndex))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
}
}
}
if (flush)
{
if (highSurrogate != 0)
{
if (fallbackBuffer == null)
{
fallbackBuffer = (Fallback ?? EncoderFallback.ReplacementFallback).CreateFallbackBuffer();
}
if (fallbackBuffer.Fallback(highSurrogate, charIndex - 1))
{
HandleFallbackWrite(fallbackBuffer, bytes, ref byteIndex2);
}
}
}
else
{
HighSurrogate = highSurrogate;
}
return byteIndex2 - byteIndex;
}
protected static void HandleFallbackCount(EncoderFallbackBuffer fallbackBuffer, ref int count)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
if (!(ch < ArmSCII8s.Length && (ArmSCII8s[ch] != 0 || ch == '\0')))
{
throw new EncoderFallbackException();
}
count++;
}
}
protected static void HandleFallbackWrite(EncoderFallbackBuffer fallbackBuffer, byte[] bytes, ref int byteIndex)
{
while (fallbackBuffer.Remaining > 0)
{
char ch = fallbackBuffer.GetNextChar();
byte b;
if (!(ch < ArmSCII8s.Length && ((b = ArmSCII8s[ch]) != 0 || ch == '\0')))
{
throw new EncoderFallbackException();
}
WriteByte(bytes, byteIndex, b);
byteIndex++;
}
}
// Remove the next line if using .NET < 4.5
[MethodImpl(MethodImplOptions.AggressiveInlining)]
protected static void WriteByte(byte[] bytes, int byteIndex, byte b)
{
if (byteIndex == bytes.Length)
{
throw new ArgumentException("bytes");
}
bytes[byteIndex] = b;
}
}
}
Then use it like:
public static string DecodeFromArmSCII8(string str) {
// copy the string as UTF-8 bytes.
byte[] bytes = Encoding.GetEncoding("iso-8859-1").GetBytes(str);
return new ArmSCII8EncodingSimple().GetString(bytes);
}
and
string str = DecodeFromArmSCII8("سñïÇñáëÛ³Ý ²ÉÇݳ Ø.");
The Encoding.GetEncoding("iso-8859-1").GetBytes(str) returns the original byte[] array. Note that if you have the original byte[] you can directly use the ArmSCII8EncodingFast.
I would like to match strings with a wildcard (*), where the wildcard means "any". For example:
*X = string must end with X
X* = string must start with X
*X* = string must contain X
Also, some compound uses such as:
*X*YZ* = string contains X and contains YZ
X*YZ*P = string starts with X, contains YZ and ends with P.
Is there a simple algorithm to do this? I'm unsure about using regex (though it is a possibility).
To clarify, the users will type in the above to a filter box (as simple a filter as possible), I don't want them to have to write regular expressions themselves. So something I can easily transform from the above notation would be good.
Often, wild cards operate with two type of jokers:
? - any character (one and only one)
* - any characters (zero or more)
so you can easily convert these rules into appropriate regular expression:
// If you want to implement both "*" and "?"
private static String WildCardToRegular(String value) {
return "^" + Regex.Escape(value).Replace("\\?", ".").Replace("\\*", ".*") + "$";
}
// If you want to implement "*" only
private static String WildCardToRegular(String value) {
return "^" + Regex.Escape(value).Replace("\\*", ".*") + "$";
}
And then you can use Regex as usual:
String test = "Some Data X";
Boolean endsWithEx = Regex.IsMatch(test, WildCardToRegular("*X"));
Boolean startsWithS = Regex.IsMatch(test, WildCardToRegular("S*"));
Boolean containsD = Regex.IsMatch(test, WildCardToRegular("*D*"));
// Starts with S, ends with X, contains "me" and "a" (in that order)
Boolean complex = Regex.IsMatch(test, WildCardToRegular("S*me*a*X"));
You could use the VB.NET Like-Operator:
string text = "x is not the same as X and yz not the same as YZ";
bool contains = LikeOperator.LikeString(text,"*X*YZ*", Microsoft.VisualBasic.CompareMethod.Binary);
Use CompareMethod.Text if you want to ignore the case.
You need to add using Microsoft.VisualBasic.CompilerServices; and add a reference to the Microsoft.VisualBasic.dll.
Since it's part of the .NET framework and will always be, it's not a problem to use this class.
For those using .NET Core 2.1+ or .NET 5+, you can use the FileSystemName.MatchesSimpleExpression method in the System.IO.Enumeration namespace.
string text = "X is a string with ZY in the middle and at the end is P";
bool isMatch = FileSystemName.MatchesSimpleExpression("X*ZY*P", text);
Both parameters are actually ReadOnlySpan<char> but you can use string arguments too. There's also an overloaded method if you want to turn on/off case matching. It is case insensitive by default as Chris mentioned in the comments.
Using of WildcardPattern from System.Management.Automation may be an option.
pattern = new WildcardPattern(patternString);
pattern.IsMatch(stringToMatch);
Visual Studio UI may not allow you to add System.Management.Automation assembly to References of your project. Feel free to add it manually, as described here.
A wildcard * can be translated as .* or .*? regex pattern.
You might need to use a singleline mode to match newline symbols, and in this case, you can use (?s) as part of the regex pattern.
You can set it for the whole or part of the pattern:
X* = > #"X(?s:.*)"
*X = > #"(?s:.*)X"
*X* = > #"(?s).*X.*"
*X*YZ* = > #"(?s).*X.*YZ.*"
X*YZ*P = > #"(?s:X.*YZ.*P)"
*X*YZ* = string contains X and contains YZ
#".*X.*YZ"
X*YZ*P = string starts with X, contains YZ and ends with P.
#"^X.*YZ.*P$"
It is necessary to take into consideration, that Regex IsMatch gives true with XYZ, when checking match with Y*. To avoid it, I use "^" anchor
isMatch(str1, "^" + str2.Replace("*", ".*?"));
So, full code to solve your problem is
bool isMatchStr(string str1, string str2)
{
string s1 = str1.Replace("*", ".*?");
string s2 = str2.Replace("*", ".*?");
bool r1 = Regex.IsMatch(s1, "^" + s2);
bool r2 = Regex.IsMatch(s2, "^" + s1);
return r1 || r2;
}
This is kind of an improvement on the popular answer from #Dmitry Bychenko above (https://stackoverflow.com/a/30300521/4491768). In order to support ? and * as a matching characters we have to escape them. Use \\? or \\* to escape them.
Also a pre compiled regex will improve the performance (on reuse).
public class WildcardPattern
{
private readonly string _expression;
private readonly Regex _regex;
public WildcardPattern(string pattern)
{
if (string.IsNullOrEmpty(pattern)) throw new ArgumentNullException(nameof(pattern));
_expression = "^" + Regex.Escape(pattern)
.Replace("\\\\\\?","??").Replace("\\?", ".").Replace("??","\\?")
.Replace("\\\\\\*","**").Replace("\\*", ".*").Replace("**","\\*") + "$";
_regex = new Regex(_expression, RegexOptions.Compiled);
}
public bool IsMatch(string value)
{
return _regex.IsMatch(value);
}
}
usage
new WildcardPattern("Hello *\\**\\?").IsMatch("Hello W*rld?");
new WildcardPattern(#"Hello *\**\?").IsMatch("Hello W*rld?");
To support those one with C#+Excel (for partial known WS name) but not only - here's my code with wildcard (ddd*).
Briefly: the code gets all WS names and if today's weekday(ddd) matches the first 3 letters of WS name (bool=true) then it turn it to string that gets extracted out of the loop.
using System;
using Microsoft.Office.Interop.Excel;
using System.Runtime.InteropServices;
using Range = Microsoft.Office.Interop.Excel.Range;
using System.Diagnostics;
using System.Reflection;
using System.IO;
using System.Text.RegularExpressions;
...
string weekDay = DateTime.Now.ToString("ddd*");
Workbook sourceWorkbook4 = xlApp.Workbooks.Open(LrsIdWorkbook, 0, false, 5, "", "", true, XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
Workbook destinationWorkbook = xlApp.Workbooks.Open(masterWB, 0, false, 5, "", "", true, XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
static String WildCardToRegular(String value)
{
return "^" + Regex.Escape(value).Replace("\\*", ".*") + "$";
}
string wsName = null;
foreach (Worksheet works in sourceWorkbook4.Worksheets)
{
Boolean startsWithddd = Regex.IsMatch(works.Name, WildCardToRegular(weekDay + "*"));
if (startsWithddd == true)
{
wsName = works.Name.ToString();
}
}
Worksheet sourceWorksheet4 = (Worksheet)sourceWorkbook4.Worksheets.get_Item(wsName);
...
public class Wildcard
{
private readonly string _pattern;
public Wildcard(string pattern)
{
_pattern = pattern;
}
public static bool Match(string value, string pattern)
{
int start = -1;
int end = -1;
return Match(value, pattern, ref start, ref end);
}
public static bool Match(string value, string pattern, char[] toLowerTable)
{
int start = -1;
int end = -1;
return Match(value, pattern, ref start, ref end, toLowerTable);
}
public static bool Match(string value, string pattern, ref int start, ref int end)
{
return new Wildcard(pattern).IsMatch(value, ref start, ref end);
}
public static bool Match(string value, string pattern, ref int start, ref int end, char[] toLowerTable)
{
return new Wildcard(pattern).IsMatch(value, ref start, ref end, toLowerTable);
}
public bool IsMatch(string str)
{
int start = -1;
int end = -1;
return IsMatch(str, ref start, ref end);
}
public bool IsMatch(string str, char[] toLowerTable)
{
int start = -1;
int end = -1;
return IsMatch(str, ref start, ref end, toLowerTable);
}
public bool IsMatch(string str, ref int start, ref int end)
{
if (_pattern.Length == 0) return false;
int pindex = 0;
int sindex = 0;
int pattern_len = _pattern.Length;
int str_len = str.Length;
start = -1;
while (true)
{
bool star = false;
if (_pattern[pindex] == '*')
{
star = true;
do
{
pindex++;
}
while (pindex < pattern_len && _pattern[pindex] == '*');
}
end = sindex;
int i;
while (true)
{
int si = 0;
bool breakLoops = false;
for (i = 0; pindex + i < pattern_len && _pattern[pindex + i] != '*'; i++)
{
si = sindex + i;
if (si == str_len)
{
return false;
}
if (str[si] == _pattern[pindex + i])
{
continue;
}
if (si == str_len)
{
return false;
}
if (_pattern[pindex + i] == '?' && str[si] != '.')
{
continue;
}
breakLoops = true;
break;
}
if (breakLoops)
{
if (!star)
{
return false;
}
sindex++;
if (si == str_len)
{
return false;
}
}
else
{
if (start == -1)
{
start = sindex;
}
if (pindex + i < pattern_len && _pattern[pindex + i] == '*')
{
break;
}
if (sindex + i == str_len)
{
if (end <= start)
{
end = str_len;
}
return true;
}
if (i != 0 && _pattern[pindex + i - 1] == '*')
{
return true;
}
if (!star)
{
return false;
}
sindex++;
}
}
sindex += i;
pindex += i;
if (start == -1)
{
start = sindex;
}
}
}
public bool IsMatch(string str, ref int start, ref int end, char[] toLowerTable)
{
if (_pattern.Length == 0) return false;
int pindex = 0;
int sindex = 0;
int pattern_len = _pattern.Length;
int str_len = str.Length;
start = -1;
while (true)
{
bool star = false;
if (_pattern[pindex] == '*')
{
star = true;
do
{
pindex++;
}
while (pindex < pattern_len && _pattern[pindex] == '*');
}
end = sindex;
int i;
while (true)
{
int si = 0;
bool breakLoops = false;
for (i = 0; pindex + i < pattern_len && _pattern[pindex + i] != '*'; i++)
{
si = sindex + i;
if (si == str_len)
{
return false;
}
char c = toLowerTable[str[si]];
if (c == _pattern[pindex + i])
{
continue;
}
if (si == str_len)
{
return false;
}
if (_pattern[pindex + i] == '?' && c != '.')
{
continue;
}
breakLoops = true;
break;
}
if (breakLoops)
{
if (!star)
{
return false;
}
sindex++;
if (si == str_len)
{
return false;
}
}
else
{
if (start == -1)
{
start = sindex;
}
if (pindex + i < pattern_len && _pattern[pindex + i] == '*')
{
break;
}
if (sindex + i == str_len)
{
if (end <= start)
{
end = str_len;
}
return true;
}
if (i != 0 && _pattern[pindex + i - 1] == '*')
{
return true;
}
if (!star)
{
return false;
}
sindex++;
continue;
}
}
sindex += i;
pindex += i;
if (start == -1)
{
start = sindex;
}
}
}
}
C# Console application sample
Command line Sample:
C:/> App_Exe -Opy PythonFile.py 1 2 3
Console output:
Argument list: -Opy PythonFile.py 1 2 3
Found python filename: PythonFile.py
using System;
using System.Text.RegularExpressions; //Regex
namespace ConsoleApp1
{
class Program
{
static void Main(string[] args)
{
string cmdLine = String.Join(" ", args);
bool bFileExtFlag = false;
int argIndex = 0;
Regex regex;
foreach (string s in args)
{
//Search for the 1st occurrence of the "*.py" pattern
regex = new Regex(#"(?s:.*)\056py", RegexOptions.IgnoreCase);
bFileExtFlag = regex.IsMatch(s);
if (bFileExtFlag == true)
break;
argIndex++;
};
Console.WriteLine("Argument list: " + cmdLine);
if (bFileExtFlag == true)
Console.WriteLine("Found python filename: " + args[argIndex]);
else
Console.WriteLine("Python file with extension <.py> not found!");
}
}
}
I am working on a CLR Table-Valued function for SQL Server 2008 R2. I need to HTMLDecode a string at one point, but this is problematic b/c that relies on System.Web, which is not a support assembly for SQL Server.
Can anyone think of a better way to do the HTML Decode?
FYI SQL Server 2008 CLR only supports up to .NET 3.5 so system.net.webutility will not work.
Also you can use reflector to grab the code from WebUtility directly (please don't blame me for the coding style, its reflected stuff):
public class WebUtility {
private static class HtmlEntities {
private static string[] _entitiesList = new string[]
{
"\"-quot",
"&-amp",
"'-apos",
"<-lt",
">-gt",
"\u00a0-nbsp",
"¡-iexcl",
"¢-cent",
"£-pound",
"¤-curren",
"¥-yen",
"¦-brvbar",
"§-sect",
"¨-uml",
"©-copy",
"ª-ordf",
"«-laquo",
"¬-not",
"-shy",
"®-reg",
"¯-macr",
"°-deg",
"±-plusmn",
"²-sup2",
"³-sup3",
"´-acute",
"µ-micro",
"¶-para",
"·-middot",
"¸-cedil",
"¹-sup1",
"º-ordm",
"»-raquo",
"¼-frac14",
"½-frac12",
"¾-frac34",
"¿-iquest",
"À-Agrave",
"Á-Aacute",
"Â-Acirc",
"Ã-Atilde",
"Ä-Auml",
"Å-Aring",
"Æ-AElig",
"Ç-Ccedil",
"È-Egrave",
"É-Eacute",
"Ê-Ecirc",
"Ë-Euml",
"Ì-Igrave",
"Í-Iacute",
"Î-Icirc",
"Ï-Iuml",
"Ð-ETH",
"Ñ-Ntilde",
"Ò-Ograve",
"Ó-Oacute",
"Ô-Ocirc",
"Õ-Otilde",
"Ö-Ouml",
"×-times",
"Ø-Oslash",
"Ù-Ugrave",
"Ú-Uacute",
"Û-Ucirc",
"Ü-Uuml",
"Ý-Yacute",
"Þ-THORN",
"ß-szlig",
"à-agrave",
"á-aacute",
"â-acirc",
"ã-atilde",
"ä-auml",
"å-aring",
"æ-aelig",
"ç-ccedil",
"è-egrave",
"é-eacute",
"ê-ecirc",
"ë-euml",
"ì-igrave",
"í-iacute",
"î-icirc",
"ï-iuml",
"ð-eth",
"ñ-ntilde",
"ò-ograve",
"ó-oacute",
"ô-ocirc",
"õ-otilde",
"ö-ouml",
"÷-divide",
"ø-oslash",
"ù-ugrave",
"ú-uacute",
"û-ucirc",
"ü-uuml",
"ý-yacute",
"þ-thorn",
"ÿ-yuml",
"Œ-OElig",
"œ-oelig",
"Š-Scaron",
"š-scaron",
"Ÿ-Yuml",
"ƒ-fnof",
"ˆ-circ",
"˜-tilde",
"Α-Alpha",
"Β-Beta",
"Γ-Gamma",
"Δ-Delta",
"Ε-Epsilon",
"Ζ-Zeta",
"Η-Eta",
"Θ-Theta",
"Ι-Iota",
"Κ-Kappa",
"Λ-Lambda",
"Μ-Mu",
"Ν-Nu",
"Ξ-Xi",
"Ο-Omicron",
"Π-Pi",
"Ρ-Rho",
"Σ-Sigma",
"Τ-Tau",
"Υ-Upsilon",
"Φ-Phi",
"Χ-Chi",
"Ψ-Psi",
"Ω-Omega",
"α-alpha",
"β-beta",
"γ-gamma",
"δ-delta",
"ε-epsilon",
"ζ-zeta",
"η-eta",
"θ-theta",
"ι-iota",
"κ-kappa",
"λ-lambda",
"μ-mu",
"ν-nu",
"ξ-xi",
"ο-omicron",
"π-pi",
"ρ-rho",
"ς-sigmaf",
"σ-sigma",
"τ-tau",
"υ-upsilon",
"φ-phi",
"χ-chi",
"ψ-psi",
"ω-omega",
"ϑ-thetasym",
"ϒ-upsih",
"ϖ-piv",
"\u2002-ensp",
"\u2003-emsp",
"\u2009-thinsp",
"-zwnj",
"-zwj",
"-lrm",
"-rlm",
"–-ndash",
"—-mdash",
"‘-lsquo",
"’-rsquo",
"‚-sbquo",
"“-ldquo",
"”-rdquo",
"„-bdquo",
"†-dagger",
"‡-Dagger",
"•-bull",
"…-hellip",
"‰-permil",
"′-prime",
"″-Prime",
"‹-lsaquo",
"›-rsaquo",
"‾-oline",
"⁄-frasl",
"€-euro",
"ℑ-image",
"℘-weierp",
"ℜ-real",
"™-trade",
"ℵ-alefsym",
"←-larr",
"↑-uarr",
"→-rarr",
"↓-darr",
"↔-harr",
"↵-crarr",
"⇐-lArr",
"⇑-uArr",
"⇒-rArr",
"⇓-dArr",
"⇔-hArr",
"∀-forall",
"∂-part",
"∃-exist",
"∅-empty",
"∇-nabla",
"∈-isin",
"∉-notin",
"∋-ni",
"∏-prod",
"∑-sum",
"−-minus",
"∗-lowast",
"√-radic",
"∝-prop",
"∞-infin",
"∠-ang",
"∧-and",
"∨-or",
"∩-cap",
"∪-cup",
"∫-int",
"∴-there4",
"∼-sim",
"≅-cong",
"≈-asymp",
"≠-ne",
"≡-equiv",
"≤-le",
"≥-ge",
"⊂-sub",
"⊃-sup",
"⊄-nsub",
"⊆-sube",
"⊇-supe",
"⊕-oplus",
"⊗-otimes",
"⊥-perp",
"⋅-sdot",
"⌈-lceil",
"⌉-rceil",
"⌊-lfloor",
"⌋-rfloor",
"〈-lang",
"〉-rang",
"◊-loz",
"♠-spades",
"♣-clubs",
"♥-hearts",
"♦-diams"
};
private static Dictionary<string, char> _lookupTable = WebUtility.HtmlEntities.GenerateLookupTable();
private static Dictionary<string, char> GenerateLookupTable() {
Dictionary<string, char> dictionary = new Dictionary<string, char>(StringComparer.Ordinal);
string[] entitiesList = WebUtility.HtmlEntities._entitiesList;
for (int i = 0; i < entitiesList.Length; i++) {
string text = entitiesList[i];
dictionary.Add(text.Substring(2), text[0]);
}
return dictionary;
}
public static char Lookup(string entity) {
char result;
WebUtility.HtmlEntities._lookupTable.TryGetValue(entity, out result);
return result;
}
}
private enum UnicodeDecodingConformance {
Auto,
Strict,
Compat,
Loose
}
private static char[] _htmlEntityEndingChars = new char[] { ';', '&' };
private static readonly UnicodeDecodingConformance _htmlDecodeConformance = UnicodeDecodingConformance.Auto;
public static string HtmlDecode(string value) {
if (string.IsNullOrEmpty(value)) {
return value;
}
if (!WebUtility.StringRequiresHtmlDecoding(value)) {
return value;
}
StringWriter stringWriter = new StringWriter(CultureInfo.InvariantCulture);
WebUtility.HtmlDecode(value, stringWriter);
return stringWriter.ToString();
}
private static bool StringRequiresHtmlDecoding(string s) {
if (WebUtility._htmlDecodeConformance == UnicodeDecodingConformance.Compat) {
return s.IndexOf('&') >= 0;
}
for (int i = 0; i < s.Length; i++) {
char c = s[i];
if (c == '&' || char.IsSurrogate(c)) {
return true;
}
}
return false;
}
private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, out char trailingSurrogate) {
int num = (int)(smpChar - 65536u);
leadingSurrogate = (char)(num / 1024 + 55296);
trailingSurrogate = (char)(num % 1024 + 56320);
}
public static void HtmlDecode(string value, TextWriter output) {
if (value == null) {
return;
}
if (output == null) {
throw new ArgumentNullException("output");
}
if (!WebUtility.StringRequiresHtmlDecoding(value)) {
output.Write(value);
return;
}
int length = value.Length;
int i = 0;
while (i < length) {
char c = value[i];
if (c != '&') {
goto IL_1B6;
}
int num = value.IndexOfAny(WebUtility._htmlEntityEndingChars, i + 1);
if (num <= 0 || value[num] != ';') {
goto IL_1B6;
}
string text = value.Substring(i + 1, num - i - 1);
if (text.Length > 1 && text[0] == '#') {
uint num2;
bool flag;
if (text[1] == 'x' || text[1] == 'X') {
flag = uint.TryParse(text.Substring(2), NumberStyles.AllowHexSpecifier, NumberFormatInfo.InvariantInfo, out num2);
} else {
flag = uint.TryParse(text.Substring(1), NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out num2);
}
if (flag) {
switch (WebUtility._htmlDecodeConformance) {
case UnicodeDecodingConformance.Strict:
flag = (num2 < 55296u || (57343u < num2 && num2 <= 1114111u));
break;
case UnicodeDecodingConformance.Compat:
flag = (0u < num2 && num2 <= 65535u);
break;
case UnicodeDecodingConformance.Loose:
flag = (num2 <= 1114111u);
break;
default:
flag = false;
break;
}
}
if (!flag) {
goto IL_1B6;
}
if (num2 <= 65535u) {
output.Write((char)num2);
} else {
char value2;
char value3;
WebUtility.ConvertSmpToUtf16(num2, out value2, out value3);
output.Write(value2);
output.Write(value3);
}
i = num;
} else {
i = num;
char c2 = WebUtility.HtmlEntities.Lookup(text);
if (c2 != '\0') {
c = c2;
goto IL_1B6;
}
output.Write('&');
output.Write(text);
output.Write(';');
}
IL_1BD:
i++;
continue;
IL_1B6:
output.Write(c);
goto IL_1BD;
}
}
}
I have a string as input and have to break the string in two substrings. If the left substring equals the right substring then do some logic.
How can I do this?
Sample:
public bool getStatus(string myString)
{
}
Example: myString = "ankYkna", so if we break it into two substring it would be:
left-part = "ank",
right-part = "ank" (after reversal).
Just for fun:
return myString.SequenceEqual(myString.Reverse());
public static bool getStatus(string myString)
{
string first = myString.Substring(0, myString.Length / 2);
char[] arr = myString.ToCharArray();
Array.Reverse(arr);
string temp = new string(arr);
string second = temp.Substring(0, temp.Length / 2);
return first.Equals(second);
}
int length = myString.Length;
for (int i = 0; i < length / 2; i++)
{
if (myString[i] != myString[length - i - 1])
return false;
}
return true;
Using LINQ and off course far from the best solution
var original = "ankYkna";
var reversed = new string(original.Reverse().ToArray());
var palindrom = original == reversed;
A single line of code using Linq
public static bool IsPalindrome(string str)
{
return str.SequenceEqual(str.Reverse());
}
public static bool IsPalindrome(string value)
{
int i = 0;
int j = value.Length - 1;
while (true)
{
if (i > j)
{
return true;
}
char a = value[i];
char b = value[j];
if (char.ToLower(a) != char.ToLower(b))
{
return false;
}
i++;
j--;
}
}
//This c# method will check for even and odd lengh palindrome string
public static bool IsPalenDrome(string palendromeString)
{
bool isPalenDrome = false;
try
{
int halfLength = palendromeString.Length / 2;
string leftHalfString = palendromeString.Substring(0,halfLength);
char[] reversedArray = palendromeString.ToCharArray();
Array.Reverse(reversedArray);
string reversedString = new string(reversedArray);
string rightHalfStringReversed = reversedString.Substring(0, halfLength);
isPalenDrome = leftHalfString == rightHalfStringReversed ? true : false;
}
catch (Exception ex)
{
throw ex;
}
return isPalenDrome;
}
In C# :
public bool EhPalindromo(string text)
{
var reverseText = string.Join("", text.ToLower().Reverse());
return reverseText == text;
}
This is a short and efficient way of checking palindrome.
bool checkPalindrome(string inputString) {
int length = inputString.Length;
for(int i = 0; i < length/2; i++){
if(inputString[i] != inputString[length-1-i]){
return false;
}
}
return true;
}
This way is both concise in appearance & processes very quickly.
Func<string, bool> IsPalindrome = s => s.Reverse().Equals(s);
public static bool IsPalindrome(string word)
{
//first reverse the string
string reversedString = new string(word.Reverse().ToArray());
return string.Compare(word, reversedString) == 0 ? true : false;
}
Out of all the solutions, below can also be tried:
public static bool IsPalindrome(string s)
{
return s == new string(s.Reverse().ToArray());
}
String extension method, easy to use:
public static bool IsPalindrome(this string str)
{
str = new Regex("[^a-zA-Z]").Replace(str, "").ToLower();
return !str.Where((t, i) => t != str[str.Length - i - 1]).Any();
}
private void CheckIfPalindrome(string str)
{
//place string in array of chars
char[] array = str.ToCharArray();
int length = array.Length -1 ;
Boolean palindrome =true;
for (int i = 0; i <= length; i++)//go through the array
{
if (array[i] != array[length])//compare if the char in the same positions are the same eg "tattarrattat" will compare array[0]=t with array[11] =t if are not the same stop the for loop
{
MessageBox.Show("not");
palindrome = false;
break;
}
else //if they are the same make length smaller by one and do the same
{
length--;
}
}
if (palindrome) MessageBox.Show("Palindrome");
}
use this way from dotnetperls
using System;
class Program
{
/// <summary>
/// Determines whether the string is a palindrome.
/// </summary>
public static bool IsPalindrome(string value)
{
int min = 0;
int max = value.Length - 1;
while (true)
{
if (min > max)
{
return true;
}
char a = value[min];
char b = value[max];
// Scan forward for a while invalid.
while (!char.IsLetterOrDigit(a))
{
min++;
if (min > max)
{
return true;
}
a = value[min];
}
// Scan backward for b while invalid.
while (!char.IsLetterOrDigit(b))
{
max--;
if (min > max)
{
return true;
}
b = value[max];
}
if (char.ToLower(a) != char.ToLower(b))
{
return false;
}
min++;
max--;
}
}
static void Main()
{
string[] array =
{
"A man, a plan, a canal: Panama.",
"A Toyota. Race fast, safe car. A Toyota.",
"Cigar? Toss it in a can. It is so tragic.",
"Dammit, I'm mad!",
"Delia saw I was ailed.",
"Desserts, I stressed!",
"Draw, O coward!",
"Lepers repel.",
"Live not on evil.",
"Lonely Tylenol.",
"Murder for a jar of red rum.",
"Never odd or even.",
"No lemon, no melon.",
"Senile felines.",
"So many dynamos!",
"Step on no pets.",
"Was it a car or a cat I saw?",
"Dot Net Perls is not a palindrome.",
"Why are you reading this?",
"This article is not useful.",
"...",
"...Test"
};
foreach (string value in array)
{
Console.WriteLine("{0} = {1}", value, IsPalindrome(value));
}
}
}
If you just need to detect a palindrome, you can do it with a regex, as explained here. Probably not the most efficient approach, though...
That is non-trivial, there is no built in method to do that for you, you'll have to write your own. You will need to consider what rules you would like to check, like you implicitly stated you accepted reversing of one string. Also, you missed out the middle character, is this only if odd length?
So you will have something like:
if(myString.length % 2 = 0)
{
//even
string a = myString.substring(0, myString.length / 2);
string b = myString.substring(myString.length / 2 + 1, myString.lenght/2);
if(a == b)
return true;
//Rule 1: reverse
if(a == b.reverse()) //can't remember if this is a method, if not you'll have to write that too
return true;
etc, also doing whatever you want for odd strings
This C# method will check for even and odd length palindrome string (Recursive Approach):
public static bool IsPalindromeResursive(int rightIndex, int leftIndex, char[] inputString)
{
if (rightIndex == leftIndex || rightIndex < leftIndex)
return true;
if (inputString[rightIndex] == inputString[leftIndex])
return IsPalindromeResursive(--rightIndex, ++leftIndex, inputString);
else
return false;
}
public Boolean IsPalindrome(string value)
{
var one = value.ToList<char>();
var two = one.Reverse<char>().ToList();
return one.Equals(two);
}
class Program
{
static void Main(string[] args)
{
string s, revs = "";
Console.WriteLine(" Enter string");
s = Console.ReadLine();
for (int i = s.Length - 1; i >= 0; i--) //String Reverse
{
Console.WriteLine(i);
revs += s[i].ToString();
}
if (revs == s) // Checking whether string is palindrome or not
{
Console.WriteLine("String is Palindrome");
}
else
{
Console.WriteLine("String is not Palindrome");
}
Console.ReadKey();
}
}
public bool IsPalindroom(string input)
{
input = input.ToLower();
var loops = input.Length / 2;
var higherBoundIdx = input.Length - 1;
for (var lowerBoundIdx = 0; lowerBoundIdx < loops; lowerBoundIdx++, higherBoundIdx--)
{
if (input[lowerBoundIdx] != input[higherBoundIdx])
return false;
}
return true;
}
Here is an absolutely simple way to do this,
Receive the word as input into a method.
Assign a temp variable to the original value.
Loop through the initial word, and add the last character to the reversal that you are constructing until the inital word has no more characters.
Now use the spare you created to hold the original value to compare to the constructed copy.
This is a nice way as u don't have to cast ints and doubles. U can just pass them to the method in their string representation by using the ToString() method.
public static bool IsPalindrome(string word)
{
string spare = word;
string reversal = null;
while (word.Length > 0)
{
reversal = string.Concat(reversal, word.LastOrDefault());
word = word.Remove(word.Length - 1);
}
return spare.Equals(reversal);
}
So from your main method,
For even and odd length strings u just pass the whole string into the method.
Since a palindrome also includes numbers, words, sentences, and any combinations of these, and should ignore punctuation and case, (See Wikipedia Article)
I propose this solution:
public class Palindrome
{
static IList<int> Allowed = new List<int> {
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'1', '2', '3', '4', '5', '6', '7', '8', '9',
'0'
};
private static int[] GetJustAllowed(string text)
{
List<int> characters = new List<int>();
foreach (var c in text)
characters.Add(c | 0x20);
return characters.Where(c => Allowed.Contains(c)).ToArray();
}
public static bool IsPalindrome(string text)
{
if(text == null || text.Length == 1)
return true;
int[] chars = GetJustAllowed(text);
var length = chars.Length;
while (length > 0)
if (chars[chars.Length - length] != chars[--length])
return false;
return true;
}
public static bool IsPalindrome(int number)
{
return IsPalindrome(number.ToString());
}
public static bool IsPalindrome(double number)
{
return IsPalindrome(number.ToString());
}
public static bool IsPalindrome(decimal number)
{
return IsPalindrome(number.ToString());
}
}
static void Main(string[] args)
{
string str, rev="";
Console.Write("Enter string");
str = Console.ReadLine();
for (int i = str.Length - 1; i >= 0; i--)
{
rev = rev + str[i];
}
if (rev == str)
Console.Write("Entered string is pallindrome");
else
Console.Write("Entered string is not pallindrome");
Console.ReadKey();
}
string test = "Malayalam";
char[] palindrome = test.ToCharArray();
char[] reversestring = new char[palindrome.Count()];
for (int i = palindrome.Count() - 1; i >= 0; i--)
{
reversestring[palindrome.Count() - 1 - i] = palindrome[i];
}
string materializedString = new string(reversestring);
if (materializedString.ToLower() == test.ToLower())
{
Console.Write("Palindrome!");
}
else
{
Console.Write("Not a Palindrome!");
}
Console.Read();
public static bool palindrome(string t)
{
int i = t.Length;
for (int j = 0; j < i / 2; j++)
{
if (t[j] == t[i - j-1])
{
continue;
}
else
{
return false;
break;
}
}
return true;
}
public bool Solution(string content)
{
int length = content.Length;
int half = length/2;
int isOddLength = length%2;
// Counter for checking the string from the middle
int j = (isOddLength==0) ? half:half+1;
for(int i=half-1;i>=0;i--)
{
if(content[i] != content[j])
{
return false;
}
j++;
}
return true;
}
public bool MojTestPalindrome (string word)
{
bool yes = false;
char[]test1 = word.ToArray();
char[] test2 = test1.Reverse().ToArray();
for (int i=0; i< test2.Length; i++)
{
if (test1[i] != test2[test2.Length - 1 - i])
{
yes = false;
break;
}
else {
yes = true;
}
}
if (yes == true)
{
return true;
}
else
return false;
}
public static bool IsPalindrome(string str)
{
int i = 0;
int a = 0;
char[] chr = str.ToCharArray();
foreach (char cr in chr)
{
Array.Reverse(chr);
if (chr[i] == cr)
{
if (a == str.Length)
{
return true;
}
a++;
i++;
}
else
{
return false;
}
}
return true;
}
The various provided answers are wrong for numerous reasons, primarily from misunderstanding what a palindrome is. The majority only properly identify a subset of palindromes.
From Merriam-Webster
A word, verse, or sentence (such as "Able was I ere I saw Elba")
And from Wordnik
A word, phrase, verse, or sentence that reads the same backward or forward. For example: A man, a plan, a canal, Panama!
Consider non-trivial palindromes such as "Malayalam" (it's a proper language, so naming rules apply, and it should be capitalized), or palindromic sentences such as "Was it a car or a cat I saw?" or "No 'X' in Nixon".
These are recognized palindromes in any literature.
I'm lifting the thorough solution from a library providing this kind of stuff that I'm the primary author of, so the solution works for both String and ReadOnlySpan<Char> because that's a requirement I've imposed on the library. The solution for purely String will be easy to determine from this, however.
public static Boolean IsPalindrome(this String #string) =>
!(#string is null) && #string.AsSpan().IsPalindrome();
public static Boolean IsPalindrome(this ReadOnlySpan<Char> span) {
// First we need to build the string without any punctuation or whitespace or any other
// unrelated-to-reading characters.
StringBuilder builder = new StringBuilder(span.Length);
foreach (Char s in span) {
if (!(s.IsControl()
|| s.IsPunctuation()
|| s.IsSeparator()
|| s.IsWhiteSpace()) {
_ = builder.Append(s);
}
}
String prepped = builder.ToString();
String reversed = prepped.Reverse().Join();
// Now actually check it's a palindrome
return String.Equals(prepped, reversed, StringComparison.CurrentCultureIgnoreCase);
}
You're going to want variants of this that accept a CultureInfo parameter as well, when you're testing a specific language rather than your own language, by instead calling .ToUpper(cultureInfo) on prepped.
And here's proof from the projects unit tests that it works.