Let's say I have a string that consists of a person's name:
var name = "Jason, Bruno Mars";
How do I mask the string with X for the name behind the comma and returns:
var name = "Jason, BXXXX MXXX";
I have tried using the following methods but only front characters are masked:
string first, second, output;
bool hasSpace, hasComma;
int int_LENGTH;
var name = "Jason, Bruno Mars";
hasComma = name.Contains(",");
if (hasComma == true)
{
int_LENGTH = name.IndexOf(",");
if (int_LENGTH > 0)
{
first = name.Substring(0, int_LENGTH);
}
second = string.Join(",", name.Split(" ").Skip(1));
hasSpace = second.Contains(" ");
if (hasSpace == true)
{
second = string.Concat(new string('X', 12), second.Substring(second.Length - 4));
output = first + "," + second;
}
}
Anyone has a better idea of how can I achieve the same result in a more efficient way?
Another option, using Regex.Matches to select the name parts except the first letter. The regex collects all string parts separated by a space, skipping what's before the comma.
The collections of Matches is then passed to Linq's Aggregate() method to perform the substitution.
A StringBuilder is used to store the strings generated by its own Replace() method:
string theName = "Jason Et Alt., Bruno Mars And More Names";
var matches = Regex.Matches(theName, #"(?!.*?,)\s+?.(\w+)");
string outName = matches.OfType<Match>().Aggregate(new StringBuilder(theName), (sb, m) =>
sb.Replace(m.Groups[1].Value, new string('X', m.Groups[1].Length))).ToString();
outname = Jason Et Alt., BXXXX MXXX AXX MXXX NXXXX
static string MaskName(string name)
{
string maskedString = string.Empty;
string[] names = name.Split(',');
if (names.Length > 0)
{
maskedString = names[0] + ",";
}
if (names.Length > 1)
{
string[] arrName = names[1].Split(' ');
foreach (string s in arrName)
{
if (s.Length > 0)
maskedString += " " + s[0].ToString().PadRight(s.Length, 'X');
}
}
return maskedString;
}
Using This code..
static string MaskName(string name)
{
string maskedString = string.Empty;
string[] names = name.Split(',');
if (names.Length > 0)
{
maskedString = names[0] + ",";
}
if (names.Length > 1)
{
string[] arrName = names[1].Split(' ');
foreach (string s in arrName)
{
if (s.Length > 0)
maskedString += " " + s[0].ToString().PadRight(s.Length, 'X');
}
}
return maskedString;
}
Output :-
Try This
private string ReturnMaskedName(string name)
{
string temporary = "";
var newname = (name.Split(new string[] { "," }, StringSplitOptions.None)[1].Trim().Split(new String[] { " " }, StringSplitOptions.None));
foreach (string allnames in newname)
{
temporary = temporary + " " + allnames[0].ToString() + new string('X', allnames.Length - 1);
}
return name.Split(new string[] { " " }, StringSplitOptions.None)[0] + " " + temporary;
}
Efficient way of masking without Split, Regex, and using System.Linq:
For C# version < 7.2:
static string MaskName(string name)
{
int indexOfComma = name.IndexOf(',');
if (indexOfComma != -1)
{
char[] temp = name.ToCharArray();
bool isFirstChar = true;
for (int i = indexOfComma + 1; i < temp.Length; i++)
{
if (temp[i] == ' ')
isFirstChar = true;
else if (isFirstChar)
isFirstChar = false;
else
temp[i] = 'X';
}
return new string(temp);
}
else
{
return name;
}
}
For C# version >= 7.2:
static string MaskName(ReadOnlySpan<char> name)
{
int indexOfComma = name.IndexOf(',');
if (indexOfComma != -1)
{
Span<char> temp = stackalloc char[name.Length];
name.CopyTo(temp);
bool isFirstChar = true;
for (int i = indexOfComma + 1; i < temp.Length; i++)
{
if (temp[i] == ' ')
isFirstChar = true;
else if (isFirstChar)
isFirstChar = false;
else
temp[i] = 'X';
}
return new string(temp);
}
else
{
return name.ToString();
}
}
private string MaskName(string name)
{
var parts = name.Split(',');
var subparts = parts[1].Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries);
for (var i = 0; i < subparts.Length; i++)
{
var subpart = subparts[i];
subparts[i] = subpart[0] + new string('X', subpart.Length - 1);
}
return parts[0] + ", " + string.Join(" ", subparts);
}
Related
Currently I am making a AI based on text.
I have a database with a pattern for each answer
A pattern looks like [Who is the] Winner of the World Cup 2018
[] = Optional words
<> = Needed words
When I enter the sentence Who is the Winner of the World Cup 2018 my method should return the indentifier of the answer.
My database has 2 rows called "AnswerIndentifier" and "Pattern"
I did it myself and programmed this algoryhm:
private static bool MatchesPattern(string text, string pattern)
{
List<string> patternTokens = new List<string>();
string tok = "";
pattern = pattern.ToLower() + "[";
int state = 0;
for(int i = 0; i < pattern.ToCharArray().Length; i++)
{
char token = pattern[i];
if(token == '[')
{
if(tok != "")
{
patternTokens.Add("NEC" + char.MaxValue + tok);
tok = "";
}
state = 1;
continue;
}
if(token == ']' && state == 1)
{
i++;
state = 0;
patternTokens.Add("OPT" + char.MaxValue + tok);
tok = "";
continue;
}
if(token == ' ' && i + 1 < text.ToCharArray().Length && text[i + 1] == '[')
continue;
tok += token;
}
string[] patternTokensCopy = new string[patternTokens.Count];
for(int i = 0; i < patternTokens.Count; i++)
patternTokensCopy[i] = patternTokens[i];
int max = (int) Math.Pow(2, patternTokens.Where(x => x.StartsWith("OPT")).ToList().Count);
for(int i = 0; i < max; i++)
{
string binary = Convert.ToString(i, 2).PadLeft(patternTokensCopy.Where(x => x.StartsWith("OPT")).ToList().Count, '0');
for(int x = 0; x < patternTokensCopy.Where(t => t.StartsWith("OPT")).ToList().Count; x++)
if(binary[x] == '0')
{
List<string> optionalTokens = new List<string>();
foreach(string curpattern in patternTokensCopy)
if(curpattern.StartsWith("OPT"))
optionalTokens.Add(curpattern);
patternTokens.Remove(optionalTokens[x]);
}
string patternAsSentence = "";
foreach(string patternToken in patternTokens)
patternAsSentence += patternToken.Split(char.MaxValue)[1] + " ";
while(patternAsSentence[patternAsSentence.Length - 1] == ' ')
patternAsSentence = patternAsSentence.Substring(0, patternAsSentence.Length - 1);
patternAsSentence = patternAsSentence.Replace("\r", "").Replace(" ", " ");
int similarity = StringSimilarity.GetStringSimilarity(patternAsSentence, text);
if(text.Length < 6)
{
if(text == patternAsSentence)
return true;
}
else
{
if(similarity <= 6)
return true;
}
patternTokens = new List<string>();
patternTokensCopy.ToList().ForEach(x => patternTokens.Add(x));
}
return false;
}
The only changes are that the needed text must not marked with <> and the similarity check(see C# - Compare String Similarity)
I want to split a string into smaller parts, not exceeding a string length of 20 characters.
The current code is able to split an input string into an array of strings of length 20. However, this could cut a word.
The current code is:
string[] Array;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < input.Length; i++)
{
if (i % 20 == 0 && i != 0) {
sb.Append('~');
}
sb.Append(input[i]);
}
Array = sb.ToString().Split('~');
For an input of this: Hello. This is a string. Goodbye., the output would be ['Hello. This is a str', 'ing. Goodbye.'].
However, I don’t want the string to be cut if it’s a word. That word should move to the next string in the array. How can I get the following output instead?
['Hello. This is a', 'string. Goodbye.']
First split your sentence on word-boundary:
var words = myString.Split();
Now concatenate words as long as not more than 20 characters are within your current line:
var lines = new List<string> { words[0] };
var lineNum = 0;
for(int i = 1; i < words.Length; i++)
{
if(lines[lineNum].Length + words[i].Length + 1 <= 20)
lines[lineNum] += " " + words[i];
else
{
lines.Add(words[i]);
lineNum++;
}
}
Here is a fiddle for testing: https://dotnetfiddle.net/s0LrFC
Could be more elegant but this will split the string to lines of a maximum number of characters. The words will be kept together unless they exceed the given length.
public static string[] SplitString(string input, int lineLen)
{
StringBuilder sb = new StringBuilder();
string[] words = input.Split(' ');
string line = string.Empty;
string sp = string.Empty;
foreach (string w in words)
{
string word = w;
while (word != string.Empty)
{
if (line == string.Empty)
{
while (word.Length >= lineLen)
{
sb.Append(word.Substring(0, lineLen) + "~");
word = word.Substring(lineLen);
}
if (word != string.Empty)
line = word;
word = string.Empty;
sp = " ";
}
else if (line.Length + word.Length <= lineLen)
{
line += sp + word;
sp = " ";
word = string.Empty;
}
else
{
sb.Append(line + "~");
line = string.Empty;
sp = string.Empty;
}
}
}
if (line != string.Empty)
sb.Append(line);
return sb.ToString().Split('~');
}
To test:
string[] lines = SplitString("This is a test of the string splitter KGKGKJGKGHKJHJKJKHGJHGhghsjagsjasgajsgjahs yes!", 20);
foreach (string line in lines)
{
Console.WriteLine(line);
}
Output:
This is a test of the
string splitter
KGKGKJGKGHKJHJKJKHGJ
HGhghsjagsjasgajsgja
hs yes!
I believe it's faster to split it only at places where it needs to be, instead of every word. With lines.SelectMany(x => Split(x, 80) can be used with multiline texts:
private static IEnumerable<string> Split(string text, int maxLength)
{
var i = 0;
while (i + maxLength < text.Length)
{
var partIndex = text.LastIndexOf(' ', i + maxLength, maxLength);
if (partIndex == -1)
partIndex = i + maxLength;
yield return text[i..partIndex];
i = partIndex + 1;
}
yield return text[i..];
}
I'm writing simple compiler without using grammar Generators. Here
. I wrote the scanning code that read file.txt ,and classify it to Identifier or Revers words , Number , Variable ..etc.
I got the result that I want but Is it possible to complete with this code I mean applying syntax Analysis ? and parsing ? . Because I I tried make it with java and I coded it with wrong way . I want see if I do it right ,specially i'm not good in C# codding .
Arrays :
string[] Rw = new string[8];
Rw[0] = "for";
Rw[1] = "while";
Rw[2] = "end";
Rw[3] = "contiune";
Rw[4] = "if";
Rw[5] = "break";
Rw[6] = "do";
Rw[7] = "return";
string[] id = new string[6];
id[0] = "int";
id[1] = "float";
id[2] = "string";
id[3] = "double";
id[4] = "bool";
id[5] = "char";
string[] op = new string[6];
op[0] = "+";
op[1] = "-";
op[2] = "/";
op[3] = "*";
op[4] = "%";
string[] Num = new string[10];
Num[0] = "0";
Num[1] = "1";
Num[2] = "2";
Num[3] = "3";
Num[4] = "4";
Num[5] = "5";
Num[6] = "6";
Num[7] = "7";
Num[8] = "8";
Num[9] = "9";
string[] var = new string[17];
var[0] = "a";
var[1] = "b";
var[2] = "A";
var[3] = "B";
var[4] = "C";
var[5] = "d";
var[6] = "D";
var[7] = "X";
var[8] = "x";
var[9] = "Y";
var[10] = "y";
var[11] = "z";
var[12] = "Z";
var[13] = "v";
var[14] = "V";
var[15] = "f";
var[16] = "F";
Reading and Split :
char[] delimiterChars = { ' ', ',', '.', ':', '\t' };
using (System.IO.StreamReader file = new System.IO.StreamReader(#"C:\Users\caza\Desktop\1.txt"))
{
while ((line = file.ReadLine()) != null)
{
string[] token = line.Split(delimiterChars);
foreach (string s in token)
{
// Console.WriteLine(s);
foreach (string z in Rw)
{
if (s == z)
{
Console.WriteLine(s + " is a Reserved Word ");
}
}
foreach (string y in id)
{
if (s == y)
{
Console.WriteLine(s + " is a identifier ");
}
}
foreach (string o in op)
{
if (s == o)
{
Console.WriteLine(s + " is a Operation ");
}
}
foreach (string n in Num)
{
if (s == n)
{
Console.WriteLine(s + " is a Number ");
}
}
foreach (string v in var)
{
if (s == v)
{
Console.WriteLine(s + " is a Variable");
}
}
}
}
}
Console.WriteLine("Press any key to exit.");
Console.ReadKey();
}
}
I have a file that is a SQL Server result set saved as a text file.
Here is a sample of what the file looks like:
RWS_DMP_ID RV1_DMP_NUM CUS_NAME
3192 3957 THE ACME COMPANY
3192 3957 THE ACME COMPANY
3192 3957 THE ACME COMPANY
I want to create a C# program that reads this file and creates the following table of data:
Field MaxSize
----- -------
RWS_DMP_ID 17
RV1_DMP_NUM 17
CUS_NAME 42
This is a list of the field names and their max length. The max length is the beginning of the field to the space right before the beginning of the next field.
By the way I don't care about code performance. This is seldom used file processing utility.
I solved this with the following code:
objFile = new StreamReader(strPath + strFileName);
strLine = objFile.ReadLine();
intLineCnt = 0;
while (strLine != null)
{
intLineCnt++;
if (intLineCnt <= 3)
{
if (intLineCnt == 1)
{
strWords = SplitWords(strLine);
intNumberOfFields = strWords.Length;
foreach (char c in strLine)
{
if (bolNewField == true)
{
bolFieldEnd = false;
bolNewField = false;
}
if (bolFieldEnd == false)
{
if (c == ' ')
{
bolFieldEnd = true;
}
}
else
{
if (c != ' ')
{
if (intFieldCnt < strWords.Length)
{
strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
strProcessedData[intFieldCnt, 1] = (intCharCnt - 1).ToString();
}
intFieldCnt++;
intCharCnt = 1;
bolNewField = true;
}
}
if (bolNewField == false)
{
intCharCnt++;
}
}
strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
strProcessedData[intFieldCnt, 1] = intCharCnt.ToString();
}
else if (intLineCnt == 3)
{
intLine2Cnt= 0;
intTotalLength = 0;
while(intLine2Cnt < intNumberOfFields)
{
intSize = Convert.ToInt32(strProcessedData[intLine2Cnt, 1]);
if (intSize + intTotalLength > strLine.Length)
{
intSize = strLine.Length - intTotalLength;
}
strField = strLine.Substring(intTotalLength, intSize);
strField = strField.Trim();
strProcessedData[intLine2Cnt, intLineCnt - 1] = strField;
intTotalLength = intTotalLength + intSize + 1;
intLine2Cnt++;
}
}
}
strLine = objFile.ReadLine();
}`enter code here`
I'm aware that this code is a complete hack job. I'm looking for a better way to solve this problem.
Is there a better way to solve this problem?
THanks
I'm not sure how memory efficient this is, but I think it's a bit cleaner (assuming your fields are tab-delimited):
var COL_DELIMITER = new[] { '\t' };
string[] lines = File.ReadAllLines(strPath + strFileName);
// read the field names from the first line
var fields = lines[0].Split(COL_DELIMITER, StringSplitOptions.RemoveEmptyEntries).ToList();
// get a 2-D array of the columns (excluding the header row)
string[][] columnsArray = lines.Skip(1).Select(l => l.Split(COL_DELIMITER)).ToArray();
// dictionary of columns with max length
var max = new Dictionary<string, int>();
// for each field, select all columns, and take the max string length
foreach (var field in fields)
{
max.Add(field, columnsArray.Select(row => row[fields.IndexOf(field)]).Max(col => col.Trim().Length));
}
// output per requirment
Console.WriteLine(string.Join(Environment.NewLine,
max.Keys.Select(field => field + " " + max[field])
));
void MaximumWidth(StreamReader reader)
{
string[] columns = null;
int[] maxWidth = null;
string line;
while ((line = reader.ReadLine()) != null)
{
string[] cols = line.Split('\t');
if (columns == null)
{
columns = cols;
maxWidth = new int[cols.Length];
}
else
{
for (int i = 0; i < columns.Length; i++)
{
int width = cols[i].Length;
if (maxWidth[i] < width)
{
maxWidth[i] = width;
}
}
}
}
// ...
}
Here is what I came up with. The big takeaway is to use the IndexOf string function.
class Program
{
static void Main(string[] args)
{
String strFilePath;
String strLine;
Int32 intMaxLineSize;
strFilePath = [File path and name];
StreamReader objFile= null;
objFile = new StreamReader(strFilePath);
intMaxLineSize = File.ReadAllLines(strFilePath).Max(line => line.Length);
//Get the first line
strLine = objFile.ReadLine();
GetFieldNameAndFieldLengh(strLine, intMaxLineSize);
Console.WriteLine("Press <enter> to continue.");
Console.ReadLine();
}
public static void GetFieldNameAndFieldLengh(String strLine, Int32 intMaxSize)
{
Int32 x;
string[] fields = null;
string[,] strFieldSizes = null;
Int32 intFieldSize;
fields = SplitWords(strLine);
strFieldSizes = new String[fields.Length, 2];
x = 0;
foreach (string strField in fields)
{
if (x < fields.Length - 1)
{
intFieldSize = strLine.IndexOf(fields[x + 1]) - strLine.IndexOf(fields[x]);
}
else
{
intFieldSize = intMaxSize - strLine.IndexOf(fields[x]);
}
strFieldSizes[x, 0] = fields[x];
strFieldSizes[x, 1] = intFieldSize.ToString();
x++;
}
Console.ReadLine();
}
static string[] SplitWords(string s)
{
return Regex.Split(s, #"\W+");
}
}
How do I split a long string ?
Input :
ABCDEFGHI
Output :
ABC, DEF, GHI
Use
string str = "ABCDEFGHI";
List<string> lst = new List<string>();
string temp = "";
for(int i = 0; i < str.Length; i++)
{
temp = str[i].ToString();
if((i + 1) % 3 == 0)
{
lst.Add(temp);
temp = "";
}
}
string final_str = string.Join(", ", lst);
Assuming this is a very long string, use a string builder to do your concatenating. And use substring to build the individually grouped strings. This will save you on memory.
string longString = "ABCDEFGHIJK";
int groupingLength = 3;
var stringLength = longString.Length;
var startingLength = Math.Min(longString.Length, groupingLength);
var startingString = longString.Substring(0, startingLength);
var sb = new StringBuilder(startingString);
if (stringLength > groupingLength)
{
for(int i = groupingLength; i < stringLength; i = i + groupingLength)
{
var subStringLength = Math.Min(stringLength - i, groupingLength);
var groupedString = longString.Substring(i, subStringLength);
sb.Append(", ").Append(groupedString);
}
}
var finalString = sb.ToString();