Inverting case: (aPPLE to Apple, BLUeBeRrY to bluEbErRY) - c#

In my following code, the output is Upper:APPLE and Lower:apple, but I need aPPLE to become Apple, and BLUeBeRrY to become bluEbErRY.
string[] words = {"aPPLE", "BlUeBeRrY", "cHeRry" };
var upperLowerWords = from w in words
select new { Upper = w.ToUpper(),
Lower = w.ToLower() };
foreach (var ul in upperLowerWords)
{
Console.WriteLine("Uppercase: {0}, Lowercase: {1}", ul.Upper, ul.Lower);
}

Using some more Linq magic:
string[] words = { "aPPLE", "BlUeBeRrY", "cHeRry" };
var upperLowerWords = from w in words
select new
{
Upper = w.ToUpper(),
Lower = w.ToLower(),
Original = w,
Changed = new string(w.Select(s=> char.IsLower(s) ? char.ToUpper(s) : char.ToLower(s)).ToArray())
};
foreach (var ul in upperLowerWords)
{
Console.WriteLine("Original: {0}, Uppercase: {1}, Lowercase: {2}, Changed: {3}", ul.Original, ul.Upper, ul.Lower, ul.Changed );
}

How about throwing in some extension methods:
public static class StringExtensions
{
public static string InvertCases(this string value)
{
if (string.IsNullOrWhiteSpace(value)) return value;
var chars = value.ToCharArray();
for (int i = 0; i < chars.Length; i++)
chars[i] = char.IsLower(chars[i]) ? char.ToUpper(chars[i]) : char.ToLower(chars[i]);
return new string(chars);
}
}
and your use would be
static void Main(string[] args)
{
string[] words = { "aPPLE", "BlUeBeRrY", "cHeRry" };
foreach (var ul in words)
{
Console.WriteLine("Original: {0}, Inverted: {1}", ul, ul.InvertCases());
}
Console.ReadLine();
}
Many ways lead to Rome...

string[] words = { "aPPLE", "BlUeBeRrY", "cHeRry" };
var upperLowerWords = from w in words
select new { Original = w, Changed = ChangeCases(w) };
foreach (var ul in upperLowerWords)
{
Console.WriteLine("Original: {0}, Changed: {1}",
ul.Original, ul.Changed);
}
static string ChangeCases(string input)
{
string result = string.Empty;
foreach (char ch in input)
{
result += Char.IsUpper(ch) ? ch.ToString().ToLower() : ch.ToString().ToUpper();
}
return result;
}

I suggest using Linq with char.IsXXX and char.ToXXX methods
string source = "StaCkOverFloW";
string result = string.Concat(source.Select(c => char.IsLower(c)
? char.ToUpper(c) : char.IsUpper(c)
? char.ToLower(c) : c));
...
// sTAcKoVERfLOw
Console.Write(result);

For ASCII only letters, you can XOR the 6th bit:
string[] words = { "aPPLE", "BlUeBeRrY", "cHeRry" };
var result = words.Select(s => string.Concat(s.Select(c => (char)(c ^ ' '))));
Debug.Print(string.Join(", ", result)); // "Apple, bLuEbErRy, ChErRY"

Related

Using Regex Array with String Array

I'm trying to make a program where a user can input an array of serial numbers and have each corresponding product show up.
Suppose I know that Product A always starts with "C02", Product B always ends in "X02", and Product C always contains "A1700". Then if the user input was "C02HGV32,N93XA1700D,J3429X02", it would return "C02HGV32: Product A; N93XA1700D: Product C; J3429X02: Product B".
How would I get an array of Regex expressions to compare against the array of strings? Here's what I have:
using System.Text.RegularExpressions;
public class ReturnProduct{
public Regex[] compareAgainst = new Regex[3]{#"[C02]*",#"*[X02]",#"*[A1700]*"}; //Clearly not the right way, but not sure how else to do it
...
public string getTheProduct(string input){
string[] compareString = input.Split(",");
for (int a = 0; a < compareString.Length; a++){
for (int b = 0; b < compareAgainst.Length; b++){
//Do something Regex-y with compareString[a] and compareAgainst[b]
}
}
If the requirements of these codes are so simple you can use String.Contains, String.StartsWith and String.EndsWith. You can create a Dictionary to hold product names and functions to check if a given string has the pattern for a product.
var dict = new Dictionary<string, Predicate<string>>
{
["Product A"] = s => s.StartsWith("C02"),
["Product B"] = s => s.EndsWith("X02"),
["Product C"] = s => s.Contains("A1700")
};
string GetProductName(string serialNum)
{
foreach(var keyVal in dict)
{
if(keyVal.Value(serialNum))
return keyVal.Key;
}
return "No product name found";
}
List<(string, string)> GetProductNames(string str)
{
var productCodes = str.Split(',');
var productNames = new List<(string, string)>(); // list of tuples (string, string)
foreach(var serialNum in productCodes)
{
productNames.Add((serialNum, GetProductName(serialNum)));
}
return productNames;
}
Usage:
var userString = "C02HGV32,N93XA1700D,J3429X02";
List<(string serialNum, string name)> productNames = GetProductNames(userString);
foreach(var tuple in productNames)
{
Console.WriteLine($"{tuple.serialNum} : {tuple.name}");
}
If you specifically want to use Regex, you can use the following patterns:
var regexDict = new Dictionary<string, Regex>
{
["Product A"] = new Regex("^C02"), //'^' means beginning of string
["Product B"] = new Regex("X02$"), //'$' means end of string
["Product C"] = new Regex("A1700") //given string anywhere
};
string GetProductName(string serialNum)
{
foreach(var keyVal in regexDict)
{
if(keyVal.Value.IsMatch(serialNum))
return keyVal.Key;
}
return "No product name found";
}
List<(string, string)> GetProductNames(string str)
{
var productCodes = str.Split(',');
var productNames = new List<string>();
foreach(var serialNum in productCodes)
{
productNames.Add((serialNum, GetProductName(serialNum)));
}
return productNames;
}
Define a class for your products:
public class Product
{
public string Name { get; set; }
public Regex Expr { get; set; }
}
then create an array with all your regexes:
var regexes = new[]
{
new Product
{
Name = "Product A",
Expr = new Regex("^C02")
},
new Product
{
Name = "Product B",
Expr = new Regex("X02$")
},
new Product
{
Name = "Product C",
Expr = new Regex("A1700")
}
};
now you can use LINQ query:
var input = "C02HGV32,N93XA1700D,J3429X02";
var result = string.Join("; ",
input.Split(',')
.Select(s => new {regexes.FirstOrDefault(p => p.Expr.IsMatch(s))?.Name, Value = s})
.Select(x => $"{x.Value}: {x.Name}"));
result would be
C02HGV32: Product A; N93XA1700D: Product C; J3429X02: Product B
Regex syntax:
"^C02.*" - Starts with C02 followed by any number of characters including 0 characters.
"^.*X02" - Starts with any number of characters including 0 characters and ends with X02.
"^.A1700.*" - Starts and ends with any number of characters, and contains A1700 somewhere.
public static void GetTheProduct(string input, List<Regex> regList)
{
List<string> compareString = input.Split(new char[] { ',' }).ToList();
foreach (string item in compareString)
{
if (regList[0].Match(item).Success)
Console.WriteLine("{0} : {1}", item, "Product A");
else if (regList[1].Match(item).Success)
Console.WriteLine("{0} : {1}", item, "Product B");
else if (regList[2].Match(item).Success)
Console.WriteLine("{0} : {1}", item, "Product C");
}
}
static void Main(string[] args)
{
List<Regex> regexList = new List<Regex>() { new Regex("^C02.*"), new Regex("^.*X02"), new Regex("^.*A1700.*") };
GetTheProduct("C02HGV32,N93XA1700D,J3429X02", regexList);
Console.ReadLine();
}
You could also generalize the method and avoid hardcoding Product names.
Like so:
public static void GetTheProduct(string input, Dictionary<string, Regex> regDictionary)
{
List<string> compareString = input.Split(new char[] { ',' }).ToList();
foreach (string item in compareString)
{
string key = regDictionary.First(x => x.Value.IsMatch(item)).Key;
Console.WriteLine("{0} : {1}", item, key);
}
}
static void Main(string[] args)
{
Dictionary<string, Regex> regDictionary = new Dictionary<string, Regex>();
regDictionary.Add("Product A", new Regex("^C02.*"));
regDictionary.Add("Product B", new Regex("^.*X02"));
regDictionary.Add("Product C", new Regex("^.*A1700.*"));
GetTheProduct("C02HGV32,N93XA1700D,J3429X02", regDictionary);
Console.ReadLine();
}

Conditionally binding dropdownlist in asp.net

I have a string in this particular format
string LogInIDs = 124,345, 876 | 765,322, 98 | 565,99
All the numbers in the string are Unique LogIn ID. Using the Pipe symbol, the grouping is done of id's. Now, Suppose the LogInID is 345, then i need to bind other numbers (in this case 124 & 876) which are in the group in a dropdown. The below function is what i have made to retrieve the other numbers.
Can anybody come up with any better idea or suggestion
public static List<string> RetrieveAffCodes(string LogInIDs , string LogInID)
{
List<string> _finale = new List<string>();
string[] sep1 = new string[1];
sep1[0] = "|";
int count = LogInIDs.Count(x => x == '|');
string[] groups = new string[count + 1];
groups = LogInIDs.Split(sep1, StringSplitOptions.RemoveEmptyEntries);
foreach (string g in groups)
{
string p = g;
string[] sep2 = new string[1];
sep2[0] = ",";
int counter = p.Count(x => x == ',');
string[] final_list = new string[counter + 1];
final_list = p.Split(sep2, StringSplitOptions.RemoveEmptyEntries);
foreach (string _f in final_list)
{
if (_f.Trim() == LogInID)
{
_finale = AddLogInIDs(final_list, final_list.Count());
}
}
}
return _finale;
}
private static List<string> AddLogInIDs(string[] final_list, int p)
{
List<string> _finale = new List<string>();
foreach (var item in final_list)
{
_finale.Add(item.Trim());
}
return _finale;
}
Any suggestions will be embraced.
Thanks for your time and patience.
Note: The string will be expanding up to 200 groups
try this..
public static List<string> RetrieveAffCodes(string logInIDs, string logInID)
{
return logInIDs
.Split("|".ToCharArray(), StringSplitOptions.RemoveEmptyEntries)
.Where(a => a.Split(',').Any(c => c.Trim().Equals(logInID)))
.Select(a => a.Split(',').ToList()).FirstOrDefault();
}
Would this work for you?
using System;
using System.Collections.Generic;
using System.Linq;
public class Program
{
public static void Main()
{
string LogInIDs = "124,345,876|765,322,98 |565,99";
Console.WriteLine(string.Join("\n", RetrieveAffCodes(LogInIDs, "322")));
Console.WriteLine(string.Join("\n", RetrieveAffCodes(LogInIDs, "565")));
}
public static IEnumerable<string> RetrieveAffCodes(string logInIDs , string logInID)
{
//We split the list
var list = logInIDs.Split('|');
//We look for an item with the logInID, if found (?) we split using ',' and then we remove the item
var match = list
.Select(i => i.Split(',').Select(item => item.Trim()))
.FirstOrDefault(i => i.Contains(logInID))?
.Where(i => i != logInID);
if(match != null)
{
return match;
}
return new List<string>();
}
}
Source code: https://dotnetfiddle.net/M2FyNk

Error Cannot implicitly convert type 'string[]' to 'string'

I am experiencing an Error within nouns[i] = SearchKeywords(words, sentences); every time I run my program. What should I do?
*I edited to include the SearchKeywords method. I am trying to find all words that have "a" and "an" before them.
Error is,
Error Cannot implicitly convert type 'string[]' to 'string'
string[] SearchKeywords(List<string> keywords, string[] sentence){
int[] location = new int[sentence.Length];
foreach (string word in keywords)
{
for (int i = 0; i < sentence.Length;i++ )
{
string[] nouns = new string[i];
if (String.IsNullOrEmpty(sentence[i]))
{
return null;
}
else
{
location[i] = sentence[i].IndexOf(word);
nouns[i] = sentence[i].Substring(location[i]);
if (nouns[i].Contains(word)) return nouns;
}
}
}
return sentence;
string[] checkForIndefinite()
{
string input = txtEntry.Text;
string text = lbltxtOutput.Text;
Tools tool = new Tools();
List<string> words = new List<string>();
words.Add("an");
words.Add("a");
foreach(string sentence in GetWords(text))
{
for (int i = 0; i < sentence.Length; i++)
{
string[] nouns = new string[i];
string[] sentences = new string[i];
sentences[i] = sentence;
**nouns[i] = SearchKeywords(words, sentences);**
return nouns;
}
}
return null;
}
It seems to me that this is what you need:
string[] SearchKeywords(List<string> keywords, string[] sentence)
{
return
sentence
.Zip(sentence.Skip(1), (w0, w1) => new { w0, w1 })
.Where(ww => keywords.Contains(ww.w0))
.Select(ww => ww.w1)
.ToArray();
}
string[] checkForIndefinite()
{
var input = txtEntry.Text;
var text = lbltxtOutput.Text;
var words = new List<string>() { "an", "a" };
return SearchKeywords(words, GetWords(text));
}
The output of the SearchKeywords function is an string[] and the nouns[i] is a string and simply you cannot assign and string[] value to a string. So You may what to try something like this:
List<string> nouns = new List<string>();
//foreach loop
// for loop
nouns.AddRange(SearchKeywords(words, sentences));
using System;
using System.Security.Cryptography;
using System.Text;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
string[] indefinites = checkForIndefinite();
foreach (string s in indefinites)
{
Console.WriteLine(s);
}
}
static string[] SearchKeywords(List<string> keywords, string sentence)
{
if (String.IsNullOrEmpty(sentence))
{
return null;
}
List<string> nouns = new List<string>();
string[] words = sentence.Split(' ');
foreach (string keyword in keywords)
{
for (int i = 0; i < words.Length; i++ )
{
if (words[i] == keyword)
{
if (i+1 < words.Length && !nouns.Contains(words[i+1]))
{
nouns.Add(words[i+1]);
}
}
}
}
return nouns.ToArray();
}
static string[] checkForIndefinite()
{
string sentence = "This is not an interesting sentence to use to test a program";
string text = string.Empty;
List<string> words = new List<string>();
words.Add("an");
words.Add("a");
return SearchKeywords(words, sentence);
}
}
This compiles and returns the following:
interesting
program

Connect element in distinct array using recursion

if I have two array
A:[A,B]
B:[1,2,3]
how can I create a string List like [A_1, A_2, A_3, B_1, B_2, B_3]
the number of array is not regular, it's maybe have 3 more
A:[A,B]
B:[1,2,3]
C:[w,x,y,z]
D:[m,n]
E:[p,q,r]
can I use recursive to solve it?
So, we define a functions Mergethat takes lists of list of stings and merges them into the string enumerable you want
static void Main(string[] args)
{
var a = new[] { "A", "B" };
var b = new[] { "1", "2", "3" };
var c = new[] { "x", "y", "z", "w" };
var result = Merge(a, b, c);
foreach (var r in result)
{
Console.WriteLine(r);
}
}
public static IList<string> Merge(params IEnumerable<string>[] lists)
{
return Merge((IEnumerable<IEnumerable<string>>) lists);
}
public static IList<string> Merge(IEnumerable<IEnumerable<string>> lists)
{
var retval = new List<string>();
var first = lists.FirstOrDefault();
if (first != null)
{
var result = Merge(lists.Skip(1));
if (result.Count > 0)
{
foreach (var x in first)
{
retval.AddRange(result.Select(y => string.Format("{0}_{1}", x, y)));
}
}
else
{
retval.AddRange(first);
}
}
return retval;
}
we can also improve this, if you use Lists as inputs
public static IList<string> Merge(params IList<string>[] lists)
{
return Merge((IList<IList<string>>) lists);
}
public static IList<string> Merge(IList<IList<string>> lists, int offset = 0)
{
if (offset >= lists.Count)
return new List<string>();
var current = lists[offset];
if (offset + 1 == lists.Count) // last entry in lists
return current;
var retval = new List<string>();
var merged = Merge(lists, offset + 1);
foreach (var x in current)
{
retval.AddRange(merged.Select(y => string.Format("{0}_{1}", x, y)));
}
return retval;
}
This is simple iterating over n-ary dimension - no need for recursion for that, just array to store indexes.
static void Iterate(int[] iterators, ArrayList[] arrays) {
for (var j = iterators.Length - 1; j >= 0; j--) {
iterators[j]++;
if (iterators[j] == arrays[j].Count) {
if (j == 0) {
break;
}
iterators[j] = 0;
} else {
break;
}
}
}
static IList<string> Merge(ArrayList[] arrays) {
List<string> result = new List<string>();
int[] iterators = new int[arrays.Length];
while (iterators[0] != arrays[0].Count) {
var builder = new StringBuilder(20);
for(var index = 0; index < arrays.Length; index++) {
if (index > 0) {
builder.Append("_");
}
builder.Append(arrays[index][iterators[index]]);
}
result.Add(builder.ToString());
Iterate(iterators, arrays);
}
return result;
}
static void Main(string[] args) {
var list1 = new ArrayList();
var list2 = new ArrayList();
var list3 = new ArrayList();
list1.Add(1);
list1.Add(2);
list2.Add("a");
list2.Add("b");
list3.Add("x");
list3.Add("y");
list3.Add("z");
var result = Merge(new[] { list1, list2, list3 });
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace arrconn {
class Program {
static string[] conn(params Array[] arrs) {
if(arrs.Length == 0) return new string[0];
if(arrs.Length == 1) {
string[] result = new string[arrs[0].Length];
for(int i = 0; i < result.Length; i++)
result[i] = arrs[0].GetValue(i).ToString();
return result; }
else {
string[] result = new string[arrs[0].Length*arrs[1].Length];
for(int i = 0; i < arrs[0].Length; i++)
for(int j = 0; j < arrs[1].Length; j++)
result[i*arrs[1].Length+j] = string.Format("{0}_{1}", arrs[0].GetValue(i), arrs[1].GetValue(j));
if(arrs.Length == 2) return result;
Array[] next = new Array[arrs.Length-1];
next[0] = result; Array.Copy(arrs, 2, next, 1, next.Length-1);
return conn(next);
}
}
static void Main(string[] args) {
foreach(string s in conn(
new string[] { "A", "B" },
new int[] { 1, 2, 3 },
new string[] { "x" },
new string[] { "$", "%", "#" }))
Console.WriteLine(s);
Console.Read();
}
}
}
I guess your input are like this:
var A = ["A","B"];
var B = [1,2,3];
var C = ["x","y","z","w"];
And what you want to obtain is:
var result = ["A_1_x", "A_1_y",...
"A_2_x", "A_2_y",...
"A_3_x", "A_3_y",...
"B_1_x", "B_1_y",...
...
..., "B_3_z", "B_3_w"];
We'll be working with IEnumerable as it will simplify the work for us and give us access to the yield keyword.
First, let's take care of the case where we only concataining two collections:
IEnumerable<string> ConcatEnumerables(IEnumerable<object> first, IEnumerable<object> second)
{
foreach (var x in first)
{
foreach (var y in second)
{
yield return x.ToString() + "_" + y.ToString();
}
}
}
Then we can recursively takle any number of collections:
IEnumerable<string> ConcatEnumerablesRec(IEnumerable<IEnumerable<object>> enums)
{
//base cases
if(!enums.Any())
{
return Enumerable.Empty<string>();
}
if (enums.Count() == 1)
{
return enums.First().Select(o => o.ToString());
}
//recursively solve the problem
return ConcatEnumerables(enums.First(), ConcatEnumerablesRec(enums.Skip(1));
}
Now you just need to call ToArray on the result if you really need an array as your output.
string[] Concatenator(params object[][] parameters)
{
return ConcatEnumerablesRec(parameters).ToArray();
}
This should do the trick. Note that the input sequences do not have to be arrays - they can be any type that implements IEnumerable<>.
Also note that we have to case sequences of value types to sequences of <object> so that they are assignable to IEnumerable<object>.
Here's the compilable Console app demo code:
using System;
using System.Collections.Generic;
using System.Linq;
namespace Demo
{
internal static class Program
{
static void Main()
{
string[] a = {"A", "B", "C", "D"};
var b = Enumerable.Range(1, 3); // <-- See how it doesn't need to be an array.
char[] c = {'X', 'Y', 'Z'};
double[] d = {-0.1, -0.2};
var sequences = new [] { a, b.Cast<object>(), c.Cast<object>(), d.Cast<object>() };
Console.WriteLine(string.Join("\n", Combine("", sequences)));
}
public static IEnumerable<string> Combine(string prefix, IEnumerable<IEnumerable<object>> sequences)
{
foreach (var item in sequences.First())
{
string current = (prefix == "") ? item.ToString() : prefix + "_" + item;
var remaining = sequences.Skip(1);
if (!remaining.Any())
{
yield return current;
}
else
{
foreach (var s in Combine(current, remaining))
yield return s;
}
}
}
}
}

How to word by word iterate in string in C#?

I want to iterate over string as word by word.
If I have a string "incidentno and fintype or unitno", I would like to read every word one by one as "incidentno", "and", "fintype", "or", and "unitno".
foreach (string word in "incidentno and fintype or unitno".Split(' ')) {
...
}
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
This works even if you have ".,; tabs and new lines" between your words.
Slightly twisted I know, but you could define an iterator block as an extension method on strings. e.g.
/// <summary>
/// Sweep over text
/// </summary>
/// <param name="Text"></param>
/// <returns></returns>
public static IEnumerable<string> WordList(this string Text)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(' ', cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
foreach (string word in "incidentno and fintype or unitno".WordList())
System.Console.WriteLine("'" + word + "'");
Which has the advantage of not creating a big array for long strings.
Use the Split method of the string class
string[] words = "incidentno and fintype or unitno".Split(" ");
This will split on spaces, so "words" will have [incidentno,and,fintype,or,unitno].
Assuming the words are always separated by a blank, you could use String.Split() to get an Array of your words.
There are multiple ways to accomplish this. Two of the most convenient methods (in my opinion) are:
Using string.Split() to create an array. I would probably use this method, because it is the most self-explanatory.
example:
string startingSentence = "incidentno and fintype or unitno";
string[] seperatedWords = startingSentence.Split(' ');
Alternatively, you could use (this is what I would use):
string[] seperatedWords = startingSentence.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
StringSplitOptions.RemoveEmptyEntries will remove any empty entries from your array that may occur due to extra whitespace and other minor problems.
Next - to process the words, you would use:
foreach (string word in seperatedWords)
{
//Do something
}
Or, you can use regular expressions to solve this problem, as Darin demonstrated (a copy is below).
example:
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
For processing, you can use similar code to the first option.
foreach (string word in words)
{
//Do something
}
Of course, there are many ways to solve this problem, but I think that these two would be the simplest to implement and maintain. I would go with the first option (using string.Split()) just because regex can sometimes become quite confusing, while a split will function correctly most of the time.
When using split, what about checking for empty entries?
string sentence = "incidentno and fintype or unitno"
string[] words = sentence.Split(new char[] { ' ', ',' ,';','\t','\n', '\r'}, StringSplitOptions.RemoveEmptyEntries);
foreach (string word in words)
{
// Process
}
EDIT:
I can't comment so I'm posting here but this (posted above) works:
foreach (string word in "incidentno and fintype or unitno".Split(' '))
{
...
}
My understanding of foreach is that it first does a GetEnumerator() and the calles .MoveNext until false is returned. So the .Split won't be re-evaluated on each iteration
public static string[] MyTest(string inword, string regstr)
{
var regex = new Regex(regstr);
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase);
return words;
}
? MyTest("incidentno, and .fintype- or; :unitno",#"[^\w+]")
[0]: "incidentno"
[1]: "and"
[2]: "fintype"
[3]: "or"
[4]: "unitno"
I'd like to add some information to JDunkerley's awnser.
You can easily make this method more reliable if you give a string or char parameter to search for.
public static IEnumerable<string> WordList(this string Text,string Word)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(Word, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
public static IEnumerable<string> WordList(this string Text, char c)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(c, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
I write a string processor class.You can use it.
Example:
metaKeywords = bodyText.Process(prepositions).OrderByDescending().TakeTop().GetWords().AsString();
Class:
public static class StringProcessor
{
private static List<String> PrepositionList;
public static string ToNormalString(this string strText)
{
if (String.IsNullOrEmpty(strText)) return String.Empty;
char chNormalKaf = (char)1603;
char chNormalYah = (char)1610;
char chNonNormalKaf = (char)1705;
char chNonNormalYah = (char)1740;
string result = strText.Replace(chNonNormalKaf, chNormalKaf);
result = result.Replace(chNonNormalYah, chNormalYah);
return result;
}
public static List<KeyValuePair<String, Int32>> Process(this String bodyText,
List<String> blackListWords = null,
int minimumWordLength = 3,
char splitor = ' ',
bool perWordIsLowerCase = true)
{
string[] btArray = bodyText.ToNormalString().Split(splitor);
long numberOfWords = btArray.LongLength;
Dictionary<String, Int32> wordsDic = new Dictionary<String, Int32>(1);
foreach (string word in btArray)
{
if (word != null)
{
string lowerWord = word;
if (perWordIsLowerCase)
lowerWord = word.ToLower();
var normalWord = lowerWord.Replace(".", "").Replace("(", "").Replace(")", "")
.Replace("?", "").Replace("!", "").Replace(",", "")
.Replace("<br>", "").Replace(":", "").Replace(";", "")
.Replace("،", "").Replace("-", "").Replace("\n", "").Trim();
if ((normalWord.Length > minimumWordLength && !normalWord.IsMemberOfBlackListWords(blackListWords)))
{
if (wordsDic.ContainsKey(normalWord))
{
var cnt = wordsDic[normalWord];
wordsDic[normalWord] = ++cnt;
}
else
{
wordsDic.Add(normalWord, 1);
}
}
}
}
List<KeyValuePair<String, Int32>> keywords = wordsDic.ToList();
return keywords;
}
public static List<KeyValuePair<String, Int32>> OrderByDescending(this List<KeyValuePair<String, Int32>> list, bool isBasedOnFrequency = true)
{
List<KeyValuePair<String, Int32>> result = null;
if (isBasedOnFrequency)
result = list.OrderByDescending(q => q.Value).ToList();
else
result = list.OrderByDescending(q => q.Key).ToList();
return result;
}
public static List<KeyValuePair<String, Int32>> TakeTop(this List<KeyValuePair<String, Int32>> list, Int32 n = 10)
{
List<KeyValuePair<String, Int32>> result = list.Take(n).ToList();
return result;
}
public static List<String> GetWords(this List<KeyValuePair<String, Int32>> list)
{
List<String> result = new List<String>();
foreach (var item in list)
{
result.Add(item.Key);
}
return result;
}
public static List<Int32> GetFrequency(this List<KeyValuePair<String, Int32>> list)
{
List<Int32> result = new List<Int32>();
foreach (var item in list)
{
result.Add(item.Value);
}
return result;
}
public static String AsString<T>(this List<T> list, string seprator = ", ")
{
String result = string.Empty;
foreach (var item in list)
{
result += string.Format("{0}{1}", item, seprator);
}
return result;
}
private static bool IsMemberOfBlackListWords(this String word, List<String> blackListWords)
{
bool result = false;
if (blackListWords == null) return false;
foreach (var w in blackListWords)
{
if (w.ToNormalString().Equals(word))
{
result = true;
break;
}
}
return result;
}
}

Categories