Got a quite huge problem. My task is to split the input text into sentences,then to split sentences into words. Here comes the code :
using System.Collections.Generic;
using System.Linq;
namespace TextAnalysis
{
static class SentencesParserTask
{
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
List<string>[] mas = new List<string>[splittedText.Length];
for (int i = 0; i < splittedText.Length; i++)
{
mas[i] = new List<string>();
}
for (int j = 0; j < splittedText.Length; j++)
{
mas[j]= GetWordsOutOfTheSentence(splittedText);
bool isEmpty = !(mas[j]).Any();
if(!isEmpty)
sentencesList.Add(mas[j]);
}
return sentencesList;
}
private static List<string> GetWordsOutOfTheSentence(string[] splittedText)
{
var wordList = new List<string>();
foreach (var sentence in splittedText)
{
var wordsArray = sentence.Split('^', '#', '$', '-', '+', '1', '=', ' ', '\t', '\n', '\r',',');
for (int i = 0; i < wordsArray.Length; i++)
{
if (wordsArray[i] != string.Empty)
{
var fineWord = wordsArray[i];
wordList.Add(fineWord.ToLower());
}
}
}
return wordList;
}
}
}
The main problem is on test 1)
Failed : TextAnalysis.SentencesParser_Tests.CorrectlyParse_SentenceDelimiters
Input text: [a.b!c?d:e;f(g)h;i]
Sentence #0 is wrong
Expected is <System.Collections.Generic.List<System.String>> with 1 elements, actual is <System.Collections.Generic.List<System.String>> with 9 elements
Values differ at index [1]
Extra: < "b", "c", "d"... >
My code just continue adding new words in list and then add that lists in main list.What should i do?
As stated in one of the comments, you are passing the entire splittedText variable into GetWordsOutOfTheSentence and not just that sentence. This means you are passing the list of 9 sentences instead of one sentence. As suggested in the comments your code should pass the specific sentence instead.
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
List<string>[] mas = new List<string>[splittedText.Length];
for (int i = 0; i < splittedText.Length; i++)
{
mas[i] = new List<string>();
}
for (int j = 0; j < splittedText.Length; j++)
{
//Passes entire splittedText:
mas[j]= GetWordsOutOfTheSentence(splittedText);
//Passes just the relevant sentence
mas[j]= GetWordsOutOfTheSentence(splittedText[j]);
bool isEmpty = !(mas[j]).Any();
if(!isEmpty)
sentencesList.Add(mas[j]);
}
return sentencesList;
}
Actually i just used additional list to solve the problem. Thanks everybody,it was awesome!
using System.Collections.Generic;
using System.Linq;
namespace TextAnalysis
{
static class SentencesParserTask
{
public static List<List<string>> ParseSentences(string text)
{
var sentencesList = new List<List<string>>();
var splittedText = text.Split('.', '!', '?', ';', ':', '(', ')');
foreach (var sentence in splittedText)
{
var wordsArray = sentence.Split('^', '#', '$', '-', '+', '1', '=', ' ', '\t', '\n', '\r', ',');
var additionalMainList = new List<string>();
var wordList = new List<string>();
foreach (var word in wordsArray)
{
if (word != string.Empty)
{
var fineWord = word;
wordList.Add(fineWord.ToLower());
additionalMainList.Add(fineWord.ToLower());
}
}
bool isEmpty = !(wordList).Any();
if (!isEmpty)
sentencesList.Add(additionalMainList);
wordList.Clear();
}
return sentencesList;
}
}
}
Related
void lvnf_SelectedIndexChanged(object sender, EventArgs e)
{
results = new List<int>();
richTextBox1.Text = File.ReadAllText(listViewCostumControl1.lvnf.Items[listViewCostumControl1.lvnf.SelectedIndices[0]].Text);
FileInfo fi = new FileInfo(listViewCostumControl1.lvnf.Items[listViewCostumControl1.lvnf.SelectedIndices[0]].Text);
lblfilesizeselected.Text = ExtensionMethods.ToFileSize(fi.Length);
lblfilesizeselected.Visible = true;
filePath = Path.GetDirectoryName(fi.FullName);
string words = textBox1.Text;
string[] splittedwords = words.Split(new string[] { ",," }, StringSplitOptions.None);
foreach (string myword in splittedwords)
{
HighlightPhrase(richTextBox1, myword, Color.Yellow);
lblviewerselectedfile.Text = results.Count.ToString();
lblviewerselectedfile.Visible = true;
if (results.Count > 0)
{
numericUpDown1.Maximum = results.Count;
numericUpDown1.Enabled = true;
richTextBox1.SelectionStart = results[(int)numericUpDown1.Value - 1];
richTextBox1.ScrollToCaret();
}
}
}
This is the line that make the split :
string[] splittedwords = words.Split(new string[] { ",," }, StringSplitOptions.None);
The problem is if I'm typing the textBox1 for example sadsdss,,s,,form1,,,,,,,,f,,dd,,,,,,
Then all the places that have more then two commas it count as empty string when highlighting the words :
void HighlightPhrase(RichTextBox box, string phrase, Color color)
{
int pos = box.SelectionStart;
string s = box.Text;
for (int ix = 0; ;)
{
int jx = s.IndexOf(phrase, ix, StringComparison.CurrentCultureIgnoreCase);
if (jx < 0)
{
break;
}
else
{
box.SelectionStart = jx;
box.SelectionLength = phrase.Length;
box.SelectionColor = color;
ix = jx + 1;
results.Add(jx);
}
}
box.SelectionStart = pos;
box.SelectionLength = 0;
}
The exception is on the line :
int jx = s.IndexOf(phrase, ix, StringComparison.CurrentCultureIgnoreCase);
System.ArgumentOutOfRangeException: 'Index was out of range. Must be non-negative and less than the size of the collection.
Parameter name: startIndex'
because the phrase is empty string ""
what I want to do is that every place there are more then two commas like ,,, count it as string as word even if the user type s,,1,,form1,,,,,,
so the words s 1 form1 and ,,,,,, all should be counted as results and words that should be highlighted.
If you want to remove empty entries, just do it with a help of StringSplitOptions.RemoveEmptyEntries option:
string[] splittedwords = words.Split(
new string[] { ",," },
StringSplitOptions.RemoveEmptyEntries);
Another posibility is to query with a help of Linq, which can be helpful if you want to exclude (filter out) some words, e.g.
using System.Linq;
...
string[] splittedwords = words
.Split(new string[] { ",," }, StringSplitOptions.None)
.Where(item => !string.IsNullOrWhiteSpace(item))
.ToArray();
As much as I tried to find a similar version in the question here, I couldn't find something.. so I am asking your help.
After reading some numbers from a textfile (now in string format), I split them in rows and columns and add them to a 2d-array (in string format as well). Now I want to convert everythinh in integers so that I can play with sorting the numbers out later.
Here is my code...
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace ArrayProgram
{
class Program
{
int number = 0;
int i = 0;
int k = 0;
string strnum;
public void onetohundredints()
{
StreamWriter writer = new StreamWriter("numberstored.txt");
for (i = 0; i < 10; i++)
{
for (k = 0; k < 10; k++)
{
number++;
Console.Write(number + " ");
strnum = number.ToString();
writer.Write(strnum + " ");
}
Console.WriteLine();
writer.WriteLine();
}
writer.Close();
}
public void readints()
{
StreamReader reader = new StreamReader("numberstored.txt");
string data = reader.ReadToEnd();
reader.Close();
string[,] dataarray = new string[10,10];
int[] numbers = new int[100];
string[] dataperlines = data.Split(new[] { '\r','\n' },StringSplitOptions.RemoveEmptyEntries);
for(int i=0; i<=dataperlines.Count()-1; i++)
{
string[] numbersperrow = dataperlines[i].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
for (int j=0; j<=numbersperrow.Count()-1; j++)
{
dataarray[i, j] = numbersperrow[j];
}
}
}
public static void Main(string[] args)
{
Program prog = new Program();
prog.onetohundredints();
prog.readints();
Console.ReadKey();
}
}
}
After I insert the number into the 2d-array, how do I convert all of it in integers?
If you don't have a particular reason to have an array of strings, you can just save you data as int in the first place. Just change your inner for loop to do:
var parsed = int.TryParse(numbersperrow[j], out dataarray[i, j]);
if (!parsed)
{
// Error
}
While that should work, I would suggest to re-write your ReadData method to look similar to the sample below.
public int[,] ReadData(string filePath, int xDimension, int yDimension)
{
var results = new int[xDimension, yDimension];
var lines = File.ReadLines(filePath);
for (var i = 0; i < allLines.Count(); i++)
{
var values = lines[i].Split(new[] { ' ' },
StringSplitOptions.RemoveEmptyEntries);
for (var j = 0; j < values.Count(); j++)
{
var parsed = int.TryParse(values[j], out results[i, j]);
if (!parsed) { }
}
}
return results;
}
You're putting everything into a string array. That array can only hold strings, not numbers. If you want to make the 2d array hold numbers, define it as int[,] dataarray = new int[10,10]; Next, in the final loop, simply do dataarray[i, j] = Convert.ToInt32(numbersperrow[j]);
Edit: You can use int.TryParse(numbersperrow[j], out value) if you aren't sure that numbersperrow[j] will be a number. Value will return false if the conversion is not successful.
I am happy to say that both solutions work. I now have my numbers in my 2d array. But now I wish to play with them. Let's say that I want the numbers printed on the screen in reverse order.
Having this correct solution:
int numbers;
int[,] dataarray = new int[10,10];
string[] dataperlines = data.Split(new[] { '\r','\n' },StringSplitOptions.RemoveEmptyEntries);
for(int i=0; i<=dataperlines.Count()-1; i++)
{
string[] numbersperrow = dataperlines[i].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
for (int j=0; j<=numbersperrow.Count()-1; j++)
{
numbers = int.Parse(numbersperrow[j]);
dataarray[i, j] = numbers;
Console.Write(numbers + " ");
}
Console.WriteLine();
}
I know that I have to make a double for loop. But how do I write a succesful syntax for the numbers to be printed properly?
I also tried this:
int[,] reversedarray = new int[10, 10];
reversedarray[i, j] = Array.Reverse(dataarray[i,j]);
but the dataarray[i,j] becomes red and the error "cannot convert from int to system.Array occurs... what am I missing?
I also tried this...
for (i = 10; i <= dataperlines.Count(); i--)
{
string[] numbersperrow = dataperlines[i].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
for (j =10; j <= numbersperrow.Count(); j--)
{
numbers = int.Parse(numbersperrow[j]);
reversedarray[i, j] = numbers;
Console.Write(numbers + " ");
}
Console.WriteLine();
}
But I have an IndexOutOfRange exception coming from
string[] numbersperrow = dataperlines[i].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
I understood that since I splitted the columns into rows too, it is unecessary to write it again, but I am stuck. I tried simplifying the code since I have my 2darray with its elements and I my tries were fallacious. Any more suggestions?
I try using the code they have on msdn: it uses foreach and that puts each element on a different line.
How would I put it into a 3x3 matrix format?
char[] delimiterChars = { ' ', ',', '.', ':', '\t' ,'[' ,']', ';', '"', 'A', '=' };
string text = "A = [5 4 1; 3 6 1; 2 3 9]";
System.Console.WriteLine("Original text: '{0}'", text);
string[] words = text.Split(delimiterChars);
System.Console.WriteLine("{0} words in text:", words.Length);
foreach (string element in words)
{
System.Console.WriteLine(element);
}
// Keep the console window open in debug mode.
System.Console.WriteLine("Press any key to exit.");
System.Console.ReadKey();
Thanks in advance!
There are at least two ways to do that.
1.
At first your example will output
17 words in text:
This is because when you split string by all of the specified chars you get many empty strings, to get rid of them add StringSplitOptions.RemoveEmptyEntries option to Split method.
string[] words = text.Split(delimiterChars, StringSplitOptions.RemoveEmptyEntries);
Now you will get
9 words in text:
You can get 3 x 3 matrix with simple for loop, like
string[,] matrix = new string[3, 3];
for (int i = 0; i < 3; ++i)
{
for(int j = 0; j < 3; ++j)
{
matrix[i, j] = words[i * 3 + j];
}
}
However you need to know number of rows and columns (or assume that the matrix is square).
The method will look something like this
public static string[,] GetMatrix1(string text, int n, int m)
{
char[] delimiterChars = { ' ', ',', '.', ':', '\t' ,'[' ,']', ';', '"', 'A', '=' };
string[] words = text.Split(delimiterChars, StringSplitOptions.RemoveEmptyEntries);
string[,] matrix = new string[n, m];
for (int i = 0; i < n; ++i)
{
for(int j = 0; j < m; ++j)
{
matrix[i, j] = words[i * m + j];
}
}
return matrix;
}
2.
Another way is to split text by [ and ] first, then by ; to get rows and at last by (space) to get elements in each row.
char[] delimiter1 = { '[', ']' };
char[] delimiter2 = { ';' };
char[] delimiter3 = { ' ' };
string[][] words = text.Split(delimiter1)[1]
.Split(delimiter2, StringSplitOptions.RemoveEmptyEntries)
.Select(x => x.Split(delimiter3, StringSplitOptions.RemoveEmptyEntries))
.ToArray();
There you will get string[][], number of columns may vary for each row, but if you want string[,] type you need to convert it.
As the result the implementation will be like this one
public static string[,] GetMatrix2(string text)
{
char[] delimiter1 = { '[', ']' };
char[] delimiter2 = { ';' };
char[] delimiter3 = { ' ' };
string[][] words = text.Split(delimiter1)[1]
.Split(delimiter2, StringSplitOptions.RemoveEmptyEntries)
.Select(x => x.Split(delimiter3, StringSplitOptions.RemoveEmptyEntries))
.ToArray();
string[,] matrix = new string[words.Length, words[0].Length];
for(int i = 0; i < words.Length; ++i)
{
for(int j = 0; j < words[i].Length; ++j)
{
matrix[i, j] = words[i][j];
}
}
return matrix;
}
I have this method:
public List<string> AdvMultiKeySearch(string key)
{
string[] listKeys = key.Split(',');
string[] ORsplit;
List<string> joinedDashKeys = new List<string>();
List<string> joinedSearchKeys = new List<string>();
for (int i = 0; i < listKeys.Length; i++)
{
ORsplit = listKeys[i].Split('|');
joinedDashKeys.Add(string.Join(",", ORsplit));
}
for (int i = 0; i < joinedDashKeys.Count; i++)
{
string[] split = joinedDashKeys[i].Split(',');
for (int j = 0; j < split.Length; j++)
{
joinedSearchKeys.Add(string.Join(",", split[i]));
}
}
return joinedDashKeys;
}
I am trying to create a method that receives a string Keyword that is composed of the words,comas, and '|' character. For example, user enters
glu|sal,1368|1199
And method should produce/return List of strings: "glu,1368", "glu,1199", "sal,1368", "sal,1199"
It's been more than two hours and I still can't figure out how to correctly implement it. Can someone help please?
Given the input above this will show any number of combinations as long as there is one comma.
char[] splitter1 = new char[] { '|' };
char[] splitterComma = new char[] { ',' };
public List<string> AdvMultiKeySearch(string key)
{
List<string> strings = new List<string>();
string[] commaSplit = key.Split(splitterComma);
string[] leftSideSplit = commaSplit[0].Split(splitter1);
string[] rightSideSplit = commaSplit[1].Split(splitter1);
for (int l = 0; l < leftSideSplit.Length; l++)
{
for (int r = 0; r < rightSideSplit.Length; r++)
{
strings.Add(leftSideSplit[l] + "," + rightSideSplit[r]);
}
}
return strings;
}
What is the best way to separate the individual characters in an array of strings strArr into an array of those characters charArr, as depicted below?
string[] strArr = { "123", "456", "789" };
char[] chrArr = { '1', '2', '3', '4', '5', '6', '7', '8', '9' };
This is what I am currently doing, but I do not think that it is very elegant:
int characterCount = 0;
for (int i = 0; i < strArr.Length; i++)
{
characterCount += strArr[i].Length;
}
int indexCount = 0;
char[] chrArr = new char[characterCount];
for (int i = 0; i < strArr.Length; i++)
{
for (int j = 0; j < strArr[i].Length; j++)
{
chrArr[indexCount] = strArr[i][j];
indexCount++;
}
}
Well, easiest way would be this:
char[] chrArr = string.Join(string.Empty, strArr).ToCharArray();
To make sure there is no confusion over performance characteristics here, here is a short program to test in LINQPad (don't forget to turn on optimizations in the lower right corner):
static string[] strArr = { "123", "456", "789" };
void Main()
{
const int iterations = 10000000; // 10 million
// Warm up JITter
StringJoin();
LINQSelectMany();
LINQ();
Stopwatch sw = Stopwatch.StartNew();
for (int index = 0; index < iterations; index++)
StringJoin();
sw.Stop();
sw.ElapsedMilliseconds.Dump("String.Join");
sw.Restart();
for (int index = 0; index < iterations; index++)
LINQSelectMany();
sw.Stop();
sw.ElapsedMilliseconds.Dump("LINQ SelectMany");
sw.Restart();
for (int index = 0; index < iterations; index++)
LINQ();
sw.Stop();
sw.ElapsedMilliseconds.Dump("LINQ");
}
public static void StringJoin()
{
char[] c = string.Join(string.Empty, strArr).ToCharArray();
}
public static void LINQSelectMany()
{
char[] c = strArr.SelectMany(s => s).ToArray();
}
public static void LINQ()
{
var characters = (from s in strArr
from c in s
select c).ToArray();
}
You can download this LINQPad script here if you want to play with it.
Output (in milliseconds):
String.Join
765
LINQ SelectMany
5098
LINQ
5465
(the usual caveat about performance measuring code applies here, point out any mistakes I made)
I would do:
char[] chrArr = strArr.SelectMany(s => s).ToArray();
var res = strArr.SelectMany(c => c.ToCharArray()).ToArray();
LINQ based version would be:
var input = new string[] { "abc", "def", "ghi" };
var characters = (from s in input
from c in s
select c).ToArray();
foreach (var c in characters) Console.WriteLine(c);
This is basically an extension of Jesse Slicer's answer.
Simply cleaning up your code with ReSharper (if you don't have this or something like it, go get it now) yields this:
var characterCount = strArr.Sum(t => t.Length);
var indexCount = 0;
var chrArr = new char[characterCount];
foreach (var t1 in strArr.SelectMany(t => t))
{
chrArr[indexCount] = t1;
indexCount++;
}
The foreach is just filling an array and there's already a LINQ method ToArray for that. Then characterCount and indexCount are completely unnecessary.
var chrArr = strArr.SelectMany(t => t).ToArray();
try this..
string value="";
string[] strArr = { "123", "456", "789" }; //Your string array.
for(int i=0;i<strArr.Length;i++)
{
value+=strArr[i];
}
char [] array=value.ToCharArray();