Sorting a set of values with special characters - c#

I have a task to sort a set of values in the DataTable. BUt the tricky part is that the values have special characters like '.' or '-' and negative values as well. The following picture is my current output:
the data are separated by ';' in the picture.
I am using the following code to sort the data.
DataTable myDataTable = new DataTable();
myDataTable.Columns.Add("SN", typeof(string));
string myValues = "";
if (!File.Exists("d:\\DUDBC-values.txt")) //DUDBC-values.txt
{
Console.WriteLine("No file found");
Console.ReadKey();
return;
}
StreamReader file = new StreamReader("d:\\DUDBC-values.txt");
string line;
while ((line = file.ReadLine()) != null)
{
myValues += line;
myValues += ";";
}
file.Close();
string[] myValuesArray = myValues.Split(';');
myValuesArray = myValuesArray.Take(myValuesArray.Count() - 1).ToArray();
foreach (string myValue in myValuesArray)
{
DataRow myRow = myDataTable.NewRow();
myRow["SN"] = myValue;
myDataTable.Rows.Add(myRow);
}
string beforeSort = string.Join(";", myDataTable.AsEnumerable().Select(x => x["SN"]));
Console.WriteLine("Before Sorting:");
Console.WriteLine();
Console.WriteLine(beforeSort);
Console.WriteLine();
IEnumerable<DataRow> sortedValues = myDataTable.AsEnumerable()
.OrderBy(x =>
{
string currentStringValue = x["SN"].ToString();
char[] SplitChar = new char[] { '.', '-' };
string[] currentStringValueArray = new string[1];
try
{
float val = float.Parse(currentStringValue);
currentStringValueArray[0] = currentStringValue;
}
catch {
currentStringValueArray = currentStringValue.Split(SplitChar);
}
string currentPart = "";
int currentPartNumeric = 0;
if (currentStringValueArray.Length > 1)
{
for (int i = 0; i < currentStringValueArray.Length; i++)
{
if (int.TryParse(currentStringValueArray[i], out currentPartNumeric))
{
if (i >= 1)
currentPart += ".";
currentPart += currentPartNumeric.ToString();
}
else
{
try
{
if (i >= 1)
currentPart += ".";
currentPart += (((int)(char.ToUpper(char.Parse(currentStringValueArray[i])))) - 64).ToString();
}
catch { }
}
}
return Convert.ToString(currentPart, CultureInfo.InvariantCulture);
}
else
return 0m.ToString();
});
string afterSort = string.Join(";", sortedValues.Select(x => x["SN"]));
Console.WriteLine("After Sorting:");
Console.WriteLine();
Console.WriteLine(afterSort);
//Copy to your existing datatable
myDataTable = sortedValues.CopyToDataTable();
Console.ReadKey();
I was expecting it to be like this:
-1
1.1.a.1
1.2.a.1
1.2.a.2
1.2.a.3
1.3.1
2.1.2
2.1a.1
2.1a.2
2.5
2.6.1
2.7.1
2.7.2
2.7.16
2.25a
2.25b
2.42.1
2.42.2
3.1.1
3.1.2
3.5.2
3.6a.1
3.6a.2
3.6b.2
5.1a.1
5.1a.2
5.1a.3
5.1b.1
5.1b.2
5.1b.6
6.3.1
6.3.2
6.3.3
6.3.4
6.3.5
6.5.1
6.5.2-C11
6.5.3-C12
17.06.01.b.i
17.06.02.b.i
17.06.02.b.vi
18.01.b
18.02.01.b.iii
1000
What am i doing wrong? Help needed please. I had also asked this type of question in this post until users kept putting different types of values.

It looks like you need to sort in what's called "Natural sort order".
There is a Windows API function, StrCmpLogicalW() that you can use to do such a comparison.
You can wrap this in a set of extension methods for sorting List<T> or arrays like so:
public static class NaturalSortExt
{
/// <summary>Sorts a list in "Natural sort order", i.e. "9" sorts before "10".</summary>
/// <typeparam name="T">The type of elements in the list to be sorted.</typeparam>
/// <param name="self">The list to be sorted.</param>
/// <param name="stringSelector">A projection to convert list elements to strings for comparision.</param>
public static void SortNatural<T>(this List<T> self, Func<T, string> stringSelector)
{
self.Sort((lhs, rhs) => StrCmpLogicalW(stringSelector(lhs), stringSelector(rhs)));
}
/// <summary>Sorts a list in "Natural sort order", i.e. "9" sorts before "10".</summary>
/// <param name="self">The list to be sorted.</param>
public static void SortNatural(this List<string> self)
{
self.Sort(StrCmpLogicalW);
}
/// <summary>Sorts an array in "Natural sort order", i.e. "9" sorts before "10".</summary>
/// <param name="self">The array to be sorted.</param>
public static void SortNatural(this string[] self)
{
Array.Sort(self, StrCmpLogicalW);
}
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
static extern int StrCmpLogicalW(string lhs, string rhs);
}
Then you can just sort your array (or List<T>) as demonstrated in the sample code below:
class Program
{
static void Main()
{
string[] test =
{
"3.1.2",
"1.2.a.1",
"1.2.a.2",
"1.3.1",
"2.1.2",
"2.1a.2",
"2.1a.1",
"-1",
"2.5",
"2.7.1",
"1.1.a.1",
"2.7.16",
"2.7.2",
"2.25a",
"2.6.1",
"5.1a.3",
"2.42.2",
"2.25b",
"2.42.1",
"3.6a.2",
"5.1b.1",
"3.1.1",
"3.5.2",
"3.6a.1",
"3.6b.2",
"5.1a.1",
"1.2.a.3",
"5.1b.2",
"5.1b.6",
"6.3.1",
"6.3.2",
"17.06.02.b.i",
"6.3.3",
"5.1a.2",
"6.3.4",
"6.3.5",
"6.5.1",
"1000",
"6.5.2-C11",
"6.5.3-C12",
"17.06.01.b.i",
"17.06.02.b.vi",
"18.01.b",
"18.02.01.b.iii"
};
string[] expected =
{
"-1",
"1.1.a.1",
"1.2.a.1",
"1.2.a.2",
"1.2.a.3",
"1.3.1",
"2.1.2",
"2.1a.1",
"2.1a.2",
"2.5",
"2.6.1",
"2.7.1",
"2.7.2",
"2.7.16",
"2.25a",
"2.25b",
"2.42.1",
"2.42.2",
"3.1.1",
"3.1.2",
"3.5.2",
"3.6a.1",
"3.6a.2",
"3.6b.2",
"5.1a.1",
"5.1a.2",
"5.1a.3",
"5.1b.1",
"5.1b.2",
"5.1b.6",
"6.3.1",
"6.3.2",
"6.3.3",
"6.3.4",
"6.3.5",
"6.5.1",
"6.5.2-C11",
"6.5.3-C12",
"17.06.01.b.i",
"17.06.02.b.i",
"17.06.02.b.vi",
"18.01.b",
"18.02.01.b.iii",
"1000"
};
test.SortNatural();
Debug.Assert(test.SequenceEqual(expected));
Console.WriteLine(string.Join("\n", test));
}
}

Related

Get certain value in the string from text file

I have this in my text file:
000000000:Carrots:$1.99:214:03/11/2015:03/11/2016:$0.99
000000001:Bananas:$1.99:872:03/11/2015:03/11/2016:$0.99
000000002:Chocolate:$2.99:083:03/11/2015:03/11/2016:$1.99
000000003:Spaghetti:$3.99:376:03/11/2015:03/11/2016:$2.99
000000004:Tomato Sauce:$1.99:437:03/11/2015:03/11/2016:$0.99
000000005:Lettuce:$0.99:279:03/11/2015:03/11/2016:$0.99
000000006:Orange Juice:$2.99:398:03/11/2015:03/11/2016:$1.99
000000007:Potatoes:$2.99:792:03/11/2015:03/11/2016:$1.99
000000008:Celery:$0.99:973:03/11/2015:03/11/2016:$0.99
000000009:Onions:$1.99:763:03/11/2015:03/11/2016:$0.99
000000010:Chicken:$8.99:345:03/11/2015:03/11/2016:$7.99
000000010:Chicken:$8.99:345:03/11/2015:03/11/2016:$7.99
I need to get the value of each of the "quantity" values from the position in bold.
EDIT:
I want to also compare the values that I got and give an error if the quantity is low.
Solution with minimal memory consumption in case of large input data.
In additional: there are not processing of incorrect data in quantity column. To do this just replace int.Parse block;
This is several methods to process file data using LINQ expressions
internal static class MyExtensions
{
/// <exception cref="OutOfMemoryException">There is insufficient memory to allocate a buffer for the returned string. </exception>
/// <exception cref="IOException">An I/O error occurs. </exception>
/// <exception cref="ArgumentException"><paramref name="stream" /> does not support reading. </exception>
/// <exception cref="ArgumentNullException"><paramref name="stream" /> is null. </exception>
public static IEnumerable<string> EnumerateLines(this Stream stream)
{
using (var reader = new StreamReader(stream))
{
do
{
var line = reader.ReadLine();
if (line == null) break;
yield return line;
} while (true);
}
}
/// <exception cref="ArgumentNullException"><paramref name="line"/> is <see langword="null" />.</exception>
public static IEnumerable<string> ChunkLine(this string line)
{
if (line == null) throw new ArgumentNullException("line");
return line.Split(':');
}
/// <exception cref="ArgumentNullException"><paramref name="chuckedData"/> is <see langword="null" />.</exception>
/// <exception cref="ArgumentException">Index should be not negative value</exception>
public static string GetColumnData(this IEnumerable<string> chuckedData, int columnIndex)
{
if (chuckedData == null) throw new ArgumentNullException("chuckedData");
if (columnIndex < 0) throw new ArgumentException("Column index should be >= 0", "columnIndex");
return chuckedData.Skip(columnIndex).FirstOrDefault();
}
}
This is example of usage:
private void button1_Click(object sender, EventArgs e)
{
var values = EnumerateQuantityValues("largefile.txt");
// do whatever you need
}
private IEnumerable<int> EnumerateQuantityValues(string fileName)
{
const int columnIndex = 3;
using (var stream = File.OpenRead(fileName))
{
IEnumerable<int> enumerable = stream
.EnumerateLines()
.Select(x => x.ChunkLine().GetColumnData(columnIndex))
.Select(int.Parse);
foreach (var value in enumerable)
{
yield return value;
}
}
}
just consider if you are managed to get all these lines in string array or list.
you can apply the below code to get the collection of quantity as IEnumerable<string>.
var quantity = arr.Select(c =>
{
var temp = c.Split('$');
if (temp.Length > 1)
{
temp = temp[1].Split(':');
if (temp.Length > 1)
{
return temp[1];
}
}
return null;
}).Where(c => c != null);
UPDATE
Check the Fiddle.
https://dotnetfiddle.net/HqKdeI
you simply need to split the string
string data = #"000000000:Carrots:$1.99:214:03/11/2015:03/11/2016:$0.99
000000001:Bananas:$1.99:872:03/11/2015:03/11/2016:$0.99
000000002:Chocolate:$2.99:083:03/11/2015:03/11/2016:$1.99
000000003:Spaghetti:$3.99:376:03/11/2015:03/11/2016:$2.99
000000004:Tomato Sauce:$1.99:437:03/11/2015:03/11/2016:$0.99
000000005:Lettuce:$0.99:279:03/11/2015:03/11/2016:$0.99
000000006:Orange Juice:$2.99:398:03/11/2015:03/11/2016:$1.99
000000007:Potatoes:$2.99:792:03/11/2015:03/11/2016:$1.99
000000008:Celery:$0.99:973:03/11/2015:03/11/2016:$0.99
000000009:Onions:$1.99:763:03/11/2015:03/11/2016:$0.99
000000010:Chicken:$8.99:345:03/11/2015:03/11/2016:$7.99";
string[] rows = data.split(Environment.Newline.ToCharArray());
foreach(var row in rows)
{
string[] cols = row.Split(':');
var quantity = cols[3];
}
You can use String.Split to do this.
// Read all lines into an array
string[] lines = File.ReadAllLines(#"C:\path\to\your\file.txt");
// Loop through each one
foreach (string line in lines)
{
// Split into an array based on the : symbol
string[] split = line.Split(':');
// Get the column based on index
Console.WriteLine(split[3]);
}
Check out the example code below. The string you care about is named theValueYouWantInTheString.
char[] delimiterChar = { ':' };
string input = #"000000010:Chicken:$8.99:345:03/11/2015:03/11/2016:$7.99";
string[] values = input.Split(delimiterChar);
string theValueYouWantInTheString = values[3];
If you have a problem, use regular expression. Now you have two problems.
Here is a program that uses your input as a txt file. The function GetQuantity returns a list with int that contains the quantity. With this approach you can define more groups to extract information from each line.
namespace RegExptester
{
class Program
{
private static List<int> GetQuantity(string txtFile)
{
string tempLineValue;
Regex regex = new Regex(#"[0-9]*:[a-zA-Z]*:\$[0-9]*\.[0-9]*:([0-9]*).*", RegexOptions.Compiled);
List<int> retValue = new List<int>();
using (StreamReader inputReader = new StreamReader(txtFile))
{
while (null != (tempLineValue = inputReader.ReadLine()))
{
Match match = regex.Match(tempLineValue);
if (match.Success)
{
if(match.Groups.Count == 2)
{
int numberValue;
if (int.TryParse(match.Groups[1].Value, out numberValue))
retValue.Add(numberValue);
}
}
}
}
return retValue;
}
static void Main(string[] args)
{
var tmp = GetQuantity("c:\\tmp\\junk.txt");
}
}
}
Apparently from each line you want the part between the 3th and the 4th colon. Linq can do that for you:
using (var textReader = new StreamReader(fileName))
{
// read all text and divide into lines:
var allText = textReader.ReadToEnd();
var allLines = textReader.Split(new char[] {'\r','\n'}, StringSplitIoptions.RemoveEmptyEntries);
// split each line based on ':', and take the fourth element
var myValues = allLines.Select(line => line.Split(new char[] {':'})
.Skip(3)
.FirstOrDefault();
}
If you want less readability, of course you can concatenate these statements into one line.

CSV-file values to List<List>

I have a CSV file that I want some values from. One problem is that I don't know how many columns the file has. The number can be different every time I get a new CSV file. It will always have columns and rows with values. I will get it from a normal excel-file.
I want the method to return a List<List>.
ListA(FirstName, LastName, PhoneNumber... and so on) here I don't know how many items ListA will have. It can be different every time.
Inside ListA I want lists of persons like this:
ListA[FirstName] = List1(Zlatan, Lionel, Anders.....)
ListA[LastName] = List2(Ibrahimovic, Messi, Svensson.....) .. and so on.
You could create a class Person
class person {
private string FirstName;
private string LastName;
// others
}
Open the File and split each row in the file with the String.Split()-Method then convert each value and create Objects, which you can add to a List.
List<Person> persons = new List<Person>();
persons.Add(personFromFile);
Thats a pretty short solution but it works
Edit: Variable Fields per Row
If thats the case you could use a List<string[]> stringArraylist; and then add the results of the String.Split()-Method to it.
List<string[]> stringArraylist;
stringArraylist = new List<string[]>();
stringArraylist.Add("Andrew;Pearson;...;lololo;".Split(';'));
Is that more of what you wanted?
There are a lot of questions on SO that deal with parsing CSV files. See here for one: Reading CSV files in C#. I am fairly certain there are some solutions built in to .NET, though I can't recall what they are at the moment. (#ZoharPeled suggested TextFieldParser)
Most of the parsing solutions with give you a collection of rows where each item is a collection of columns. So assuming you have something like a IEnumerable<IList<string>>, you could create a class and use LINQ queries to get what you need:
public class CSVColumns
{
public IEnumerable<IList<string>> CSVContents { get; private set; }
public CSVColumns(IEnumerable<IList<string>> csvcontents)
{
this.CSVContents = csvcontents;
}
public List<string> FirstNames
{
get { return GetColumn("FirstName"); }
}
public List<string> LastNames
{
get { return GetColumn("LastName"); }
}
/// <summary>
/// Gets a collection of the column data based on the name of the column
/// from the header row.
/// </summary>
public List<string> GetColumn(string columnname)
{
//Get the index of the column with the name
var firstrow = CSVContents.ElementAtOrDefault(0);
if (firstrow != null)
{
int index = -1;
foreach (string s in firstrow)
{
index++;
if (s == columnname)
{
return GetColumn(index, true);
}
}
}
return new List<string>();
}
/// <summary>
/// Gets all items from a specific column number but skips the
/// header row if needed.
/// </summary>
public List<string> GetColumn(int index, bool hasHeaderRow = true)
{
IEnumerable<IList<string>> columns = CSVContents;
if (hasHeaderRow)
columns = CSVContents.Skip(1);
return columns.Select(list =>
{
try
{
return list[index];
}
catch (IndexOutOfRangeException ex)
{
return "";
}
}
).ToList();
}
}
I finally got a solution and it's working for me. My friend made it so all creed to him. No user here on stackoverflow so I post it instead.
private List<Attributes> LoadCsv()
{
string filename = #"C:\Desktop\demo.csv";
// Get the file's text.
string whole_file = System.IO.File.ReadAllText(filename);
// Split into lines.
whole_file = whole_file.Replace('\n', '\r');
string[] lines = whole_file.Split(new char[] { '\r' },
StringSplitOptions.RemoveEmptyEntries);
// See how many rows and columns there are.
int num_rows = lines.Length;
int num_cols = lines[0].Split(';').Length;
// Allocate the data array.
string[,] values = new string[num_rows, num_cols];
// Load the array.
for (int r = 0; r < num_rows; r++)
{
string[] line_r = lines[r].Split(';');
for (int c = 0; c < num_cols; c++)
{
values[r, c] = line_r[c];
}
}
var attr = new List<Attributes>();
for (var r = 0; r < num_rows; r++)
{
if (r == 0)
{
for (var c = 0; c < num_cols; c++)
{
attr.Add(new Attributes());
attr[c].Name = values[r, c];
attr[c].Value = new List<String>();
}
}
else
{
for (var b = 0; b < num_cols; b++)
{
var input = values[r, b];
attr[b].Value.Add(input);
}
}
}
// Return the values.
return attr;
}

Replace every other of a certain char in a string

I have searched a lot to find a solution to this, but could not find anything. I do however suspect that it is because I don't know what to search for.
First, I have a string that I convert to an array. The string will be formatted like so:
"99.28099822998047,68.375 118.30699729919434,57.625 126.49999713897705,37.875 113.94499683380127,11.048999786376953 96.00499725341797,8.5"
I create the array with the following code:
public static Array StringToArray(string String)
{
var list = new List<string>();
string[] Coords = String.Split(' ', ',');
foreach (string Coord in Coords)
{
list.Add(Coord);
}
var array = list.ToArray();
return array;
}
Now my problem is; I am trying to find a way to convert it back into a string, with the same formatting. So, I could create a string simply using:
public static String ArrayToString(Array array)
{
string String = string.Join(",", array);
return String;
}
and then hopefully replace every 2nd "," with a space (" "). Is this possible? Or are there a whole other way you would do this?
Thank you in advance! I hope my question makes sense.
There is no built-in way of doing what you need. However, it's pretty trivial to achieve what it is you need e.g.
public static string[] StringToArray(string str)
{
return str.Replace(" ", ",").Split(',');
}
public static string ArrayToString(string[] array)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i <= array.Length-1; i++)
{
sb.AppendFormat(i % 2 != 0 ? "{0} " : "{0},", array[i]);
}
return sb.ToString();
}
If those are pairs of coordinates, you can start by parsing them like pairs, not like separate numbers:
public static IEnumerable<string[]> ParseCoordinates(string input)
{
return input.Split(' ').Select(vector => vector.Split(','));
}
It is easier then to reconstruct the original string:
public static string PrintCoordinates(IEnumerable<string[]> coords)
{
return String.Join(" ", coords.Select(vector => String.Join(",", vector)));
}
But if you absolutely need to have your data in a flat structure like array, it is then possible to convert it to a more structured format:
public static IEnumerable<string[]> Pairwise(string[] coords)
{
coords.Zip(coords.Skip(1), (coord1, coord2) => new[] { coord1, coord2 });
}
You then can use this method in conjunction with PrintCoordinates to reconstruct your initial string.
Here is a route to do it. I don't think other solutions were removing last comma or space. I also include a test.
public static String ArrayToString(Array array)
{
var useComma = true;
var stringBuilder = new StringBuilder();
foreach (var value in array)
{
if (useComma)
{
stringBuilder.AppendFormat("{0}{1}", value, ",");
}
else
{
stringBuilder.AppendFormat("{0}{1}", value, " ");
}
useComma = !useComma;
}
// Remove last space or comma
stringBuilder.Length = stringBuilder.Length - 1;
return stringBuilder.ToString();
}
[TestMethod]
public void ArrayToStringTest()
{
var expectedStringValue =
"99.28099822998047,68.375 118.30699729919434,57.625 126.49999713897705,37.875 113.94499683380127,11.048999786376953 96.00499725341797,8.5";
var array = new[]
{
"99.28099822998047",
"68.375",
"118.30699729919434",
"57.625",
"126.49999713897705",
"37.875",
"113.94499683380127",
"11.048999786376953",
"96.00499725341797",
"8.5",
};
var actualStringValue = ArrayToString(array);
Assert.AreEqual(expectedStringValue, actualStringValue);
}
Another way of doing it:
string inputString = "1.11,11.3 2.22,12.4 2.55,12.8";
List<string[]> splitted = inputString.Split(' ').Select(a => a.Split(',')).ToList();
string joined = string.Join(" ", splitted.Select(a => string.Join(",",a)).ToArray());
"splitted" list will look like this:
1.11 11.3
2.22 12.4
2.55 12.8
"joined" string is the same as "inputString"
Here's another approach to this problem.
public static string ArrayToString(string[] array)
{
Debug.Assert(array.Length % 2 == 0, "Array is not dividable by two.");
// Group all coordinates as pairs of two.
int index = 0;
var coordinates = from item in array
group item by index++ / 2
into pair
select pair;
// Format each coordinate pair with a comma.
var formattedCoordinates = coordinates.Select(i => string.Join(",", i));
// Now concatinate all the pairs with a space.
return string.Join(" ", formattedCoordinates);
}
And a simple demonstration:
public static void A_Simple_Test()
{
string expected = "1,2 3,4";
string[] array = new string[] { "1", "2", "3", "4" };
Debug.Assert(expected == ArrayToString(array));
}

How do I sort strings alphabetically while accounting for value when a string is numeric?

I'm trying to sort an array of numbers that are strings and I'd like them to sort numerically.
The catch is that I cannot convert the numbers into int.
Here is the code:
string[] things= new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => x))
{
Console.WriteLine(thing);
}
Output:
101, 102, 103, 105, 90
I'd like:
90, 101, 102, 103, 105
EDIT:
The output can't be 090, 101, 102...
Updated the code sample to say "things" instead of "sizes". The array can be something like this:
string[] things= new string[] { "paul", "bob", "lauren", "007", "90" };
That means it needs to be sorted alphabetically and by number:
007, 90, bob, lauren, paul
Pass a custom comparer into OrderBy. Enumerable.OrderBy will let you specify any comparer you like.
This is one way to do that:
void Main()
{
string[] things = new string[] { "paul", "bob", "lauren", "007", "90", "101"};
foreach (var thing in things.OrderBy(x => x, new SemiNumericComparer()))
{
Console.WriteLine(thing);
}
}
public class SemiNumericComparer: IComparer<string>
{
/// <summary>
/// Method to determine if a string is a number
/// </summary>
/// <param name="value">String to test</param>
/// <returns>True if numeric</returns>
public static bool IsNumeric(string value)
{
return int.TryParse(value, out _);
}
/// <inheritdoc />
public int Compare(string s1, string s2)
{
const int S1GreaterThanS2 = 1;
const int S2GreaterThanS1 = -1;
var IsNumeric1 = IsNumeric(s1);
var IsNumeric2 = IsNumeric(s2);
if (IsNumeric1 && IsNumeric2)
{
var i1 = Convert.ToInt32(s1);
var i2 = Convert.ToInt32(s2);
if (i1 > i2)
{
return S1GreaterThanS2;
}
if (i1 < i2)
{
return S2GreaterThanS1;
}
return 0;
}
if (IsNumeric1)
{
return S2GreaterThanS1;
}
if (IsNumeric2)
{
return S1GreaterThanS2;
}
return string.Compare(s1, s2, true, CultureInfo.InvariantCulture);
}
}
Just pad with zeroes to the same length:
int maxlen = sizes.Max(x => x.Length);
var result = sizes.OrderBy(x => x.PadLeft(maxlen, '0'));
Value is a string
List = List.OrderBy(c => c.Value.Length).ThenBy(c => c.Value).ToList();
Works
And, how about this ...
string[] sizes = new string[] { "105", "101", "102", "103", "90" };
var size = from x in sizes
orderby x.Length, x
select x;
foreach (var p in size)
{
Console.WriteLine(p);
}
There is a native function in windows StrCmpLogicalW that will compare in strings numbers as numbers instead of letters. It is easy to make a comparer that calls out to that function and uses it for it's comparisons.
public class StrCmpLogicalComparer : Comparer<string>
{
[DllImport("Shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string x, string y);
public override int Compare(string x, string y)
{
return StrCmpLogicalW(x, y);
}
}
It even works on strings that have both text and numbers. Here is a example program that will show the diffrence between the default sort and the StrCmpLogicalW sort
class Program
{
static void Main()
{
List<string> items = new List<string>()
{
"Example1.txt", "Example2.txt", "Example3.txt", "Example4.txt", "Example5.txt", "Example6.txt", "Example7.txt", "Example8.txt", "Example9.txt", "Example10.txt",
"Example11.txt", "Example12.txt", "Example13.txt", "Example14.txt", "Example15.txt", "Example16.txt", "Example17.txt", "Example18.txt", "Example19.txt", "Example20.txt"
};
items.Sort();
foreach (var item in items)
{
Console.WriteLine(item);
}
Console.WriteLine();
items.Sort(new StrCmpLogicalComparer());
foreach (var item in items)
{
Console.WriteLine(item);
}
Console.ReadLine();
}
}
which outputs
Example1.txt
Example10.txt
Example11.txt
Example12.txt
Example13.txt
Example14.txt
Example15.txt
Example16.txt
Example17.txt
Example18.txt
Example19.txt
Example2.txt
Example20.txt
Example3.txt
Example4.txt
Example5.txt
Example6.txt
Example7.txt
Example8.txt
Example9.txt
Example1.txt
Example2.txt
Example3.txt
Example4.txt
Example5.txt
Example6.txt
Example7.txt
Example8.txt
Example9.txt
Example10.txt
Example11.txt
Example12.txt
Example13.txt
Example14.txt
Example15.txt
Example16.txt
Example17.txt
Example18.txt
Example19.txt
Example20.txt
try this
sizes.OrderBy(x => Convert.ToInt32(x)).ToList<string>();
Note:
this will helpful when all are string convertable to int.....
You say you cannot convert the numbers into int because the array can contain elements that cannot be converted to int, but there is no harm in trying:
string[] things = new string[] { "105", "101", "102", "103", "90", "paul", "bob", "lauren", "007", "90" };
Array.Sort(things, CompareThings);
foreach (var thing in things)
Debug.WriteLine(thing);
Then compare like this:
private static int CompareThings(string x, string y)
{
int intX, intY;
if (int.TryParse(x, out intX) && int.TryParse(y, out intY))
return intX.CompareTo(intY);
return x.CompareTo(y);
}
Output: 007, 90, 90, 101, 102, 103, 105, bob, lauren, paul
This site discusses alphanumeric sorting and will sort the numbers in a logical sense instead of an ASCII sense. It also takes into account the alphas around it:
http://www.dotnetperls.com/alphanumeric-sorting
EXAMPLE:
C:/TestB/333.jpg
11
C:/TestB/33.jpg
1
C:/TestA/111.jpg
111F
C:/TestA/11.jpg
2
C:/TestA/1.jpg
111D
22
111Z
C:/TestB/03.jpg
1
2
11
22
111D
111F
111Z
C:/TestA/1.jpg
C:/TestA/11.jpg
C:/TestA/111.jpg
C:/TestB/03.jpg
C:/TestB/33.jpg
C:/TestB/333.jpg
The code is as follows:
class Program
{
static void Main(string[] args)
{
var arr = new string[]
{
"C:/TestB/333.jpg",
"11",
"C:/TestB/33.jpg",
"1",
"C:/TestA/111.jpg",
"111F",
"C:/TestA/11.jpg",
"2",
"C:/TestA/1.jpg",
"111D",
"22",
"111Z",
"C:/TestB/03.jpg"
};
Array.Sort(arr, new AlphaNumericComparer());
foreach(var e in arr) {
Console.WriteLine(e);
}
}
}
public class AlphaNumericComparer : IComparer
{
public int Compare(object x, object y)
{
string s1 = x as string;
if (s1 == null)
{
return 0;
}
string s2 = y as string;
if (s2 == null)
{
return 0;
}
int len1 = s1.Length;
int len2 = s2.Length;
int marker1 = 0;
int marker2 = 0;
// Walk through two the strings with two markers.
while (marker1 < len1 && marker2 < len2)
{
char ch1 = s1[marker1];
char ch2 = s2[marker2];
// Some buffers we can build up characters in for each chunk.
char[] space1 = new char[len1];
int loc1 = 0;
char[] space2 = new char[len2];
int loc2 = 0;
// Walk through all following characters that are digits or
// characters in BOTH strings starting at the appropriate marker.
// Collect char arrays.
do
{
space1[loc1++] = ch1;
marker1++;
if (marker1 < len1)
{
ch1 = s1[marker1];
}
else
{
break;
}
} while (char.IsDigit(ch1) == char.IsDigit(space1[0]));
do
{
space2[loc2++] = ch2;
marker2++;
if (marker2 < len2)
{
ch2 = s2[marker2];
}
else
{
break;
}
} while (char.IsDigit(ch2) == char.IsDigit(space2[0]));
// If we have collected numbers, compare them numerically.
// Otherwise, if we have strings, compare them alphabetically.
string str1 = new string(space1);
string str2 = new string(space2);
int result;
if (char.IsDigit(space1[0]) && char.IsDigit(space2[0]))
{
int thisNumericChunk = int.Parse(str1);
int thatNumericChunk = int.Parse(str2);
result = thisNumericChunk.CompareTo(thatNumericChunk);
}
else
{
result = str1.CompareTo(str2);
}
if (result != 0)
{
return result;
}
}
return len1 - len2;
}
}
I guess this will be much more good if it has some numeric in the string.
Hope it will help.
PS:I'm not sure about performance or complicated string values but it worked good something like this:
lorem ipsum
lorem ipsum 1
lorem ipsum 2
lorem ipsum 3
...
lorem ipsum 20
lorem ipsum 21
public class SemiNumericComparer : IComparer<string>
{
public int Compare(string s1, string s2)
{
int s1r, s2r;
var s1n = IsNumeric(s1, out s1r);
var s2n = IsNumeric(s2, out s2r);
if (s1n && s2n) return s1r - s2r;
else if (s1n) return -1;
else if (s2n) return 1;
var num1 = Regex.Match(s1, #"\d+$");
var num2 = Regex.Match(s2, #"\d+$");
var onlyString1 = s1.Remove(num1.Index, num1.Length);
var onlyString2 = s2.Remove(num2.Index, num2.Length);
if (onlyString1 == onlyString2)
{
if (num1.Success && num2.Success) return Convert.ToInt32(num1.Value) - Convert.ToInt32(num2.Value);
else if (num1.Success) return 1;
else if (num2.Success) return -1;
}
return string.Compare(s1, s2, true);
}
public bool IsNumeric(string value, out int result)
{
return int.TryParse(value, out result);
}
}
This seems a weird request and deserves a weird solution:
string[] sizes = new string[] { "105", "101", "102", "103", "90" };
foreach (var size in sizes.OrderBy(x => {
double sum = 0;
int position = 0;
foreach (char c in x.ToCharArray().Reverse()) {
sum += (c - 48) * (int)(Math.Pow(10,position));
position++;
}
return sum;
}))
{
Console.WriteLine(size);
}
The answer given by Jeff Paulsen is correct but the Comprarer can be much simplified to this:
public class SemiNumericComparer: IComparer<string>
{
public int Compare(string s1, string s2)
{
if (IsNumeric(s1) && IsNumeric(s2))
return Convert.ToInt32(s1) - Convert.ToInt32(s2)
if (IsNumeric(s1) && !IsNumeric(s2))
return -1;
if (!IsNumeric(s1) && IsNumeric(s2))
return 1;
return string.Compare(s1, s2, true);
}
public static bool IsNumeric(object value)
{
int result;
return Int32.TryParse(value, out result);
}
}
This works because the only thing that is checked for the result of the Comparer is if the result is larger, smaller or equal to zero. One can simply subtract the values from another and does not have to handle the return values.
Also the IsNumeric method should not have to use a try-block and can benefit from TryParse.
And for those who are not sure:
This Comparer will sort values so, that non numeric values are always appended to the end of the list. If one wants them at the beginning the second and third if block have to be swapped.
public class NaturalSort: IComparer<string>
{
[DllImport("shlwapi.dll", CharSet = CharSet.Unicode)]
public static extern int StrCmpLogicalW(string x, string y);
public int Compare(string x, string y)
{
return StrCmpLogicalW(x, y);
}
}
arr = arr.OrderBy(x => x, new NaturalSort()).ToArray();
The reason I needed it was to get filed in a directory whose filenames started with a number:
public static FileInfo[] GetFiles(string path)
{
return new DirectoryInfo(path).GetFiles()
.OrderBy(x => x.Name, new NaturalSort())
.ToArray();
}
Try this :
string[] things= new string[] { "105", "101", "102", "103", "90" };
int tmpNumber;
foreach (var thing in (things.Where(xx => int.TryParse(xx, out tmpNumber)).OrderBy(xx => int.Parse(xx))).Concat(things.Where(xx => !int.TryParse(xx, out tmpNumber)).OrderBy(xx => xx)))
{
Console.WriteLine(thing);
}
Expanding on Jeff Paulsen answer. I wanted to make sure it didn't matter how many number or char groups were in the strings:
public class SemiNumericComparer : IComparer<string>
{
public int Compare(string s1, string s2)
{
if (int.TryParse(s1, out var i1) && int.TryParse(s2, out var i2))
{
if (i1 > i2)
{
return 1;
}
if (i1 < i2)
{
return -1;
}
if (i1 == i2)
{
return 0;
}
}
var text1 = SplitCharsAndNums(s1);
var text2 = SplitCharsAndNums(s2);
if (text1.Length > 1 && text2.Length > 1)
{
for (var i = 0; i < Math.Max(text1.Length, text2.Length); i++)
{
if (text1[i] != null && text2[i] != null)
{
var pos = Compare(text1[i], text2[i]);
if (pos != 0)
{
return pos;
}
}
else
{
//text1[i] is null there for the string is shorter and comes before a longer string.
if (text1[i] == null)
{
return -1;
}
if (text2[i] == null)
{
return 1;
}
}
}
}
return string.Compare(s1, s2, true);
}
private string[] SplitCharsAndNums(string text)
{
var sb = new StringBuilder();
for (var i = 0; i < text.Length - 1; i++)
{
if ((!char.IsDigit(text[i]) && char.IsDigit(text[i + 1])) ||
(char.IsDigit(text[i]) && !char.IsDigit(text[i + 1])))
{
sb.Append(text[i]);
sb.Append(" ");
}
else
{
sb.Append(text[i]);
}
}
sb.Append(text[text.Length - 1]);
return sb.ToString().Split(' ');
}
}
I also took SplitCharsAndNums from an SO Page after amending it to deal with file names.
Example of short IComparer class.
if both string arguments can be converted to integer then arguments
are parsed to integers and compared
if only one argument can be converted to integer, then integer is
prioritized (has lower value) and are inserted before string.
If no one of arguments can be converted into integer then ordinary
string comparison is used.
Code:
public class CompareIntegerStrings : IComparer<string>
{
public int Compare(string x, string y)
{
if (int.TryParse(x, out int xOut) && int.TryParse(y, out int yOut))
return xOut.CompareTo(yOut);
else if (int.TryParse(x, out _))
return -1;
else if (int.TryParse(y, out _))
return 1;
else
return x.CompareTo(y);
}
}
In this example
List<string> intStrings = new List<string> { "01","0022","abba", "11", "deep purple", "02", };
List<string> orderedIntStrings = intStrings.OrderBy(i=>i,new CompareIntegerStrings()).ToList();
ordered list orderedIntString are { "01","02","11","0022","abba","deep purple"}.
Recommend using NaturalSort.Extension(nuget/github), as it is a reasonably difficult operation as you can see from the answer.
using NaturalSort.Extension;
var ordered = things.OrderBy(x => x, StringComparison.OrdinalIgnoreCase.WithNaturalSort());
Try this out..
string[] things = new string[] { "paul", "bob", "lauren", "007", "90", "-10" };
List<int> num = new List<int>();
List<string> str = new List<string>();
for (int i = 0; i < things.Count(); i++)
{
int result;
if (int.TryParse(things[i], out result))
{
num.Add(result);
}
else
{
str.Add(things[i]);
}
}
Now Sort the lists and merge them back...
var strsort = from s in str
orderby s.Length
select s;
var numsort = from n in num
orderby n
select n;
for (int i = 0; i < things.Count(); i++)
{
if(i < numsort.Count())
things[i] = numsort.ElementAt(i).ToString();
else
things[i] = strsort.ElementAt(i - numsort.Count());
}
I jsut tried to make a contribution in this interesting question...
My preferred solution (if all strings are numeric only):
// Order by numerical order: (Assertion: all things are numeric strings only)
foreach (var thing in things.OrderBy(int.Parse))
{
Console.Writeline(thing);
}
public class Test
{
public void TestMethod()
{
List<string> buyersList = new List<string>() { "5", "10", "1", "str", "3", "string" };
List<string> soretedBuyersList = null;
soretedBuyersList = new List<string>(SortedList(buyersList));
}
public List<string> SortedList(List<string> unsoredList)
{
return unsoredList.OrderBy(o => o, new SortNumericComparer()).ToList();
}
}
public class SortNumericComparer : IComparer<string>
{
public int Compare(string x, string y)
{
int xInt = 0;
int yInt = 0;
int result = -1;
if (!int.TryParse(x, out xInt))
{
result = 1;
}
if(int.TryParse(y, out yInt))
{
if(result == -1)
{
result = xInt - yInt;
}
}
else if(result == 1)
{
result = string.Compare(x, y, true);
}
return result;
}
}
Using Regex.Replace is so simple yet efficient. Note that the number "3" just has to be a number equal-to or larger than your longest string, so for anyone else, increase as needed.
using System.Text.RegularExpressions;
string[] things = new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => Regex.Replace(x, #"\d+", i =>
i.Value.PadLeft(3, '0'))))
{
Console.WriteLine(thing);
}
I would have commented under recursive's answer, but my reputation is too low for that.
Because recursive's answer only works with numeric strings (if You have a string like "I am just a damn long string", it would be sorted after "Not so long string") and OP edited his answer, my Idea for the question would be to sort the strings by differentiating them into numbers and not numbers:
int maxlen = items.Max(x => x.Length);
var items = items.OrderBy(x => long.TryParse(x, out _) == true ? x.PadLeft(maxlen, '0') : x);
The underscore is for discarding the output
namespace X
{
public class Utils
{
public class StrCmpLogicalComparer : IComparer<Projects.Sample>
{
[DllImport("Shlwapi.dll", CharSet = CharSet.Unicode)]
private static extern int StrCmpLogicalW(string x, string y);
public int Compare(Projects.Sample x, Projects.Sample y)
{
string[] ls1 = x.sample_name.Split("_");
string[] ls2 = y.sample_name.Split("_");
string s1 = ls1[0];
string s2 = ls2[0];
return StrCmpLogicalW(s1, s2);
}
}
}
}
Even though this is an old question, I'd like to give a solution:
string[] things= new string[] { "105", "101", "102", "103", "90" };
foreach (var thing in things.OrderBy(x => Int32.Parse(x) )
{
Console.WriteLine(thing);
}
Woha quite simple right? :D

How to word by word iterate in string in C#?

I want to iterate over string as word by word.
If I have a string "incidentno and fintype or unitno", I would like to read every word one by one as "incidentno", "and", "fintype", "or", and "unitno".
foreach (string word in "incidentno and fintype or unitno".Split(' ')) {
...
}
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
This works even if you have ".,; tabs and new lines" between your words.
Slightly twisted I know, but you could define an iterator block as an extension method on strings. e.g.
/// <summary>
/// Sweep over text
/// </summary>
/// <param name="Text"></param>
/// <returns></returns>
public static IEnumerable<string> WordList(this string Text)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(' ', cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
foreach (string word in "incidentno and fintype or unitno".WordList())
System.Console.WriteLine("'" + word + "'");
Which has the advantage of not creating a big array for long strings.
Use the Split method of the string class
string[] words = "incidentno and fintype or unitno".Split(" ");
This will split on spaces, so "words" will have [incidentno,and,fintype,or,unitno].
Assuming the words are always separated by a blank, you could use String.Split() to get an Array of your words.
There are multiple ways to accomplish this. Two of the most convenient methods (in my opinion) are:
Using string.Split() to create an array. I would probably use this method, because it is the most self-explanatory.
example:
string startingSentence = "incidentno and fintype or unitno";
string[] seperatedWords = startingSentence.Split(' ');
Alternatively, you could use (this is what I would use):
string[] seperatedWords = startingSentence.Split(new char[] {' '}, StringSplitOptions.RemoveEmptyEntries);
StringSplitOptions.RemoveEmptyEntries will remove any empty entries from your array that may occur due to extra whitespace and other minor problems.
Next - to process the words, you would use:
foreach (string word in seperatedWords)
{
//Do something
}
Or, you can use regular expressions to solve this problem, as Darin demonstrated (a copy is below).
example:
var regex = new Regex(#"\b[\s,\.-:;]*");
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase).Where(x => !string.IsNullOrEmpty(x));
For processing, you can use similar code to the first option.
foreach (string word in words)
{
//Do something
}
Of course, there are many ways to solve this problem, but I think that these two would be the simplest to implement and maintain. I would go with the first option (using string.Split()) just because regex can sometimes become quite confusing, while a split will function correctly most of the time.
When using split, what about checking for empty entries?
string sentence = "incidentno and fintype or unitno"
string[] words = sentence.Split(new char[] { ' ', ',' ,';','\t','\n', '\r'}, StringSplitOptions.RemoveEmptyEntries);
foreach (string word in words)
{
// Process
}
EDIT:
I can't comment so I'm posting here but this (posted above) works:
foreach (string word in "incidentno and fintype or unitno".Split(' '))
{
...
}
My understanding of foreach is that it first does a GetEnumerator() and the calles .MoveNext until false is returned. So the .Split won't be re-evaluated on each iteration
public static string[] MyTest(string inword, string regstr)
{
var regex = new Regex(regstr);
var phrase = "incidentno and fintype or unitno";
var words = regex.Split(phrase);
return words;
}
? MyTest("incidentno, and .fintype- or; :unitno",#"[^\w+]")
[0]: "incidentno"
[1]: "and"
[2]: "fintype"
[3]: "or"
[4]: "unitno"
I'd like to add some information to JDunkerley's awnser.
You can easily make this method more reliable if you give a string or char parameter to search for.
public static IEnumerable<string> WordList(this string Text,string Word)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(Word, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
public static IEnumerable<string> WordList(this string Text, char c)
{
int cIndex = 0;
int nIndex;
while ((nIndex = Text.IndexOf(c, cIndex + 1)) != -1)
{
int sIndex = (cIndex == 0 ? 0 : cIndex + 1);
yield return Text.Substring(sIndex, nIndex - sIndex);
cIndex = nIndex;
}
yield return Text.Substring(cIndex + 1);
}
I write a string processor class.You can use it.
Example:
metaKeywords = bodyText.Process(prepositions).OrderByDescending().TakeTop().GetWords().AsString();
Class:
public static class StringProcessor
{
private static List<String> PrepositionList;
public static string ToNormalString(this string strText)
{
if (String.IsNullOrEmpty(strText)) return String.Empty;
char chNormalKaf = (char)1603;
char chNormalYah = (char)1610;
char chNonNormalKaf = (char)1705;
char chNonNormalYah = (char)1740;
string result = strText.Replace(chNonNormalKaf, chNormalKaf);
result = result.Replace(chNonNormalYah, chNormalYah);
return result;
}
public static List<KeyValuePair<String, Int32>> Process(this String bodyText,
List<String> blackListWords = null,
int minimumWordLength = 3,
char splitor = ' ',
bool perWordIsLowerCase = true)
{
string[] btArray = bodyText.ToNormalString().Split(splitor);
long numberOfWords = btArray.LongLength;
Dictionary<String, Int32> wordsDic = new Dictionary<String, Int32>(1);
foreach (string word in btArray)
{
if (word != null)
{
string lowerWord = word;
if (perWordIsLowerCase)
lowerWord = word.ToLower();
var normalWord = lowerWord.Replace(".", "").Replace("(", "").Replace(")", "")
.Replace("?", "").Replace("!", "").Replace(",", "")
.Replace("<br>", "").Replace(":", "").Replace(";", "")
.Replace("،", "").Replace("-", "").Replace("\n", "").Trim();
if ((normalWord.Length > minimumWordLength && !normalWord.IsMemberOfBlackListWords(blackListWords)))
{
if (wordsDic.ContainsKey(normalWord))
{
var cnt = wordsDic[normalWord];
wordsDic[normalWord] = ++cnt;
}
else
{
wordsDic.Add(normalWord, 1);
}
}
}
}
List<KeyValuePair<String, Int32>> keywords = wordsDic.ToList();
return keywords;
}
public static List<KeyValuePair<String, Int32>> OrderByDescending(this List<KeyValuePair<String, Int32>> list, bool isBasedOnFrequency = true)
{
List<KeyValuePair<String, Int32>> result = null;
if (isBasedOnFrequency)
result = list.OrderByDescending(q => q.Value).ToList();
else
result = list.OrderByDescending(q => q.Key).ToList();
return result;
}
public static List<KeyValuePair<String, Int32>> TakeTop(this List<KeyValuePair<String, Int32>> list, Int32 n = 10)
{
List<KeyValuePair<String, Int32>> result = list.Take(n).ToList();
return result;
}
public static List<String> GetWords(this List<KeyValuePair<String, Int32>> list)
{
List<String> result = new List<String>();
foreach (var item in list)
{
result.Add(item.Key);
}
return result;
}
public static List<Int32> GetFrequency(this List<KeyValuePair<String, Int32>> list)
{
List<Int32> result = new List<Int32>();
foreach (var item in list)
{
result.Add(item.Value);
}
return result;
}
public static String AsString<T>(this List<T> list, string seprator = ", ")
{
String result = string.Empty;
foreach (var item in list)
{
result += string.Format("{0}{1}", item, seprator);
}
return result;
}
private static bool IsMemberOfBlackListWords(this String word, List<String> blackListWords)
{
bool result = false;
if (blackListWords == null) return false;
foreach (var w in blackListWords)
{
if (w.ToNormalString().Equals(word))
{
result = true;
break;
}
}
return result;
}
}

Categories