Split a range sequence into multiple string c#,linq [closed] - c#

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
Not sure why question is being marked as offtopic, where as so called desired behaviour is included within the question post!
I am trying to write this program that takes two inputs:
• a set of include intervals
• and a set of exclude intervals
The sets of intervals can be given in any order, and they may be empty or overlapping. The program should output the result of taking all the includes and “remove” the excludes. The output should be given as non-overlapping intervals in a sorted order.
Intervals will contain Integers only
Example :
Includes: 50-600, 10-100
Excludes: (empty)
Output: 10-600
Includes: 10-100, 200-300, 400-600
Excludes: 95-205, 410-420
Output: 10-94, 206-300, 400-409, 421-600
I tried to populate two Enumerable Range from include and excludes (after splitting,parsing ), but didn't find any efficient way of implementing this afterwards.
string[] _break = _string.Split(',');
string[] _breakB = _stringB.Split(',');
string[] res = new string[_break.Length + 1];
string[] _items, _itemsB;
List < int > _back = new List < int > ();
int count = 0;
foreach(var _item in _break) {
_items = _item.Split('-');
var a = Enumerable.Range(int.Parse(_items[0]), (int.Parse(_items[1]) - int.Parse(_items[0]) + 1)).ToList();
foreach(var _itemB in _breakB) {
_itemsB = _itemB.Split('-');
var b = Enumerable.Range(int.Parse((_itemsB[0])), (int.Parse(_itemsB[1]) - int.Parse((_itemsB[0])) + 1)).ToList();
var c = a.Except < int > (b).ToList();
/// different things tried here, but they are not good
res[count] = c.Min().ToString() + "-" + c.Max().ToString();
count++;
}
}
return res;
Any input will be of great help

You can use the Built-in SortedSet<T> collection to do most of the work for you like this:
The SortedSet<T> collection implements the useful UnionWith and ExceptWith methods which at least makes the code quite easy to follow:
private void button1_Click(object sender, EventArgs e)
{
string[] includeRanges = _string.Text.Replace(" ", "").Split(',');
string[] excludeRanges = _stringB.Text.Replace(" ", "").Split(',');
string[] includeRange, excludeRange;
SortedSet<int> includeSet = new SortedSet<int>();
SortedSet<int> excludeSet = new SortedSet<int>();
// Create a UNION of all the include ranges
foreach (string item in includeRanges)
{
includeRange = item.Split('-');
includeSet.UnionWith(Enumerable.Range(int.Parse(includeRange[0]), (int.Parse(includeRange[1]) - int.Parse(includeRange[0]) + 1)).ToList());
}
// Create a UNION of all the exclude ranges
foreach (string item in excludeRanges)
{
excludeRange = item.Split('-');
excludeSet.UnionWith(Enumerable.Range(int.Parse(excludeRange[0]), (int.Parse(excludeRange[1]) - int.Parse(excludeRange[0]) + 1)).ToList());
}
// Exclude the excludeSet from the includeSet
includeSet.ExceptWith(excludeSet);
//Format the output using a stringbuilder
StringBuilder sb = new StringBuilder();
int lastValue = -1;
foreach (int included in includeSet)
{
if (lastValue == -1)
{
sb.Append(included + "-");
lastValue = included;
}
else
{
if (lastValue == included - 1)
{
lastValue = included;
}
else
{
sb.Append(lastValue + ",");
sb.Append(included + "-");
lastValue = included;
}
}
}
sb.Append(lastValue);
result.Text = sb.ToString();
}

This should work faster than SortedSet trick, at least for large intervals. Idea is like:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace Test
{
using Pair = Tuple<int, int>; //for brevity
struct Point //point of an interval
{
public enum Border { Left, Right };
public enum Interval { Including, Excluding };
public int Val;
public int Brdr;
public int Intr;
public Point(int value, Border border, Interval interval)
{
Val = value;
Brdr = (border == Border.Left) ? 1 : -1;
Intr = (int)interval;
}
public override string ToString() =>
(Brdr == 1 ? "L" : "R") + (Intr == 0 ? "+ " : "- ") + Val;
}
class Program
{
static IEnumerable<Pair> GetInterval(string strIn, string strEx)
{
//a func to get interval border points from string:
Func<string, Point.Interval, IEnumerable<Point>> parse = (str, intr) =>
Regex.Matches(str, "[0-9]+").Cast<Match>().Select((s, idx) =>
new Point(int.Parse(s.Value), (Point.Border)(idx % 2), intr));
var INs = parse(strIn, Point.Interval.Including);
var EXs = parse(strEx, Point.Interval.Excluding);
var intrs = new int[2]; //current interval border control IN[0], EX[1]
int start = 0; //left border of a new resulting interval
//put all points in a line and loop:
foreach (var p in INs.Union(EXs).OrderBy(x => x.Val))
{
//check for start (close) of a new (cur) interval:
var change = (intrs[p.Intr] == 0) ^ (intrs[p.Intr] + p.Brdr == 0);
intrs[p.Intr] += p.Brdr;
if (!change) continue;
var In = p.Intr == 0 && intrs[1] == 0; //w no Ex
var Ex = p.Intr == 1 && intrs[0] > 0; //breaks In
var Open = intrs[p.Intr] > 0;
var Close = !Open;
if (In && Open || Ex && Close)
{
start = p.Val + p.Intr; //exclude point if Ex
}
else if (In && Close || Ex && Open)
{
yield return new Pair(start, p.Val - p.Intr);
}
}
}
static void Main(string[] args)
{
var strIN = "10-100, 200-300, 400-500, 420-480";
var strEX = "95-205, 410-420";
foreach (var i in GetInterval(strIN, strEX))
Console.WriteLine(i.Item1 + "-" + i.Item2);
Console.ReadLine();
}
}
}

So, you task could be separated to the list of subtasks:
Parse a source line of intervals to the list of objects
Concatinate intervals if they cross each over
Excludes intervals 'excludes' from 'includes'
I published my result code here: http://rextester.com/OBXQ56769
The code could be optimized as well, but I wanted it to be quite simple. Hope it will help you.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace ConsoleApplication
{
public class Program
{
private const string Includes = "10-100, 200-300, 400-500 ";
private const string Excludes = "95-205, 410-420";
private const string Pattern = #"(\d*)-(\d*)";
public static void Main(string[] args)
{
var includes = ParseIntevals(Includes);
var excludes = ParseIntevals(Excludes);
includes = ConcatinateIntervals(includes);
excludes = ConcatinateIntervals(excludes);
// The Result
var result = ExcludeFromInclude(includes, excludes);
foreach (var interval in result)
{
Console.WriteLine(interval.Min + "-" + interval.Max);
}
}
/// <summary>
/// Excludes intervals 'excludes' from 'includes'
/// </summary>
public static List<Interval> ExcludeFromInclude(List<Interval> includes, List<Interval> excludes)
{
var result = new List<Interval>();
if (!excludes.Any())
{
return includes.Select(x => x.Clone()).ToList();
}
for (int i = 0; i < includes.Count; i++)
{
for (int j = 0; j < excludes.Count; j++)
{
if (includes[i].Max < excludes[j].Min || includes[i].Min > excludes[j].Max)
continue; // no crossing
//1 Example: includes[i]=(10-20) excludes[j]=(15-25)
if (includes[i].Min < excludes[j].Min && includes[i].Max <= excludes[j].Max)
{
var interval = new Interval(includes[i].Min, excludes[j].Min - 1);
result.Add(interval);
break;
}
//2 Example: includes[i]=(10-25) excludes[j]=(15-20)
if (includes[i].Min <= excludes[j].Min && includes[i].Max >= excludes[j].Max)
{
if (includes[i].Min < excludes[j].Min)
{
var interval1 = new Interval(includes[i].Min, excludes[j].Min - 1);
result.Add(interval1);
}
if (includes[i].Max > excludes[j].Max)
{
var interval2 = new Interval(excludes[j].Max + 1, includes[i].Max);
result.Add(interval2);
}
break;
}
//3 Example: includes[i]=(15-25) excludes[j]=(10-20)
if (includes[i].Min < excludes[j].Max && includes[i].Max > excludes[j].Max)
{
var interval = new Interval(excludes[j].Max + 1, includes[i].Max);
result.Add(interval);
break;
}
}
}
return result;
}
/// <summary>
/// Concatinates intervals if they cross each over
/// </summary>
public static List<Interval> ConcatinateIntervals(List<Interval> intervals)
{
var result = new List<Interval>();
for (int i = 0; i < intervals.Count; i++)
{
for (int j = 0; j < intervals.Count; j++)
{
if (i == j)
continue;
if (intervals[i].Max < intervals[j].Min || intervals[i].Min > intervals[j].Max)
{
Interval interval = intervals[i].Clone();
result.Add(interval);
continue; // no crossing
}
//1
if (intervals[i].Min < intervals[j].Min && intervals[i].Max < intervals[j].Max)
{
var interval = new Interval(intervals[i].Min, intervals[j].Max);
result.Add(interval);
break;
}
//2
if (intervals[i].Min < intervals[j].Min && intervals[i].Max > intervals[j].Max)
{
Interval interval = intervals[i].Clone();
result.Add(interval);
break;
}
//3
if (intervals[i].Min < intervals[j].Max && intervals[i].Max > intervals[j].Max)
{
var interval = new Interval(intervals[j].Min, intervals[i].Max);
result.Add(interval);
break;
}
//4
if (intervals[i].Min > intervals[j].Min && intervals[i].Max < intervals[j].Max)
{
var interval = new Interval(intervals[j].Min, intervals[j].Max);
result.Add(interval);
break;
}
}
}
return result.Distinct().ToList();
}
/// <summary>
/// Parses a source line of intervals to the list of objects
/// </summary>
public static List<Interval> ParseIntevals(string intervals)
{
var matches = Regex.Matches(intervals, Pattern, RegexOptions.IgnoreCase);
var list = new List<Interval>();
foreach (Match match in matches)
{
var min = int.Parse(match.Groups[1].Value);
var max = int.Parse(match.Groups[2].Value);
list.Add(new Interval(min, max));
}
return list.OrderBy(x => x.Min).ToList();
}
/// <summary>
/// Interval
/// </summary>
public class Interval
{
public int Min { get; set; }
public int Max { get; set; }
public Interval()
{
}
public Interval(int min, int max)
{
Min = min;
Max = max;
}
public override bool Equals(object obj)
{
var obj2 = obj as Interval;
if (obj2 == null) return false;
return obj2.Min == Min && obj2.Max == Max;
}
public override int GetHashCode()
{
return this.ToString().GetHashCode();
}
public override string ToString()
{
return string.Format("{0}-{1}", Min, Max);
}
public Interval Clone()
{
return (Interval) this.MemberwiseClone();
}
}
}
}

Lots of ways to solve this. The LINQ approach hasn't been discussed yet - this is how I would do it:
// declaring a lambda fn because it's gonna be used by both include/exclude
// list
Func<string, IEnumerable<int>> rangeFn =
baseInput =>
{
return baseInput.Split (new []{ ',', ' ' },
StringSplitOptions.RemoveEmptyEntries)
.SelectMany (rng =>
{
var range = rng.Split (new []{ '-' },
StringSplitOptions.RemoveEmptyEntries)
.Select(i => Convert.ToInt32(i));
// just in case someone types in
// a reverse range (e.g. 10-5), LOL...
var start = range.Min ();
var end = range.Max ();
return Enumerable.Range (start, (end - start + 1));
});
};
var includes = rangeFn (_string);
var excludes = rangeFn (_stringB);
var result = includes.Except (excludes).Distinct().OrderBy(r => r);

Related

How to find number of specific strings in a string in c#

There is a string: xxoxoxoxoxoxooxxxxox where x is a seat that is occupied and o is not, I have to find individual occupied seats, with both sides having an x.
I tried to look at
for (int i = 0; i < string.Length; i++) {
if(string[i]=='x' && string[i + 1]=='o' && string[i + 2] == 'x')
{
count++;
}
}
but i got error so I was wondering if theres a good way to do it.
As the question is pretty unclear, I am assuming that you are looking for a pattern xox and want to know the position of o.
you can run a for loop and get the index.
to get the count of such patterns. you can increment the count by 1.
string str = "xxoxoxoxoxoxooxxxxox";
for(int i = 0; i < str.Length - 2; i++)
{
if (str[i] == 'x' && str[i +1] == 'o' && str[i+ 2] == 'x')
{
Console.WriteLine(i + 1);
count++;
}
}
you can change the character value based on your requirement.
you can use regex.matches to find all matches ...
string s = "xxoxoxoxoxoxooxxxxox";
Regex rx = new Regex("xox");
foreach (Match match in rx.Matches(s))
{
Console.WriteLine("Match index: "+ match.Index);
}
RegEx approach which gives you all indices of individual occupied seats oxo https://dotnetfiddle.net/3jc1Vq
string input = "xxoxoxoxoxoxooxxxxox";
// ^ ^ ^ ^ ^ ^
int[] indices = Regex.Matches(input, "(?<=x)o(?=x)").Cast<Match>().Select(x => x.Index).ToArray();
// in case you only want the count
int count = Regex.Matches(input, "(?<=x)o(?=x)").Count();
I made a working example that makes use of ReadOnlySpan<T> and avoids RegEx and over allocation.
using System;
using System.Collections.Generic;
public class Program
{
public static void Main()
{
string seats = "xxoxoxoxoxoxooxxxxox";
var results = seats.ToCharArray().GetSingles('o');
foreach(var i in results)
{
Console.WriteLine(i);
}
}
}
public static class Ext
{
public static IReadOnlyList<int> GetSingles<T>(this T[] source, T search)
{
var results = new List<int>();
if(source.Length == 0)
{
return results;
}
if (source.Length == 1)
{
if (source[0].Equals(search))
{
results.Add(0);
}
return results;
}
if(source.Length >= 2)
{
if (source[0].Equals(search) && ! source[1].Equals(search))
{
results.Add(0);
}
if (source.Length == 2)
{
if (!source[0].Equals(search) && source[1].Equals(search))
{
results.Add(1);
}
return results;
}
}
ReadOnlySpan<T> window = new ReadOnlySpan<T>(source, 0, 3);
int i = 1;
for(; i < source.Length - 1; i++)
{
window = new ReadOnlySpan<T>(source, i - 1, 3);
if(!window[0].Equals(search) &&
window[1].Equals(search) &&
!window[2].Equals(search))
{
results.Add(i);
}
}
if(!window[1].Equals(search) && window[2].Equals(search))
{
results.Add(i + 1);
}
return results;
}
}
This outputs,
2
4
6
8
10
18
With the more challenging test data,
public class Program
{
public static void Main()
{
var tests = new string[]
{
"",
"o",
"x",
"oo",
"ox",
"xo",
"xx",
"oxx",
"oox",
"xox",
"xoo",
"xoxoxoxo",
"xoxoxoxoo",
"xoxoxox"
};
for(var i = 0; i < tests.Length; i++)
{
string seats = tests[i];
Console.WriteLine($"{i}:\"{seats}\"");
var results = seats.ToCharArray().GetSingles('o');
foreach(var r in results)
{
Console.WriteLine(r);
}
}
}
}
we get the correct output,
0:""
1:"o"
0
2:"x"
3:"oo"
4:"ox"
0
5:"xo"
1
6:"xx"
7:"oxx"
0
8:"oox"
9:"xox"
1
10:"xoo"
11:"xoxoxoxo"
1
3
5
8
12:"xoxoxoxoo"
1
3
5
13:"xoxoxox"
1
3
5

find the first element of an array that is not consecutive using web forms

E.g. If we have an array [1,2,3,4,6,7,8] then 1 then 2 then 3 then 4 are all consecutive but 6 is not, so that's the first non-consecutive number.
If the whole array is consecutive then return null .
The array will always have at least 2 elements 1 and all elements will be numbers. The numbers will also all be unique and in ascending order. The numbers could be positive or negative and the first non-consecutive could be either too. please help me finish this code i am new in programming. My code:
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace _2katas
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
var input = this.txtInput.Text;
var numarray = input.Split(',');
int firstValue = Convert.ToInt32(numarray[0]);
for (var i = 0; i < numarray.Length; i++)
{
if (Convert.ToInt32(numarray[i]) - i != firstValue)
{
lblPrint.Text = "";
}
else
{
lblPrint.Text = "";
}
if (this.rdbConsecutive.Checked == true)
{
lblKataRunning.Text = "Consecutive";
}
else if (this.rdbStripCleaning.Checked == true)
{
lblKataRunning.Text = "Strip Cleaning";
}
}
}
}
}
Let's extract a method:
Find the first element of an array that is not consecutive ...
If the whole array is consecutive then return null
We can implement it like this:
private static string FirstInconsecutive(string data) {
var array = data.Split(',');
if (array.Length <= 0)
return null; //TODO: what shall we return on empty array?
if (!int.TryParse(array[0], out int delta))
return array[0];
for (int i = 1; i < array.Length; ++i)
if (!int.TryParse(array[i], out int value) || value - i != delta)
return array[i];
return null;
}
Usage:
string result = FirstInconsecutive(txtInput.Text);
Please note int.TryParse which helps to return the right answer "ABC" on an input like "1, 2, 3, ABC, 4, 6, 7, 8" (user input txtInput.Text can contain any string)
A linq solution just for the fun of it:
static int? FindFirstNonConsecutive(IEnumerable<int> arr)
{
var nonConsecutiveInfo =
arr.Select((i, index) => new {Index = index, Delta = i - index})
.FirstOrDefault(t => t.Delta != arr.First());
return nonConsecutiveInfo?.Delta + nonConsecutiveInfo?.Index;
}
Note that this will only work finding non consecutive numbers in ascending order as per requirements.
Two numbers are not consecutive if the left ones + 1 <> the right one.
Check with something like this, note that you have to change your boundary checks:
for (var i = 0; i < numarray.Length - 1; i++)
{
if (Convert.ToInt32(numarray[i]) + 1 != Convert.ToInt32(numarray[i+1]))
Update your condition as below for loop and it will work. I would suggest you to have separate function so that it could be reusable if needed elsewhere in code.
Here start your loop from i = 1 and compare numarray[i-1] + 1 != numarray[i] values.
You can convert your sting[] to int[] with var numarray = input.Split(',').Select(x => Convert.ToInt32(x)).ToArray(); and use it with IsConsecutive(numarray) as per button1_Click code.
You can get first non-consecutive value with minor modification in return type and return statement as shown in GetFirstNonConsecutiveValue().
public bool IsConsecutive(int[] numarray)
{
for (int i = 1; i < numarray.Length; i++)
{
if (numarray[i-1] + 1 != numarray[i])
{
return false;
}
}
return true;
}
public int? GetFirstNonConsecutiveValue(int[] numarray)
{
for (int i = 1; i < numarray.Length; i++)
{
if (numarray[i-1] + 1 != numarray[i])
{
return numarray[i];
}
}
return null;
}
private void button1_Click(object sender, EventArgs e)
{
var input = this.txtInput.Text;
var numarray = input.Split(',').Select(x => Convert.ToInt32(x)).ToArray();
var nonConsecutiveValue = GetFirstNonConsecutiveValue(numarray);
if (nonConsecutiveValue != null)
{
// nonConsecutiveValue is first non consecutive value.
}
else
{
// sequence is consecutive.
}
}
One way to go.
string rawData = "1,2,3,4,6,7,8,9";
IEnumerable<int> data = rawData.Split(',').Select(v => Convert.ToInt32(v));
IEnumerable<int> all = Enumerable.Range(data.Min(), data.Max() - data.Min() + 1);
IEnumerable<int> diff = all.Except(data);
if (diff.Count() == 0)
{
return null;
}
return data.ElementAt(all.ToList().IndexOf(diff.First()))
NB Not thoroughly tested.
Just test diff for being empty to get the numbers are consecutive

How to add array values to equal a number, or fail if values in array wont work

I am trying to build a calculator that will tell me what "steps" are needed to stack up to a given value. Using only that "steps" available in the array.
Example:
decimal[] blocks {.05, .100, .150, .200, .250}
goal = .550m
result = .100, .200, .250
I have tried using nested if statements and array find/last with not much luck.
I can get a match if the goal is an exact match, or will match with two of them stacked. I can't get it to work for the max(.750).
This is what I have so far:
code:
string result = "nope";
decimal goal = 3.264m;
decimal[] DAStep = new decimal[10];
decimal temp = Array.Find(GaugeBlockArray, element => element.Equals(goal));
if (temp != 0m)
{
DAStep[0] = Array.Find(GaugeBlockArray, element => element.Equals(temp));
result = DAStep[0].ToString();
}
else
{
DAStep[0] = GaugeBlockArray.Last(element => element <= goal); ;
decimal remaining;
remaining = goal - DAStep[0];
while (remaining != 0m)
{
DAStep[1] = GaugeBlockArray.Last(element => element <= remaining);
if (DAStep[1] != remaining)
{
DAStep[2] = GaugeBlockArray.Last(element => element <= (DAStep[1] - .0001m));
if (DAStep[2] == 0) { DAStep[1] = DAStep[2]; }
}
}
}
GaugeBlockArray contains an array of 72 different elements from .05 to 4.0. And, I can only use each block once.
edit:
I guess more detail on the array contents may help getting to a solution.
GaugeBlockArray:
.05
.100
.1001
.1002
.1003
.1004
.1005
.1006
.1007
.1008
.1009
.110
.111
.112
.113
.114
.115
.116
.117
.118
.119
.120
.121
.122
.123
.124
.125
.126
.127
.128
.129
.130
.131
.132
.133
.134
.135
.136
.137
.138
.139
.140
.141
.142
.143
.144
.145
.146
.147
.148
.149
.150
.200
.250
.300
.350
.400
.450
.500
.550
.600
.650
.700
.750
.800
.850
.900
.950
1.000
2.000
3.000
4.000
Many thanks to #GeorgPatscheider for getting me pointed in the right direction!
This is my final working result:
public static void CountSum(decimal[] DNumbers, decimal Dsum)
{
foreach (Window window in Application.Current.Windows)
{
if (window.GetType() == typeof(MetTracker.GaugeCalc))
{
(window as MetTracker.GaugeCalc).CalculateBtn.Content = "working...";
}
}
DNumbers = Array.ConvertAll(DNumbers, element => 10000m * element);
string TempString = GetSettingsStrings("GBCMaxStep"); // only used to initialize max step value
Dsum = Dsum * 10000m;
Int32 Isum = Convert.ToInt32(Dsum);
Int32[] INumbers = Array.ConvertAll(DNumbers, element => (Int32)element);
// int result = 0;
GetmNumberOfSubsets(INumbers, Isum);
success = false;
return;
}
private static void GetmNumberOfSubsets(Int32[] numbers, Int32 Isum)
{
set = numbers;
sum = Isum;
FindSubsetSum();
}
//-------------------------------------------------------------
static Int32[] set;
static Int32[] subSetIndexes;
static Int32 sum;
static Int32 numberOfSubsetSums;
static bool success = false;
static List<Int32> ResultSet = new List<Int32>();
static List<string> results = new List<string>();//------------------------------------------------------------
/*
Method: FindSubsetSum()
*/
private static void FindSubsetSum()
{
numberOfSubsetSums = 0;
Int32 numberOfElements = set.Length;
FindPowerSet(numberOfElements);
}
//-------------------------------------------------------------
/*
Method: FindPowerSet(int n, int k)
*/
private static void FindPowerSet(Int32 n)
{
// Super set - all sets with size: 0, 1, ..., n - 1
for (Int32 k = 0; k <= n - 1; k++)
{
subSetIndexes = new Int32[k];
CombinationsNoRepetition(k, 0, n - 1);
if(subSetIndexes.Length >= GBC_MaxStepSetting) {
break; }
}
if (numberOfSubsetSums == 0)
{
MessageBox.Show("No subsets with wanted sum exist.");
}
}
//-------------------------------------------------------------
/*
Method: CombinationsNoRepetition(int k, int iBegin, int iEnd);
*/
private static void CombinationsNoRepetition(Int32 k, Int32 iBegin, Int32 iEnd)
{
if (k == 0)
{
PrintSubSet();
return;
}
if (success == false)
{
for (Int32 i = iBegin; i <= iEnd; i++)
{
subSetIndexes[k - 1] = i;
++iBegin;
CombinationsNoRepetition(k - 1, iBegin, iEnd);
if (success == true)
break;
}
}
return;
}
private static void PrintSubSet()
{
Int32 currentSubsetSum = 0;
// accumulate sum of current subset
for (Int32 i = 0; i < subSetIndexes.Length; i++)
{
currentSubsetSum += set[subSetIndexes[i]];
if(currentSubsetSum > sum) { break; }
}
if(currentSubsetSum > sum) { return; }
// if wanted sum: print current subset elements
if (currentSubsetSum == sum)
{
++numberOfSubsetSums;
// results.Add("(");
for (Int32 i = 0; i < subSetIndexes.Length; i++)
{
results.Add((set[subSetIndexes[i]]).ToString());
ResultSet.Add(set[subSetIndexes[i]]);
if (i < subSetIndexes.Length - 1)
{
// results.Add(" ,");
}
}
// results.Add(")");
Int32[] ResultSetArr = ResultSet.ToArray();
decimal[] ResultSetArrD = Array.ConvertAll(ResultSetArr, element => (decimal)element);
ResultSetArrD = Array.ConvertAll(ResultSetArrD, element => element / 10000m);
// var message = string.Join(Environment.NewLine, ResultSetArrD);
// message = string.Format("{0:0.0000}", message);
int l = ResultSetArrD.Length;
string[] ResultString = new string[l];
foreach(int i in ResultSetArrD)
{ResultString = Array.ConvertAll(ResultSetArrD, element => element.ToString("0.0000"));}
var message = string.Join(Environment.NewLine, ResultString);
decimal ResultSum = ResultSetArrD.Sum();
MessageBox.Show(message + "\n= " + ResultSum.ToString("0.0000"), "Result");
Array.Clear(ResultSetArrD, 0, ResultSetArrD.Length);
Array.Clear(ResultSetArr, 0, ResultSetArr.Length);
ResultSet.Clear();
message = null;
success = true;
foreach (Window window in Application.Current.Windows)
{
if (window.GetType() == typeof(MetTracker.GaugeCalc))
{
(window as MetTracker.GaugeCalc).CalculateBtn.Content = "Calculate";
}
}
return;
}
if (success == true)
return;
}
I added some limiting to reduce the amount of time before it reports a failure to find a combo. I also convert the array to a double to get around the headache the decimals were causing me. Works great!

Importing and removing duplicates from a massive amount of text files using C# and Redis

This is a bit of a doozy and it's been a while since I worked with C#, so bear with me:
I'm running a jruby script to iterate through 900 files (5 Mb - 1500 Mb in size) to figure out how many dupes STILL exist within these (already uniq'd) files. I had little luck with awk.
My latest idea was to insert them into a local MongoDB instance like so:
db.collection('hashes').update({ :_id => hash}, { $inc: { count: 1} }, { upsert: true)
... so that later I could just query it like db.collection.where({ count: { $gt: 1 } }) to get all the dupes.
This is working great except it's been over 24 hours and at the time of writing I'm at 72,532,927 Mongo entries and growing.
I think Ruby's .each_line is bottlnecking the IO hardcore:
So what I'm thinking now is compiling a C# program which fires up a thread PER EACH FILE and inserts the line (md5 hash) into a Redis list.
From there, I could have another compiled C# program simply pop the values off and ignore the save if the count is 1.
So the questions are:
Will using a compiled file reader and multithreading the file reads significantly improve performance?
Is using Redis even necessary? With a tremendous amount of AWS memory, could I not just use the threads to fill some sort of a list atomically and proceed from there?
Thanks in advance.
Updated
New solution. Old solution. The main idea is to calculate dummy hashes(just sum of all chars in string) of each line and store it in Dictionary<ulong, List<LinePosition>> _hash2LinePositions. It's possible to have multiple hashes in the same stream and it solves by List in Dictionary Value. When the hashes are the same, we read and compare the strings from the streams. LinePosition is using for storing info about line - position in stream and its length. I don't have such huge files as you, but my tests shows that it works. Here is the full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Solution
{
struct LinePosition
{
public long Start;
public long Length;
public LinePosition(long start, long count)
{
Start = start;
Length = count;
}
public override string ToString()
{
return string.Format("Start: {0}, Length: {1}", Start, Length);
}
}
class TextFileHasher : IDisposable
{
readonly Dictionary<ulong, List<LinePosition>> _hash2LinePositions;
readonly Stream _stream;
bool _isDisposed;
public HashSet<ulong> Hashes { get; private set; }
public string Name { get; private set; }
public TextFileHasher(string name, Stream stream)
{
Name = name;
_stream = stream;
_hash2LinePositions = new Dictionary<ulong, List<LinePosition>>();
Hashes = new HashSet<ulong>();
}
public override string ToString()
{
return Name;
}
public void CalculateFileHash()
{
int readByte = -1;
ulong dummyLineHash = 0;
// Line start position in file
long startPosition = 0;
while ((readByte = _stream.ReadByte()) != -1) {
// Read until new line
if (readByte == '\r' || readByte == '\n') {
// If there was data
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - 1 - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
else {
// Was it new line ?
if (dummyLineHash == 0)
startPosition = _stream.Position - 1;
// Calculate dummy hash
dummyLineHash += (uint)readByte;
}
}
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
public List<LinePosition> GetLinePositions(ulong hash)
{
return _hash2LinePositions[hash];
}
public List<string> GetDuplicates()
{
List<string> duplicates = new List<string>();
foreach (var key in _hash2LinePositions.Keys) {
List<LinePosition> linesPos = _hash2LinePositions[key];
if (linesPos.Count > 1) {
duplicates.AddRange(FindExactDuplicates(linesPos));
}
}
return duplicates;
}
public void Dispose()
{
if (_isDisposed)
return;
_stream.Dispose();
_isDisposed = true;
}
private void AddToDictAndHash(ulong hash, long start, long count)
{
List<LinePosition> linesPosition;
if (!_hash2LinePositions.TryGetValue(hash, out linesPosition)) {
linesPosition = new List<LinePosition>() { new LinePosition(start, count) };
_hash2LinePositions.Add(hash, linesPosition);
}
else {
linesPosition.Add(new LinePosition(start, count));
}
Hashes.Add(hash);
}
public byte[] GetLineAsByteArray(LinePosition prevPos)
{
long len = prevPos.Length;
byte[] lineBytes = new byte[len];
_stream.Seek(prevPos.Start, SeekOrigin.Begin);
_stream.Read(lineBytes, 0, (int)len);
return lineBytes;
}
private List<string> FindExactDuplicates(List<LinePosition> linesPos)
{
List<string> duplicates = new List<string>();
linesPos.Sort((x, y) => x.Length.CompareTo(y.Length));
LinePosition prevPos = linesPos[0];
for (int i = 1; i < linesPos.Count; i++) {
if (prevPos.Length == linesPos[i].Length) {
var prevLineArray = GetLineAsByteArray(prevPos);
var thisLineArray = GetLineAsByteArray(linesPos[i]);
if (prevLineArray.SequenceEqual(thisLineArray)) {
var line = System.Text.Encoding.Default.GetString(prevLineArray);
duplicates.Add(line);
}
#if false
string prevLine = System.Text.Encoding.Default.GetString(prevLineArray);
string thisLine = System.Text.Encoding.Default.GetString(thisLineArray);
Console.WriteLine("PrevLine: {0}\r\nThisLine: {1}", prevLine, thisLine);
StringBuilder sb = new StringBuilder();
sb.Append(prevPos);
sb.Append(" is '");
sb.Append(prevLine);
sb.Append("'. ");
sb.AppendLine();
sb.Append(linesPos[i]);
sb.Append(" is '");
sb.Append(thisLine);
sb.AppendLine("'. ");
sb.Append("Equals => ");
sb.Append(prevLine.CompareTo(thisLine) == 0);
Console.WriteLine(sb.ToString());
#endif
}
else {
prevPos = linesPos[i];
}
}
return duplicates;
}
}
public static void Main(String[] args)
{
List<TextFileHasher> textFileHashers = new List<TextFileHasher>();
string text1 = "abc\r\ncba\r\nabc";
TextFileHasher tfh1 = new TextFileHasher("Text1", new MemoryStream(System.Text.Encoding.Default.GetBytes(text1)));
tfh1.CalculateFileHash();
textFileHashers.Add(tfh1);
string text2 = "def\r\ncba\r\nwet";
TextFileHasher tfh2 = new TextFileHasher("Text2", new MemoryStream(System.Text.Encoding.Default.GetBytes(text2)));
tfh2.CalculateFileHash();
textFileHashers.Add(tfh2);
string text3 = "def\r\nbla\r\nwat";
TextFileHasher tfh3 = new TextFileHasher("Text3", new MemoryStream(System.Text.Encoding.Default.GetBytes(text3)));
tfh3.CalculateFileHash();
textFileHashers.Add(tfh3);
List<string> totalDuplicates = new List<string>();
Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>> totalHashes = new Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>>();
textFileHashers.ForEach(tfh => {
foreach(var dummyHash in tfh.Hashes) {
Dictionary<TextFileHasher, List<LinePosition>> tfh2LinePositions = null;
if (!totalHashes.TryGetValue(dummyHash, out tfh2LinePositions))
totalHashes[dummyHash] = new Dictionary<TextFileHasher, List<LinePosition>>() { { tfh, tfh.GetLinePositions(dummyHash) } };
else {
List<LinePosition> linePositions = null;
if (!tfh2LinePositions.TryGetValue(tfh, out linePositions))
tfh2LinePositions[tfh] = tfh.GetLinePositions(dummyHash);
else
linePositions.AddRange(tfh.GetLinePositions(dummyHash));
}
}
});
HashSet<TextFileHasher> alreadyGotDuplicates = new HashSet<TextFileHasher>();
foreach(var hash in totalHashes.Keys) {
var tfh2LinePositions = totalHashes[hash];
var tfh = tfh2LinePositions.Keys.FirstOrDefault();
// Get duplicates in the TextFileHasher itself
if (tfh != null && !alreadyGotDuplicates.Contains(tfh)) {
totalDuplicates.AddRange(tfh.GetDuplicates());
alreadyGotDuplicates.Add(tfh);
}
if (tfh2LinePositions.Count <= 1) {
continue;
}
// Algo to get duplicates in more than 1 TextFileHashers
var tfhs = tfh2LinePositions.Keys.ToArray();
for (int i = 0; i < tfhs.Length; i++) {
var tfh1Positions = tfhs[i].GetLinePositions(hash);
for (int j = i + 1; j < tfhs.Length; j++) {
var tfh2Positions = tfhs[j].GetLinePositions(hash);
for (int k = 0; k < tfh1Positions.Count; k++) {
var tfh1Pos = tfh1Positions[k];
var tfh1ByteArray = tfhs[i].GetLineAsByteArray(tfh1Pos);
for (int m = 0; m < tfh2Positions.Count; m++) {
var tfh2Pos = tfh2Positions[m];
if (tfh1Pos.Length != tfh2Pos.Length)
continue;
var tfh2ByteArray = tfhs[j].GetLineAsByteArray(tfh2Pos);
if (tfh1ByteArray.SequenceEqual(tfh2ByteArray)) {
var line = System.Text.Encoding.Default.GetString(tfh1ByteArray);
totalDuplicates.Add(line);
}
}
}
}
}
}
Console.WriteLine();
if (totalDuplicates.Count > 0) {
Console.WriteLine("Total number of duplicates: {0}", totalDuplicates.Count);
Console.WriteLine("#######################");
totalDuplicates.ForEach(x => Console.WriteLine("{0}", x));
Console.WriteLine("#######################");
}
// Free resources
foreach (var tfh in textFileHashers)
tfh.Dispose();
}
}
If you have tons of ram... You guys are overthinking it...
var fileLines = File.ReadAllLines(#"c:\file.csv").Distinct();

Program to find minimum number in string

I have a c# class like so
internal class QueuedMinimumNumberFinder : ConcurrentQueue<int>
{
private readonly string _minString;
public QueuedMinimumNumberFinder(string number, int takeOutAmount)
{
if (number.Length < takeOutAmount)
{
throw new Exception("Error *");
}
var queueIndex = 0;
var queueAmount = number.Length - takeOutAmount;
var numQueue = new ConcurrentQueue<int>(number.ToCharArray().Where(m => (int) Char.GetNumericValue(m) != 0).Select(m=>(int)Char.GetNumericValue(m)).OrderBy(m=>m));
var zeroes = number.Length - numQueue.Count;
while (queueIndex < queueAmount)
{
int next;
if (queueIndex == 0)
{
numQueue.TryDequeue(out next);
Enqueue(next);
} else
{
if (zeroes > 0)
{
Enqueue(0);
zeroes--;
} else
{
numQueue.TryDequeue(out next);
Enqueue(next);
}
}
queueIndex++;
}
var builder = new StringBuilder();
while (Count > 0)
{
int next = 0;
TryDequeue(out next);
builder.Append(next.ToString());
}
_minString = builder.ToString();
}
public override string ToString() { return _minString; }
}
The point of the program is to find the minimum possible integer that can be made by taking out any x amount of characters from a string(example 100023 is string, if you take out any 3 letters, the minimum int created would be 100). My question is, is this the correct way to do this? Is there a better data structure that can be used for this problem?
First Edit:
Here's how it looks now
internal class QueuedMinimumNumberFinder
{
private readonly string _minString;
public QueuedMinimumNumberFinder(string number, int takeOutAmount)
{
var queue = new Queue<int>();
if (number.Length < takeOutAmount)
{
throw new Exception("Error *");
}
var queueIndex = 0;
var queueAmount = number.Length - takeOutAmount;
var numQueue = new List<int>(number.Where(m=>(int)Char.GetNumericValue(m)!=0).Select(m=>(int)Char.GetNumericValue(m))).ToList();
var zeroes = number.Length - numQueue.Count;
while (queueIndex < queueAmount)
{
if (queueIndex == 0)
{
var nextMin = numQueue.Min();
numQueue.Remove(nextMin);
queue.Enqueue(nextMin);
} else
{
if (zeroes > 1)
{
queue.Enqueue(0);
zeroes--;
} else
{
var nextMin = numQueue.Min();
numQueue.Remove(nextMin);
queue.Enqueue(nextMin);
}
}
queueIndex++;
}
var builder = new StringBuilder();
while (queue.Count > 0)
{
builder.Append(queue.Dequeue().ToString());
}
_minString = builder.ToString();
}
public override string ToString() { return _minString; }
}
A pretty simple and efficient implementation can be made, once you realize that your input string digits map to the domain of only 10 possible values: '0' .. '9'.
This can be encoded as the number of occurrences of a specific digit in your input string using a simple array of 10 integers: var digit_count = new int[10];
#MasterGillBates describes this idea in his answer.
You can then regard this array as your priority queue from which you can dequeue the characters you need by iteratively removing the lowest available character (decreasing its occurrence count in the array).
The code sample below provides an example implementation for this idea.
public static class MinNumberSolver
{
public static string GetMinString(string number, int takeOutAmount)
{
// "Add" the string by simply counting digit occurrance frequency.
var digit_count = new int[10];
foreach (var c in number)
if (char.IsDigit(c))
digit_count[c - '0']++;
// Now remove them one by one in lowest to highest order.
// For the first character we skip any potential leading 0s
var selected = new char[takeOutAmount];
var start_index = 1;
selected[0] = TakeLowest(digit_count, ref start_index);
// For the rest we start in digit order at '0' first.
start_index = 0;
for (var i = 0; i < takeOutAmount - 1; i++)
selected[1 + i] = TakeLowest(digit_count, ref start_index);
// And return the result.
return new string(selected);
}
private static char TakeLowest(int[] digit_count, ref int start_index)
{
for (var i = start_index; i < digit_count.Length; i++)
{
if (digit_count[i] > 0)
{
start_index = ((--digit_count[i] > 0) ? i : i + 1);
return (char)('0' + i);
}
}
throw new InvalidDataException("Input string does not have sufficient digits");
}
}
Just keep a count of how many times each digit appears. An array of size 10 will do. Count[i] gives the count of digit i.
Then pick the smallest non-zero i first, then pick the smallest etc and form your number.
Here's my solution using LINQ:
public string MinimumNumberFinder(string number, int takeOutAmount)
{
var ordered = number.OrderBy(n => n);
var nonZero = ordered.SkipWhile(n => n == '0');
var zero = ordered.TakeWhile(n => n == '0');
var result = nonZero.Take(1)
.Concat(zero)
.Concat(nonZero.Skip(1))
.Take(number.Length - takeOutAmount);
return new string(result.ToArray());
}
You could place every integer into a list and find all possible sequences of these values. From the list of sequences, you could sort through taking only the sets which have the number of integers you want. From there, you can write a quick function which parses a sequence into an integer. Next, you could store all of your parsed sequences into an array or an other data structure and sort based on value, which will allow you to select the minimum number from the data structure. There may be simpler ways to do this, but this will definitely work and gives you options as far as how many digits you want your number to have.
If I'm understanding this correctly, why don't you just pick out your numbers starting with the smallest number greater than zero. Then pick out all zeroes, then any remaining number if all the zeroes are picked up. This is all depending on the length of your ending result
In your example you have a 6 digit number and you want to pick out 3 digits. This means you'll only have 3 digits left. If it was a 10 digit number, then you would end up with a 7 digit number, etc...
So have an algorithm that knows the length of your starting number, how many digits you plan on removing, and the length of your ending number. Then just pick out the numbers.
This is just quick and dirty code:
string startingNumber = "9999903040404"; // "100023";
int numberOfCharactersToRemove = 3;
string endingNumber = string.Empty;
int endingNumberLength = startingNumber.Length - numberOfCharactersToRemove;
while (endingNumber.Length < endingNumberLength)
{
if (string.IsNullOrEmpty(endingNumber))
{
// Find the smallest digit in the starting number
for (int i = 1; i <= 9; i++)
{
if (startingNumber.Contains(i.ToString()))
{
endingNumber += i.ToString();
startingNumber = startingNumber.Remove(startingNumber.IndexOf(i.ToString()), 1);
break;
}
}
}
else if (startingNumber.Contains("0"))
{
// Add any zeroes
endingNumber += "0";
startingNumber = startingNumber.Remove(startingNumber.IndexOf("0"), 1);
}
else
{
// Add any remaining numbers from least to greatest
for (int i = 1; i <= 9; i++)
{
if (startingNumber.Contains(i.ToString()))
{
endingNumber += i.ToString();
startingNumber = startingNumber.Remove(startingNumber.IndexOf(i.ToString()), 1);
break;
}
}
}
}
Console.WriteLine(endingNumber);
100023 starting number resulted in 100 being the end result
9999903040404 starting number resulted in 3000044499 being the end result
Here's my version to fix this problem:
DESIGN:
You can sort your list using a binary tree , there are a lot of
implementations , I picked this one
Then you can keep track of the number of the Zeros you have in your
string Finally you will end up with two lists: I named one
SortedDigitsList and the other one ZeroDigitsList
perform a switch case to determine which last 3 digits should be
returned
Here's the complete code:
class MainProgram2
{
static void Main()
{
Tree theTree = new Tree();
Console.WriteLine("Please Enter the string you want to process:");
string input = Console.ReadLine();
foreach (char c in input)
{
// Check if it's a digit or not
if (c >= '0' && c <= '9')
{
theTree.Insert((int)Char.GetNumericValue(c));
}
}
//End of for each (char c in input)
Console.WriteLine("Inorder traversal resulting Tree Sort without the zeros");
theTree.Inorder(theTree.ReturnRoot());
Console.WriteLine(" ");
//Format the output depending on how many zeros you have
Console.WriteLine("The final 3 digits are");
switch (theTree.ZeroDigitsList.Count)
{
case 0:
{
Console.WriteLine("{0}{1}{2}", theTree.SortedDigitsList[0], theTree.SortedDigitsList[1], theTree.SortedDigitsList[2]);
break;
}
case 1:
{
Console.WriteLine("{0}{1}{2}", theTree.SortedDigitsList[0], 0, theTree.SortedDigitsList[2]);
break;
}
default:
{
Console.WriteLine("{0}{1}{2}", theTree.SortedDigitsList[0], 0, 0);
break;
}
}
Console.ReadLine();
}
}//End of main()
}
class Node
{
public int item;
public Node leftChild;
public Node rightChild;
public void displayNode()
{
Console.Write("[");
Console.Write(item);
Console.Write("]");
}
}
class Tree
{
public List<int> SortedDigitsList { get; set; }
public List<int> ZeroDigitsList { get; set; }
public Node root;
public Tree()
{
root = null;
SortedDigitsList = new List<int>();
ZeroDigitsList = new List<int>();
}
public Node ReturnRoot()
{
return root;
}
public void Insert(int id)
{
Node newNode = new Node();
newNode.item = id;
if (root == null)
root = newNode;
else
{
Node current = root;
Node parent;
while (true)
{
parent = current;
if (id < current.item)
{
current = current.leftChild;
if (current == null)
{
parent.leftChild = newNode;
return;
}
}
else
{
current = current.rightChild;
if (current == null)
{
parent.rightChild = newNode;
return;
}
}
}
}
}
//public void Preorder(Node Root)
//{
// if (Root != null)
// {
// Console.Write(Root.item + " ");
// Preorder(Root.leftChild);
// Preorder(Root.rightChild);
// }
//}
public void Inorder(Node Root)
{
if (Root != null)
{
Inorder(Root.leftChild);
if (Root.item > 0)
{
SortedDigitsList.Add(Root.item);
Console.Write(Root.item + " ");
}
else
{
ZeroDigitsList.Add(Root.item);
}
Inorder(Root.rightChild);
}
}

Categories