Given the following:
List<List<int>> lists = new List<List<int>>();
lists.Add(new List<int>() { 1,2,3,4,5,6,7 });
lists.Add(new List<int>() { 1,2 });
lists.Add(new List<int>() { 1,2,3,4 });
lists.Add(new List<int>() { 1,2,5,6,7 });
What is the best/fastest way of identifying which numbers appear in all lists?
You can use the .net 3.5 .Intersect() extension method:-
List<int> a = new List<int>() { 1, 2, 3, 4, 5 };
List<int> b = new List<int>() { 0, 4, 8, 12 };
List<int> common = a.Intersect(b).ToList();
To do it for two lists one would use x.Intersect(y).
To do it for several we would want to do something like:
var intersection = lists.Aggregate((x, y) => x.Intersect(y));
But this won't work because the result of the lambda isn't List<int> and so it can't be fed back in. This might tempt us to try:
var intersection = lists.Aggregate((x, y) => x.Intersect(y).ToList());
But then this makes n-1 needless calls to ToList() which is relatively expensive. We can get around this with:
var intersection = lists.Aggregate(
(IEnumerable<int> x, IEnumerable<int> y) => x.Intersect(y));
Which applies the same logic, but in using explicit types in the lambda, we can feed the result of Intersect() back in without wasting time and memory creating a list each time, and so gives faster results.
If this came up a lot we can get further (slight) performance improvements by rolling our own rather than using Linq:
public static IEnumerable<T> IntersectAll<T>(this IEnumerable<IEnumerable<T>> source)
{
using(var en = source.GetEnumerator())
{
if(!en.MoveNext()) return Enumerable.Empty<T>();
var set = new HashSet<T>(en.Current);
while(en.MoveNext())
{
var newSet = new HashSet<T>();
foreach(T item in en.Current)
if(set.Remove(item))
newSet.Add(item);
set = newSet;
}
return set;
}
}
This assumes its for internal use only. If it could be called from another assembly it should have error checks, and perhaps should be defined so as to only perform the intersect operations on the first MoveNext() of the calling code:
public static IEnumerable<T> IntersectAll<T>(this IEnumerable<IEnumerable<T>> source)
{
if(source == null)
throw new ArgumentNullException("source");
return IntersectAllIterator(source);
}
public static IEnumerable<T> IntersectAllIterator<T>(IEnumerable<IEnumerable<T>> source)
{
using(var en = source.GetEnumerator())
{
if(en.MoveNext())
{
var set = new HashSet<T>(en.Current);
while(en.MoveNext())
{
var newSet = new HashSet<T>();
foreach(T item in en.Current)
if(set.Remove(item))
newSet.Add(item);
set = newSet;
}
foreach(T item in set)
yield return item;
}
}
}
(In these final two versions there's an opportunity to short-circuit if we end up emptying the set, but it only pays off if this happens relatively often, otherwise it's a nett loss).
Conversely, if these aren't concerns, and if we know that we're only ever going to want to do this with lists, we can optimise a bit further with the use of Count and indices:
public static IEnumerable<T> IntersectAll<T>(this List<List<T>> source)
{
if (source.Count == 0) return Enumerable.Empty<T>();
if (source.Count == 1) return source[0];
var set = new HashSet<T>(source[0]);
for(int i = 1; i != source.Count; ++i)
{
var newSet = new HashSet<T>();
var list = source[i];
for(int j = 0; j != list.Count; ++j)
{
T item = list[j];
if(set.Remove(item))
newSet.Add(item);
}
set = newSet;
}
return set;
}
And further if we know we're always going to want the results in a list, and we know that either we won't mutate the list, or it won't matter if the input list got mutated, we can optimise for the case of there being zero or one lists (but this costs more if we might ever not need the output in a list):
public static List<T> IntersectAll<T>(this List<List<T>> source)
{
if (source.Count == 0) return new List<T>(0);
if (source.Count == 1) return source[0];
var set = new HashSet<T>(source[0]);
for(int i = 1; i != source.Count; ++i)
{
var newSet = new HashSet<T>();
var list = source[i];
for(int j = 0; j != list.Count; ++j)
{
T item = list[j];
if(set.Remove(item))
newSet.Add(item);
}
set = newSet;
}
return new List<T>(set);
}
Again though, as well as making the method less widely-applicable, this has risks in terms of how it could be used, so is only appropriate for internal code were you can know either that you won't change either the input or the output after the fact, or that this won't matter.
Linq already offers Intersect and you can exploit Aggregate as well:
var result = lists.Aggregate((a, b) => a.Intersect(b).ToList());
If you don't trust the Intersect method or you just prefer to see what's going on, here's a snippet of code that should do the trick:
// Output goes here
List<int> output = new List<int>();
// Make sure lists are sorted
for (int i = 0; i < lists.Count; ++i) lists[i].Sort();
// Maintain array of indices so we can step through all the lists in parallel
int[] index = new int[lists.Count];
while(index[0] < lists[0].Count)
{
// Search for each value in the first list
int value = lists[0][index[0]];
// No. lists that value appears in, we want this to equal lists.Count
int count = 1;
// Search all the other lists for the value
for (int i = 1; i < lists.Count; ++i)
{
while (index[i] < lists[i].Count)
{
// Stop if we've passed the spot where value would have been
if (lists[i][index[i]] > value) break;
// Stop if we find value
if (lists[i][index[i]] == value)
{
++count;
break;
}
++index[i];
}
// If we reach the end of any list there can't be any more matches so end the search now
if (index[i] >= lists[i].Count) goto done;
}
// Store the value if we found it in all the lists
if (count == lists.Count) output.Add(value);
// Skip multiple occurrances of the same value
while (index[0] < lists[0].Count && lists[0][index[0]] == value) ++index[0];
}
done:
Edit:
I got bored and did some benchmarks on this vs. Jon Hanna's version. His is consistently faster, typically by around 50%. Mine wins by about the same margin if you happen to have presorted lists, though. Also you can gain a further 20% or so with unsafe optimisations. Just thought I'd share that.
You can also get it with SelectMany and Distinct:
List<int> result = lists
.SelectMany(x => x.Where(e => lists.All(l => l.Contains(e))))
.Distinct().ToList();
Edit:
List<int> result2 = lists.First().Where(e => lists.Skip(1).All(l => l.Contains(e)))
.ToList();
Edit 2:
List<int> result3 = lists
.Select(l => l.OrderBy(n => n).Take(lists.Min(x => x.Count()))).First()
.TakeWhile((n, index) => lists.Select(l => l.OrderBy(x => x)).Skip(1).All(l => l.ElementAt(index) == n))
.ToList();
I need something similar to an AggregateWhile method. The standard System.Linq.Enumerable class doesn't provide it. Until now I've always been able to leverage the standard LINQ methods to solve every problem I've encountered. So I'd like to know if that's still possible in this case, or if I really do need to extend LINQ with a non-standard method.
The hypothetical AggregateWhile method would iterate over a sequence and apply the accumulator. The aggregation would be complete once a predicate returns false. The result is the aggregration of elements up to but not including the element for which the predicate failed.
Here's an example. We have a List { 1, 2, 3, 4, 5 } with an accumulator that adds the two input numbers together, and a predicate that states the accumulation must be less than 12. AggregateWhile would return 10 since that's the result of 1 + 2 + 3 + 4 and adding the final 5 would push the total over the limit. In code:
var list = new List<int> { 1, 2, 3, 4, 5 };
int total = list.AggregateWhile( (x, y) => x + y, a => a < 12 ); // returns 10
I need a purely functional solution, so closing over a temporary variable is not an option.
You could either write the function yourself, or carry a flag with your accumulator:
int total = list.Aggregate(new { value = 0, valid = true },
(acc, v) => acc.value + v < 12 && acc.valid ?
new { value = acc.value + v, valid = true } :
new { value = acc.value, valid = false },
acc => acc.value);
It's quite ugly, so writting a new AggregateWhile would be nicer:
public static TSource AggregateWhile<TSource>(this IEnumerable<TSource> source,
Func<TSource, TSource, TSource> func,
Func<TSource, bool> predicate)
{
using (IEnumerator<TSource> e = source.GetEnumerator()) {
TSource result = e.Current;
TSource tmp = default(TSource);
while (e.MoveNext() && predicate(tmp = func(result, e.Current)))
result = tmp;
return result;
}
}
(no error checking for brevity)
You can write your own extension method. This is not as perfect as the normal Linq methods, I cheated because I already know your requirements to make it simpler. In reality you may want an optional starting value for a and maybe different In and output types for T or other stuff:
public static class Linq
{
public static T AggregateWhile<T>(this IEnumerable<T> sequence, Func<T, T, T> aggregate, Func<T, bool> predicate)
{
T a;
foreach(var value in sequence)
{
T temp = aggregate(a, value);
if(!predicate(temp)) break;
a = temp;
}
return a;
}
}
Won't this work?
int total = list.Aggregate(0, (a, x) => (a + x) > 12 ? a : a + x);
Using Tuple<bool, int> as accumulator type, to break on first overflow:
int total = list.Aggregate(new Tuple<bool, int>(false, 0),
(a, x) => a.Item1 || (a.Item2 + x) > 12
? new Tuple<bool, int>(true, a.Item2)
: new Tuple<bool, int>(false, a.Item2 + x)
).Item2;
But it isn't so nice unfortunately.
Start using F#. ;)
let list = [ 1; 2; 3; 4; 5; 1 ]
let predicate = fun a -> a > 12
let total = list |> List.fold (fun (aval, astate) x ->
if astate || predicate (aval + x)
then (aval, true)
else (aval + x, false)) (0, false)
Tuple unpacking, no new bloat. And when you code it type inference makes it a breeze.
I asked this question a while back while encountering a problem that I later reframed into not needing AggregateWhile. But now I've encountered a slightly different problem which undoubtedly requires AggregateWhile or some direct substitute for it.
The solutions proposed by #sloth and #rkrahl are helpful. But they fall short in that the aggregation logic (addition in this case) is repeated twice. This doesn't seem like a big deal for the question's trivial example. But for my real problem, the calculation is complex so writing it twice is unacceptable.
Here's the solution I prefer (short of actual AggregateWhile methods):
class Program
{
static void Main( string[] args ) { new Program(); }
public Program()
{
var list = new int[] { 1, 2, 3, 4, 5 };
int total = list
.Aggregate( new Accumulator( 0 ), ( a, i ) => a.Next( i ), a => a.Total );
}
}
class Accumulator
{
public Accumulator( int total )
{
this.total = total;
}
public Accumulator Next( int i )
{
if ( isDone )
return this;
else {
int total = this.total + i;
if ( total < 12 )
return new Accumulator( total );
else {
isDone = true;
return this;
}
}
}
bool isDone;
public int Total
{
get { return total; }
}
readonly int total;
}
The ideal solution are fully implemented and tested AggregateWhile methods which correspond to the three Aggregate overloads. Short of that, the above pattern has the advantage that it can leverage the (somewhat lacking) functionality that's already present in the .NET framework.
Here is an AggregateWhile with a seed:
public static TAccumulate AggregateWhile<TSource, TAccumulate>(
this IEnumerable<TSource> source,
TAccumulate seed,
Func<TAccumulate, TSource, TAccumulate> func,
Func<TAccumulate, bool> predicate)
{
if (source == null)
throw new ArgumentNullException(nameof(source));
if (func == null)
throw new ArgumentNullException(nameof(func));
if (predicate == null)
throw new ArgumentNullException(nameof(predicate));
var accumulate = seed;
foreach (var item in source)
{
var tmp = func(accumulate, item);
if (!predicate(tmp)) break;
accumulate = tmp;
}
return accumulate;
}
I have a multi dimensions array ins C# defined as follow:
double[,,] myArray=new double[10000,10000,3];
I find the maximum value of this array when the last dim is for example is 0. something g such as this:
double m1=myArray[?,?,0].Max();
How can I calculate it using Linq or other methods?
If you'd like to get the max across some subset of the array you can do this:
double m1 =
(from x in Enumerable.Range(0, myArray.GetLength(0))
from y in Enumerable.Range(0, myArray.GetLength(1))
select myArray[x, y, 0])
.Max();
If you'd like to get the max across all elements in the array you can just do this
double m1 = myArray.Cast<double>().Max();
However, you can get a significant performance boost by implementing your own extension method like this:
public static IEnumerable<T> Flatten<T>(this T[,,] arry) {
foreach(T x in arry) yield return item;
}
myArray.Flatten().Max();
EDIT 2
Note, this extension works equally well for the hideous but valid case of a non zero based array,
var nonZeroBasedArray = Array.CreateInstance(
typeof(double),
new[] { 4, 4, 3 },
new[] { -2, -2, 0 });
Note that the first two dimensions range from -2 to 1 inclusive (yikes.) This test code illustrates that the Flatten extension still works.
var count = 0;
foreach (var element in nonZeroBasedArray.Flatten<double>(null, null, 0))
{
Console.Write(string.Join(", ", element.Key));
Console.WriteLine(": {0}", element.Value);
}
Console.WriteLine("Count: {0}", count);
Console.ReadKey();
EDIT
So, using the extension defined below you can do
var myArray = new double[10000,10000,3];
var ordered = myArray.Flatten<double>(null, null, 0).OrderBy(p => p.Value);
var maxZ0 = ordered.First();
var minZ0 = ordered.Last();
The element type is a KeyValuePair<IEnumerable<int>, T> so the Key allows you to back reference to the original array.
Ok, here is a generic extension, intially inspired by p.s.w.g's answer
If you start with Eric Lippert's inspirational CartesianProduct<T> extension,
public static IEnumerable<IEnumerable<T>> CartesianProduct<T>(
this IEnumerable<IEnumerable<T>> sequences)
{
IEnumerable<IEnumerable<T>> emptyProduct = new[] { Enumerable.Empty<T>() };
return sequences.Aggregate(
emptyProduct,
(accumulator, sequence) =>
from accseq in accumulator
from item in sequence
select accseq.Concat(new[]
{
item
}));
}
Then you make a function to generate the bound sets of a multi dimensional array that allows you to specify fixed values for some dimensions.
private static IEnumerable<IEnumerable<int>> GetBoundSequences(
Array array,
int?[] definedBounds)
{
for (var rank = 0; rank < array.Rank; rank++)
{
var defined = definedBounds.ElementAtorDefault(rank);
if (defined.HasValue)
{
yield return new[] { defined.Value };
}
else
{
var min = array.GetLowerBound(rank);
yield return Enumerable.Range(
min,
(array.GetUpperBound(rank) - min) + 1);
}
}
}
you can use both to make a flexible Flatten<T> extension, that works with arrays of any rank.
public static IEnumerable<KeyValuePair<IEnumerable<int>, T>> Flatten<T>(
this Array array,
params int?[] definedBounds)
{
var coordSets = GetBoundSequences(array, definedBounds).CartesianProduct();
foreach (var coordSet in coordSets)
{
var coords = coordSet.ToArray();
var value = (T)array.GetValue(coords);
yield return new KeyValuePair<IEnumerable<int>, T>(
coords,
value);
}
}
Once you have this, you can do something like
var myArray = new double[10000,10000,3];
var maxZ0 = myArray.Flatten<double>(null, null, 0).Max(p => p.Value);
This is good because it lazily iterates and converts only the elements specified.
Try this
double[,,] myArray = new double[10000, 10000, 3];
double max = myArray.Cast<double>().Max();
I have:
List<int> A = new List<int>(){1,2,3,4,5,6};
List<int> m=new List<int>();
for(int i=1;i<A.count;i++)
{
int j=A[i]+A[i-1];
m.add(j);
}
how can I do this same operation using LinQ?
Well, a straightforward translation would be:
var m = Enumerable.Range(1, A.Count - 1)
.Select(i => A[i] + A[i - 1])
.ToList();
But also consider:
var m = A.Skip(1)
.Zip(A, (curr, prev) => curr + prev)
.ToList();
Or using Jon Skeet's extension here:
var m = A.SelectWithPrevious((prev, curr) => prev + curr)
.ToList();
But as Jason Evans points out in a comment, this doesn't help all that much with readability or brevity, considering your existing code is perfectly understandable (and short) and you want to materialize all of the results into a list anyway.
There's nothing really wrong with:
var sumsOfConsecutives = new List<int>();
for(int i = 1; i < A.Count; i++)
sumsOfConsecutives.Add(A[i] + A[i - 1]);
Ok so getting the next item in the list you can use:
A.SkipWhile(x => x != value).Skip(1).FirstOrDefault();
So to get the previous item use:
var B = A.ToList();
B.Reverse();
B.SkipWhile(x => x != value).Skip(1).FirstOrDefault();
How about something like
var l = A.Skip(1).Select((x, index) => x + A[index]).ToList();
Some of the other answers assume that the elements of A are always going to be 1, 2, 3, 4, 5, 6. If those values ever change then the solution would break, such as the values changing to 2, 3, 6, 7, 10.
Here's my solution that will work with any values of A.
List<int> m = A.Skip(1).Select((element, index) => element + A.ElementAt(index)).ToList();
It is worth noting that sticking with a loop would probably be better than hacking together a Linq solution for this.
In case you only need the end value, you can Aggregate it, ie. you need previous value, but dont need each individual value to a new list.
int last = 0;
var r = m.Aggregate(last, (acc, it) => (last += it), (acc) => (last));
Another option is to implement your own Buffer operator and use that to make a simple LINQ statement.
public static IEnumerable<IEnumerable<T>> Buffer<T>(this IEnumerable<T> source, int size)
{
var buffer = new List<T>();
foreach (var t in source)
{
buffer.Add(t);
if (buffer.Count() == size)
{
yield return buffer.AsEnumerable();
buffer = buffer.Skip(1).ToList();
}
}
}
That allows this code:
List<int> B = A.Buffer(2).Select(x => x.Sum()).ToList();
I have a list of numbers e.g. 21,4,7,9,12,22,17,8,2,20,23
I want to be able to pick out sequences of sequential numbers (minimum 3 items in length), so from the example above it would be 7,8,9 and 20,21,22,23.
I have played around with a few ugly sprawling functions but I am wondering if there is a neat LINQ-ish way to do it.
Any suggestions?
UPDATE:
Many thanks for all the responses, much appriciated. Im am currently having a play with them all to see which would best integrate into our project.
It strikes me that the first thing you should do is order the list. Then it's just a matter of walking through it, remembering the length of your current sequence and detecting when it's ended. To be honest, I suspect that a simple foreach loop is going to be the simplest way of doing that - I can't immediately think of any wonderfully neat LINQ-like ways of doing it. You could certainly do it in an iterator block if you really wanted to, but bear in mind that ordering the list to start with means you've got a reasonably "up-front" cost anyway. So my solution would look something like this:
var ordered = list.OrderBy(x => x);
int count = 0;
int firstItem = 0; // Irrelevant to start with
foreach (int x in ordered)
{
// First value in the ordered list: start of a sequence
if (count == 0)
{
firstItem = x;
count = 1;
}
// Skip duplicate values
else if (x == firstItem + count - 1)
{
// No need to do anything
}
// New value contributes to sequence
else if (x == firstItem + count)
{
count++;
}
// End of one sequence, start of another
else
{
if (count >= 3)
{
Console.WriteLine("Found sequence of length {0} starting at {1}",
count, firstItem);
}
count = 1;
firstItem = x;
}
}
if (count >= 3)
{
Console.WriteLine("Found sequence of length {0} starting at {1}",
count, firstItem);
}
EDIT: Okay, I've just thought of a rather more LINQ-ish way of doing things. I don't have the time to fully implement it now, but:
Order the sequence
Use something like SelectWithPrevious (probably better named SelectConsecutive) to get consecutive pairs of elements
Use the overload of Select which includes the index to get tuples of (index, current, previous)
Filter out any items where (current = previous + 1) to get anywhere that counts as the start of a sequence (special-case index=0)
Use SelectWithPrevious on the result to get the length of the sequence between two starting points (subtract one index from the previous)
Filter out any sequence with length less than 3
I suspect you need to concat int.MinValue on the ordered sequence, to guarantee the final item is used properly.
EDIT: Okay, I've implemented this. It's about the LINQiest way I can think of to do this... I used null values as "sentinel" values to force start and end sequences - see comments for more details.
Overall, I wouldn't recommend this solution. It's hard to get your head round, and although I'm reasonably confident it's correct, it took me a while thinking of possible off-by-one errors etc. It's an interesting voyage into what you can do with LINQ... and also what you probably shouldn't.
Oh, and note that I've pushed the "minimum length of 3" part up to the caller - when you have a sequence of tuples like this, it's cleaner to filter it out separately, IMO.
using System;
using System.Collections.Generic;
using System.Linq;
static class Extensions
{
public static IEnumerable<TResult> SelectConsecutive<TSource, TResult>
(this IEnumerable<TSource> source,
Func<TSource, TSource, TResult> selector)
{
using (IEnumerator<TSource> iterator = source.GetEnumerator())
{
if (!iterator.MoveNext())
{
yield break;
}
TSource prev = iterator.Current;
while (iterator.MoveNext())
{
TSource current = iterator.Current;
yield return selector(prev, current);
prev = current;
}
}
}
}
class Test
{
static void Main()
{
var list = new List<int> { 21,4,7,9,12,22,17,8,2,20,23 };
foreach (var sequence in FindSequences(list).Where(x => x.Item1 >= 3))
{
Console.WriteLine("Found sequence of length {0} starting at {1}",
sequence.Item1, sequence.Item2);
}
}
private static readonly int?[] End = { null };
// Each tuple in the returned sequence is (length, first element)
public static IEnumerable<Tuple<int, int>> FindSequences
(IEnumerable<int> input)
{
// Use null values at the start and end of the ordered sequence
// so that the first pair always starts a new sequence starting
// with the lowest actual element, and the final pair always
// starts a new one starting with null. That "sequence at the end"
// is used to compute the length of the *real* final element.
return End.Concat(input.OrderBy(x => x)
.Select(x => (int?) x))
.Concat(End)
// Work out consecutive pairs of items
.SelectConsecutive((x, y) => Tuple.Create(x, y))
// Remove duplicates
.Where(z => z.Item1 != z.Item2)
// Keep the index so we can tell sequence length
.Select((z, index) => new { z, index })
// Find sequence starting points
.Where(both => both.z.Item2 != both.z.Item1 + 1)
.SelectConsecutive((start1, start2) =>
Tuple.Create(start2.index - start1.index,
start1.z.Item2.Value));
}
}
Jon Skeet's / Timwi's solutions are the way to go.
For fun, here's a LINQ query that does the job (very inefficiently):
var sequences = input.Distinct()
.GroupBy(num => Enumerable.Range(num, int.MaxValue - num + 1)
.TakeWhile(input.Contains)
.Last()) //use the last member of the consecutive sequence as the key
.Where(seq => seq.Count() >= 3)
.Select(seq => seq.OrderBy(num => num)); // not necessary unless ordering is desirable inside each sequence.
The query's performance can be improved slightly by loading the input into a HashSet (to improve Contains), but that will still not produce a solution that is anywhere close to efficient.
The only bug I am aware of is the possibility of an arithmetic overflow if the sequence contains negative numbers of large magnitude (we cannot represent the count parameter for Range). This would be easy to fix with a custom static IEnumerable<int> To(this int start, int end) extension-method. If anyone can think of any other simple technique of dodging the overflow, please let me know.
EDIT:
Here's a slightly more verbose (but equally inefficient) variant without the overflow issue.
var sequences = input.GroupBy(num => input.Where(candidate => candidate >= num)
.OrderBy(candidate => candidate)
.TakeWhile((candidate, index) => candidate == num + index)
.Last())
.Where(seq => seq.Count() >= 3)
.Select(seq => seq.OrderBy(num => num));
I think my solution is more elegant and simple, and therefore easier to verify as correct:
/// <summary>Returns a collection containing all consecutive sequences of
/// integers in the input collection.</summary>
/// <param name="input">The collection of integers in which to find
/// consecutive sequences.</param>
/// <param name="minLength">Minimum length that a sequence should have
/// to be returned.</param>
static IEnumerable<IEnumerable<int>> ConsecutiveSequences(
IEnumerable<int> input, int minLength = 1)
{
var results = new List<List<int>>();
foreach (var i in input.OrderBy(x => x))
{
var existing = results.FirstOrDefault(lst => lst.Last() + 1 == i);
if (existing == null)
results.Add(new List<int> { i });
else
existing.Add(i);
}
return minLength <= 1 ? results :
results.Where(lst => lst.Count >= minLength);
}
Benefits over the other solutions:
It can find sequences that overlap.
It’s properly reusable and documented.
I have not found any bugs ;-)
Here is how to solve the problem in a "LINQish" way:
int[] arr = new int[]{ 21, 4, 7, 9, 12, 22, 17, 8, 2, 20, 23 };
IOrderedEnumerable<int> sorted = arr.OrderBy(x => x);
int cnt = sorted.Count();
int[] sortedArr = sorted.ToArray();
IEnumerable<int> selected = sortedArr.Where((x, idx) =>
idx <= cnt - 3 && sortedArr[idx + 1] == x + 1 && sortedArr[idx + 2] == x + 2);
IEnumerable<int> result = selected.SelectMany(x => new int[] { x, x + 1, x + 2 }).Distinct();
Console.WriteLine(string.Join(",", result.Select(x=>x.ToString()).ToArray()));
Due to the array copying and reconstruction, this solution - of course - is not as efficient as the traditional solution with loops.
Not 100% Linq but here's a generic variant:
static IEnumerable<IEnumerable<TItem>> GetSequences<TItem>(
int minSequenceLength,
Func<TItem, TItem, bool> areSequential,
IEnumerable<TItem> items)
where TItem : IComparable<TItem>
{
items = items
.OrderBy(n => n)
.Distinct().ToArray();
var lastSelected = default(TItem);
var sequences =
from startItem in items
where startItem.Equals(items.First())
|| startItem.CompareTo(lastSelected) > 0
let sequence =
from item in items
where item.Equals(startItem) || areSequential(lastSelected, item)
select (lastSelected = item)
where sequence.Count() >= minSequenceLength
select sequence;
return sequences;
}
static void UsageInt()
{
var sequences = GetSequences(
3,
(a, b) => a + 1 == b,
new[] { 21, 4, 7, 9, 12, 22, 17, 8, 2, 20, 23 });
foreach (var sequence in sequences)
Console.WriteLine(string.Join(", ", sequence.ToArray()));
}
static void UsageChar()
{
var list = new List<char>(
"abcdefghijklmnopqrstuvwxyz".ToCharArray());
var sequences = GetSequences(
3,
(a, b) => (list.IndexOf(a) + 1 == list.IndexOf(b)),
"PleaseBeGentleWithMe".ToLower().ToCharArray());
foreach (var sequence in sequences)
Console.WriteLine(string.Join(", ", sequence.ToArray()));
}
Here's my shot at it:
public static class SequenceDetector
{
public static IEnumerable<IEnumerable<T>> DetectSequenceWhere<T>(this IEnumerable<T> sequence, Func<T, T, bool> inSequenceSelector)
{
List<T> subsequence = null;
// We can only have a sequence with 2 or more items
T last = sequence.FirstOrDefault();
foreach (var item in sequence.Skip(1))
{
if (inSequenceSelector(last, item))
{
// These form part of a sequence
if (subsequence == null)
{
subsequence = new List<T>();
subsequence.Add(last);
}
subsequence.Add(item);
}
else if (subsequence != null)
{
// We have a previous seq to return
yield return subsequence;
subsequence = null;
}
last = item;
}
if (subsequence != null)
{
// Return any trailing seq
yield return subsequence;
}
}
}
public class test
{
public static void run()
{
var list = new List<int> { 21, 4, 7, 9, 12, 22, 17, 8, 2, 20, 23 };
foreach (var subsequence in list
.OrderBy(i => i)
.Distinct()
.DetectSequenceWhere((first, second) => first + 1 == second)
.Where(seq => seq.Count() >= 3))
{
Console.WriteLine("Found subsequence {0}",
string.Join(", ", subsequence.Select(i => i.ToString()).ToArray()));
}
}
}
This returns the specific items that form the sub-sequences and permits any type of item and any definition of criteria so long as it can be determined by comparing adjacent items.
What about sorting the array then create another array that is the difference between each element the previous one
sortedArray = 8, 9, 10, 21, 22, 23, 24, 27, 30, 31, 32
diffArray = 1, 1, 11, 1, 1, 1, 3, 3, 1, 1
Now iterate through the difference array; if the difference equlas 1, increase the count of a variable, sequenceLength, by 1. If the difference is > 1, check the sequenceLength if it is >=2 then you have a sequence of at at least 3 consecutive elements. Then reset sequenceLenght to 0 and continue your loop on the difference array.
Here is a solution I knocked up in F#, it should be fairly easy to translate this into a C# LINQ query since fold is pretty much equivalent to the LINQ aggregate operator.
#light
let nums = [21;4;7;9;12;22;17;8;2;20;23]
let scanFunc (mainSeqLength, mainCounter, lastNum:int, subSequenceCounter:int, subSequence:'a list, foundSequences:'a list list) (num:'a) =
(mainSeqLength, mainCounter + 1,
num,
(if num <> lastNum + 1 then 1 else subSequenceCounter+1),
(if num <> lastNum + 1 then [num] else subSequence#[num]),
if subSequenceCounter >= 3 then
if mainSeqLength = mainCounter+1
then foundSequences # [subSequence#[num]]
elif num <> lastNum + 1
then foundSequences # [subSequence]
else foundSequences
else foundSequences)
let subSequences = nums |> Seq.sort |> Seq.fold scanFunc (nums |> Seq.length, 0, 0, 0, [], []) |> fun (_,_,_,_,_,results) -> results
Linq isn't the solution for everything, sometimes you're better of with a simple loop. Here's a solution, with just a bit of Linq to order the original sequences and filter the results
void Main()
{
var numbers = new[] { 21,4,7,9,12,22,17,8,2,20,23 };
var sequences =
GetSequences(numbers, (prev, curr) => curr == prev + 1);
.Where(s => s.Count() >= 3);
sequences.Dump();
}
public static IEnumerable<IEnumerable<T>> GetSequences<T>(
IEnumerable<T> source,
Func<T, T, bool> areConsecutive)
{
bool first = true;
T prev = default(T);
List<T> seq = new List<T>();
foreach (var i in source.OrderBy(i => i))
{
if (!first && !areConsecutive(prev, i))
{
yield return seq.ToArray();
seq.Clear();
}
first = false;
seq.Add(i);
prev = i;
}
if (seq.Any())
yield return seq.ToArray();
}
I thought of the same thing as Jon: to represent a range of consecutive integers all you really need are two measly integers! So I'd start there:
struct Range : IEnumerable<int>
{
readonly int _start;
readonly int _count;
public Range(int start, int count)
{
_start = start;
_count = count;
}
public int Start
{
get { return _start; }
}
public int Count
{
get { return _count; }
}
public int End
{
get { return _start + _count - 1; }
}
public IEnumerator<int> GetEnumerator()
{
for (int i = 0; i < _count; ++i)
{
yield return _start + i;
}
}
// Heck, why not?
public static Range operator +(Range x, int y)
{
return new Range(x.Start, x.Count + y);
}
// skipping the explicit IEnumerable.GetEnumerator implementation
}
From there, you can write a static method to return a bunch of these Range values corresponding to the consecutive numbers of your sequence.
static IEnumerable<Range> FindRanges(IEnumerable<int> source, int minCount)
{
// throw exceptions on invalid arguments, maybe...
var ordered = source.OrderBy(x => x);
Range r = default(Range);
foreach (int value in ordered)
{
// In "real" code I would've overridden the Equals method
// and overloaded the == operator to write something like
// if (r == Range.Empty) here... but this works well enough
// for now, since the only time r.Count will be 0 is on the
// first item.
if (r.Count == 0)
{
r = new Range(value, 1);
continue;
}
if (value == r.End)
{
// skip duplicates
continue;
}
else if (value == r.End + 1)
{
// "append" consecutive values to the range
r += 1;
}
else
{
// return what we've got so far
if (r.Count >= minCount)
{
yield return r;
}
// start over
r = new Range(value, 1);
}
}
// return whatever we ended up with
if (r.Count >= minCount)
{
yield return r;
}
}
Demo:
int[] numbers = new[] { 21, 4, 7, 9, 12, 22, 17, 8, 2, 20, 23 };
foreach (Range r in FindConsecutiveRanges(numbers, 3))
{
// Using .NET 3.5 here, don't have the much nicer string.Join overloads.
Console.WriteLine(string.Join(", ", r.Select(x => x.ToString()).ToArray()));
}
Output:
7, 8, 9
20, 21, 22, 23
Here's my LINQ-y take on the problem:
static IEnumerable<IEnumerable<int>>
ConsecutiveSequences(this IEnumerable<int> input, int minLength = 3)
{
int order = 0;
var inorder = new SortedSet<int>(input);
return from item in new[] { new { order = 0, val = inorder.First() } }
.Concat(
inorder.Zip(inorder.Skip(1), (x, val) =>
new { order = x + 1 == val ? order : ++order, val }))
group item.val by item.order into list
where list.Count() >= minLength
select list;
}
uses no explicit loops, but should still be O(n lg n)
uses SortedSet instead of .OrderBy().Distinct()
combines consecutive element with list.Zip(list.Skip(1))
Here's a solution using a Dictionary instead of a sort...
It adds the items to a Dictionary, and then for each value increments above and below to find the longest sequence.
It is not strictly LINQ, though it does make use of some LINQ functions, and I think it is more readable than a pure LINQ solution..
static void Main(string[] args)
{
var items = new[] { -1, 0, 1, 21, -2, 4, 7, 9, 12, 22, 17, 8, 2, 20, 23 };
IEnumerable<IEnumerable<int>> sequences = FindSequences(items, 3);
foreach (var sequence in sequences)
{ //print results to consol
Console.Out.WriteLine(sequence.Select(num => num.ToString()).Aggregate((a, b) => a + "," + b));
}
Console.ReadLine();
}
private static IEnumerable<IEnumerable<int>> FindSequences(IEnumerable<int> items, int minSequenceLength)
{
//Convert item list to dictionary
var itemDict = new Dictionary<int, int>();
foreach (int val in items)
{
itemDict[val] = val;
}
var allSequences = new List<List<int>>();
//for each val in items, find longest sequence including that value
foreach (var item in items)
{
var sequence = FindLongestSequenceIncludingValue(itemDict, item);
allSequences.Add(sequence);
//remove items from dict to prevent duplicate sequences
sequence.ForEach(i => itemDict.Remove(i));
}
//return only sequences longer than 3
return allSequences.Where(sequence => sequence.Count >= minSequenceLength).ToList();
}
//Find sequence around start param value
private static List<int> FindLongestSequenceIncludingValue(Dictionary<int, int> itemDict, int value)
{
var result = new List<int>();
//check if num exists in dictionary
if (!itemDict.ContainsKey(value))
return result;
//initialize sequence list
result.Add(value);
//find values greater than starting value
//and add to end of sequence
var indexUp = value + 1;
while (itemDict.ContainsKey(indexUp))
{
result.Add(itemDict[indexUp]);
indexUp++;
}
//find values lower than starting value
//and add to start of sequence
var indexDown = value - 1;
while (itemDict.ContainsKey(indexDown))
{
result.Insert(0, itemDict[indexDown]);
indexDown--;
}
return result;
}