Related
We often need a way to compare the items of two lists and find which items exists only in ListA (leftOuterItems), which exist only in ListB (rightOuterItems) and their common items (matchedItems)...
I've ended up with two solutions as you can see below:
One way is to sort the lists and iterate one by one (which has a performance penalty when the collections have too many items due to sorting), and the other way is to use dictionaries and hashing (which is slower than the first way when collections have a few items - due to memory allocation etc)
*Also keep in mind that I want to compare two lists of objects eg two lists of class Person (not just primitives).. That's why I created generic extension methods
So, do you have any better idea to suggest?
Thank you in advanced!
class Program
{
static void Main(string[] args)
{
var fewItemsList1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var fewItemsList2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Run(100_000, fewItemsList1, fewItemsList2);
var manyItemsList1 = Enumerable.Range(0, 100_000).ToArray();
var manyItemsList2 = Enumerable.Range(50000, 150_000).ToArray();
Run(1000, manyItemsList1, manyItemsList2);
Console.WriteLine("Hello World!");
Console.Read();
}
private static void Run(int count, int[] l1, int[] l2)
{
var sw = Stopwatch.StartNew();
for (int i = 0; i < count; i++)
l1.OrderedCompare(l2, x => x, x => x, out int[] leftOuterItems, out int[] rightOuterItems, out (int, int)[] matchedItems);
sw.Stop();
Console.WriteLine($"OrderedCompare for {count} iterations with L1 items:{l1.Count()} and L2 items:{l2.Count()} took {sw.Elapsed}");
sw.Restart();
for (int i = 0; i < count; i++)
l1.HashedCompare(l2, x => x, x => x, out int[] leftOuterItems2, out int[] rightOuterItems2, out (int, int)[] matchedItems2);
Console.WriteLine($"HashedCompare for {count} with L1 items:{l1.Count()} and L2 items:{l2.Count()} iterations took {sw.Elapsed}");
}
}
public static class Extensions
{
public static void OrderedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var leftOuterItemsList = new List<T1>();
var rightOuterItemsList = new List<T2>();
var matchedItemsList = new List<(T1, T2)>();
source = source.OrderBy(x => sourceKeyGetter(x)).ToArray();
target = target.OrderBy(x => targetKeyGetter(x)).ToArray();
bool reverseCompare = false;
int i = 0, j = 0, sourcZeroBasedCount = source.Count() - 1, targetZeroBaseCount = target.Count() - 1;
while (true)
{
var end = i == sourcZeroBasedCount && j == targetZeroBaseCount;
var sourceItem = source.ElementAt(i);
var targetItem = target.ElementAt(j);
var sourceKey = sourceKeyGetter(sourceItem);
var targetKey = targetKeyGetter(targetItem);
int diff = reverseCompare ? targetKey.CompareTo(sourceKey) : sourceKey.CompareTo(targetKey);
reverseCompare = i == sourcZeroBasedCount || j == targetZeroBaseCount;
switch (diff)
{
case -1:
leftOuterItemsList.Add(sourceItem);
i = i < sourcZeroBasedCount ? i + 1 : i;
break;
case 0:
matchedItemsList.Add((sourceItem, targetItem));
i = i < sourcZeroBasedCount ? i + 1 : i;
j = j < targetZeroBaseCount ? j + 1 : j;
break;
case 1:
rightOuterItemsList.Add(targetItem);
j = j < targetZeroBaseCount ? j + 1 : j;
break;
}
if (end)
break;
}
leftOuterItems = leftOuterItemsList.ToArray();
rightOuterItems = rightOuterItemsList.ToArray();
matchedItems = matchedItemsList.ToArray();
}
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out T1[] leftOuterItems,
out T2[] rightOuterItems,
out (T1, T2)[] matchedItems) where TKey : IComparable<TKey>
{
var sourceDic = source.ToDictionary(x => sourceKeyGetter(x));
var targetDic = target.ToDictionary(x => targetKeyGetter(x));
var leftOuterKeys = sourceDic.Keys.Except(targetDic.Keys).ToArray();
var rightOuterKeys = targetDic.Keys.Except(sourceDic.Keys).ToArray();
var matchedKeys = sourceDic.Keys.Concat(targetDic.Keys).Except(leftOuterKeys.Concat(rightOuterKeys)).ToArray();
leftOuterItems = leftOuterKeys.Select(key => sourceDic[key]).ToArray();
rightOuterItems = rightOuterKeys.Select(key => targetDic[key]).ToArray();
matchedItems = matchedKeys.Select(key => (sourceDic[key], targetDic[key])).ToArray();
}
}
Most of the inefficiency in HashedCompare() is down to unnecessary enumerations and lookups in the dictionaries. If you write the algorithm in an imperative style you can avoid all that and the code becomes, in my opinion simpler to follow:
I second #00110001 suggestion that you should use a proper benchmarking framework as the differences between the different implementations are in the same order of complexity.
public static void HashedCompare<T1, T2, TKey>(
this IEnumerable<T1> source,
IEnumerable<T2> target,
Func<T1, TKey> sourceKeyGetter,
Func<T2, TKey> targetKeyGetter,
out List<T1> leftOuterItems,
out List<T2> rightOuterItems,
out List<(T1, T2)> matchedItems) where TKey : IEquatable<TKey>
{
var sourceItems = source.ToDictionary(x => sourceKeyGetter(x));
var targetItems = target.ToDictionary(x => targetKeyGetter(x));
matchedItems = new List<(T1, T2)>();
leftOuterItems = new List<T1>();
rightOuterItems = new List<T2>();
foreach (var sourceItem in sourceItems)
{
if (targetItems.TryGetValue(sourceItem.Key, out var targetItem))
matchedItems.Add((sourceItem.Value, targetItem));
else
leftOuterItems.Add(sourceItem.Value);
}
foreach (var targetItem in targetItems)
{
if (!sourceItems.ContainsKey(targetItem.Key))
rightOuterItems.Add(targetItem.Value);
}
}
You could use Except and Intersect which both work on Sets (a light weight hashset) and will work O(n) linear time complexity
var list1 = new[] { 1, 4, 2, 3, 7, 6, 9, 5 };
var List2 = new[] { 15, 5, 14, 6, 13, 7, 12, 8, 11, 9, 10 };
Console.WriteLine(string.Join(", ", list1.Except(List2)));
Console.WriteLine(string.Join(", ", List2.Except(list1)));
Console.WriteLine(string.Join(", ", List2.Intersect(list1)));
Output
1, 4, 2, 3
15, 14, 13, 12, 8, 11, 10
5, 6, 7, 9
As to whether it's faster or slower, you would have to benchmark, however my gut feeling is they will be more efficient and faster.
On the topic of benchmarking
Use a reliable tested framework like BenchmarkDotNet, you are likely to get the results wrong in unlimited ways if you roll this yourself
I've a List of int and I want to create multiple List after splitting the original list when a lower or same number is found. Numbers are not in sorted order.
List<int> data = new List<int> { 1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
I want the result to be as following lists:
{ 1, 2 }
{ 1, 2, 3 }
{ 3 }
{ 1, 2, 3, 4 }
{ 1, 2, 3, 4, 5, 6 }
Currently, I'm using following linq to do this but not helping me out:
List<int> data = new List<int> { 1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
List<List<int>> resultLists = new List<List<int>>();
var res = data.Where((p, i) =>
{
int count = 0;
resultLists.Add(new List<int>());
if (p < data[(i + 1) >= data.Count ? i - 1 : i + 1])
{
resultLists[count].Add(p);
}
else
{
count++;
resultLists.Add(new List<int>());
}
return true;
}).ToList();
I'd just go for something simple:
public static IEnumerable<List<int>> SplitWhenNotIncreasing(List<int> numbers)
{
for (int i = 1, start = 0; i <= numbers.Count; ++i)
{
if (i != numbers.Count && numbers[i] > numbers[i - 1])
continue;
yield return numbers.GetRange(start, i - start);
start = i;
}
}
Which you'd use like so:
List<int> data = new List<int> { 1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
foreach (var subset in SplitWhenNotIncreasing(data))
Console.WriteLine(string.Join(", ", subset));
If you really did need to work with IEnumerable<T>, then the simplest way I can think of is like this:
public sealed class IncreasingSubsetFinder<T> where T: IComparable<T>
{
public static IEnumerable<IEnumerable<T>> Find(IEnumerable<T> numbers)
{
return new IncreasingSubsetFinder<T>().find(numbers.GetEnumerator());
}
IEnumerable<IEnumerable<T>> find(IEnumerator<T> iter)
{
if (!iter.MoveNext())
yield break;
while (!done)
yield return increasingSubset(iter);
}
IEnumerable<T> increasingSubset(IEnumerator<T> iter)
{
while (!done)
{
T prev = iter.Current;
yield return prev;
if ((done = !iter.MoveNext()) || iter.Current.CompareTo(prev) <= 0)
yield break;
}
}
bool done;
}
Which you would call like this:
List<int> data = new List<int> { 1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
foreach (var subset in IncreasingSubsetFinder<int>.Find(data))
Console.WriteLine(string.Join(", ", subset));
This is not a typical LINQ operation, so as usual in such cases (when one insists on using LINQ) I would suggest using Aggregate method:
var result = data.Aggregate(new List<List<int>>(), (r, n) =>
{
if (r.Count == 0 || n <= r.Last().Last()) r.Add(new List<int>());
r.Last().Add(n);
return r;
});
You can use the index to get the previous item and calculate the group id out of comparing the values. Then group on the group ids and get the values out:
List<int> data = new List<int> { 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
int groupId = 0;
var groups = data.Select
( (item, index)
=> new
{ Item = item
, Group = index > 0 && item <= data[index - 1] ? ++groupId : groupId
}
);
List<List<int>> list = groups.GroupBy(g => g.Group)
.Select(x => x.Select(y => y.Item).ToList())
.ToList();
I really like Matthew Watson's solution. If however you do not want to rely on List<T>, here is my simple generic approach enumerating the enumerable once at most and still retaining the capability for lazy evaluation.
public static IEnumerable<IEnumerable<T>> AscendingSubsets<T>(this IEnumerable<T> superset) where T :IComparable<T>
{
var supersetEnumerator = superset.GetEnumerator();
if (!supersetEnumerator.MoveNext())
{
yield break;
}
T oldItem = supersetEnumerator.Current;
List<T> subset = new List<T>() { oldItem };
while (supersetEnumerator.MoveNext())
{
T currentItem = supersetEnumerator.Current;
if (currentItem.CompareTo(oldItem) > 0)
{
subset.Add(currentItem);
}
else
{
yield return subset;
subset = new List<T>() { currentItem };
}
oldItem = supersetEnumerator.Current;
}
yield return subset;
}
Edit: Simplified the solution further to only use one enumerator.
I have modified your code, and now working fine:
List<int> data = new List<int> { 1, 2, 1, 2, 3,3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
List<List<int>> resultLists = new List<List<int>>();
int last = 0;
int count = 0;
var res = data.Where((p, i) =>
{
if (i > 0)
{
if (p > last && p!=last)
{
resultLists[count].Add(p);
}
else
{
count++;
resultLists.Add(new List<int>());
resultLists[count].Add(p);
}
}
else
{
resultLists.Add(new List<int>());
resultLists[count].Add(p);
}
last = p;
return true;
}).ToList();
For things like this, I'm generally not a fan of solutions that use GroupBy or other methods that materialize the results. The reason is that you never know how long the input sequence will be, and materializations of these sub-sequences can be very costly.
I prefer to stream the results as they are pulled. This allows implementations of IEnumerable<T> that stream results to continue streaming through your transformation of that stream.
Note, this solution won't work if you break out of iterating through the sub-sequence and want to continue to the next sequence; if this is an issue, then one of the solutions that materialize the sub-sequences would probably be better.
However, for forward-only iterations of the entire sequence (which is the most typical use case), this will work just fine.
First, let's set up some helpers for our test classes:
private static IEnumerable<T> CreateEnumerable<T>(IEnumerable<T> enumerable)
{
// Validate parameters.
if (enumerable == null) throw new ArgumentNullException("enumerable");
// Cycle through and yield.
foreach (T t in enumerable)
yield return t;
}
private static void EnumerateAndPrintResults<T>(IEnumerable<T> data,
[CallerMemberName] string name = "") where T : IComparable<T>
{
// Write the name.
Debug.WriteLine("Case: " + name);
// Cycle through the chunks.
foreach (IEnumerable<T> chunk in data.
ChunkWhenNextSequenceElementIsNotGreater())
{
// Print opening brackets.
Debug.Write("{ ");
// Is this the first iteration?
bool firstIteration = true;
// Print the items.
foreach (T t in chunk)
{
// If not the first iteration, write a comma.
if (!firstIteration)
{
// Write the comma.
Debug.Write(", ");
}
// Write the item.
Debug.Write(t);
// Flip the flag.
firstIteration = false;
}
// Write the closing bracket.
Debug.WriteLine(" }");
}
}
CreateEnumerable is used for creating a streaming implementation, and EnumerateAndPrintResults will take the sequence, call ChunkWhenNextSequenceElementIsNotGreater (this is coming up and does the work) and output the results.
Here's the implementation. Note, I've chosen to implement them as extension methods on IEnumerable<T>; this is the first benefit, as it doesn't require a materialized sequence (technically, none of the other solutions do either, but it's better to explicitly state it like this).
First, the entry points:
public static IEnumerable<IEnumerable<T>>
ChunkWhenNextSequenceElementIsNotGreater<T>(
this IEnumerable<T> source)
where T : IComparable<T>
{
// Validate parameters.
if (source == null) throw new ArgumentNullException("source");
// Call the overload.
return source.
ChunkWhenNextSequenceElementIsNotGreater(
Comparer<T>.Default.Compare);
}
public static IEnumerable<IEnumerable<T>>
ChunkWhenNextSequenceElementIsNotGreater<T>(
this IEnumerable<T> source,
Comparison<T> comparer)
{
// Validate parameters.
if (source == null) throw new ArgumentNullException("source");
if (comparer == null) throw new ArgumentNullException("comparer");
// Call the implementation.
return source.
ChunkWhenNextSequenceElementIsNotGreaterImplementation(
comparer);
}
Note that this works on anything that implements IComparable<T> or where you provide a Comparison<T> delegate; this allows for any type and any kind of rules you want for performing the comparison.
Here's the implementation:
private static IEnumerable<IEnumerable<T>>
ChunkWhenNextSequenceElementIsNotGreaterImplementation<T>(
this IEnumerable<T> source, Comparison<T> comparer)
{
// Validate parameters.
Debug.Assert(source != null);
Debug.Assert(comparer != null);
// Get the enumerator.
using (IEnumerator<T> enumerator = source.GetEnumerator())
{
// Move to the first element. If one can't, then get out.
if (!enumerator.MoveNext()) yield break;
// While true.
while (true)
{
// The new enumerator.
var chunkEnumerator = new
ChunkWhenNextSequenceElementIsNotGreaterEnumerable<T>(
enumerator, comparer);
// Yield.
yield return chunkEnumerator;
// If the last move next returned false, then get out.
if (!chunkEnumerator.LastMoveNext) yield break;
}
}
}
Of note: this uses another class ChunkWhenNextSequenceElementIsNotGreaterEnumerable<T> to handle enumerating the sub-sequences. This class will iterate each of the items from the IEnumerator<T> that is obtained from the original IEnumerable<T>.GetEnumerator() call, but store the results of the last call to IEnumerator<T>.MoveNext().
This sub-sequence generator is stored, and the value of the last call to MoveNext is checked to see if the end of the sequence has or hasn't been hit. If it has, then it simply breaks, otherwise, it moves to the next chunk.
Here's the implementation of ChunkWhenNextSequenceElementIsNotGreaterEnumerable<T>:
internal class
ChunkWhenNextSequenceElementIsNotGreaterEnumerable<T> :
IEnumerable<T>
{
#region Constructor.
internal ChunkWhenNextSequenceElementIsNotGreaterEnumerable(
IEnumerator<T> enumerator, Comparison<T> comparer)
{
// Validate parameters.
if (enumerator == null)
throw new ArgumentNullException("enumerator");
if (comparer == null)
throw new ArgumentNullException("comparer");
// Assign values.
_enumerator = enumerator;
_comparer = comparer;
}
#endregion
#region Instance state.
private readonly IEnumerator<T> _enumerator;
private readonly Comparison<T> _comparer;
internal bool LastMoveNext { get; private set; }
#endregion
#region IEnumerable implementation.
public IEnumerator<T> GetEnumerator()
{
// The assumption is that a call to MoveNext
// that returned true has already
// occured. Store as the previous value.
T previous = _enumerator.Current;
// Yield it.
yield return previous;
// While can move to the next item, and the previous
// item is less than or equal to the current item.
while ((LastMoveNext = _enumerator.MoveNext()) &&
_comparer(previous, _enumerator.Current) < 0)
{
// Yield.
yield return _enumerator.Current;
// Store the previous.
previous = _enumerator.Current;
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
#endregion
}
Here's the test for the original condition in the question, along with the output:
[TestMethod]
public void TestStackOverflowCondition()
{
var data = new List<int> {
1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6
};
EnumerateAndPrintResults(data);
}
Output:
Case: TestStackOverflowCondition
{ 1, 2 }
{ 1, 2, 3 }
{ 3 }
{ 1, 2, 3, 4 }
{ 1, 2, 3, 4, 5, 6 }
Here's the same input, but streamed as an enumerable:
[TestMethod]
public void TestStackOverflowConditionEnumerable()
{
var data = new List<int> {
1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6
};
EnumerateAndPrintResults(CreateEnumerable(data));
}
Output:
Case: TestStackOverflowConditionEnumerable
{ 1, 2 }
{ 1, 2, 3 }
{ 3 }
{ 1, 2, 3, 4 }
{ 1, 2, 3, 4, 5, 6 }
Here's a test with non-sequential elements:
[TestMethod]
public void TestNonSequentialElements()
{
var data = new List<int> {
1, 3, 5, 7, 6, 8, 10, 2, 5, 8, 11, 11, 13
};
EnumerateAndPrintResults(data);
}
Output:
Case: TestNonSequentialElements
{ 1, 3, 5, 7 }
{ 6, 8, 10 }
{ 2, 5, 8, 11 }
{ 11, 13 }
Finally, here's a test with characters instead of numbers:
[TestMethod]
public void TestNonSequentialCharacters()
{
var data = new List<char> {
'1', '3', '5', '7', '6', '8', 'a', '2', '5', '8', 'b', 'c', 'a'
};
EnumerateAndPrintResults(data);
}
Output:
Case: TestNonSequentialCharacters
{ 1, 3, 5, 7 }
{ 6, 8, a }
{ 2, 5, 8, b, c }
{ a }
You can do it with Linq using the index to calculate the group:
var result = data.Select((n, i) => new { N = n, G = (i > 0 && n > data[i - 1] ? data[i - 1] + 1 : n) - i })
.GroupBy(a => a.G)
.Select(g => g.Select(n => n.N).ToArray())
.ToArray();
This is my simple loop approach using some yields :
static IEnumerable<IList<int>> Split(IList<int> data)
{
if (data.Count == 0) yield break;
List<int> curr = new List<int>();
curr.Add(data[0]);
int last = data[0];
for (int i = 1; i < data.Count; i++)
{
if (data[i] <= last)
{
yield return curr;
curr = new List<int>();
}
curr.Add(data[i]);
last = data[i];
}
yield return curr;
}
I use a dictionary to get 5 different list as below;
static void Main(string[] args)
{
List<int> data = new List<int> { 1, 2, 1, 2, 3, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6 };
Dictionary<int, List<int>> listDict = new Dictionary<int, List<int>>();
int listCnt = 1;
//as initial value get first value from list
listDict.Add(listCnt, new List<int>());
listDict[listCnt].Add(data[0]);
for (int i = 1; i < data.Count; i++)
{
if (data[i] > listDict[listCnt].Last())
{
listDict[listCnt].Add(data[i]);
}
else
{
//increase list count and add a new list to dictionary
listCnt++;
listDict.Add(listCnt, new List<int>());
listDict[listCnt].Add(data[i]);
}
}
//to use new lists
foreach (var dic in listDict)
{
Console.WriteLine( $"List {dic.Key} : " + string.Join(",", dic.Value.Select(x => x.ToString()).ToArray()));
}
}
Output :
List 1 : 1,2
List 2 : 1,2,3
List 3 : 3
List 4 : 1,2,3,4
List 5 : 1,2,3,4,5,6
There are lots and lots of questions on SO about finding if one list is the subset of another list.
i.e. bool isSubset = !t2.Except(t1).Any();
I can't seem to find one that accounts for order
as in given a sequence:
1,1,2,5,8,1,9,1,2
The subsequences...
2,5,8,1,9 true
1,2,5,8,1 true
5,2,1 false
1,2,5,1,8 false
1,1,2 true
1,1,1,2 false
A list in which the order is significant is a generalisation of the concept of string. Therefore you want to use a substring-finding algorithm.
There are several possibilities, but Knuth–Morris–Pratt is a good choice. It has some initial Θ(m) overhead where m is the length of the sublist sought, and then finds in Θ(n) where n is the distance to the sublist sought, or the length of the whole list if it isn't there. This beats the simple item-by-item compare which is Θ((n-m+1) m):
public static class ListSearching
{
public static bool Contains<T>(this IList<T> haystack, IList<T> needle)
{
return Contains(haystack, needle, null);
}
public static bool Contains<T>(this IList<T> haystack, IList<T> needle, IEqualityComparer<T> cmp)
{
return haystack.IndexOf(needle, cmp) != -1;
}
public static int IndexOf<T>(this IList<T> haystack, IList<T> needle)
{
return IndexOf(haystack, needle, null);
}
public static int IndexOf<T>(this IList<T> haystack, IList<T> needle, IEqualityComparer<T> cmp)
{
if(haystack == null || needle == null)
throw new ArgumentNullException();
int needleCount = needle.Count;
if(needleCount == 0)
return 0;//empty lists are everywhere!
if(cmp == null)
cmp = EqualityComparer<T>.Default;
int count = haystack.Count;
if(needleCount == 1)//can't beat just spinning through for it
{
T item = needle[0];
for(int idx = 0; idx != count; ++idx)
if(cmp.Equals(haystack[idx], item))
return idx;
return -1;
}
int m = 0;
int i = 0;
int[] table = KMPTable(needle, cmp);
while(m + i < count)
{
if(cmp.Equals(needle[i], haystack[m + i]))
{
if(i == needleCount - 1)
return m == needleCount ? -1 : m;//match -1 = failure to find conventional in .NET
++i;
}
else
{
m = m + i - table[i];
i = table[i] > -1 ? table[i] : 0;
}
}
return -1;
}
private static int[] KMPTable<T>(IList<T> sought, IEqualityComparer<T> cmp)
{
int[] table = new int[sought.Count];
int pos = 2;
int cnd = 0;
table[0] = -1;
table[1] = 0;
while(pos < table.Length)
if(cmp.Equals(sought[pos - 1], sought[cnd]))
table[pos++] = ++cnd;
else if(cnd > 0)
cnd = table[cnd];
else
table[pos++] = 0;
return table;
}
}
Testing this:
var list = new[]{ 1, 1, 2, 5, 8, 1, 9, 1, 2 };
Console.WriteLine(list.Contains(new[]{2,5,8,1,9})); // True
Console.WriteLine(list.Contains(new[]{1,2,5,8,1})); // True
Console.WriteLine(list.Contains(new[]{5,2,1})); // False
Console.WriteLine(list.Contains(new[]{1,2,5,1,8})); // False
Console.WriteLine(list.Contains(new[]{1,1,2})); // True
Console.WriteLine(list.Contains(new[]{1,1,1,2})); // False
Unfortunately there is no such function in .net. You need Knuth–Morris–Pratt algo for it. One guy already implemented it as linq extension https://code.google.com/p/linq-extensions/
This works for me:
var source = new [] { 1,1,2,5,8,1,9,1,2 };
Func<int[], int[], bool> contains =
(xs, ys) =>
Enumerable
.Range(0, xs.Length)
.Where(n => xs.Skip(n).Take(ys.Length).SequenceEqual(ys))
.Any();
Console.WriteLine(contains(source, new [] { 2,5,8,1,9 })); // true
Console.WriteLine(contains(source, new [] { 1,2,5,8,1 })); // true
Console.WriteLine(contains(source, new [] { 5,2,1 })); // false
Console.WriteLine(contains(source, new [] { 1,2,5,1,8 })); // false
Console.WriteLine(contains(source, new [] { 1,1,2 })); // true
Console.WriteLine(contains(source, new [] { 1,1,1,2 })); // false
there is a workaround to the limitation. You can change the enumerable to a string and then make use of the Contains method.
var t1 = new List<int> {1, 1, 2, 5, 8, 1, 9, 1, 2};
var t2 = new List<int> {2,5,8,1,9};
var t3 = new List<int> {5,2,1};
var t1Str = String.Join(",", t1);
t1Str.Contains(String.Join(",", t2););//true
t1Str.Contains(String.Join(",", t3););//false
You can build your own extension, I wrote a simple IsSubset method:
Console App for testing:
class Program
{
static void Main(string[] args)
{
var list = new List<int> { 1, 3, 5, 2, 4, 6 };
var subList = new List<int> { 3, 5};
var subList2 = new List<int> { 1, 4 };
bool isSublist1 = subList.IsSubset(list);
bool isSublist2 = subList2.IsSubset(list);
Console.WriteLine(isSublist1 + "; " + isSublist2);
/* True; False */
Console.ReadKey();
}
}
IEnumerable Extension:
public static class IEnumerableExtensions
{
public static bool IsSubset<T>(this IEnumerable<T> subsetEnumerable, IEnumerable<T> enumerable)
{
var found = false;
var list = enumerable as IList<T> ?? enumerable.ToList();
var listCount = list.Count();
var subsetList = subsetEnumerable as IList<T> ?? subsetEnumerable.ToList();
var posListCount = subsetList.Count();
/* If the SubList is bigger, it can't be a sublist */
if (listCount < posListCount) {
return false;
}
/* find all indexes of the first item of the sublist in the list */
var firstElement = subsetList.First();
var indexes = new List<int>();
var index = 0;
foreach (var elem in list)
{
if (elem.Equals(firstElement))
{
indexes.Add(index);
}
index++;
}
/* check all first item founds for the subsequence */
foreach (var i in indexes)
{
int x=0;
for (x = 0; x < posListCount && (i + x) < listCount; x++)
{
if (!Equals(subsetList[x], list[(i + x)]))
{
found = false;
break;
}
found = true;
}
if (x + 1 < posListCount)
found = false;
}
return found;
}
}
May be using join can get you what you want. Join will return the matching records. If record count is greater than 0 than there is a match else no match.
Below I have explained through a sample code:
class Program
{
static void Main(string[] args)
{
List<Employee> empList = new List<Employee>
{
new Employee{EmpID = 1},
new Employee{EmpID = 1},
new Employee{EmpID = 2},
new Employee{EmpID = 5},
new Employee{EmpID = 8},
new Employee{EmpID = 1},
new Employee{EmpID = 9},
new Employee{EmpID = 1},
new Employee{EmpID = 2}
};
List<Manager> mgrList = new List<Manager>
{
new Manager{ManagerID = 7},
new Manager{ManagerID = 3},
new Manager{ManagerID = 6}
};
var result = (from emp in empList
join mgr in mgrList on emp.EmpID equals mgr.ManagerID
select new { emp.EmpID}).Count();
Console.WriteLine(result);
Console.ReadKey();
}
}
public class Employee
{
public int EmpID { get; set; }
}
public class Manager
{
public int ManagerID { get; set; }
}
Assuming I have a list
var listOfInt = new List<int> {1, 2, 3, 4, 7, 8, 12, 13, 14}
How can I use LINQ to obtain a list of lists as follows:
{{1, 2, 3, 4}, {7, 8}, {12, 13, 14}}
So, i have to take the consecutive values and group them into lists.
You can create extension method (I omitted source check here) which will iterate source and create groups of consecutive items. If next item in source is not consecutive, then current group is yielded:
public static IEnumerable<List<int>> ToConsecutiveGroups(
this IEnumerable<int> source)
{
using (var iterator = source.GetEnumerator())
{
if (!iterator.MoveNext())
{
yield break;
}
else
{
int current = iterator.Current;
List<int> group = new List<int> { current };
while (iterator.MoveNext())
{
int next = iterator.Current;
if (next < current || current + 1 < next)
{
yield return group;
group = new List<int>();
}
current = next;
group.Add(current);
}
if (group.Any())
yield return group;
}
}
}
Usage is simple:
var listOfInt = new List<int> { 1, 2, 3, 4, 7, 8, 12, 13, 14 };
var groups = listOfInt.ToConsecutiveGroups();
Result:
[
[ 1, 2, 3, 4 ],
[ 7, 8 ],
[ 12, 13, 14 ]
]
UPDATE: Here is generic version of this extension method, which accepts predicate for verifying if two values should be considered consecutive:
public static IEnumerable<List<T>> ToConsecutiveGroups<T>(
this IEnumerable<T> source, Func<T,T, bool> isConsequtive)
{
using (var iterator = source.GetEnumerator())
{
if (!iterator.MoveNext())
{
yield break;
}
else
{
T current = iterator.Current;
List<T> group = new List<T> { current };
while (iterator.MoveNext())
{
T next = iterator.Current;
if (!isConsequtive(current, next))
{
yield return group;
group = new List<T>();
}
current = next;
group.Add(current);
}
if (group.Any())
yield return group;
}
}
}
Usage is simple:
var result = listOfInt.ToConsecutiveGroups((x,y) => (x == y) || (x == y - 1));
This works for both sorted and unsorted lists:
var listOfInt = new List<int> { 1, 2, 3, 4, 7, 8, 12, 13 };
int index = 0;
var result = listOfInt.Zip(listOfInt
.Concat(listOfInt.Reverse<int>().Take(1))
.Skip(1),
(v1, v2) =>
new
{
V = v1,
G = (v2 - v1) != 1 ? index++ : index
})
.GroupBy(x => x.G, x => x.V, (k, l) => l.ToList())
.ToList();
External index is building an index of consecutive groups that have value difference of 1. Then you can simply GroupBy with respect to this index.
To clarify solution, here is how this collection looks without grouping (GroupBy commented):
Assuming your input is in order, the following will work:
var grouped = input.Select((n, i) => new { n, d = n - i }).GroupBy(p => p.d, p => p.n);
It won't work if your input is e.g. { 1, 2, 3, 999, 5, 6, 7 }.
You'd get { { 1, 2, 3, 5, 6, 7 }, { 999 } }.
This works:
var results =
listOfInt
.Skip(1)
.Aggregate(
new List<List<int>>(new [] { listOfInt.Take(1).ToList() }),
(a, x) =>
{
if (a.Last().Last() + 1 == x)
{
a.Last().Add(x);
}
else
{
a.Add(new List<int>(new [] { x }));
}
return a;
});
I get this result:
How do I select the unique elements from the list {0, 1, 2, 2, 2, 3, 4, 4, 5} so that I get {0, 1, 3, 5}, effectively removing all instances of the repeated elements {2, 4}?
var numbers = new[] { 0, 1, 2, 2, 2, 3, 4, 4, 5 };
var uniqueNumbers =
from n in numbers
group n by n into nGroup
where nGroup.Count() == 1
select nGroup.Key;
// { 0, 1, 3, 5 }
var nums = new int{ 0...4,4,5};
var distinct = nums.Distinct();
make sure you're using Linq and .NET framework 3.5.
With lambda..
var all = new[] {0,1,1,2,3,4,4,4,5,6,7,8,8}.ToList();
var unique = all.GroupBy(i => i).Where(i => i.Count() == 1).Select(i=>i.Key);
C# 2.0 solution:
static IEnumerable<T> GetUniques<T>(IEnumerable<T> things)
{
Dictionary<T, int> counts = new Dictionary<T, int>();
foreach (T item in things)
{
int count;
if (counts.TryGetValue(item, out count))
counts[item] = ++count;
else
counts.Add(item, 1);
}
foreach (KeyValuePair<T, int> kvp in counts)
{
if (kvp.Value == 1)
yield return kvp.Key;
}
}
Here is another way that works if you have complex type objects in your List and want to get the unique values of a property:
var uniqueValues= myItems.Select(k => k.MyProperty)
.GroupBy(g => g)
.Where(c => c.Count() == 1)
.Select(k => k.Key)
.ToList();
Or to get distinct values:
var distinctValues = myItems.Select(p => p.MyProperty)
.Distinct()
.ToList();
If your property is also a complex type you can create a custom comparer for the Distinct(), such as Distinct(OrderComparer), where OrderComparer could look like:
public class OrderComparer : IEqualityComparer<Order>
{
public bool Equals(Order o1, Order o2)
{
return o1.OrderID == o2.OrderID;
}
public int GetHashCode(Order obj)
{
return obj.OrderID.GetHashCode();
}
}
If Linq isn't available to you because you have to support legacy code that can't be upgraded, then declare a Dictionary, where the first int is the number and the second int is the number of occurences. Loop through your List, loading up your Dictionary. When you're done, loop through your Dictionary selecting only those elements where the number of occurences is 1.
I believe Matt meant to say:
static IEnumerable<T> GetUniques<T>(IEnumerable<T> things)
{
Dictionary<T, bool> uniques = new Dictionary<T, bool>();
foreach (T item in things)
{
if (!(uniques.ContainsKey(item)))
{
uniques.Add(item, true);
}
}
return uniques.Keys;
}
There are many ways to skin a cat, but HashSet seems made for the task here.
var numbers = new[] { 0, 1, 2, 2, 2, 3, 4, 4, 5 };
HashSet<int> r = new HashSet<int>(numbers);
foreach( int i in r ) {
Console.Write( "{0} ", i );
}
The output:
0 1 2 3 4 5
Here's a solution with no LINQ:
var numbers = new[] { 0, 1, 2, 2, 2, 3, 4, 4, 5 };
// This assumes the numbers are sorted
var noRepeats = new List<int>();
int temp = numbers[0]; // Or .First() if using IEnumerable
var count = 1;
for(int i = 1; i < numbers.Length; i++) // Or foreach (var n in numbers.Skip(1)) if using IEnumerable
{
if (numbers[i] == temp) count++;
else
{
if(count == 1) noRepeats.Add(temp);
temp = numbers[i];
count = 1;
}
}
if(count == 1) noRepeats.Add(temp);
Console.WriteLine($"[{string.Join(separator: ",", values: numbers)}] -> [{string.Join(separator: ",", values: noRepeats)}]");
This prints:
[0,1,2,2,2,3,4,4,5] -> [0,1,3,5]
In .Net 2.0 I`m pretty sure about this solution:
public IEnumerable<T> Distinct<T>(IEnumerable<T> source)
{
List<T> uniques = new List<T>();
foreach (T item in source)
{
if (!uniques.Contains(item)) uniques.Add(item);
}
return uniques;
}