Priority queue in .Net [closed] - c#

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
We don’t allow questions seeking recommendations for books, tools, software libraries, and more. You can edit the question so it can be answered with facts and citations.
Closed 5 years ago.
Improve this question
I am looking for a .NET implementation of a priority queue or heap data structure
Priority queues are data structures that provide more flexibility than simple sorting, because they allow new elements to enter a system at arbitrary intervals. It is much more cost-effective to insert a new job into a priority queue than to re-sort everything on each such arrival.
The basic priority queue supports three primary operations:
Insert(Q,x). Given an item x with key k, insert it into the priority queue Q.
Find-Minimum(Q). Return a pointer to the item
whose key value is smaller than any other key in the priority queue
Q.
Delete-Minimum(Q). Remove the item from the priority queue Q whose key is minimum
Unless I am looking in the wrong place, there isn't one in the framework. Is anyone aware of a good one, or should I roll my own?

You might like IntervalHeap from the C5 Generic Collection Library. To quote the user guide
Class IntervalHeap<T> implements interface IPriorityQueue<T> using an interval heap stored as an array of pairs. The FindMin and
FindMax operations, and the indexer’s get-accessor, take time O(1). The DeleteMin,
DeleteMax, Add and Update operations, and the indexer’s set-accessor, take time
O(log n). In contrast to an ordinary priority queue, an interval heap offers both minimum
and maximum operations with the same efficiency.
The API is simple enough
> var heap = new C5.IntervalHeap<int>();
> heap.Add(10);
> heap.Add(5);
> heap.FindMin();
5
Install from Nuget https://www.nuget.org/packages/C5 or GitHub https://github.com/sestoft/C5/

Here's my attempt at a .NET heap
public abstract class Heap<T> : IEnumerable<T>
{
private const int InitialCapacity = 0;
private const int GrowFactor = 2;
private const int MinGrow = 1;
private int _capacity = InitialCapacity;
private T[] _heap = new T[InitialCapacity];
private int _tail = 0;
public int Count { get { return _tail; } }
public int Capacity { get { return _capacity; } }
protected Comparer<T> Comparer { get; private set; }
protected abstract bool Dominates(T x, T y);
protected Heap() : this(Comparer<T>.Default)
{
}
protected Heap(Comparer<T> comparer) : this(Enumerable.Empty<T>(), comparer)
{
}
protected Heap(IEnumerable<T> collection)
: this(collection, Comparer<T>.Default)
{
}
protected Heap(IEnumerable<T> collection, Comparer<T> comparer)
{
if (collection == null) throw new ArgumentNullException("collection");
if (comparer == null) throw new ArgumentNullException("comparer");
Comparer = comparer;
foreach (var item in collection)
{
if (Count == Capacity)
Grow();
_heap[_tail++] = item;
}
for (int i = Parent(_tail - 1); i >= 0; i--)
BubbleDown(i);
}
public void Add(T item)
{
if (Count == Capacity)
Grow();
_heap[_tail++] = item;
BubbleUp(_tail - 1);
}
private void BubbleUp(int i)
{
if (i == 0 || Dominates(_heap[Parent(i)], _heap[i]))
return; //correct domination (or root)
Swap(i, Parent(i));
BubbleUp(Parent(i));
}
public T GetMin()
{
if (Count == 0) throw new InvalidOperationException("Heap is empty");
return _heap[0];
}
public T ExtractDominating()
{
if (Count == 0) throw new InvalidOperationException("Heap is empty");
T ret = _heap[0];
_tail--;
Swap(_tail, 0);
BubbleDown(0);
return ret;
}
private void BubbleDown(int i)
{
int dominatingNode = Dominating(i);
if (dominatingNode == i) return;
Swap(i, dominatingNode);
BubbleDown(dominatingNode);
}
private int Dominating(int i)
{
int dominatingNode = i;
dominatingNode = GetDominating(YoungChild(i), dominatingNode);
dominatingNode = GetDominating(OldChild(i), dominatingNode);
return dominatingNode;
}
private int GetDominating(int newNode, int dominatingNode)
{
if (newNode < _tail && !Dominates(_heap[dominatingNode], _heap[newNode]))
return newNode;
else
return dominatingNode;
}
private void Swap(int i, int j)
{
T tmp = _heap[i];
_heap[i] = _heap[j];
_heap[j] = tmp;
}
private static int Parent(int i)
{
return (i + 1)/2 - 1;
}
private static int YoungChild(int i)
{
return (i + 1)*2 - 1;
}
private static int OldChild(int i)
{
return YoungChild(i) + 1;
}
private void Grow()
{
int newCapacity = _capacity*GrowFactor + MinGrow;
var newHeap = new T[newCapacity];
Array.Copy(_heap, newHeap, _capacity);
_heap = newHeap;
_capacity = newCapacity;
}
public IEnumerator<T> GetEnumerator()
{
return _heap.Take(Count).GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
public class MaxHeap<T> : Heap<T>
{
public MaxHeap()
: this(Comparer<T>.Default)
{
}
public MaxHeap(Comparer<T> comparer)
: base(comparer)
{
}
public MaxHeap(IEnumerable<T> collection, Comparer<T> comparer)
: base(collection, comparer)
{
}
public MaxHeap(IEnumerable<T> collection) : base(collection)
{
}
protected override bool Dominates(T x, T y)
{
return Comparer.Compare(x, y) >= 0;
}
}
public class MinHeap<T> : Heap<T>
{
public MinHeap()
: this(Comparer<T>.Default)
{
}
public MinHeap(Comparer<T> comparer)
: base(comparer)
{
}
public MinHeap(IEnumerable<T> collection) : base(collection)
{
}
public MinHeap(IEnumerable<T> collection, Comparer<T> comparer)
: base(collection, comparer)
{
}
protected override bool Dominates(T x, T y)
{
return Comparer.Compare(x, y) <= 0;
}
}
Some tests:
[TestClass]
public class HeapTests
{
[TestMethod]
public void TestHeapBySorting()
{
var minHeap = new MinHeap<int>(new[] {9, 8, 4, 1, 6, 2, 7, 4, 1, 2});
AssertHeapSort(minHeap, minHeap.OrderBy(i => i).ToArray());
minHeap = new MinHeap<int> { 7, 5, 1, 6, 3, 2, 4, 1, 2, 1, 3, 4, 7 };
AssertHeapSort(minHeap, minHeap.OrderBy(i => i).ToArray());
var maxHeap = new MaxHeap<int>(new[] {1, 5, 3, 2, 7, 56, 3, 1, 23, 5, 2, 1});
AssertHeapSort(maxHeap, maxHeap.OrderBy(d => -d).ToArray());
maxHeap = new MaxHeap<int> {2, 6, 1, 3, 56, 1, 4, 7, 8, 23, 4, 5, 7, 34, 1, 4};
AssertHeapSort(maxHeap, maxHeap.OrderBy(d => -d).ToArray());
}
private static void AssertHeapSort(Heap<int> heap, IEnumerable<int> expected)
{
var sorted = new List<int>();
while (heap.Count > 0)
sorted.Add(heap.ExtractDominating());
Assert.IsTrue(sorted.SequenceEqual(expected));
}
}

I like using the OrderedBag and OrderedSet classes in PowerCollections as priority queues.

here's one i just wrote, maybe it's not as optimized (just uses a sorted dictionary) but simple to understand.
you can insert objects of different kinds, so no generic queues.
using System;
using System.Diagnostics;
using System.Collections;
using System.Collections.Generic;
namespace PrioQueue
{
public class PrioQueue
{
int total_size;
SortedDictionary<int, Queue> storage;
public PrioQueue ()
{
this.storage = new SortedDictionary<int, Queue> ();
this.total_size = 0;
}
public bool IsEmpty ()
{
return (total_size == 0);
}
public object Dequeue ()
{
if (IsEmpty ()) {
throw new Exception ("Please check that priorityQueue is not empty before dequeing");
} else
foreach (Queue q in storage.Values) {
// we use a sorted dictionary
if (q.Count > 0) {
total_size--;
return q.Dequeue ();
}
}
Debug.Assert(false,"not supposed to reach here. problem with changing total_size");
return null; // not supposed to reach here.
}
// same as above, except for peek.
public object Peek ()
{
if (IsEmpty ())
throw new Exception ("Please check that priorityQueue is not empty before peeking");
else
foreach (Queue q in storage.Values) {
if (q.Count > 0)
return q.Peek ();
}
Debug.Assert(false,"not supposed to reach here. problem with changing total_size");
return null; // not supposed to reach here.
}
public object Dequeue (int prio)
{
total_size--;
return storage[prio].Dequeue ();
}
public void Enqueue (object item, int prio)
{
if (!storage.ContainsKey (prio)) {
storage.Add (prio, new Queue ());
}
storage[prio].Enqueue (item);
total_size++;
}
}
}

.NET 6+: As #rustyx commented, .NET 6 adds a System.Collections.Generic.PriorityQueue<TElement,TPriority> class. And FWIW it is open-source and implemented in c#.
Earlier .NET Core versions and .NET Framework: Microsoft has written (and shared online) 2 internal PriorityQueue classes within the .NET Framework. However, as #mathusum-mut commented, there is a bug in one of them (the SO community has, of course, provided fixes for it): Bug in Microsoft's internal PriorityQueue<T>?

I found one by Julian Bucknall on his blog here - http://www.boyet.com/Articles/PriorityQueueCSharp3.html
We modified it slightly so that low-priority items on the queue would eventually 'bubble-up' to the top over time, so they wouldn't suffer starvation.

You may find useful this implementation:
http://www.codeproject.com/Articles/126751/Priority-queue-in-Csharp-with-help-of-heap-data-st.aspx
it is generic and based on heap data structure

class PriorityQueue<T>
{
IComparer<T> comparer;
T[] heap;
public int Count { get; private set; }
public PriorityQueue() : this(null) { }
public PriorityQueue(int capacity) : this(capacity, null) { }
public PriorityQueue(IComparer<T> comparer) : this(16, comparer) { }
public PriorityQueue(int capacity, IComparer<T> comparer)
{
this.comparer = (comparer == null) ? Comparer<T>.Default : comparer;
this.heap = new T[capacity];
}
public void push(T v)
{
if (Count >= heap.Length) Array.Resize(ref heap, Count * 2);
heap[Count] = v;
SiftUp(Count++);
}
public T pop()
{
var v = top();
heap[0] = heap[--Count];
if (Count > 0) SiftDown(0);
return v;
}
public T top()
{
if (Count > 0) return heap[0];
throw new InvalidOperationException("优先队列为空");
}
void SiftUp(int n)
{
var v = heap[n];
for (var n2 = n / 2; n > 0 && comparer.Compare(v, heap[n2]) > 0; n = n2, n2 /= 2) heap[n] = heap[n2];
heap[n] = v;
}
void SiftDown(int n)
{
var v = heap[n];
for (var n2 = n * 2; n2 < Count; n = n2, n2 *= 2)
{
if (n2 + 1 < Count && comparer.Compare(heap[n2 + 1], heap[n2]) > 0) n2++;
if (comparer.Compare(v, heap[n2]) >= 0) break;
heap[n] = heap[n2];
}
heap[n] = v;
}
}
easy.

AlgoKit
I wrote an open source library called AlgoKit, available via NuGet. It contains:
Implicit d-ary heaps (ArrayHeap),
Binomial heaps,
Pairing heaps.
The code has been extensively tested. I definitely recommend you to give it a try.
Example
var comparer = Comparer<int>.Default;
var heap = new PairingHeap<int, string>(comparer);
heap.Add(3, "your");
heap.Add(5, "of");
heap.Add(7, "disturbing.");
heap.Add(2, "find");
heap.Add(1, "I");
heap.Add(6, "faith");
heap.Add(4, "lack");
while (!heap.IsEmpty)
Console.WriteLine(heap.Pop().Value);
Why those three heaps?
The optimal choice of implementation is strongly input-dependent — as Larkin, Sen, and Tarjan show in A back-to-basics empirical study of priority queues, arXiv:1403.0252v1 [cs.DS]. They tested implicit d-ary heaps, pairing heaps, Fibonacci heaps, binomial heaps, explicit d-ary heaps, rank-pairing heaps, quake heaps, violation heaps, rank-relaxed weak heaps, and strict Fibonacci heaps.
AlgoKit features three types of heaps that appeared to be most efficient among those tested.
Hint on choice
For a relatively small number of elements, you would likely be interested in using implicit heaps, especially quaternary heaps (implicit 4-ary). In case of operating on larger heap sizes, amortized structures like binomial heaps and pairing heaps should perform better.

A Simple Max Heap Implementation.
https://github.com/bharathkumarms/AlgorithmsMadeEasy/blob/master/AlgorithmsMadeEasy/MaxHeap.cs
using System;
using System.Collections.Generic;
using System.Linq;
namespace AlgorithmsMadeEasy
{
class MaxHeap
{
private static int capacity = 10;
private int size = 0;
int[] items = new int[capacity];
private int getLeftChildIndex(int parentIndex) { return 2 * parentIndex + 1; }
private int getRightChildIndex(int parentIndex) { return 2 * parentIndex + 2; }
private int getParentIndex(int childIndex) { return (childIndex - 1) / 2; }
private int getLeftChild(int parentIndex) { return this.items[getLeftChildIndex(parentIndex)]; }
private int getRightChild(int parentIndex) { return this.items[getRightChildIndex(parentIndex)]; }
private int getParent(int childIndex) { return this.items[getParentIndex(childIndex)]; }
private bool hasLeftChild(int parentIndex) { return getLeftChildIndex(parentIndex) < size; }
private bool hasRightChild(int parentIndex) { return getRightChildIndex(parentIndex) < size; }
private bool hasParent(int childIndex) { return getLeftChildIndex(childIndex) > 0; }
private void swap(int indexOne, int indexTwo)
{
int temp = this.items[indexOne];
this.items[indexOne] = this.items[indexTwo];
this.items[indexTwo] = temp;
}
private void hasEnoughCapacity()
{
if (this.size == capacity)
{
Array.Resize(ref this.items,capacity*2);
capacity *= 2;
}
}
public void Add(int item)
{
this.hasEnoughCapacity();
this.items[size] = item;
this.size++;
heapifyUp();
}
public int Remove()
{
int item = this.items[0];
this.items[0] = this.items[size-1];
this.items[this.size - 1] = 0;
size--;
heapifyDown();
return item;
}
private void heapifyUp()
{
int index = this.size - 1;
while (hasParent(index) && this.items[index] > getParent(index))
{
swap(index, getParentIndex(index));
index = getParentIndex(index);
}
}
private void heapifyDown()
{
int index = 0;
while (hasLeftChild(index))
{
int bigChildIndex = getLeftChildIndex(index);
if (hasRightChild(index) && getLeftChild(index) < getRightChild(index))
{
bigChildIndex = getRightChildIndex(index);
}
if (this.items[bigChildIndex] < this.items[index])
{
break;
}
else
{
swap(bigChildIndex,index);
index = bigChildIndex;
}
}
}
}
}
/*
Calling Code:
MaxHeap mh = new MaxHeap();
mh.Add(10);
mh.Add(5);
mh.Add(2);
mh.Add(1);
mh.Add(50);
int maxVal = mh.Remove();
int newMaxVal = mh.Remove();
*/

Use a Java to C# translator on the Java implementation (java.util.PriorityQueue) in the Java Collections framework, or more intelligently use the algorithm and core code and plug it into a C# class of your own making that adheres to the C# Collections framework API for Queues, or at least Collections.

Here is the another implementation from NGenerics team:
NGenerics PriorityQueue

I had the same issue recently and ended up creating a NuGet package for this.
This implements a standard heap-based priority queue. It also has all the usual niceties of the BCL collections: ICollection<T> and IReadOnlyCollection<T> implementation, custom IComparer<T> support, ability to specify an initial capacity, and a DebuggerTypeProxy to make the collection easier to work with in the debugger.
There is also an Inline version of the package which just installs a single .cs file into your project (useful if you want to avoid taking externally-visible dependencies).
More information is available on the github page.

The following implementation of a PriorityQueue uses SortedSet from the System library.
using System;
using System.Collections.Generic;
namespace CDiggins
{
interface IPriorityQueue<T, K> where K : IComparable<K>
{
bool Empty { get; }
void Enqueue(T x, K key);
void Dequeue();
T Top { get; }
}
class PriorityQueue<T, K> : IPriorityQueue<T, K> where K : IComparable<K>
{
SortedSet<Tuple<T, K>> set;
class Comparer : IComparer<Tuple<T, K>> {
public int Compare(Tuple<T, K> x, Tuple<T, K> y) {
return x.Item2.CompareTo(y.Item2);
}
}
PriorityQueue() { set = new SortedSet<Tuple<T, K>>(new Comparer()); }
public bool Empty { get { return set.Count == 0; } }
public void Enqueue(T x, K key) { set.Add(Tuple.Create(x, key)); }
public void Dequeue() { set.Remove(set.Max); }
public T Top { get { return set.Max.Item1; } }
}
}

Related

Linq: ForEach item return the number of items needed to get n unique items starting at that item

Lets say I have a list of items:
[a,b,b,a,c,d,a,d,b,c]
and I need to know, for each item, how many items along do I have to traverse till I get n unique items, (and return eg -1, or otherwise indicate if that's not possible)
So here, if n = 4, I would return
[6,5,4,6,5,5,4,-1,-1,-1]
since
a,b,b,a,c,d contains 4 unique elements
b,b,a,c,d contains 4 unique elements
b,a,c,d contains 4 unique elements,
a,c,d,a,d,b contains 4 unique elements
etc.
I used
List.Select((x,i) => {
var range = List.Skip(i).GroupBy(y => y).Take(n);
if (range.Count() == n)
return range.SelectMany(y => y).Count();
return -1;
});
Although i'm pretty sure this is horribly non-performant.
To try to minimize overhead, I created a ListSpan extension class for managing subparts of a List - something like ArraySegment for List, but (loosely) modeled on Span:
public class ListSpan<T> : IEnumerable<T>, IEnumerable {
List<T> baseList;
int start;
int len;
public ListSpan(List<T> src, int start = 0, int? len = null) {
baseList = src;
this.start = start;
this.len = len ?? (baseList.Count - start);
if (this.start + this.len > baseList.Count)
throw new ArgumentException("start+len > Count for ListSpan");
}
public T this[int n]
{
get
{
return baseList[start + n];
}
set
{
baseList[start + n] = value;
}
}
public class ListSpanEnumerator<Te> : IEnumerator<Te>, IEnumerator {
int pos;
List<Te> baseList;
int end;
Te cur = default(Te);
public ListSpanEnumerator(ListSpan<Te> src) {
pos = src.start - 1;
baseList = src.baseList;
end = src.start + src.len;
}
public Te Current => cur;
object IEnumerator.Current => Current;
public bool MoveNext() {
if (++pos < end) {
cur = baseList[pos];
return true;
}
else {
cur = default(Te);
return false;
}
}
public void Reset() => pos = 0;
public void Dispose() { }
}
public IEnumerator<T> GetEnumerator() => new ListSpanEnumerator<T>(this);
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
public static class ListExt {
public static ListSpan<T> Slice<T>(this List<T> src, int start = 0, int? len = null) => new ListSpan<T>(src, start, len);
}
Then I created an extension method to return the distance (in Take terms) required to get n unique items from an IEnumerable:
public static class IEnumerableExt {
public static int DistanceToUnique<T>(this IEnumerable<T> src, int n, IEqualityComparer<T> cmp = null) {
var hs = new HashSet<T>(cmp ?? EqualityComparer<T>.Default);
var pos = 0;
using (var e = src.GetEnumerator()) {
while (e.MoveNext()) {
++pos;
hs.Add(e.Current);
if (hs.Count == n)
return pos;
}
}
return -1;
}
}
Now the answer is relatively straight forward:
var ans = Enumerable.Range(0, src.Count).Select(p => src.Slice(p).DistanceToUnique(n));
Basically I go through each position in the original (src) List and compute the distance to n unique values from that position using a ListSpan of the List starting at that position.
This still isn't terribly efficient in that I am creating a HashSet for every element in the original List and putting all the following elements in it, and traversing the elements up to k! times for a k element List. Still trying to come up with something really efficient.

Why can't SortedSet be used as a Priority Queue or Min-Heap?

I was attempting to solve the running median problem (on hackerrank) using a sorted set. Only it's elements don't appear properly sorted.
See it in action here: http://rextester.com/NGBN25779
public class RunningMedian{
List<int> list = new List<int>();
SortedSet<int> sorted = new SortedSet<int>();
public void Add(int num){
list.Add(num);
sorted.Add(num);
}
public double MedianNotWorking(){
return GetMedian(sorted.ToArray());
}
public double MedianWorking(){
int[] arr = list.ToArray();
Array.Sort(arr);
return GetMedian(arr);
}
public double GetMedian(int[] arr){
int idx = list.Count / 2;
if(arr.Length % 2 == 0){
return (double)((double)(arr[idx] + arr[idx-1]) / 2);
}else{
return arr[idx];
}
}
}
static void Main(String[] args) {
int n = Convert.ToInt32(Console.ReadLine());
int[] a = new int[n];
RunningMedian heap = new RunningMedian();
for(int i = 0; i < n; i++){
a[i] = Convert.ToInt32(Console.ReadLine());
heap.Add(a[i]);
//double median = heap.GetMedian();
double median = heap.MedianNotWorking();
Console.WriteLine(median.ToString("F1"));
}
}
For the most part the sorted set does work. However at larger input sizes it begins to give wrong answers. It may not be the optimal solution to the problem but I'm curious as to why it fails at all. C# doesn't have a min-heap / priority queue so why can't sorted sets be used as a substitute?
*Edited to include full code from hackerrank.
Here is an input file.
Input
http://textuploader.com/dovni
Expected
http://textuploader.com/dovnb
Output
http://textuploader.com/dovwj
Conflicts appear near the end
Expected
(Skipping 1-364)
54240.0
54576.5
54913.0
54576.5
54240.0
Results
(Skipping 1-364)
54240.0
54576.5
54913.0
54963.0
54576.5
SortedSet collections contain by definition only unique values. However your input file contains the number 21794 twice, which means that the second 21794 entry doesn't get added to your SortedSet. So your sorted set will contain fewer values than your list and your whole algorithm doesn't work anymore.
In general, this could be achieved by definition of new IComparator behavior for the SortedSet comparison. For the min priority queue it would be smth like this:
public class PriorityQueue<K,V> where K : IComparable
where V : IComparable
{
private SortedSet<Node<K,V>> _set;
private readonly int _amount;
public PriorityQueue(int amount)
{
_set = new SortedSet<Node<K,V>>(new PriorityComparer<K,V>());
_amount = amount;
}
public void Add(Node<K,V> value)
{
if (_amount > _set.Count)
_set.Add(value);
else
{
if (_set.Max.Val.CompareTo(value.Val) == 1)
{
_set.Remove(_set.Max);
_set.Add(value);
}
}
}
public Node<K,V> ExtractMax()
{
var max = _set.Max;
_set.Remove(max);
return max;
}
public Node<K,V> ExtractMin()
{
var min = _set.Min;
_set.Remove(min);
return min;
}
public bool IsEmpty => _set.Count == 0;
}
public struct Node<K,V> where K : IComparable
where V : IComparable
{
public K Key;
public V Val;
public Node(K key, V val)
{
Val = val;
Key = key;
}
}
public class PriorityComparer<K,V> : IComparer<Node<K,V>> where K: IComparable
where V: IComparable
{
public int Compare(Node<K,V> i, Node<K,V> y)
{
var compareresult = i.Val.CompareTo(y.Val);
if (compareresult == 0)
return i.Key.CompareTo(y.Key);
return compareresult;
}
}

All combination of a list of tuples

I'm practicing some optimization problems and I'm stuck.
I have a list of tuples and I am doing the following:
private static int CalculateMinimumTotalCost(List<Tuple<int, int>> tuples)
{
int minimumCost = 0;
for(int i=0;i<tuples.Count()-1;i++)
{
minimumCost += Math.Max(Math.Abs(tuples[i].Item1 - tuples[i + 1].Item1), Math.Abs(tuples[i].Item2 - tuples[i + 1].Item2));
}
return minimumCost;
}
The idea is that given a list of tuples and this mathematical equation, I need to find the minimum cost. The catch is that the order of the tuples can be rearranged. My job is to find the LEAST costly arrangement of tuples.
So what I would like to do is loop through all possible combination of Tuples and return the combination with the minimum cost.
For example:
(1,2)(1,1)(1,3) = 3
(1,1)(1,2)(1,3) = 2
So in this case, i would return 2 because that arrangement is less costly.
I understand that when there are N tuples, the number of combinations is N!.
How do I get all the combinations possible for a list of tuples?
Thanks!
As other have suggested you should create the Point class:
public partial class Point
{
public int X { get; set; }
public int Y { get; set; }
public Point(int x, int y)
{
this.X = x;
this.Y = y;
}
}
And, let's encapsulate the functions for computing distance and total cost :
public partial class Point
{
public static int CalculateDistance(Point p0, Point p1)
{
return Math.Max(
Math.Abs(p0.X - p1.X),
Math.Abs(p0.Y - p1.Y)
);
}
}
public static class PointExtensions
{
public static int GetTotalCost(this IEnumerable<Point> source)
{
return source
.Zip(source.Skip(1), Point.CalculateDistance)
.Sum();
}
}
Finally, you will need another extension method to create "all possible combination" :
public static class PermutationExtensions
{
public static IEnumerable<IEnumerable<T>> GetPermutations<T>(this IEnumerable<T> source)
{
if (source == null || !source.Any())
throw new ArgumentNullException("source");
var array = source.ToArray();
return Permute(array, 0, array.Length - 1);
}
private static IEnumerable<IEnumerable<T>> Permute<T>(T[] array, int i, int n)
{
if (i == n)
yield return array.ToArray();
else
{
for (int j = i; j <= n; j++)
{
array.Swap(i, j);
foreach (var permutation in Permute(array, i + 1, n))
yield return permutation.ToArray();
array.Swap(i, j); //backtrack
}
}
}
private static void Swap<T>(this T[] array, int i, int j)
{
T temp = array[i];
array[i] = array[j];
array[j] = temp;
}
}
Source from Listing all permutations of a string/integer adapted to be more LINQ-friendly
Usage :
void Main()
{
var list = new List<Point>
{
new Point(1, 2),
new Point(1, 1),
new Point(1, 3),
};
// result: Point[] (3 items) : (1, 1), (1, 2), (1,3)
list.GetPermutations()
.OrderBy(x => x.GetTotalCost())
.First();
}
EDIT : As #EricLippert pointed out, source.OrderBy(selector).First() has some extra cost. This following extension method deals with this issue :
public static class EnumerableExtensions
{
public static T MinBy<T, TKey>(this IEnumerable<T> source, Func<T, TKey> keySelector, IComparer<TKey> comparer = null)
{
IEnumerator<T> etor = null;
if (source == null || !(etor = source.GetEnumerator()).MoveNext())
throw new ArgumentNullException("source");
if (keySelector == null)
throw new ArgumentNullException("keySelector");
var min = etor.Current;
var minKey = keySelector(min);
comparer = comparer ?? Comparer<TKey>.Default;
while (etor.MoveNext())
{
var key = keySelector(etor.Current);
if (comparer.Compare(key, minKey) < 0)
{
min = etor.Current;
minKey = key;
}
}
return min;
}
}
And, we can rewrite the above solution as :
list.GetPermutations().MinBy(x => x.GetTotalCost())
You can change the for loop to Foreach to make it more readable and rather than using index to fetch values.
private static int CalculateMinimumTotalCost(List<Tuple<int, int>> tuples)
{
int minimumCost = 0;
Tuple<int, int> currentTuple = tuples.First();
foreach (Tuple<int, int> tuple in tuples)
{
minimumCost += Math.Max(Math.Abs(currentTuple.Item1 - tuple.Item1), Math.Abs(currentTuple.Item2 - tuple.Item2));
currentTuple = tuple;
}
return minimumCost;
}

Why is List<T>.Enumerator faster than my implementation?

I've found myself in a position where I have to roll my own dynamic array implementation, due to various large performance benefits (in my case). However, after creating an enumerator for my version, and comparing the efficiency with the one List uses, I'm a bit bewildered; the List one is aproximately 30-40% faster than my version, even though it's much more complex.
Here's the important part of the List enumerator implementation:
public struct Enumerator : IEnumerator<T>, IDisposable, IEnumerator
{
private List<T> list;
private int index;
private int version;
private T current;
internal Enumerator(List<T> list)
{
this.list = list;
this.index = 0;
this.version = list._version;
this.current = default(T);
return;
}
public bool MoveNext()
{
List<T> list;
list = this.list;
if (this.version != list._version)
{
goto Label_004A;
}
if (this.index >= list._size)
{
goto Label_004A;
}
this.current = list._items[this.index];
this.index += 1;
return 1;
Label_004A:
return this.MoveNextRare();
}
public T Current
{
get { return this.current; }
}
}
And here's my very barebone version:
internal struct DynamicArrayEnumerator<T> : IEnumerator<T> where T : class
{
private readonly T[] internalArray;
private readonly int lastIndex;
private int currentIndex;
internal DynamicArrayEnumerator(DynamicArray<T> dynamicArray)
{
internalArray = dynamicArray.internalArray;
lastIndex = internalArray.Length - 1;
currentIndex = -1;
}
public T Current
{
get { return internalArray[currentIndex]; }
}
public bool MoveNext()
{
return (++currentIndex <= lastIndex);
}
}
I know this is micro-optimization, but I'm actually interested in understanding why the List enumerator is so much faster than mine. Any ideas? Thanks!
Edit:
As requested; the DynamicArray class (the relevant parts):
The enumerator is an inner class in this.
public struct DynamicArray<T> : IEnumerable<T> where T : class
{
private T[] internalArray;
private int itemCount;
internal T[] Data
{
get { return internalArray; }
}
public int Count
{
get { return itemCount; }
}
public DynamicArray(int count)
{
this.internalArray = new T[count];
this.itemCount = 0;
}
public IEnumerator<T> GetEnumerator()
{
return new DynamicArrayEnumerator<T>(this);
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
}
As for how I'm testing:
List<BaseClass> list = new List<BaseClass>(1000000);
DynamicArray<BaseClass> dynamicArray = new DynamicArray<BaseClass>(1000000);
// Code for filling with data omitted.
int numberOfRuns = 0;
float p1Total = 0;
float p2Total = 0;
while (numberOfRuns < 100)
{
PerformanceAnalyzer p1 = new PerformanceAnalyzer(() =>
{
int u = 0;
foreach (BaseClass b in list)
{
if (b.B > 100) // Some trivial task
u++;
}
});
p1.ExecuteAndClock();
p1Total += p1.TotalElapsedTicks;
PerformanceAnalyzer p2 = new PerformanceAnalyzer(() =>
{
int u = 0;
foreach (BaseClass b in dynamicArray)
{
if (b.B > 100) // Some trivial task
u++;
}
});
p2.ExecuteAndClock();
p2Total += p2.TotalElapsedTicks;
numberOfRuns++;
}
Console.WriteLine("List enumeration: " + p1Total / totalRuns + "\n");
Console.WriteLine("Dynamic array enumeration: " + p2Total / totalRuns + "\n");
The PerformanceAnalyzer class basically starts a Stopwatch, execute the supplied Action delegate, and then stop the Stopwatch afterwards.
Edit 2 (Quick answer to Ryan Gates):
There's a few reasons why I would want to roll my own, most importantly I need a very fast RemoveAt(int index) method.
Since I don't have to worry about the order of the list elements in my particular case, I can avoid the .Net built-in list's way of doing it:
public void RemoveAt(int index)
{
T local;
if (index < this._size)
{
goto Label_000E;
}
ThrowHelper.ThrowArgumentOutOfRangeException();
Label_000E:
this._size -= 1;
if (index >= this._size)
{
goto Label_0042;
}
Array.Copy(this._items, index + 1, this._items, index, this._size - index);
Label_0042:
this._items[this._size] = default(T);
this._version += 1;
return;
}
And instead using something along the lines of:
public void RemoveAt(int index)
{
// overwrites the element at the specified index with the last element in the array and decreases the item count.
internalArray[index] = internalArray[itemCount];
itemCount--;
}
Potencially saving enormous amounts of time in my case, if say the first 1000 elements in a long list have to be removed by index.
Okay, aside from benchmarking problems, here's how you can make your DynamicArray class more like List<T>:
public DynamicArrayEnumerator<T> GetEnumerator()
{
return new DynamicArrayEnumerator<T>(this);
}
IEnumerator<T> IEnumerable<T>.GetEnumerator()
{
return GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
Now, code which knows it's working with a dynamic array can iterate with a DynamicArrayEnumerator<T> without any boxing, and without virtual dispatch. This is exactly what List<T> does. The compiler notices when a type implements the pattern in a custom manner, and will use the types involved instead of the interfaces.
With your current code, you're getting no benefit from creating a struct - because you're boxing it in GetEnumerator().
Try the above change and fix the benchmark to work for longer. I'd expect to see a big difference.

Are there any implementations of multiset for .Net?

I'm looking for a .Net implementation of a multiset. Can anyone recommend a good one?
(A multiset, or bag, is a set that can have duplicate values, and on which you can do set operations: intersection, difference, etc. A shopping cart for instance could be thought of as a multiset because you can have multiple occurrences of the same product.)
I do not know about one, however you could use a Dictionary for that, in which the value is the quantity of the item. And when the item is added for the second time, you vould increase the value for it in the dictionary.
An other possibility would be to simply use a List of items, in which you could put duplicates. This might be a better approach for a shopping cart.
Anything calling itself a C# implementation of a multiset should not be based on a Dictionary internally. Dictionaries are hash tables, unordered collections. C++'s sets, multisets, maps, and multimaps are ordered. Internally each is represented as some flavor of a self-balancing binary search tree.
In C# we should then use a SortedDictionary as the basis of our implementation as according to Microsoft's own documentation a SortedDictionary "is a binary search tree with O(log n) retrieval". A basic multiset can be implemented as follows:
public class SortedMultiSet<T> : IEnumerable<T>
{
private SortedDictionary<T, int> _dict;
public SortedMultiSet()
{
_dict = new SortedDictionary<T, int>();
}
public SortedMultiSet(IEnumerable<T> items) : this()
{
Add(items);
}
public bool Contains(T item)
{
return _dict.ContainsKey(item);
}
public void Add(T item)
{
if (_dict.ContainsKey(item))
_dict[item]++;
else
_dict[item] = 1;
}
public void Add(IEnumerable<T> items)
{
foreach (var item in items)
Add(item);
}
public void Remove(T item)
{
if (!_dict.ContainsKey(item))
throw new ArgumentException();
if (--_dict[item] == 0)
_dict.Remove(item);
}
// Return the last value in the multiset
public T Peek()
{
if (!_dict.Any())
throw new NullReferenceException();
return _dict.Last().Key;
}
// Return the last value in the multiset and remove it.
public T Pop()
{
T item = Peek();
Remove(item);
return item;
}
public IEnumerator<T> GetEnumerator()
{
foreach(var kvp in _dict)
for(int i = 0; i < kvp.Value; i++)
yield return kvp.Key;
}
IEnumerator IEnumerable.GetEnumerator()
{
return this.GetEnumerator();
}
}
Another option is to just wrap SortedSet, but instead of storing your type T in it, you store the value tuple (T value, int counter) where counter goes up by 1 with each new instance of value that is inserted. Essentially you're forcing the values to be distinct. You can efficiently use GetViewBetween() to find the largest value of counter for a particular value, then increment it to get the counter for a newly-added value. And unlike the count dictionary solution, you can use GetViewBetween() to replicate the functionality equal_range, lower_bound, and upper_bound gives in C++. Here is some code showing what I mean:
public class SortedMultiSet<T> : IEnumerable<T>
{
public void Add(T value)
{
var view = set.GetViewBetween((value, 0), (value, int.MaxValue));
int nextCounter = view.Count > 0 ? view.Max.counter + 1 : 0;
set.Add((value, nextCounter));
}
public bool RemoveOne(T value)
{
var view = set.GetViewBetween((value, 0), (value, int.MaxValue));
if (view.Count == 0) return false;
set.Remove(view.Max);
return true;
}
public bool RemoveAll(T value)
{
var view = set.GetViewBetween((value, 0), (value, int.MaxValue));
bool result = view.Count > 0;
view.Clear();
return result;
}
public SortedMultiSet<T> GetViewBetween(T min, T max)
{
var result = new SortedMultiSet<T>();
result.set = set.GetViewBetween((min, 0), (max, int.MaxValue));
return result;
}
public IEnumerator<T> GetEnumerator() =>
set.Select(x => x.value).GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() =>
set.Select(x => x.value).GetEnumerator();
private SortedSet<(T value, int counter)> set =
new SortedSet<(T value, int counter)>();
}
Now you can write something like this:
var multiset = new SortedMultiSet<int>();
foreach (int i in new int[] { 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 8 })
{
multiset.Add(i);
}
foreach (int i in multiset.GetViewBetween(2, 7))
{
Console.Write(i + " "); // Output: 2 2 3 4 5 5 6 7 7
}
In the past, there were some issues where GetViewBetween() ran in time O(output size), rather than time O(log n), but I think those have been resolved. At the time it would count up nodes to cache the count, it now uses hierarchical counts to perform Count operations efficiently. See this StackOverflow post and this library code.
public class Multiset<T>: ICollection<T>
{
private readonly Dictionary<T, int> data;
public Multiset()
{
data = new Dictionary<T, int>();
}
private Multiset(Dictionary<T, int> data)
{
this.data = data;
}
public void Add(T item)
{
int count = 0;
data.TryGetValue(item, out count);
count++;
data[item] = count;
}
public void Clear()
{
data.Clear();
}
public Multiset<T> Except(Multiset<T> another)
{
Multiset<T> copy = new Multiset<T>(new Dictionary<T, int>(data));
foreach (KeyValuePair<T, int> kvp in another.data)
{
int count;
if (copy.data.TryGetValue(kvp.Key, out count))
{
if (count > kvp.Value)
{
copy.data[kvp.Key] = count - kvp.Value;
}
else
{
copy.data.Remove(kvp.Key);
}
}
}
return copy;
}
public Multiset<T> Intersection(Multiset<T> another)
{
Dictionary<T, int> newData = new Dictionary<T, int>();
foreach (T t in data.Keys.Intersect(another.data.Keys))
{
newData[t] = Math.Min(data[t], another.data[t]);
}
return new Multiset<T>(newData);
}
public bool Contains(T item)
{
return data.ContainsKey(item);
}
public void CopyTo(T[] array, int arrayIndex)
{
foreach (KeyValuePair<T, int> kvp in data)
{
for (int i = 0; i < kvp.Value; i++)
{
array[arrayIndex] = kvp.Key;
arrayIndex++;
}
}
}
public IEnumerable<T> Mode()
{
if (!data.Any())
{
return Enumerable.Empty<T>();
}
int modalFrequency = data.Values.Max();
return data.Where(kvp => kvp.Value == modalFrequency).Select(kvp => kvp.Key);
}
public int Count
{
get
{
return data.Values.Sum();
}
}
public bool IsReadOnly
{
get
{
return false;
}
}
public bool Remove(T item)
{
int count;
if (!data.TryGetValue(item, out count))
{
return false;
}
count--;
if (count == 0)
{
data.Remove(item);
}
else
{
data[item] = count;
}
return true;
}
public IEnumerator<T> GetEnumerator()
{
return new MultisetEnumerator<T>(this);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return new MultisetEnumerator<T>(this);
}
private class MultisetEnumerator<T> : IEnumerator<T>
{
public MultisetEnumerator(Multiset<T> multiset)
{
this.multiset = multiset;
baseEnumerator = multiset.data.GetEnumerator();
index = 0;
}
private readonly Multiset<T> multiset;
private readonly IEnumerator<KeyValuePair<T, int>> baseEnumerator;
private int index;
public T Current
{
get
{
return baseEnumerator.Current.Key;
}
}
public void Dispose()
{
baseEnumerator.Dispose();
}
object System.Collections.IEnumerator.Current
{
get
{
return baseEnumerator.Current.Key;
}
}
public bool MoveNext()
{
KeyValuePair<T, int> kvp = baseEnumerator.Current;
if (index < (kvp.Value - 1))
{
index++;
return true;
}
else
{
bool result = baseEnumerator.MoveNext();
index = 0;
return result;
}
}
public void Reset()
{
baseEnumerator.Reset();
}
}
}
You can use this implementation of a sorted multiset: SortedMultiSet.cs

Categories