Related
Array.Sort in C# is really fast if you sort floats, I need some extra data to go along with those floats so I made a simple class and extended the IComparable interface. Now all of a sudden Array.Sort is around 3-4 times slower, why is this and how can I improve the performance?
Demo code:
using System;
using System.Diagnostics;
using System.Linq;
namespace SortTest
{
class Program
{
static void Main(string[] args)
{
int arraySize = 10000;
int loops = 500;
double normalFloatTime = 0;
double floatWithIDTime = 0;
double structTime = 0;
double arraySortOverloadTime = 0;
bool floatWithIDCorrect = true;
bool structCorrect = true;
bool arraySortOverloadCorrect = true;
//just so we know the program is busy
Console.WriteLine("Sorting random arrays, this will take some time...");
Random random = new Random();
Stopwatch sw = new Stopwatch();
for (int i = 0; i < loops; i++)
{
float[] normalFloatArray = new float[arraySize];
SortTest[] floatWithIDArray = new SortTest[arraySize];
SortStruct[] structArray = new SortStruct[arraySize];
SortTest[] arraySortOverloadArray = new SortTest[arraySize];
//fill the arrays
for (int j = 0; j < arraySize; j++)
{
normalFloatArray[j] = NextFloat(random);
floatWithIDArray[j] = new SortTest(normalFloatArray[j], j);
structArray[j] = new SortStruct(normalFloatArray[j], j);
arraySortOverloadArray[j] = new SortTest(normalFloatArray[j], j);
}
//Reset stopwatch from any previous state
sw.Reset();
sw.Start();
Array.Sort(normalFloatArray);
sw.Stop();
normalFloatTime += sw.ElapsedTicks;
//Reset stopwatch from any previous state
sw.Reset();
sw.Start();
Array.Sort(floatWithIDArray);
sw.Stop();
floatWithIDTime += sw.ElapsedTicks;
//Reset stopwatch from any previous state
sw.Reset();
sw.Start();
Array.Sort(structArray);
sw.Stop();
structTime += sw.ElapsedTicks;
//Reset stopwatch from any previous state
sw.Reset();
sw.Start();
Array.Sort(arraySortOverloadArray.Select(k => k.ID).ToArray(), arraySortOverloadArray);
sw.Stop();
arraySortOverloadTime += sw.ElapsedTicks;
for (int k = 0; k < normalFloatArray.Length; k++)
{
if (normalFloatArray[k] != floatWithIDArray[k].SomeFloat)
{
floatWithIDCorrect = false;
}
if (normalFloatArray[k] != structArray[k].SomeFloat)
{
structCorrect = false;
}
if (normalFloatArray[k] != arraySortOverloadArray[k].SomeFloat)
{
arraySortOverloadCorrect = false;
}
}
}
//calculate averages
double normalFloatAverage = normalFloatTime / loops;
double floatWithIDAverage = floatWithIDTime / loops;
double structAverage = structTime / loops;
double arraySortOverloadAverage = arraySortOverloadTime / loops;
//print averages
Console.WriteLine("normalFloatAverage: {0} ticks.\nfloatWithIDAverage: {1} ticks.\nstructAverage: {2} ticks.\narraySortOverloadAverage: {3} ticks.", normalFloatAverage, floatWithIDAverage, structAverage, arraySortOverloadAverage);
Console.WriteLine("floatWithIDArray has " + (floatWithIDCorrect ? "" : "NOT ") + "been sorted correctly atleast once.");
Console.WriteLine("structArray has " + (structCorrect ? "" : "NOT ") + "been sorted correctly atleast once.");
Console.WriteLine("arraySortOverloadArray has " + (arraySortOverloadCorrect ? "" : "NOT ") + "been sorted correctly atleast once.");
Console.WriteLine("Press enter to exit.");
//pause so we can see the console
Console.ReadLine();
}
static float NextFloat(Random random)
{
double mantissa = (random.NextDouble() * 2.0) - 1.0;
double exponent = Math.Pow(2.0, random.Next(-126, 128));
return (float)(mantissa * exponent);
}
}
class SortTest : IComparable<SortTest>
{
public float SomeFloat;
public int ID;
public SortTest(float f, int id)
{
SomeFloat = f;
ID = id;
}
public int CompareTo(SortTest other)
{
float f = other.SomeFloat;
if (SomeFloat < f)
return -1;
else if (SomeFloat > f)
return 1;
else
return 0;
}
}
struct SortStruct : IComparable<SortStruct>
{
public float SomeFloat;
public int ID;
public SortStruct(float f, int id)
{
SomeFloat = f;
ID = id;
}
public int CompareTo(SortStruct other)
{
float f = other.SomeFloat;
if (SomeFloat < f)
return -1;
else if (SomeFloat > f)
return 1;
else
return 0;
}
}
}
Demo output:
Sorting random arrays, this will take some time...
normalFloatAverage: 3840,998 ticks.
floatWithIDAverage: 12850.672 ticks.
Press enter to exit.
Edit: I updated the code to also sort using a struct and a delegate, as suggested below, there is no difference.
New demo output:
Sorting random arrays, this will take some time...
normalFloatAverage: 3629.092 ticks.
floatWithIDAverage: 12721.622 ticks.
structAverage: 12870.584 ticks.
Press enter to exit.
Edit 2: I have updated my code with some of the suggestions below, making it a struct either has no effect on my pc or I am doing something horribly wrong. I also added some sanity checks.
New demo output (don't let the "atleast once" fool you, it is misphrased):
Sorting random arrays, this will take some time...
normalFloatAverage: 3679.928 ticks.
floatWithIDAverage: 14084.794 ticks.
structAverage: 11725.364 ticks.
arraySortOverloadAverage: 2186.3 ticks.
floatWithIDArray has been sorted correctly atleast once.
structArray has been sorted correctly atleast once.
arraySortOverloadArray has NOT been sorted correctly atleast once.
Press enter to exit.
Edit 3: I have updated the demo code once again with a fix to the overloaded method of Array.Sort. Note that I create and fill test[] outside the Stopwatch because in my case I already have that array available. arraySortOverload is faster in debug mode, and about as fast as the struct method in release mode.
New demo output (RELEASE):
Sorting random arrays, this will take some time...
normalFloatAverage: 2384.578 ticks.
floatWithIDAverage: 6405.866 ticks.
structAverage: 4583.992 ticks.
arraySortOverloadAverage: 4551.104 ticks.
floatWithIDArray has been sorted correctly all the time.
structArray has been sorted correctly all the time.
arraySortOverloadArray has been sorted correctly all the time.
Press enter to exit.
There are two minor ways to speed this up:
Use a struct instead of a class.
Hand-code the CompareTo() instead of using float.CompareTo().
There is also a major way to speed this up for floatWithIDAverage: Use x86 instead of x64. (But this does NOT speed up normalFloatAverage!)
Results before making any changes (for a RELEASE build, not a DEBUG build):
x64:
normalFloatAverage: 2469.86 ticks.
floatWithIDAverage: 6172.564 ticks.
x86:
normalFloatAverage: 3071.544 ticks.
floatWithIDAverage: 6036.546 ticks.
Results after changing SortTest to be a struct:
This change allows the compiler to make a number of optimizations.
x64:
normalFloatAverage: 2307.552 ticks.
floatWithIDAverage: 4214.414 ticks.
x86:
normalFloatAverage: 3054.814 ticks.
floatWithIDAverage: 4541.864 ticks.
Results after changing SortTest.CompareTo() as follows:
public int CompareTo(SortTest other)
{
float f = other.SomeFloat;
if (SomeFloat < f)
return -1;
else if (SomeFloat > f)
return 1;
else
return 0;
}
This change removes the overhead of calling float.CompareTo().
x64:
normalFloatAverage: 2323.834 ticks.
floatWithIDAverage: 3721.254 ticks.
x86:
normalFloatAverage: 3087.314 ticks.
floatWithIDAverage: 3074.364 ticks.
Finally, in this specific case, floatWithIDAverage is actually faster than normalFloatAverage.
The difference between x86 and x64 is interesting!
x64 is faster than x86 for normalFloatAverage
x86 is faster than x64 for floatWithIDAverage
Conclusion
Although I can't explain why the x86 version is so much faster than the x64 version for floatWithIDAverage, I have shown a way of speeding it up significantly.
I'll complement the other answers by adding another way to optimize this. Using a struct certainly is essential. It removes lots of pointer following and makes the JIT generate specialized generics code just for this struct.
The JIT is unfortunately sometimes not able to completely remove all generics overhead even when you use a struct. For that reason it can be beneficial to fork the .NET Array.Sort code and hard-code your array item type. This is a brutal thing to do. It's only worth it if your performance requirements are high. I have done this once and it payed off.
I try to calculate time between operations.
So,write two methods with equal code ,but use different ways.
At first way i do like that:
private static void calcAverageTimeUid(ISomeObject someObj, int N,ISnapshot _Snapshot)
{
Stopwatch stopWatch = new Stopwatch();
int averageTime = 0;
var uid = someObj.Uid;
for (int i = 0; i < N; i++)
{
stopWatch.Start();
var coll = _Snapshot.GetObject(uid);
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
averageTime = averageTime + ts.Milliseconds;
}
averageTime = averageTime / N;
}
And i have result averageTime such as 500 milliseconds. N=1000000 and more.
But , i rewrite this method to two methods: mainCalc ,wich should contains come other methods,f.e.
to get average time of uid,id,name and so on.
mainCalc:
private static void mainCalc(ISomeObject someObj,int N,ISnapshot _Snapshot)
{
int averageTimeUID = 0;
for (int i = 0; i < N; i++)
{
var tmp=calcAverageTimeUid2(someObj,N,_Snapshot);
averageTimeUID+=tmp;
}
averageTimeUID = averageTimeUID / N;
}
And other method:
private static int calcAverageTimeUid2(ISomeObject someObj,int N,ISnapshot _Snapshot)
{
Stopwatch stopWatch = new Stopwatch();
var prop = someObj.Uid;
stopWatch.Start();
var obj = _Snapshot.GetObject(prop);
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
return ts.Milliseconds;
}
So, i run mainCalc and run calcAcerageTimeUid2 in this method.And result of stopWatch=0 milliseconds!
It is wrong result or not? I dont understand-what way of use stopWatch right?
P.S. delete one of excess StopWatch.
P.P.S. Thank all of you!
Your first routine should be
for (int i = 0; i < N; i++)
{
stopWatch.Start();
var coll = _Snapshot.GetObject(uid);
stopWatch.Stop();
}
averageTime = stopWatch.Elapsed / N;
Note, stopWatch.Start() does not reset the stopwatch back to zero.
Milliseconds is not TotalMilliseconds.
Milliseconds is the entire number of milliseconds of the TimeSpan. Not the total milliseconds which is a double, so you are losing the precision under 1ms.
And why do you return an int, instead of the TimeSpan?
Try this code :
private static void mainCalc(ISomeObject someObj, int N, ISnapshot _Snapshot)
{
var averageTimeUID = TimeSpan.Zero;
for (int i = 0; i < N; i++)
{
averageTimeUID += calcAverageTimeUid2(someObj,N,_Snapshot);
}
averageTimeUID = new TimeSpan( averageTimeUID.Ticks / N );
}
The other method:
private static TimeSpan calcAverageTimeUid2(ISomeObject someObj, int N, ISnapshot _Snapshot)
{
var stopWatch = new Stopwatch();
var prop = someObj.Uid;
stopWatch.Start();
var obj = _Snapshot.GetObject(prop);
stopWatch.Stop();
return stopWatch.Elapsed;
}
The reason you get different results is because you are rounding the number of milliseconds in different places. In your first method, you use one stopwatch and continuously Start() and Stop() it. Your operation must take less than 1 ms, but when you repeatedly start and stop the same stopwatch, the total number of ticks will still increase. That is why with N=1000000 you got only 500 ms.
In the second method, you start and stop a new stopwatch each time, and return the milliseconds. Since each operation is averaging 500/1000000 = 0.00005 ms, the ticks of the stopwatch will accumulate some small value, but the ElapsedMilliseconds (or Milliseconds of the TimeSpan) will still be 0.
EDIT: To solve your problems, the first loop should use the final Elapsed value of the stopwatch once the loop is complete (like the 2nd example in sgmoore's answer). The second method should return the ticks from the method rather than milliseconds and then calculate the milliseconds from the tick frequency of the stopwatch.
In summary, the first operation you are summing a bunch of values like 0.00005, in the second you are summing a bunch of 0s.
Just now I read some posts about List<T> vs LinkedList<T>, so I decided to benchmark some structures myself. I benchmarked Stack<T>, Queue<T>, List<T> and LinkedList<T> by adding data and removing data to/from the front/end. Here's the benchmark result:
Pushing to Stack... Time used: 7067 ticks
Poping from Stack... Time used: 2508 ticks
Enqueue to Queue... Time used: 7509 ticks
Dequeue from Queue... Time used: 2973 ticks
Insert to List at the front... Time used: 5211897 ticks
RemoveAt from List at the front... Time used: 5198380 ticks
Add to List at the end... Time used: 5691 ticks
RemoveAt from List at the end... Time used: 3484 ticks
AddFirst to LinkedList... Time used: 14057 ticks
RemoveFirst from LinkedList... Time used: 5132 ticks
AddLast to LinkedList... Time used: 9294 ticks
RemoveLast from LinkedList... Time used: 4414 ticks
Code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
namespace Benchmarking
{
static class Collections
{
public static void run()
{
Random rand = new Random();
Stopwatch sw = new Stopwatch();
Stack<int> stack = new Stack<int>();
Queue<int> queue = new Queue<int>();
List<int> list1 = new List<int>();
List<int> list2 = new List<int>();
LinkedList<int> linkedlist1 = new LinkedList<int>();
LinkedList<int> linkedlist2 = new LinkedList<int>();
int dummy;
sw.Reset();
Console.Write("{0,40}", "Pushing to Stack...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
stack.Push(rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "Poping from Stack...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = stack.Pop();
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "Enqueue to Queue...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
queue.Enqueue(rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "Dequeue from Queue...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = queue.Dequeue();
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "Insert to List at the front...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
list1.Insert(0, rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "RemoveAt from List at the front...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = list1[0];
list1.RemoveAt(0);
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "Add to List at the end...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
list2.Add(rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "RemoveAt from List at the end...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = list2[list2.Count - 1];
list2.RemoveAt(list2.Count - 1);
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "AddFirst to LinkedList...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
linkedlist1.AddFirst(rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "RemoveFirst from LinkedList...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = linkedlist1.First.Value;
linkedlist1.RemoveFirst();
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "AddLast to LinkedList...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
linkedlist2.AddLast(rand.Next());
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks", sw.ElapsedTicks);
sw.Reset();
Console.Write("{0,40}", "RemoveLast from LinkedList...");
sw.Start();
for (int i = 0; i < 100000; i++)
{
dummy = linkedlist2.Last.Value;
linkedlist2.RemoveLast();
dummy++;
}
sw.Stop();
Console.WriteLine(" Time used: {0,9} ticks\n", sw.ElapsedTicks);
}
}
}
The differences are so dramatic!
As you can see, the performance of Stack<T> and Queue<T> are fast and comparable, that's expected.
For List<T>, using the front and the end has so much differences! And to my surprise, performance of adding/removing from the end is actually comparable to the performance of Stack<T>.
For LinkedList<T>, manipulating with the front is fast (-er than List<T>), but for the end, it is incredibly slow for removing manipulating with the end is too.
So... can any experts account on:
the similarity in performance of using Stack<T> and the end of List<T>,
the differences in using the front and the end of List<T>, and
the reason that using the end of LinkedList<T> is so slow (not applicable as that is a coding error due to the use of Linq's Last(), thanks to CodesInChaos)?
I think I know why List<T> doesn't handle the front so well... because List<T>needs to move the whole list back and fro when doing that. Correct me if I am wrong.
P.S. My System.Diagnostics.Stopwatch.Frequency is 2435947, and the program is targeted to .NET 4 Client Profile and compiled with C# 4.0, on Windows 7 Visual Studio 2010.
Concerning 1:
Stack<T>'s and List<T>'s performance being similar isn't surprising. I'd expect both of them to use arrays with a doubling strategy. This leads to amortized constant-time additions.
You can use List<T> everywhere you can use Stack<T>, but it leads to less expressive code.
Concerning 2:
I think I know why List<T> doesn't handle the front so well... because List<T> needs to move the whole list back and fro when doing that.
That's correct. Inserting/removing elements at the beginning is expensive because it moves all elements. Getting or replacing elements at the beginning on the other hand is cheap.
Concerning 3:
Your slow LinkedList<T>.RemoveLast value is a mistake in your benchmarking code.
Removing or getting the last item of a doubly linked list is cheap. In the case of LinkedList<T> that means that RemoveLast and Last are cheap.
But you weren't using the Last property, but LINQ's extension method Last(). On collections that don't implement IList<T> it iterates the whole list, giving it O(n) runtime.
List<T> is a dynamic over-allocating array (a data structure you'll also see in many other languages' standard library). This means it internally uses of a "static" array (an array that can't be resized, known as just "array" in .NET) which may be and often is larger than the size of the list. Appending then simply increments a counter and uses the next, previously unused, slot of the internal array. The array is only re-allocated (which requires copying all elements) if the internal array becomes to small to accommodate all items. When that happens, the size of the array is increased by a factors (not a constant), usually 2.
This ensures that amortized time complexity (basically, the average time per operation over a long sequence of operations) for appending is O(1) even in the worst case. For adding at the front, no such optimization is feasible (at least not while keeping both random access and O(1) appending at the end). It always has to copy all elements to move them into their new slots (making space for the added element in the first slot). Stack<T> does the same thing, you just don't notice the discrepancy with adding to the front because you only ever operate on one end (the fast one).
Getting the end of a linked list depends a lot on the internals of your list. One can maintain a reference to the last element, but this makes all operations on the list more complicated, and may (I don't have an example at hand) make some operations much more expensive. Lacking such a reference, appending to the end requires walking through all elements of the linked list to find the last node, which is of course awfully slow for lists of nontrivial size.
As pointed out by #CodesInChaos, your linked list manipulation was flawed. The fast retrieval of the end you see now is most likely caused by LinkedList<T> explicitly maintaining a reference to the last node, as mentioned above. Note that getting an element not at either end is still slow.
The speed comes essentially from the number of operations needed to insert, delete, or search an item. You already noticed, that list needs memory transfers.
Stack is a list, that is accessible only at the top element -- and the computer always knows, where it is.
The linked list is another thing: the start of the list is known, thus it's very fast to add or remove from the start -- but finding the last element takes time. Caching the location of the last element OTOH is only worthwhile for addition. For deletion one needs to traverse the complete list minus one element to find the 'hook' or pointer to the last one.
Just looking at the numbers, one can make some educated guesses of the internals of each data structure:
pop from a stack is fast, as expected
push to stack is slower. and it's slower than adding to the end of the list. Why?
apparently the allocation unit size for stack is smaller -- it may only increase the stack size by 100, while growing the list could be done in units of 1000.
A list seems to be a static array. Accessing the list at the front requires memory transfers, that take time in proportion to the list length.
Basic linked list operations shouldn't take that much longer, it's generally only required to
new_item.next = list_start; list_start = new_item; // to add
list_start = list_start.next; // to remove
however, as addLast is so fast, it means that also when adding or deleting to a linked list, one has to update the pointer to the last element also. So there's extra bookkeeping.
Doubly linked lists OTOH make it relatively fast to insert and delete at both ends of the list (I've been informed that a better code uses DLLs), however,
links to previous and next item also double the work for the bookkeeping
the similarity in performance of using Stack and the end of List,
As explained by delnan, they both use a simple array internally, so they behave very similar when working at the end. You could see a stack being a list with just access to the last object.
the differences in using the front and the end of List
You already suspected it correctly. Manipulating the beginning of a list, means that the underlying array needs to change. Adding an item usually means that you need to shift all other elements by one, same with removing. If you know that you will be manipulating both ends of a list, you’re better of using a linked list.
the reason that using the end of LinkedList is so slow?
Usually, element insertion and deletion for linked lists at any position can be done in constant time, as you just need to change at most two pointers. The problem is just getting to the position. A normal linked list has just a pointer to its first element. So if you want to get to the last element, you need to iterate through all elements. A queue implemented with a linked list usually solves this problem by having an additional pointer to the last element, so adding elements is possible in constant time as well. The more sophisticated data structure would be a double linked list that has both pointers to the first and last element, and where each element also contains a pointer to the next and previous element.
What you should learn about this is that there are many different data structures that are made for a single purpose, which they can handle very efficiently. Choosing the correct structure depends a lot on what you want to do.
I have a Java background and I guess your question relates more to general datastructures than a specific language. Also, I apologize if my statements are incorrect.
1. the similarity in performance of using Stack and the end of List
2. the differences in using the front and the end of List, and
At least in Java, Stacks are implemented using arrays (Apologies if that is not the case with C#. You could refer to the source for the implementation) And same is the case of Lists. Typical with an array, all insertions at the end takes lesser time than at the beginning because the pre-existing values in the array needs to be moved down to accommodate the insertion at the beginning.
Link to Stack.java source and its superclass Vector
3. the reason that using the end of LinkedList is so slow?
LinkedList do not allow random access and have to traverse through the nodes before reaching your insertion point. If you find that the performance is slower for the last nodes, then I suppose the LinkedList implementation should be a singly-linked list. I guess you would want to consider a doubly-linked-list for optimal performance while accessing elements at the end.
http://en.wikipedia.org/wiki/Linked_list
Just improved some of the deficiencies of the previous code, especially the influence of Random and the dummy calculations. Array still tops everything, but the performance of List is impressing and LinkedList is very good for random insertions.
The sorted results are:
12 array[i]
40 list2[i]
62 FillArray
68 list2.RemoveAt
78 stack.Pop
126 list2.Add
127 queue.Dequeue
159 stack.Push
161 foreach_linkedlist1
191 queue.Enqueue
218 linkedlist1.RemoveFirst
219 linkedlist2.RemoveLast
2470 linkedlist2.AddLast
2940 linkedlist1.AddFirst
The code is:
using System;
using System.Collections.Generic;
using System.Diagnostics;
//
namespace Benchmarking {
//
static class Collections {
//
public static void Main() {
const int limit = 9000000;
Stopwatch sw = new Stopwatch();
Stack<int> stack = new Stack<int>();
Queue<int> queue = new Queue<int>();
List<int> list1 = new List<int>();
List<int> list2 = new List<int>();
LinkedList<int> linkedlist1 = new LinkedList<int>();
LinkedList<int> linkedlist2 = new LinkedList<int>();
int dummy;
sw.Reset();
Console.Write( "{0,40} ", "stack.Push");
sw.Start();
for ( int i = 0; i < limit; i++ ) {
stack.Push( i );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "stack.Pop" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
stack.Pop();
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "queue.Enqueue" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
queue.Enqueue( i );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "queue.Dequeue" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
queue.Dequeue();
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
//sw.Reset();
//Console.Write( "{0,40} ", "Insert to List at the front..." );
//sw.Start();
//for ( int i = 0; i < limit; i++ ) {
// list1.Insert( 0, i );
//}
//sw.Stop();
//Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
//
//sw.Reset();
//Console.Write( "{0,40} ", "RemoveAt from List at the front..." );
//sw.Start();
//for ( int i = 0; i < limit; i++ ) {
// dummy = list1[ 0 ];
// list1.RemoveAt( 0 );
// dummy++;
//}
//sw.Stop();
//Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "list2.Add" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
list2.Add( i );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "list2.RemoveAt" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
list2.RemoveAt( list2.Count - 1 );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "linkedlist1.AddFirst" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
linkedlist1.AddFirst( i );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "linkedlist1.RemoveFirst" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
linkedlist1.RemoveFirst();
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "linkedlist2.AddLast" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
linkedlist2.AddLast( i );
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "linkedlist2.RemoveLast" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
linkedlist2.RemoveLast();
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
// Fill again
for ( int i = 0; i < limit; i++ ) {
list2.Add( i );
}
sw.Reset();
Console.Write( "{0,40} ", "list2[i]" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
dummy = list2[ i ];
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
// Fill array
sw.Reset();
Console.Write( "{0,40} ", "FillArray" );
sw.Start();
var array = new int[ limit ];
for ( int i = 0; i < limit; i++ ) {
array[ i ] = i;
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
sw.Reset();
Console.Write( "{0,40} ", "array[i]" );
sw.Start();
for ( int i = 0; i < limit; i++ ) {
dummy = array[ i ];
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
// Fill again
for ( int i = 0; i < limit; i++ ) {
linkedlist1.AddFirst( i );
}
sw.Reset();
Console.Write( "{0,40} ", "foreach_linkedlist1" );
sw.Start();
foreach ( var item in linkedlist1 ) {
dummy = item;
}
sw.Stop();
Console.WriteLine( sw.ElapsedMilliseconds.ToString() );
//
Console.WriteLine( "Press Enter to end." );
Console.ReadLine();
}
}
}
I have a requirement in my project (C#, VS2010, .NET 4.0) that a particular for loop must finish within 200 milliseconds. If it doesn't then it has to terminate after this duration without executing the remaining iterations. The loop generally goes for i = 0 to about 500,000 to 700,000 so the total loop time varies.
I have read following questions which are similar but they didn't help in my case:
What is the best way to exit out of a loop after an elapsed time of 30ms in C++
How to execute the loop for specific time
So far I have tried using a Stopwatch object to track the elapsed time but it's not working for me. Here are 2 different methods I have tried so far:
Method 1. Comparing the elapsed time within for loop:
Stopwatch sw = new Stopwatch();
sw.Start();
for (i = 0; i < nEntries; i++) // nEntries is typically more than 500,000
{
// Do some stuff
...
...
...
if (sw.Elapsed > TimeSpan.FromMilliseconds(200))
break;
}
sw.Stop();
This doesn't work because if (sw.Elapsed > TimeSpan.FromMilliseconds(200)) takes more than 200 milliseconds to complete. Hence useless in my case. I am not sure whether TimeSpan.FromMilliseconds() generally takes this long or it's just in my case for some reason.
Method 2. Creating a separate thread to compare time:
Stopwatch sw = new Stopwatch();
sw.Start();
bool bDoExit = false;
int msLimit = 200;
System.Threading.ThreadPool.QueueUserWorkItem((x) =>
{
while (bDoExit == false)
{
if (sw.Elapsed.Milliseconds > msLimit)
{
bDoExit = true;
sw.Stop();
}
System.Threading.Thread.Sleep(10);
}
});
for (i = 0; i < nEntries; i++) // nEntries is typically more than 500,000
{
// Do some stuff
...
...
...
if (bDoExit == true)
break;
}
sw.Stop();
I have some other code in the for loop that prints some statistics. It tells me that in case of Method 2, the for loop definitely breaks before completing all the iterations but the loop timing is still 280-300 milliseconds.
Any suggestions to break a for loop strictly with-in 200 milliseconds or less?
Thanks.
For a faster comparison try comparing
if(sw.ElapsedMilliseconds > 200)
break;
You should do that check in the beggining of your loop and also during the processing, ("// Do some stuff" part of the code) because it is possible, for example, that processing starts at 190 (beginning of the loop), lasts 20 and ends at 210.
You could also measure average execution time of your processing (this is approximate because it relies on average time), this way loop should last 200 milliseconds or less, here is a demo that you can put in a Main method of a Console application and easily modify it for your application:
Stopwatch sw = new Stopwatch();
sw.Start();
string a = String.Empty;
int i;
decimal sum = 0, avg = 0, beginning = 0, end = 0;
for (i = 0; i < 700000; i++) // nEntries is typically more than 500,000
{
beginning = sw.ElapsedMilliseconds;
if (sw.ElapsedMilliseconds + avg > 200)
break;
// Some processing
a += "x";
int s = a.Length * 100;
Thread.Sleep(19);
/////////////
end = sw.ElapsedMilliseconds;
sum += end - beginning;
avg = sum / (i + 1);
}
sw.Stop();
Console.WriteLine(
"avg:{0}, count:{1}, milliseconds elapsed:{2}", avg, i + 1,
sw.ElapsedMilliseconds);
Console.ReadKey();
Another option would be to use CancellationTokenSource:
CancellationTokenSource source = new CancellationTokenSource(100);
while(!source.IsCancellationRequested)
{
// Do stuff
}
Use the first one - simple and have better chances to be precise than second one.
Both cases have the same kind of termination condition, so both should behave are more-or-less the same. Second is much more complicated due to usage of threads and Sleep, so I'd use first one. Also second one is much less precise due to sleeps.
There are abolutely no reasons for TimeSpan.FromMilliseconds(200) to take any significant amount of time (as well as calling it in every iteration).
Using cancellation token:
var cancellationToken = new CancellationTokenSource(TimeSpan.FromSeconds(15)).Token;
while (!cancellationToken.IsCancellationRequested)
{
//Do stuff...
}
I don't know if this is that exactly, but I think it's worth a try using a System.Timers.Timer:
int msLimit = 200;
int nEntries = 500000;
bool cancel = false;
System.Timers.Timer t = new System.Timers.Timer();
t.Interval = msLimit;
t.Elapsed += (s, e) => cancel = true;
t.Start();
for (int i = 0; i < nEntries; i++)
{
// do sth
if (cancel) {
break;
}
}
i've written written a code for counting each byte frequency in binary file. Using Linq. Code seem to slow when performing the Linq expression. Its seem hard to implement Parallelism on this kind of logic. To build the freq table over 475MB it took approx 1 mins.
class Program
{
static void Main(string[] args)
{
Dictionary<byte, int> freq = new Dictionary<byte, int>();
Stopwatch sw = new Stopwatch();
sw.Start();
//File Size 478.668 KB
byte[] ltext = File.ReadAllBytes(#"D:\Setup.exe");
sw.Stop();
Console.WriteLine("Reading File {0}", GetTime(sw));
sw.Start();
Dictionary<byte, int> result = (from i in ltext
group i by i into g
orderby g.Count() descending
select new { Key = g.Key, Freq = g.Count() })
.ToDictionary(x => x.Key, x => x.Freq);
sw.Stop();
Console.WriteLine("Generating Freq Table {0}", GetTime(sw));
foreach (var i in result)
{
Console.WriteLine(i);
}
Console.WriteLine(result.Count);
Console.ReadLine();
}
static string GetTime(Stopwatch sw)
{
TimeSpan ts = sw.Elapsed;
string elapsedTime = String.Format("{0} min {1} sec {2} ms",ts.Minutes, ts.Seconds, ts.Milliseconds);
return elapsedTime;
}
I've tried to implement non linq solution using few loops, the performance its about the same. Please, any advice to optimize this. Sorry For my bad English
This took a bit over a second on a 442MB file on my otherwise poky Dell laptop:
byte[] ltext = File.ReadAllBytes(#"c:\temp\bigfile.bin");
var freq = new long[256];
var sw = Stopwatch.StartNew();
foreach (byte b in ltext) {
freq[b]++;
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Very hard to beat the raw perf of an array.
The following displays the frequency of bytes in descending order in a 465MB file on my machine in under 9 seconds when build in release mode.
Note, I've made it faster by reading the file in 100000 byte blocks (you can experiment with this - 16K blocks made no appreciable difference on my machine). The point is that the inner loop is the one supplying bytes. Calling Stream.ReadByte() is fast but not nearly as fast as indexing a byte in an array.
Also, reading the whole file into memory exerts extreme memory pressure which will hamper performance and will fail completely if the file is large enough.
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
class Program
{
static void Main( string[] args )
{
Console.WriteLine( "Reading file..." );
var sw = Stopwatch.StartNew();
var frequency = new long[ 256 ];
using ( var input = File.OpenRead( #"c:\Temp\TestFile.dat" ) )
{
var buffer = new byte[ 100000 ];
int bytesRead;
do
{
bytesRead = input.Read( buffer, 0, buffer.Length );
for ( var i = 0; i < bytesRead; i++ )
frequency[ buffer[ i ] ]++;
} while ( bytesRead == buffer.Length );
}
Console.WriteLine( "Read file in " + sw.ElapsedMilliseconds + "ms" );
var result = frequency.Select( ( f, i ) => new ByteFrequency { Byte = i, Frequency = f } )
.OrderByDescending( x => x.Frequency );
foreach ( var byteCount in result )
Console.WriteLine( byteCount.Byte + " " + byteCount.Frequency );
}
public class ByteFrequency
{
public int Byte { get; set; }
public long Frequency { get; set; }
}
}
Why not just
int[] freq = new int[256];
foreach (byte b in ltext)
freq[b]++;
?