I want to calculate IOPS and FLOPS in a 5 min test, where each runs for 150 seconds. However, I am getting results that are surprising, where the average IOPS on 16 threads is 53986225 with a standard deviation of 1282739 for example. Am I doing something wrong below? Also please point me to any optimizations as well.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
namespace Threads
{
class Program
{
// Count of operations done per thread
private static long[] operations;
// Volatile variable to signal threads to stop
private static volatile bool stop = false;
private const float TestTimeInMinutes = 1f;
private const float SleepInSeconds = 1f;
// Handles used by threads to signal to it's caller it's done
private static ManualResetEvent[] handles;
static void Main(string[] args)
{
// Get optimal number of threads
var tNum = Environment.ProcessorCount;
// Ask user for thread count
while (true)
{
Console.Write($"Please enter number of threads or 0 for default {tNum}: ");
var input = Console.ReadLine();
if (int.TryParse(input, out var _tNum))
{
if (_tNum != 0)
{
tNum = _tNum;
}
break;
}
}
// Initialize operations array
operations = new long[tNum];
// Initialize handles array
handles = new ManualResetEvent[tNum];
// Populate handles
for (var n = 0; n < tNum; n++)
{
handles[n] = new ManualResetEvent(false);
}
long totalIopS = 0;
long totalFlopS = 0;
long IopDev = 0;
long FlopDev = 0;
// Stopwatch to measure time
var stopwatch = new Stopwatch();
for (int i = 0; i < 5; i++)
{
Console.WriteLine($"Test {i+1}");
stopwatch.Restart();
var iop = MeasureIOP(tNum);
Console.WriteLine($"IOPS: {iop/30}\nElapsed: {stopwatch.Elapsed:mm\\:ss\\.fff}");
totalIopS += iop;
IopDev += StandardDeviation(operations);
stopwatch.Restart();
var flop = MeasureFLOP(tNum);
Console.WriteLine($"FLOPS: {flop/30}\nElapsed: {stopwatch.Elapsed:mm\\:ss\\.fff}");
totalFlopS += flop;
FlopDev += StandardDeviation(operations);
}
Console.WriteLine($"Average\nIOPS: {totalIopS/150}\nIOP Standard Deviation: {(int)(IopDev/150)}\nFLOP: {totalFlopS/150}\nFLOP Standard Deviation: {(int)(FlopDev/150)}");
Console.Write("The Test has ended");
var finalInput = Console.ReadLine();
}
private static void FloatOp(object index)
{
float floatOp = 0;
var i = (int) index;
long op = 0;
while (!stop)
{
floatOp = (floatOp + 0.8f)-0.10f*0.2f;
op++;
}
operations[i] = op;
// Signal that the thread is done
handles[i].Set();
}
private static long MeasureFLOP(int tNum)
{
const int numberOfTries = (int) ((TestTimeInMinutes / 2 * 60f) / SleepInSeconds);
long flop = 0;
for (var i = 0; i < numberOfTries; i++)
{
stop = false;
for (var threadIndex = 0; threadIndex < tNum; threadIndex++)
{
// Reset the handle
// By resetting the handle, the thread is considered active
handles[threadIndex].Reset();
// Start a thread
ThreadPool.QueueUserWorkItem(FloatOp, threadIndex);
}
Thread.Sleep((int)(SleepInSeconds*1000));
stop = true;
// Wait for all threads to exit
WaitHandle.WaitAll(handles);
// Calculate flops
flop += (int)((operations.Sum()/tNum)/SleepInSeconds);
}
return flop / numberOfTries;
}
private static void IntOp(object index)
{
long intop = 0;
var i = (int) index;
long op = 0;
while (!stop)
{
intop = intop + 1 - 10 * 2;
op++;
}
operations[i] = op;
// Signal that the thread is done
handles[i].Set();
}
private static long MeasureIOP(int tNum)
{
const int numberOfTries = (int) ((TestTimeInMinutes / 2 * 60f) / SleepInSeconds);
long iop = 0;
for (var i = 0; i < numberOfTries; i++)
{
stop = false;
for (var threadIndex = 0; threadIndex < tNum; threadIndex++)
{
// Reset the handle
// By resetting the handle, the thread is considered active
handles[threadIndex].Reset();
ThreadPool.QueueUserWorkItem(IntOp, threadIndex);
}
Thread.Sleep((int)(SleepInSeconds*1000));
stop = true;
// Wait for all threads to exit
WaitHandle.WaitAll(handles);
// Calculate iop
iop += (int)((operations.Sum()/tNum)/SleepInSeconds);
}
return iop / numberOfTries;
}
private static long StandardDeviation(long[] nums)
{
var average = nums.Average();
var sum = nums.Sum(n => Math.Pow(n - average, 2));
return (long)Math.Sqrt(sum / nums.Length);
}
}
}
Related
I'm having an application that calculates the prime numbers of the user input. so if the user puts 10 in the console. Then it will show every prime number from 0 to 10. Now If I do something like 10000000 it will take a long time before it shows every prime number so I want to divide it by 4 threads, so that each thread can do 1/4 of the total number. So the first thread does from 0 till 250000, the second thread does the calculations from 250000 till 500000, etc. this is my code so far, what I'm doing with this code now is that I have 1 thread that calculates the prime of the number that the user puts in and in the end I'm making a sum of the values in the array.
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
namespace WeekOpdr__5
{
internal class Program
{
static int[] deel1 = new int[10000];
static int n;
static int startNumber;
static int secondNumber;
static void Main(string[] args)
{
Stopwatch sw = new Stopwatch();
while (true)
{
Console.WriteLine("type a number in: ");
n = Convert.ToInt32(Console.ReadLine());
int start1 = 2;
int threadCount = 4;
var threads = new Thread[threadCount];
sw.Start();
for (int i = 0; i < threadCount; i++)
{
int range = n / threadCount * 2;
Console.WriteLine(range);
secondNumber = n / 2;
int start2 = start1;
threads[i] = new Thread(() => PrimeNumbers(startNumber, range ));
//threads[i] = new Thread(() => PrimeNumbers(secondNumber, n));
threads[i].Start();
}
for (int i = 0; i < threadCount; i++)
{
threads[i].Join();
}
Console.WriteLine($"The Prime Numbers between 0 and {n} are : ");
sw.Restart();
ReturnN();
SumN();
long timeElapsed = sw.ElapsedMilliseconds;
Console.WriteLine($"\nthe total time is {timeElapsed} ms");
}
static void PrimeNumbers(int startNumber, int n)
{
int position = 0;
for (int i = startNumber; i <= n; i++)
{
bool primeDetected = true;
for (int j = 2; j <= i / 2; j++)
{
if (i % j == 0)
{
primeDetected = false;
break;
}
}
if (primeDetected && i != 1)
{
deel1[position++] = i;
}
}
}
static void ReturnN()
{
foreach (int i in deel1)
{
Console.Write($"{i} ");
}
}
static void SumN()
{
int sum = deel1.Sum();
Console.WriteLine($"\nDe som van de priemgetallen tussen 2 en {n}: {sum}");
}
}
}
}
I made the following C# Console App:
class Program
{
static RNGCryptoServiceProvider rng = new RNGCryptoServiceProvider();
public static ConcurrentDictionary<int, int> StateCount { get; set; }
static int length = 1000000000;
static void Main(string[] args)
{
StateCount = new ConcurrentDictionary<int, int>();
for (int i = 0; i < 3; i++)
{
StateCount.AddOrUpdate(i, 0, (k, v) => 0);
}
Console.WriteLine("Processors: " + Environment.ProcessorCount);
Console.WriteLine("Starting...");
Console.WriteLine();
Timer t = new Timer(1000);
t.Elapsed += T_Elapsed;
t.Start();
Stopwatch sw = new Stopwatch();
sw.Start();
Parallel.For(0, length, (i) =>
{
var rand = GetRandomNumber();
int newState = 0;
if(rand < 0.3)
{
newState = 0;
}
else if (rand < 0.6)
{
newState = 1;
}
else
{
newState = 2;
}
StateCount.AddOrUpdate(newState, 0, (k, v) => v + 1);
});
sw.Stop();
t.Stop();
Console.WriteLine();
Console.WriteLine("Total time: " + sw.Elapsed.TotalSeconds);
Console.ReadKey();
}
private static void T_Elapsed(object sender, ElapsedEventArgs e)
{
int total = 0;
for (int i = 0; i < 3; i++)
{
if(StateCount.TryGetValue(i, out int value))
{
total += value;
}
}
int percent = (int)Math.Round((total / (double)length) * 100);
Console.Write("\r" + percent + "%");
}
public static double GetRandomNumber()
{
var bytes = new Byte[8];
rng.GetBytes(bytes);
var ul = BitConverter.ToUInt64(bytes, 0) / (1 << 11);
Double randomDouble = ul / (Double)(1UL << 53);
return randomDouble;
}
}
Before running this, the Task Manager reported <2% CPU usage (across all runs and machines).
I ran it on a machine with a Ryzen 3800X. The output was:
Processors: 16
Total time: 209.22
The speed reported in the Task Manager while it ran was ~4.12 GHz.
I ran it on a machine with an i7-7820HK The output was:
Processors: 8
Total time: 213.09
The speed reported in the Task Manager while it ran was ~3.45 GHz.
I modified Parallel.For to include the processor count (Parallel.For(0, length, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, (i) => {code});). The outputs were:
3800X: 16 - 158.58 # ~4.13
7820HK: 8 - 210.49 # ~3.40
There's something to be said about Parallel.For not natively identifying the Ryzen processors vs cores, but setting that aside, even here the Ryzen performance is still significantly poorer than would be expected (only ~25% faster with double the cores/processors, a faster speed, and larger L1-3 caches). Can anyone explain why?
Edit: Following a couple of comments, I made some changes to my code. See below:
static int length = 1000;
static void Main(string[] args)
{
StateCount = new ConcurrentDictionary<int, int>();
for (int i = 0; i < 3; i++)
{
StateCount.AddOrUpdate(i, 0, (k, v) => 0);
}
var procCount = Environment.ProcessorCount;
Console.WriteLine("Processors: " + procCount);
Console.WriteLine("Starting...");
Console.WriteLine();
List<double> times = new List<double>();
Stopwatch sw = new Stopwatch();
for (int m = 0; m < 10; m++)
{
sw.Restart();
Parallel.For(0, length, new ParallelOptions() { MaxDegreeOfParallelism = procCount }, (i) =>
{
for (int j = 0; j < 1000000; j++)
{
var rand = GetRandomNumber();
int newState = 0;
if (rand < 0.3)
{
newState = 0;
}
else if (rand < 0.6)
{
newState = 1;
}
else
{
newState = 2;
}
StateCount.AddOrUpdate(newState, 0, (k, v) => v + 1);
}
});
sw.Stop();
Console.WriteLine("Total time: " + sw.Elapsed.TotalSeconds);
times.Add(sw.Elapsed.TotalSeconds);
}
Console.WriteLine();
var avg = times.Average();
var variance = times.Select(x => (x - avg) * (x - avg)).Sum() / times.Count;
var stdev = Math.Sqrt(variance);
Console.WriteLine("Average time: " + avg + " +/- " + stdev);
Console.ReadKey();
Console.ReadKey();
}
The outside loop is 1,000 instead of 1,000,000,000, so there are "only" 1,000 parallel "tasks." Within each parallel "task" however there's now a loop of 1,000,000 actions, so the act of "getting the task" or whatever should have a much smaller affect on the total. I also loop the whole thing 10 times and get the average + standard devation. Output:
Ryzen 3800X: 158.531 +/- 0.429 # ~4.13
i7-7820HK: 202.159 +/- 2.538 # ~3.48
Even here, the Ryzen's twice as many threads and 0.60 GHz higher clock only result in a ~75% faster time for the total operation.
my task is, to sum up numbers in some range, to achieve that I have to use threads to separate computation.
I divided number to parts and used a thread for each part.
public class ParallelCalc
{
public long resultLong;
private Thread[] threads;
private List<long> list = new List<long>();
public long MaxNumber { get; set; }
public int ThreadsNumber { get; set; }
public event CalcFinishedEventHandler finished;
public ParallelCalc(long MaxNumber, int ThreadsNumber)
{
this.MaxNumber = MaxNumber;
this.ThreadsNumber = ThreadsNumber;
this.threads = new Thread[ThreadsNumber];
}
public void Start()
{
Stopwatch sw = new Stopwatch();
for (int i = 0; i < ThreadsNumber; i++)
{
threads[i] = new Thread(() => Sum(((MaxNumber / ThreadsNumber) * i) + 1,
MaxNumber / ThreadsNumber * (i + 1)));
if (i == ThreadsNumber - 1)
{
threads[i] = new Thread(() => Sum(((MaxNumber / ThreadsNumber) * i) + 1,
MaxNumber));
}
sw.Start();
threads[i].Start();
}
while (threads.All(t => t.IsAlive));
sw.Stop();
finished?.Invoke(this,
new CalcFinishedEventArgs()
{
Result = list.Sum(),
Time = sw.ElapsedMilliseconds
});
}
private void Sum(long startNumber, long endnumber)
{
long result = 0;
for (long i = startNumber; i <= endnumber; i++)
{
result += i;
}
list.Add(result);
}
}
The result has to be the sum of numbers, however, it is incorrect due to thread asynchronous assignment in list. Please indicate the error.
There is more than one thing wrong here, brace yourself...
Start creates a Stopwatch sw, but you call sw.Start on every iteration of the loop. Start it only once.
if i == ThreadsNumber - 1 evaluates to true, you let Thread to garbage. I fail to grasp why...
(MaxNumber / ThreadsNumber) * (i + 1) WHEN i == ThreadsNumber - 1
=
(MaxNumber / ThreadsNumber) * (ThreadsNumber - 1 + 1)
=
(MaxNumber / ThreadsNumber) * (ThreadsNumber)
=
MaxNumber
Do you have rounding problems? Rewrite like this:
((i + 1) * MaxNumber) / ThreadsNumber
By dividing last, you avoid the rounding problem.
You are spin waiting on the threads while (threads.All(t => t.IsAlive));. You could as well use Thread.Join or better yet, let the threads notify you when they are done.
The ranges in the lambdas have a closure on i. You need to be careful with C# - For loop and the lambda expressions.
List<T> is not thread safe. I would suggest to use a simple array (you know the number of threads afterall) and tell each thread to store only on the position that corresponds to them.
You have not considered what would happen if a second call to Start happens before the first one finishes.
So, we will have an array for the output:
var output = new long[ThreadsNumber];
And one for the Threads:
var threads = new Thread[ThreadsNumber];
Hmm, almost like we should create a class.
We will have the stopwatch:
var sw = new Stopwatch();
Let us start it once:
sw.Start();
Now a for to create the Threads:
for (var i = 0; i < ThreadsNumber; i++)
{
// ...
}
Have a copy of i to prevent problems:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
// ...
}
Compute the range for the current thread:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
// ...
}
We need to write Sum in such way that we can store the output in the array:
private void Sum(long startNumber, long endNumber, int index)
{
long result = 0;
for (long i = startNumber; i <= endnumber; i++)
{
result += i;
}
output[index] = result;
}
Hmm... wait, there is a better way...
private static void Sum(long startNumber, long endNumber, out long output)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
output = result;
}
Hmm... no, we can do better...
private static long Sum(long startNumber, long endNumber)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
return result;
}
Create the Thread
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread(() => output[index] = Sum(start, end));
// ...
}
And start the Thread:
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread(() => {output[index] = Sum(start, end);});
threads[i].Start();
}
Are we really going to wait on these?
Think, think...
We keep track of how many threads are pending... and when they are all done, we call the event (and stop the Stopwatch).
var pendingThreads = ThreadsNumber;
// ...
for (var i = 0; i < ThreadsNumber; i++)
{
// ...
threads[i] = new Thread
(
() =>
{
output[index] = Sum(start, end);
if (Interlocked.Decrement(ref pendingThreads) == 0)
{
sw.Stop();
finished?.Invoke
(
this,
new CalcFinishedEventArgs()
{
Result = output.Sum(),
Time = sw.ElapsedMilliseconds
}
);
}
}
);
// ...
}
Let us bring it all togheter:
void Main()
{
var pc = new ParallelCalc(20, 5);
pc.Finished += (sender, args) =>
{
Console.WriteLine(args);
};
pc.Start();
}
public class CalcFinishedEventArgs : EventArgs
{
public long Result {get; set;}
public long Time {get; set;}
}
public class ParallelCalc
{
public long MaxNumber { get; set; }
public int ThreadsNumber { get; set; }
public event EventHandler<CalcFinishedEventArgs> Finished;
public ParallelCalc(long MaxNumber, int ThreadsNumber)
{
this.MaxNumber = MaxNumber;
this.ThreadsNumber = ThreadsNumber;
}
public void Start()
{
var output = new long[ThreadsNumber];
var threads = new Thread[ThreadsNumber];
var pendingThreads = ThreadsNumber;
var sw = new Stopwatch();
sw.Start();
for (var i = 0; i < ThreadsNumber; i++)
{
var index = i;
var start = 1 + (i * MaxNumber) / ThreadsNumber;
var end = ((i + 1) * MaxNumber) / ThreadsNumber;
threads[i] = new Thread
(
() =>
{
output[index] = Sum(start, end);
if (Interlocked.Decrement(ref pendingThreads) == 0)
{
sw.Stop();
Finished?.Invoke
(
this,
new CalcFinishedEventArgs()
{
Result = output.Sum(),
Time = sw.ElapsedMilliseconds
}
);
}
}
);
threads[i].Start();
}
}
private static long Sum(long startNumber, long endNumber)
{
long result = 0;
for (long i = startNumber; i <= endNumber; i++)
{
result += i;
}
return result;
}
}
Output:
Result
210
Time
0
That is too fast... let me input:
var pc = new ParallelCalc(2000000000, 5);
pc.Finished += (sender, args) =>
{
Console.WriteLine(args);
};
pc.Start();
Output:
Result
2000000001000000000
Time
773
And that is correct.
And yes, this code takes care of the case of calling Start multiple times. Notice that it create a new array for the output and a new array of threads each time. That way, it does not trip over itself.
I let error handling to you. Hints: MaxNumber / ThreadsNumber -> division by 0, and (i + 1) * MaxNumber -> overflow, not to mention output.Sum() -> overflow.
Instead of printing o and -, I want to print the number of o and - on the console. However, I have no idea how to get how many times a loop spins in a thread before it gets switched to other threads.
If there are events something likes OnLeaving and OnEntering on a thread, I can get the number of spins in the given time slice. Unfortunately, I have no such events.
class Program
{
public const int N = 1000;
static void Main(string[] args)
{
ThreadStart ts = DoWork;
Thread t = new Thread(ts);
t.Start();
for (int x = 0; x < N; x++)
{
Console.Write('o');
}
t.Join();
}
private static void DoWork()
{
for (int x = 0; x < N; x++)
{
Console.Write('-');
}
}
}
Could you show me how to do this scenario?
To reiterate: AbhayDixit's comment gives you a cycle counter. You just need to add a locking mechanism and reset the counters after a context switch.
I have modified your code to include a cycle counter. Note that I have increased N significantly. Otherwise, one thread will just run through its 1000 iterations at once - because you no longer have a Write() instruction to slow it down.
class Program
{
public const int N = 1000000;
private static object _l = new object();
private static int _i = 0;
private static int _j = 0;
static void Main(string[] args)
{
ThreadStart ts = DoWork;
Thread t = new Thread(ts);
t.Start();
for (int x = 0; x < N; x++)
{
lock (_l)
{
// j-thread has run?
if (_j > 0)
{
// print and reset j
Console.Write("j{0} ", _j);
_j = 0;
}
_i++;
}
}
t.Join();
// print remaining cycles
// one of the threads will have run after the other has finished and
// hence not been printed and reset.
if (_i > 0)
Console.Write("i{0} ", _i);
if (_j > 0)
Console.Write("j{0} ", _j);
Console.ReadKey();
}
private static void DoWork()
{
for (int x = 0; x < N; x++)
{
lock (_l)
{
// i-thread has run?
if (_i > 0)
{
// print and reset i
Console.Write("i{0} ", _i);
_i = 0;
}
_j++;
}
}
}
}
I've taken the code example from Stack Overflow question Disruptor.NET example and modified it to "measure" time. Full listing is below:
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;
using Disruptor.Dsl;
namespace DisruptorTest
{
public sealed class ValueEntry
{
public long Value { get; set; }
public ValueEntry()
{
Console.WriteLine("New ValueEntry created");
}
}
public class ValueAdditionHandler : IEventHandler<ValueEntry>
{
public void OnNext(ValueEntry data, long sequence, bool endOfBatch)
{
Program.sw.Stop();
long microseconds = Program.sw.ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
Console.WriteLine("elapsed microseconds = " + microseconds);
Console.WriteLine("Event handled: Value = {0} (processed event {1}", data.Value, sequence);
}
}
class Program
{
public static Stopwatch sw = Stopwatch.StartNew();
private static readonly Random _random = new Random();
private static readonly int _ringSize = 16; // Must be multiple of 2
static void Main(string[] args)
{
var disruptor = new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), _ringSize, TaskScheduler.Default);
disruptor.HandleEventsWith(new ValueAdditionHandler());
var ringBuffer = disruptor.Start();
while (true)
{
var valueToSet = _random.Next();
long sequenceNo = ringBuffer.Next();
ValueEntry entry = ringBuffer[sequenceNo];
entry.Value = valueToSet;
sw.Restart();
ringBuffer.Publish(sequenceNo);
Console.WriteLine("Published entry {0}, value {1}", sequenceNo, entry.Value);
Thread.Sleep(1000);
}
}
}
}
And the output is:
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
New ValueEntry created
Published entry 0, value 1510145842
elapsed microseconds = 2205
Event handled: Value = 1510145842 (processed event 0
Published entry 1, value 1718075893
elapsed microseconds = 85
Event handled: Value = 1718075893 (processed event 1
Published entry 2, value 1675907645
elapsed microseconds = 32
Event handled: Value = 1675907645 (processed event 2
Published entry 3, value 1563009446
elapsed microseconds = 75
Event handled: Value = 1563009446 (processed event 3
Published entry 4, value 1782914062
elapsed microseconds = 34
Event handled: Value = 1782914062 (processed event 4
Published entry 5, value 1516398244
elapsed microseconds = 50
Event handled: Value = 1516398244 (processed event 5
Published entry 6, value 76829327
elapsed microseconds = 50
Event handled: Value = 76829327 (processed event 6
So it takes about 50 microseconds to pass data from one thread to another. But it is not fast at all! "The current version of the Disruptor can do ~50 ns between threads at a rate of 1 million messages per second." So my results are 1000 times slower than expected.
What's wrong with my example and how do achieve 50 ns speed?
I've modified program above and now receive 1 microsecond delay, which is much better. However, I am still waiting for the response from disruptor pattern experts. I'm looking for an example which can prove that I can actually pass data in 50 ns.
Also I wrote the same test using BlockingCollection and received 14 microseconds in average, which proves that Disruptor is faster:
Using BlockingCollection:
average = 14 minimum = 0 0-5 = 890558, 5-10 = 1773781, 10-30 = 6900128, >30 = 435433
Using Disruptor:
average = 0 minimum = 0 0-5 = 9908469, 5-10 = 64464, 10-30 = 19902, >30 = 7065
BlockingCollection code:
using System;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace DisruptorTest
{
public sealed class ValueEntry
{
public int Value { get; set; }
public ValueEntry()
{
// Console.WriteLine("New ValueEntry created");
}
}
//public class ValueAdditionHandler : IEventHandler<ValueEntry>
//{
// public void OnNext(ValueEntry data, long sequence, bool endOfBatch)
// {
// long microseconds = Program.sw[data.Value].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
// Program.results[data.Value] = microseconds;
// //Console.WriteLine("elapsed microseconds = " + microseconds);
// //Console.WriteLine("Event handled: Value = {0} (processed event {1}", data.Value, sequence);
// }
//}
class Program
{
public const int length = 10000000;
public static Stopwatch[] sw = new Stopwatch[length];
public static long[] results = new long[length];
static BlockingCollection<ValueEntry> dataItems = new BlockingCollection<ValueEntry>(150);
static void Main(string[] args)
{
for (int i = 0; i < length; i++)
{
sw[i] = Stopwatch.StartNew();
}
// A simple blocking consumer with no cancellation.
Task.Factory.StartNew(() =>
{
while (!dataItems.IsCompleted)
{
ValueEntry ve = null;
try
{
ve = dataItems.Take();
long microseconds = sw[ve.Value].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
results[ve.Value] = microseconds;
//Console.WriteLine("elapsed microseconds = " + microseconds);
//Console.WriteLine("Event handled: Value = {0} (processed event {1}", ve.Value, ve.Value);
}
catch (InvalidOperationException) { }
}
}, TaskCreationOptions.LongRunning);
for (int i = 0; i < length; i++)
{
var valueToSet = i;
ValueEntry entry = new ValueEntry();
entry.Value = valueToSet;
sw[i].Restart();
dataItems.Add(entry);
//Console.WriteLine("Published entry {0}, value {1}", valueToSet, entry.Value);
//Thread.Sleep(1000);
}
// Wait until all events are delivered
Thread.Sleep(5000);
long average = 0;
long minimum = 10000000000;
int firstFive = 0;
int fiveToTen = 0;
int tenToThirty = 0;
int moreThenThirty = 0;
// Do not count first 100 items because they could be extremely slow
for (int i = 100; i < length; i++)
{
average += results[i];
if (results[i] < minimum)
{
minimum = results[i];
}
if (results[i] < 5)
{
firstFive++;
}
else if (results[i] < 10)
{
fiveToTen++;
}
else if (results[i] < 30)
{
tenToThirty++;
} else
{
moreThenThirty++;
}
}
average /= (length - 100);
Console.WriteLine("average = {0} minimum = {1} 0-5 = {2}, 5-10 = {3}, 10-30 = {4}, >30 = {5}", average, minimum, firstFive, fiveToTen, tenToThirty, moreThenThirty);
}
}
}
Disruptor code:
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;
using Disruptor.Dsl;
namespace DisruptorTest
{
public sealed class ValueEntry
{
public int Value { get; set; }
public ValueEntry()
{
// Console.WriteLine("New ValueEntry created");
}
}
public class ValueAdditionHandler : IEventHandler<ValueEntry>
{
public void OnNext(ValueEntry data, long sequence, bool endOfBatch)
{
long microseconds = Program.sw[data.Value].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
Program.results[data.Value] = microseconds;
//Console.WriteLine("elapsed microseconds = " + microseconds);
//Console.WriteLine("Event handled: Value = {0} (processed event {1}", data.Value, sequence);
}
}
class Program
{
public const int length = 10000000;
public static Stopwatch[] sw = new Stopwatch[length];
public static long[] results = new long[length];
private static readonly Random _random = new Random();
private static readonly int _ringSize = 1024; // Must be multiple of 2
static void Main(string[] args)
{
for (int i = 0; i < length; i++)
{
sw[i] = Stopwatch.StartNew();
}
var disruptor = new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), _ringSize, TaskScheduler.Default);
disruptor.HandleEventsWith(new ValueAdditionHandler());
var ringBuffer = disruptor.Start();
for (int i = 0; i < length; i++)
{
var valueToSet = i;
long sequenceNo = ringBuffer.Next();
ValueEntry entry = ringBuffer[sequenceNo];
entry.Value = valueToSet;
sw[i].Restart();
ringBuffer.Publish(sequenceNo);
//Console.WriteLine("Published entry {0}, value {1}", sequenceNo, entry.Value);
//Thread.Sleep(1000);
}
// wait until all events are delivered
Thread.Sleep(5000);
long average = 0;
long minimum = 10000000000;
int firstFive = 0;
int fiveToTen = 0;
int tenToThirty = 0;
int moreThenThirty = 0;
// Do not count first 100 items because they could be extremely slow
for (int i = 100; i < length; i++)
{
average += results[i];
if (results[i] < minimum)
{
minimum = results[i];
}
if (results[i] < 5)
{
firstFive++;
}
else if (results[i] < 10)
{
fiveToTen++;
}
else if (results[i] < 30)
{
tenToThirty++;
}
else
{
moreThenThirty++;
}
}
average /= (length - 100);
Console.WriteLine("average = {0} minimum = {1} 0-5 = {2}, 5-10 = {3}, 10-30 = {4}, >30 = {5}", average, minimum, firstFive, fiveToTen, tenToThirty, moreThenThirty);
}
}
}
Here, I fixed your code:
using System;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;
using Disruptor.Dsl;
namespace DisruptorTest
{
public sealed class ValueEntry
{
public int Value { get; set; }
public ValueEntry()
{
// Console.WriteLine("New ValueEntry created");
}
}
class Program
{
public const int length = 1000000;
public static Stopwatch sw;
private static readonly Random _random = new Random();
private static readonly int _ringSize = 1024; // Must be multiple of 2
static void Main(string[] args)
{
sw = Stopwatch.StartNew();
var disruptor = new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), _ringSize, TaskScheduler.Default);
var ringBuffer = disruptor.Start();
for (int i = 0; i < length; i++)
{
var valueToSet = i;
long sequenceNo = ringBuffer.Next();
ValueEntry entry = ringBuffer[sequenceNo];
entry.Value = valueToSet;
ringBuffer.Publish(sequenceNo);
//Console.WriteLine("Published entry {0}, value {1}", sequenceNo, entry.Value);
//Thread.Sleep(1000);
}
var elapsed = sw.Elapsed.Miliseconds();
// wait until all events are delivered
Thread.Sleep(10000);
double average = /(double)length;
Console.WriteLine("average = " + average);
}
}
}
This should correctly test how long does it take for each item.
I read the BlockingCollecton code, You add many Console.WriteLine in Disruptor but no one in BlockingCollection, Console.WriteLine is slow, it have a lock inside.
Your RingBufferSize is too small, this effects performance, shoule be 1024 or larger.
and while (!dataItems.IsCompleted) may have some problem, BlockCollection isn't always in adding state, it will cause thread ends early.
Task.Factory.StartNew(() => {
while (!dataItems.IsCompleted)
{
ValueEntry ve = null;
try
{
ve = dataItems.Take();
long microseconds = sw[ve.Value].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
results[ve.Value] = microseconds;
//Console.WriteLine("elapsed microseconds = " + microseconds);
//Console.WriteLine("Event handled: Value = {0} (processed event {1}", ve.Value, ve.Value);
}
catch (InvalidOperationException) { }
}
}, TaskCreationOptions.LongRunning);
for (int i = 0; i < length; i++)
{
var valueToSet = i;
ValueEntry entry = new ValueEntry();
entry.Value = valueToSet;
sw[i].Restart();
dataItems.Add(entry);
//Console.WriteLine("Published entry {0}, value {1}", valueToSet, entry.Value);
//Thread.Sleep(1000);
}
I have rewrite you code, Disruptor is 10x faster than BlockingCollection with multi producer (10 parallel producet), 2x faster than BlockingCollection with Single producer:
using System;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;
using Disruptor.Dsl;
using NUnit.Framework;
namespace DisruptorTest.Ds
{
public sealed class ValueEntry
{
internal int Id { get; set; }
}
class MyHandler : IEventHandler<ValueEntry>
{
public void OnEvent(ValueEntry data, long sequence, bool endOfBatch)
{
}
}
[TestFixture]
public class DisruptorPerformanceTest
{
private volatile bool collectionAddEnded;
private int producerCount = 10;
private int runCount = 1000000;
private int RingBufferAndCapacitySize = 1024;
[TestCase()]
public async Task TestBoth()
{
for (int i = 0; i < 1; i++)
{
foreach (var rs in new int[] {64, 512, 1024, 2048 /*,4096,4096*2*/})
{
Console.WriteLine($"RingBufferAndCapacitySize:{rs}, producerCount:{producerCount}, runCount:{runCount} of {i}");
RingBufferAndCapacitySize = rs;
await DisruptorTest();
await BlockingCollectionTest();
}
}
}
[TestCase()]
public async Task BlockingCollectionTest()
{
var sw = new Stopwatch();
BlockingCollection<ValueEntry> dataItems = new BlockingCollection<ValueEntry>(RingBufferAndCapacitySize);
sw.Start();
collectionAddEnded = false;
// A simple blocking consumer with no cancellation.
var task = Task.Factory.StartNew(() =>
{
while (!collectionAddEnded && !dataItems.IsCompleted)
{
//if (!dataItems.IsCompleted && dataItems.TryTake(out var ve))
if (dataItems.TryTake(out var ve))
{
}
}
}, TaskCreationOptions.LongRunning);
var tasks = new Task[producerCount];
for (int t = 0; t < producerCount; t++)
{
tasks[t] = Task.Run(() =>
{
for (int i = 0; i < runCount; i++)
{
ValueEntry entry = new ValueEntry();
entry.Id = i;
dataItems.Add(entry);
}
});
}
await Task.WhenAll(tasks);
collectionAddEnded = true;
await task;
sw.Stop();
Console.WriteLine($"BlockingCollectionTest Time:{sw.ElapsedMilliseconds/1000d}");
}
[TestCase()]
public async Task DisruptorTest()
{
var disruptor =
new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), RingBufferAndCapacitySize, TaskScheduler.Default,
producerCount > 1 ? ProducerType.Multi : ProducerType.Single, new BlockingWaitStrategy());
disruptor.HandleEventsWith(new MyHandler());
var _ringBuffer = disruptor.Start();
Stopwatch sw = Stopwatch.StartNew();
sw.Start();
var tasks = new Task[producerCount];
for (int t = 0; t < producerCount; t++)
{
tasks[t] = Task.Run(() =>
{
for (int i = 0; i < runCount; i++)
{
long sequenceNo = _ringBuffer.Next();
_ringBuffer[sequenceNo].Id = 0;
_ringBuffer.Publish(sequenceNo);
}
});
}
await Task.WhenAll(tasks);
disruptor.Shutdown();
sw.Stop();
Console.WriteLine($"DisruptorTest Time:{sw.ElapsedMilliseconds/1000d}s");
}
}
}
BlockingCollectionTest with a shared ValueEntry instance (no new ValueEntry() in for loop)
RingBufferAndCapacitySize:64, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:16.962s
BlockingCollectionTest Time:18.399
RingBufferAndCapacitySize:512, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:6.101s
BlockingCollectionTest Time:19.526
RingBufferAndCapacitySize:1024, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:2.928s
BlockingCollectionTest Time:20.25
RingBufferAndCapacitySize:2048, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:2.448s
BlockingCollectionTest Time:20.649
BlockingCollectionTest create a new ValueEntry() in for loop
RingBufferAndCapacitySize:64, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:27.374s
BlockingCollectionTest Time:21.955
RingBufferAndCapacitySize:512, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:5.011s
BlockingCollectionTest Time:20.127
RingBufferAndCapacitySize:1024, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:2.877s
BlockingCollectionTest Time:22.656
RingBufferAndCapacitySize:2048, producerCount:10, runCount:1000000 of 0
DisruptorTest Time:2.384s
BlockingCollectionTest Time:23.567
https://www.cnblogs.com/darklx/p/11755686.html