I've created something similar to a web crawler to create a report of the 1000+ Webservices I need to manage. I therefore created a TPL Dataflow Pipeline to manage getting and processing the data.
The Pipeline I imagined looks a little bit like this (sorry for my paint skills :D):
I already created a implementation and everything worked fine until I started my Pipeline as a whole. I gave 500 objects into the Pipeline as a Input into the Pipeline and expected that the programm would run a little while but the Programm stopped execution after moving to the Execution Block.
After checking the flow of the Programm it seemed to me that the Completion propagated to fast to the Dispose Block.
I created a small sample Project with the same Pipeline to check if it was my Implementation of the Input classes or the Pipeline itself. The Sample code is this:
public class Job
{
public int Ticker { get; set; }
public Type Type { get; }
public Job(Type type)
{
Type = type;
}
public Task Prepare()
{
Console.WriteLine("Preparing");
Ticker = 0;
return Task.CompletedTask;
}
public Task Tick()
{
Console.WriteLine("Ticking");
Ticker++;
return Task.CompletedTask;
}
public bool IsCommitable()
{
Console.WriteLine("Trying to commit");
return IsFinished() || ( Ticker != 0 && Ticker % 100000 == 0);
}
public bool IsFinished()
{
Console.WriteLine("Trying to finish");
return Ticker == 1000000;
}
public void IntermediateCleanUp()
{
Console.WriteLine("intermediate Cleanup");
Ticker = Ticker - 120;
}
public void finalCleanUp()
{
Console.WriteLine("Final Cleanup");
Ticker = -1;
}
}
This is my Input class that is entered into the Preparation Block.
public class Dataflow
{
private TransformBlock<Job, Job> _preparationsBlock;
private BufferBlock<Job> _balancerBlock;
private readonly ExecutionDataflowBlockOptions _options = new ExecutionDataflowBlockOptions
{
BoundedCapacity = 4
};
private readonly DataflowLinkOptions _linkOptions = new DataflowLinkOptions { PropagateCompletion = true };
private TransformBlock<Job, Job> _typeATickBlock;
private TransformBlock<Job, Job> _typeBTickBlock;
private TransformBlock<Job, Job> _writeBlock;
private TransformBlock<Job, Job> _intermediateCleanupBlock;
private ActionBlock<Job> _finalCleanupBlock;
public async Task Process()
{
CreateBlocks();
ConfigureBlocks();
for (int i = 0; i < 500; i++)
{
await _preparationsBlock.SendAsync(new Job(i % 2 == 0 ? Type.A : Type.B));
}
_preparationsBlock.Complete();
await Task.WhenAll(_preparationsBlock.Completion, _finalCleanupBlock.Completion);
}
private void CreateBlocks()
{
_preparationsBlock = new TransformBlock<Job, Job>(async job =>
{
await job.Prepare();
return job;
}, _options);
_balancerBlock = new BufferBlock<Job>(_options);
_typeATickBlock = new TransformBlock<Job, Job>(async job =>
{
await job.Tick();
return job;
}, _options);
_typeBTickBlock = new TransformBlock<Job, Job>(async job =>
{
await job.Tick();
await job.Tick();
return job;
}, _options);
_writeBlock = new TransformBlock<Job, Job>(job =>
{
Console.WriteLine(job.Ticker);
return job;
}, _options);
_finalCleanupBlock = new ActionBlock<Job>(job => job.finalCleanUp(), _options);
_intermediateCleanupBlock = new TransformBlock<Job, Job>(job =>
{
job.IntermediateCleanUp();
return job;
}, _options);
}
private void ConfigureBlocks()
{
_preparationsBlock.LinkTo(_balancerBlock, _linkOptions);
_balancerBlock.LinkTo(_typeATickBlock, _linkOptions, job => job.Type == Type.A);
_balancerBlock.LinkTo(_typeBTickBlock, _linkOptions, job => job.Type == Type.B);
_typeATickBlock.LinkTo(_typeATickBlock, _linkOptions, job => !job.IsCommitable());
_typeATickBlock.LinkTo(_writeBlock, _linkOptions, job => job.IsCommitable());
_typeBTickBlock.LinkTo(_typeBTickBlock, _linkOptions, job => !job.IsCommitable());
_writeBlock.LinkTo(_intermediateCleanupBlock, _linkOptions, job => !job.IsFinished());
_writeBlock.LinkTo(_finalCleanupBlock, _linkOptions, job => job.IsFinished());
_intermediateCleanupBlock.LinkTo(_typeATickBlock, _linkOptions, job => job.Type == Type.A);
}
}
this is my Dataflow Pipeline representing my "artwork" above :D.
All of this is executed in my Scheduler that is started in the Programm.cs:
public class Scheduler
{
private readonly Timer _timer;
private readonly Dataflow _flow;
public Scheduler(int intervall)
{
_timer = new Timer(intervall);
_flow = new Dataflow();
}
public void Start()
{
_timer.AutoReset = false;
_timer.Elapsed += _timer_Elapsed;
_timer.Start();
}
private async void _timer_Elapsed(object sender, ElapsedEventArgs e)
{
try
{
_timer.Stop();
Console.WriteLine("Timer stopped");
await _flow.Process().ConfigureAwait(false);
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
finally
{
Console.WriteLine("Timer started again.");
_timer.Start();
}
}
}
class Program
{
static void Main(string[] args)
{
var scheduler = new Scheduler(1000);
scheduler.Start();
Console.ReadKey();
}
}
The Console Output i am getting is:
Timer stopped
Preparing
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Ticking
Trying to commit
Trying to finish
Trying to commit
Trying to finish
It seems like the Programm has stopped working at that point because I am not hitting any Breakpoints or getting any further. I think all my Blocks already received a Completion Signal and therefore stop taking any new Items. Therefore my Question is: How do I manage the Completion signal so that the pipeline only finishes when there is no more work to do?
The main issue with your flow is the feedback loop to your tick block. This causes two problems.
Back Pressure
Completion Flow
First: Back Pressure
When _typeATickBlock is linked back on itself it will stop accepting all messages once it has reached its capacity. In your case 4, that means once it has 3 messages in the output buffer and one being processed, it will stop accepting and passing messages. You can see this by adding the following line to the block:
Console.WriteLine($"Tick Block {_typeATickBlock.InputCount}/{_typeATickBlock.OutputCount}");
And will output:
Tick Block 0/3
To fix this you can add any buffering block, Buffer or Transform. The key will be the bounded capacity of the buffer. In your case every single message will need to be rerouted back to the tick block. With that you know that your capacity needs to match the volume of messages at any given time. In this case 500.
_printingBuffer = new TransformBlock<Job, Job>(job =>
{
Console.WriteLine($"{_printingBuffer.InputCount}/{_printingBuffer.OutputCount}");
return job;
}, new ExecutionDataflowBlockOptions() { BoundedCapacity = 500 });
In your real code you may not know the value and Unbounded may be your best option to avoid locking your pipeline but you can tune this value given your incoming volume.
Second: Completion Flow
With a feedback loop in your pipeline completion propagation becomes more difficult than simply setting the link options. Once completion hits the tick block it stops accepting all messages, even the ones that still need to be processed. To avoid this you need hold propagation until all messages have passed the loop. First you stop propagation just before the tick block and then check the buffers on each block that participates in the loop. Then once all buffers are empty propagate completion, and fault, to the block.
_balancerBlock.Completion.ContinueWith(tsk =>
{
while (!_typeATickBlock.Completion.IsCompleted)
{
if (_printingBuffer.InputCount == 0 && _printingBuffer.OutputCount == 0
&& _typeATickBlock.InputCount == 0 && _typeATickBlock.OutputCount == 0)
{
_typeATickBlock.Complete();
}
}
});
Last
Your complete ConfigureBlocks with completion setup and the buffer inserted should look something like this. Note I'm only passing complete and not fault here and I removed the type B branch.
private void ConfigureBlocks()
{
_preparationsBlock.LinkTo(_balancerBlock, _linkOptions);
_balancerBlock.LinkTo(_typeATickBlock, job => job.Type == Type.A);
_balancerBlock.Completion.ContinueWith(tsk =>
{
while (!_typeATickBlock.Completion.IsCompleted)
{
if (_printingBuffer.InputCount == 0 && _printingBuffer.OutputCount == 0
&& _typeATickBlock.InputCount == 0 && _typeATickBlock.OutputCount == 0)
{
_typeATickBlock.Complete();
}
}
});
_typeATickBlock.LinkTo(_printingBuffer, job => !job.IsCommitable());
_printingBuffer.LinkTo(_typeATickBlock);
_typeATickBlock.LinkTo(_writeBlock, _linkOptions, job => job.IsCommitable());
_writeBlock.LinkTo(_intermediateCleanupBlock, _linkOptions, job => !job.IsFinished());
_writeBlock.LinkTo(_finalCleanupBlock, _linkOptions, job => job.IsFinished());
_intermediateCleanupBlock.LinkTo(_typeATickBlock, _linkOptions, job => job.Type == Type.A);
}
I wrote a blog post a while back, blog is no longer active, about handling completion with feedback loops. It may provide some more help. Retrieved from WayBackMachine.
Finding Completion in a Complex Flow: Feedback Loops
Related
I have a windows service which is consuming a messaging system to fetch messages. I have also created a callback mechanism with the help of Timer class which helps me to check the message after some fixed time to fetch and process. Previously, the service is processing the message one by one. But I want after the message arrives the processing mechanism to execute in parallel. So if the first message arrived it should go for processing on one task and even if the processing is not finished for the first message still after the interval time configured using the callback method (callback is working now) next message should be picked and processed on a different task.
Below is my code:
Task.Factory.StartNew(() =>
{
Subsriber<Message> subsriber = new Subsriber<Message>()
{
Interval = 1000
};
subsriber.Callback(Process, m => m != null);
});
public static void Process(Message message)
{
if (message != null)
{
// Processing logic
}
else
{
}
}
But using the Task Factory I am not able to control the number of tasks in parallel so in my case I want to configure the number of tasks on which messages will run on the availability of the tasks?
Update:
Updated my above code to add multiple tasks
Below is the code:
private static void Main()
{
try
{
int taskCount = 5;
Task.Factory.StartNewAsync(() =>
{
Subscriber<Message> consumer = new
Subcriber<Message>()
{
Interval = 1000
};
consumer.CallBack(Process, msg => msg!=
null);
}, taskCount);
Console.ReadLine();
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
public static void StartNewAsync(this TaskFactory
target, Action action, int taskCount)
{
var tasks = new Task[taskCount];
for (int i = 0; i < taskCount; i++)
{
tasks[i] = target.StartNew(action);
}
}
public static void Process(Message message)
{
if (message != null)
{
}
else
{ }
}
}
I think what your looking for will result in quite a large sample. I'm trying just to demonstrate how you would do this with ActionBlock<T>. There's still a lot of unknowns so I left the sample as skeleton you can build off. In the sample the ActionBlock will handle and process in parallel all your messages as they're received from your messaging system
public class Processor
{
private readonly IMessagingSystem _messagingSystem;
private readonly ActionBlock<Message> _handler;
private bool _pollForMessages;
public Processor(IMessagingSystem messagingSystem)
{
_messagingSystem = messagingSystem;
_handler = new ActionBlock<Message>(msg => Process(msg), new ExecutionDataflowBlockOptions()
{
MaxDegreeOfParallelism = 5 //or any configured value
});
}
public async Task Start()
{
_pollForMessages = true;
while (_pollForMessages)
{
var msg = await _messagingSystem.ReceiveMessageAsync();
await _handler.SendAsync(msg);
}
}
public void Stop()
{
_pollForMessages = false;
}
private void Process(Message message)
{
//handle message
}
}
More Examples
And Ideas
Ok, sorry I'm short on time but here's the general idea/skeleton of what I was thinking as an alternative.
If I'm honest though I think the ActionBlock<T> is the better option as there's just so much done for you, with the only limit being that you can't dynamically scale the amount of work it will do it once, although I think the limit can be quite high. If you get into doing it this way you could have more control or just have a kind of dynamic amount of tasks running but you'll have to do a lot of things manually, e.g if you want to limit the amount of tasks running at a time, you'd have to implement a queueing system (something ActionBlock handles for you) and then maintain it. I guess it depends on how many messages you're receiving and how fast your process handles them.
You'll have to check it out and think of how it could apply to your direct use case as I think some of the details area a little sketchily implemented on my side around the concurrentbag idea.
So the idea behind what I've thrown together here is that you can start any number of tasks, or add to the tasks running or cancel tasks individually by using the collection.
The main thing I think is just making the method that the Callback runs fire off a thread that does the work, instead of subscribing within a separate thread.
I used Task.Factory.StartNew as you did, but stored the returned Task object in an object (TaskInfo) which also had it's CancellationTokenSource, it's Id (assigned externally) as properties, and then added that to a collection of TaskInfo which is a property on the class this is all a part of:
Updated - to avoid this being too confusing i've just updated the code that was here previously.
You'll have to update bits of it and fill in the blanks in places like with whatever you have for my HeartbeatController, and the few events that get called because they're beyond the scope of the question but the idea would be the same.
public class TaskContainer
{
private ConcurrentBag<TaskInfo> Tasks;
public TaskContainer(){
Tasks = new ConcurrentBag<TaskInfo>();
}
//entry point
//UPDATED
public void StartAndMonitor(int processorCount)
{
for (int i = 0; i <= processorCount; i++)
{
Processor task = new Processor(ProcessorId = i);
CreateProcessorTask(task);
}
this.IsRunning = true;
MonitorTasks();
}
private void CreateProcessorTask(Processor processor)
{
CancellationTokenSource cancellationTokenSource = new CancellationTokenSource();
Task taskInstance = Task.Factory.StartNew(
() => processor.Start(cancellationTokenSource.Token)
);
//bind status update event
processor.ProcessorStatusUpdated += ReportProcessorProcess;
Tasks.Add(new ProcessorInfo()
{
ProcessorId = processor.ProcessorId,
Task = taskInstance,
CancellationTokenSource = cancellationTokenSource
});
}
//this method gets called once but the HeartbeatController gets an action as a param that it then
//executes on a timer. I haven't included that but you get the idea
//This method also checks for tasks that have stopped and restarts them if the manifest call says they should be running.
//Will also start any new tasks included in the manifest and stop any that aren't included in the manifest.
internal void MonitorTasks()
{
HeartbeatController.Beat(() =>
{
HeartBeatHappened?.Invoke(this, null);
List<int> tasksToStart = new List<int>();
//this is an api call or whatever drives your config that says what tasks must be running.
var newManifest = this.GetManifest(Properties.Settings.Default.ResourceId);
//task Removed Check - If a Processor is removed from the task pool, cancel it if running and remove it from the Tasks List.
List<int> instanceIds = new List<int>();
newManifest.Processors.ForEach(x => instanceIds.Add(x.ProcessorId));
var removed = Tasks.Select(x => x.ProcessorId).ToList().Except(instanceIds).ToList();
if (removed.Count() > 0)
{
foreach (var extaskId in removed)
{
var task = Tasks.FirstOrDefault(x => x.ProcessorId == extaskId);
task.CancellationTokenSource?.Cancel();
}
}
foreach (var newtask in newManifest.Processors)
{
var oldtask = Tasks.FirstOrDefault(x => x.ProcessorId == newtask.ProcessorId);
//Existing task check
if (oldtask != null && oldtask.Task != null)
{
if (!oldtask.Task.IsCanceled && (oldtask.Task.IsCompleted || oldtask.Task.IsFaulted))
{
var ex = oldtask.Task.Exception;
tasksToStart.Add(oldtask.ProcessorId);
continue;
}
}
else //New task Check
tasksToStart.Add(newtask.ProcessorId);
}
foreach (var item in tasksToStart)
{
var taskToRemove = Tasks.FirstOrDefault(x => x.ProcessorId == item);
if (taskToRemove != null)
Tasks.Remove(taskToRemove);
var task = newManifest.Processors.FirstOrDefault(x => x.ProcessorId == item);
if (task != null)
{
CreateProcessorTask(task);
}
}
});
}
}
//UPDATED
public class Processor{
private int ProcessorId;
private Subsriber<Message> subsriber;
public Processor(int processorId) => ProcessorId = processorId;
public void Start(CancellationToken token)
{
Subsriber<Message> subsriber = new Subsriber<Message>()
{
Interval = 1000
};
subsriber.Callback(Process, m => m != null);
}
private void Process()
{
//do work
}
}
Hope this gives you an idea of how else you can approach your problem and that I didn't miss the point :).
Update
To use events to update progress or which tasks are processing, I'd extract them into their own class, which then has subscribe methods on it, and when creating a new instance of that class, assign the event to a handler in the parent class which can then update your UI or whatever you want it to do with that info.
So the content of Process() would look more like this:
Processor processor = new Processor();
Task task = Task.Factory.StartNew(() => processor.ProcessMessage(cancellationTokenSource.CancellationToken));
processor.StatusUpdated += ReportProcess;
I have a product dropdown and selecting the product connects to a websocket and get the feed messages for that product. Once the (1) feed messages starts coming I have to then (2) get the order book and then (3) process the feed messages. So the first and last task would run asynchronously. For this I have written the following code:
void OnReceivingFeedMessage()
{
concurrentQueue.Enqueue(message);
if (!messageStreamStarted) // only first time get order book
{
messageStreamStarted = true;
GetOrderBookData();
}
}
private void GetOrderBookData()
{
MarketData m = new MarketData();
ProductOrderBook p = m.GetProductOrderBook(productId);
bidsList = p.bids;
asksList = p.asks;
isOrderBookUpdated = true;
Task task3 = Task.Run(() => KickStartToProcessQueue());
}
private void KickStartToProcessQueue()
{
while (threadProcessQueueExist)
{
int recordCountNew = concurrentQueue.Count();
if (recordCountNew != 0)
{
if (isOrderBookUpdated)
{
ProcessQueueMessages();
}
}
}
}
private void ProcessQueueMessages()
{
if (!concurrentQueue.IsEmpty)
{
string jsonString;
while (concurrentQueue.TryDequeue(out jsonString))
{
// have to insert the record in existing order book
}
}
}
This works perfectly for the first time. But when I change the product and reconnect the things mess up and the data is not processed properly. The code written on product selectedindex change
private void CloseAndReconnectToGetWebsocketFeed()
{
w.CloseWebsocketConnection();
messageStreamStarted = false;
isOrderBookUpdated = false;
ConcurrentQueue<string> wssMessagesQueue = new ConcurrentQueue<string>();
concurrentQueue = wssMessagesQueue;
ConnectAndGetWebsocketFeedMessages(); // this calls OnReceivingFeedMessage
}
I am new to multi-threading so not sure if I need to use lock or async/await or something else. What am I doing wrong in the above code?
It is running fine when run first time but the moment I change the product and do the same processing again it starts giving problems. Can someone please advise how can I clear all the resources before doing the same steps again and again
I think you are writing unneccesarily complicated code. I'm not 100 % sure, what your problem is, but here are some things that might help you.
Use a BlockingCollection<T>
With that class, you can stop your consumer-thread until new messages are coming in. Here's a simple example on how these things are working:
BlockingCollection<string> collection = new BlockingCollection<string>(new ConcurrentQueue<string>());
Task t = Task.Run(() =>
{
while (collection.TryTake(out string item, Timeout.Infinite))
{
Console.WriteLine($"Started reading {item}...");
Thread.Sleep(1000); //simulate intense work
Console.WriteLine($"Done reading {item}");
}
});
while (true)
{
//This could be your OnReceivingFeedMessage()
string input = Console.ReadLine();
if (input == "stop")
{
Console.WriteLine("Stopping...");
collection.CompleteAdding();
break;
}
else
{
collection.Add(input);
}
}
t.Wait();
The task t will wait until there are items in collection. When items are "received" (here simply via console input), they are added to your list.
Dispatch new tasks to work on the input
Pretty simple:
while (true)
{
string item = Console.ReadLine();
Task.Run(() =>
{
Console.WriteLine($"Started reading {item}...");
Thread.Sleep(1000); //simulate intense work
Console.WriteLine($"Done reading {item}");
});
}
This also has the advantage (or disadvantage) that the tasks are running all in parallel. That means that you can't rely on the order they are worked on, but they will process much faster.
By the way: Both of these approaches have the advantage that you don't have busy waiting. From your question:
while (threadProcessQueueExist)
{
int recordCountNew = concurrentQueue.Count();
if (recordCountNew != 0)
{
if (isOrderBookUpdated)
{
ProcessQueueMessages();
}
}
}
This code will create busy waiting as long as nothing is in your queue, meaning that one core of your processor will be at very high load without actually doing anything. It is considered to be bad practice.
I use sharpPacp to receive udp packet, when the packed arrived , I use a BackgroundThread to process the received packed like this:
private void BackgroundThread()
{
while (!BackgroundThreadStop)
{
bool shouldSleep = true;
lock (QueueLock)
{
if (PacketQueue.Count != 0)
{
shouldSleep = false;
}
}
if (shouldSleep)
{
System.Threading.Thread.Sleep(250);
}
else // should process the queue
{
//List<RawCapture> ourQueue;
List<UdpPacket> ourQueue;
lock (QueueLock)
{
// swap queues, giving the capture callback a new one
ourQueue = PacketQueue;
PacketQueue = new List<UdpPacket>();
}
//I write my repeatable work in here
Task.Factory.StartNew(() =>
{
ProcePacket(ourQueue);
});
if (statisticsUiNeedsUpdate)
{
UpdateCaptureStatistics();
statisticsUiNeedsUpdate = false;
}
}
}
}
where ProcePacket(ourQueue) is a methed take a long time to finished.
my program run for a while,then the cpu is almost 98% used and memory increased to a large number,at last it dead.
I think the problem is I create too many task ,but how can I do reapeatable work with only one task?
I have a producer-consumer application in WPF. After I click a button.
private async void Start_Click(object sender, RoutedEventArgs e)
{
try
{
// set up data
var producer = Producer();
var consumer = Consumer();
await Task.WhenAll(producer, consumer);
// need log the results in Summary method
Summary();
}
}
The summary method is a void one; I assume it is proper.
private void Summary(){}
async Task Producer(){ await something }
async Task Consumer(){ await something }
EDIT:
My question is in Summary() method I have to use the calculated values from the tasks, however the Consumer task is a long running process. The program run Summary quickly even not getting the updated values. It use the initial values.
My thought:
await Task.WhenAll(producer, consumer);
Summary();
EDIT2: 11:08 AM 11/05/2014
private void Summary()
{
myFail = 100 - mySuccess;
_dataContext.MyFail = myFail; // update window upon property changed
async Task Consumer()
{
try
{
Dictionary<string, string> dict = new Dictionary<string, string>();
var executionDataflowBlockOptions = new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 5,
CancellationToken = cToken
};
var c = new ActionBlock<T>(
t=>
{
if (cToken.IsCancellationRequested)
return;
dict = Do(t, cToken);
if(dict["Success"] == "Success")
mySuccess++;
The current problem is mySuccess is always the initial value in Summary method.
You can use ContinueWith method to execute Summary after both producer and consumer have finished:
Task.WhenAll(producer, consumer)
.ContinueWith(continuation => Summary());
EDIT 1
It seems that you are abusing or using wrong the Producer/Consumer pattern.
The producer is supposed to produce the values and shovel them into one end of a communication pipe. On the other end of the pipe, the consumer consumes the values as they become available. In other words, the consumer waits for the producer to produce some value and to put the value in the pipe and for the value to arrive at the end of the pipe.
Usually this involves some sort of signaling mechanism where the producer signals (awakes) the consumer whenever a value has been created.
In your case, you don't have the signaling mechanism and I strongly suspect that your producer is generating only one value. If the later is the case you can just return a value from the "producer".
If however, your producer is creating more than one values, you can use the BlockingCollection<T> class to send values from producer to consumer.
In your Producer class, get a reference to the pipe and put data into it:
public class Producer
{
private BlockingCollection<Data> _pipe;
public void Start()
{
while(!done)
{
var value = ProduceValue();
_pipe.Add(value);
}
// Signal the consumer that we're finished
_pipe.CompleteAdding();
}
}
In the Consumer class wait for the values to arrive and process each one:
public class Consumer
{
private BlockingCollection<Data> _pipe;
public void Start()
{
foreach(var value in _pipe.GetConsumingEnumerable())
{
// GetConsumingEnumerable will block until a value arrives and
// will exit when producer calls CompleteAdding()
Process(value);
}
}
}
Having the above in place you can use ContinueWith or await on the WhenAll method to run the Summary.
EDIT 2
As promised in the comments I have analyzed the code you've posted on MSDN Forum. There are several problems in the code.
First of all and the simplest one to fix is that you're not incrementing the counter in a thread-safe manner. An increment (value++) is not an atomic operation so you should be careful when incrementing shared fields. An easy way to do this is:
Interlocked.Increment(ref evenNumber);
Now, the actual problems in your code:
As I mentioned earlier, the consumer does not know when the producer has finished producing the values. So, after the producer exits the for block it should signal that it has finished. The consumer waits for the finish signal of the producer; otherwise it will wait forever for the next value but there won't be one.
You are linking the BufferBlock with the consumer code which starts to execute but you're not waiting for the consumer block to finish - you're only waiting 0.5 of a second and exit the consumer method leaving the worker threads of the consumer block to do their work in vain.
As a consequence of the above, your Report method executes before the processing is finished outputting the value of the evenNumber counter at the moment when the method executes not when all processing is finished.
Below is the edited code with some comments:
class Program
{
public static BufferBlock<int> m_Queue = new BufferBlock<int>(new DataflowBlockOptions { BoundedCapacity = 1000 });
private static int evenNumber;
static void Main(string[] args)
{
var producer = Producer();
var consumer = Consumer();
Task.WhenAll(producer, consumer).Wait();
Report();
}
static void Report()
{
Console.WriteLine("There are {0} even numbers", evenNumber);
Console.Read();
}
static async Task Producer()
{
for (int i = 0; i < 500; i++)
{
// Send a value to the consumer and wait for the value to be processed
await m_Queue.SendAsync(i);
}
// Signal the consumer that there will be no more values
m_Queue.Complete();
}
static async Task Consumer()
{
var executionDataflowBlockOptions = new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 4
};
var consumerBlock = new ActionBlock<int>(x =>
{
int j = DoWork(x);
if (j % 2 == 0)
// Increment the counter in a thread-safe way
Interlocked.Increment(ref evenNumber);
}, executionDataflowBlockOptions);
// Link the buffer to the consumer
using (m_Queue.LinkTo(consumerBlock, new DataflowLinkOptions { PropagateCompletion = true }))
{
// Wait for the consumer to finish.
// This method will exit after all the data from the buffer was processed.
await consumerBlock.Completion;
}
}
static int DoWork(int x)
{
Thread.Sleep(100);
return x;
}
}
I have a list of work items that need to be processed in order. Sometimes the list will be empty, sometimes it will have a thousand items. Only one can be processed at a time and in order. Currently I am doing the following which to me looks stupid because i am using Thread.Sleep in the consumer task to wait for 100ms before checking if the list is empty or not. Is this the standard way to do it or am I completely wrong.
public class WorkItem
{
}
public class WorkerClass
{
CancellationTokenSource cts = new CancellationTokenSource();
CancellationToken ct = new CancellationToken();
List<WorkItem> listOfWorkItems = new List<WorkItem>();
public void start()
{
Task producerTask = new Task(() => producerMethod(ct), ct);
Task consumerTask = new Task(() => consumerMethod(ct), ct);
producerTask.Start();
consumerTask.Start();
}
public void producerMethod(CancellationToken _ct)
{
while (!_ct.IsCancellationRequested)
{
//Sleep random amount of time
Random r = new Random();
Thread.Sleep(r.Next(100, 1000));
WorkItem w = new WorkItem();
listOfWorkItems.Add(w);
}
}
public void consumerMethod(CancellationToken _ct)
{
while (!_ct.IsCancellationRequested)
{
if (listOfWorkItems.Count == 0)
{
//Sleep small small amount of time to avoid continuously polling this if statement
Thread.Sleep(100);
continue;
}
//Process first item
doWorkOnWorkItem(listOfWorkItems[0]);
//Remove from list
listOfWorkItems.RemoveAt(0);
}
}
public void doWorkOnWorkItem(WorkItem w)
{
// Do work here - synchronous to execute in order (10ms to 5min execution time)
}
}
Advise greatly appreciated.
Thanks
Use BlockingCollection. It does non-busy waits.
See https://stackoverflow.com/a/5108487/56778 for a simple example. Or http://www.informit.com/guides/content.aspx?g=dotnet&seqNum=821 for a bit more detail.
You can use the BlockingCollection<T> Class.