This is my code
static void Main(string[] args)
{
List<Thing> collection = new List<Thing>
{
new Thing { IntProp = 1, BoolProp = true },
new Thing { IntProp = 1, BoolProp = true },
new Thing { IntProp = 2, BoolProp = true },
new Thing { IntProp = 1, BoolProp = false }
};
int number = 0;
var task = Task.Factory.StartNew<bool>(() =>
{
TaskFactory ts = new TaskFactory(TaskCreationOptions.AttachedToParent, TaskContinuationOptions.ExecuteSynchronously);
foreach (var item in collection)
{
if (item.BoolProp)
{
ts.StartNew(() =>
number += GetNum1(item.IntProp));
}
else
{
ts.StartNew(() =>
number += GetNum2(item.IntProp));
}
}
return true;
});
task.Wait();
Console.WriteLine(number);
}
here is are definitions of GetNum1 and GetNum2
static int GetNum1(int num)
{
for (int i = 0; i < 1000000000; i++) { } // simulate some job
return 10;
}
static int GetNum2(int num)
{
for (int i = 0; i < 500000000; i++) { } // simulate some job
return 3;
}
and here is the Thing class
class Thing
{
public bool BoolProp { get; set; }
public int IntProp { get; set; }
}
basically what I am doing is just creating the collection of Thing objects. then I create a single parent task which will have several child task (which it should await, I guess).
there is a number variable which is incremented by child task by the amount returned from GetNum1 and GetNum2 method (10 or 3). the code above should output 33 (10 + 10 + 10 + 3) as I guess, but 10 is outputted instead, Because just the first child task is awaited. If I put the breakpoint in the code and go step by step than the output is correct. Why does this happen. does it have to do something with the foreach loop inside the parent task ? Please do not start asking question like "why you need this" and "there is no need for that", this is just an example code.
The parent task is in fact waiting for (not "awaiting") the child tasks. Your problem is that the code is accessing the number variable from multiple threads without synchronization:
var mutex = new object();
int number = 0;
var task = Task.Factory.StartNew<bool>(() =>
{
TaskFactory ts = new TaskFactory(TaskCreationOptions.AttachedToParent, TaskContinuationOptions.ExecuteSynchronously);
foreach (var item in collection)
{
if (item.BoolProp)
{
ts.StartNew(() =>
{
var value = GetNum1(item.IntProp);
lock (mutex) number += value;
});
}
else
{
ts.StartNew(() =>
{
var value = GetNum2(item.IntProp);
lock (mutex) number += value;
});
}
}
return true;
});
task.Wait();
lock (mutex)
Console.WriteLine(number);
Recommended reading: Parallel Tasks and Dynamic Task Parallelism.
Related
I'm writing a Console Application in C# that takes an array of videos and transcode it as long a new GPU is free to use.
The machine where the app will run has two GPUs. But I'm really having a hard time how to build this up.
The method that does the job is FireTranscode()
private void FireTranscode(int counter)
{
Random rand = new Random();
int gpu;
lock (thisLock)
{
gpu = PickGPU(0) == true ? 0 : 1;
GPU[gpu] = false;
if (gpu == 0) { gpuZero += 1; } else { gpuOne += 1; };
Thread.Sleep(rand.Next(1, 5));
videos -= 1;
}
Console.WriteLine($"Transconding on {gpu} using thread: {Thread.CurrentThread.ManagedThreadId} {transcodeArray[Convert.ToInt32(counter), 2]}");
GPU[gpu] = true;
}
and it's triggered by ManageTranscode()
private async void ManageTrancode()
{
for(counter=0; counter < videos; counter++)
{
if (GPU[0] == false & GPU[1] == false)
{
await Task.WhenAny(transcodeList);
}
else
{
transcodeList.Add(Task.Factory.StartNew(() => FireTranscode(counter)));
}
}
}
It suppose to call the FireTranscode followed by the parameter counter, 40 times async (value of videos variable), and in case both GPU (static Dictionary<int, bool> GPU = new Dictionary<int, bool> { { 0, true }, { 1, true } }; are in use (=false) it should wait until any task finishes and free for use (=true).
I'm trying to learn how to use it correctly and I would appreciate some tips and help how to achieve this.
Thank you.
You can simplify your logic and also make it more extensible in terms of available GPU by using below code. It uses SemaphoreSlim (also mentioned by #Poul Bak) which allows degree of parallelism by defined parameters.
Also, I've refactored your code to have GPU as class (you can use Struct too).
private object lockObj = new object();
private List<GPU> availableGPUs = List<GPU>() { /* initialize GPUs here */};
private int AvailableGPUCount { get { return availableGPUs.Count(); } }
private async void ManageTrancode()
{
int maxThread = AvailableGPUCount;
SemaphoreSlim lockSlim = new SemaphoreSlim(maxThread, maxThread);
for(int counter = 0; counter < videos; counter++)
{
await lockSlim.WaitAsync();
await Trancode();
lockSlim.Release();
}
}
private async Task Trancode()
{
GPU gpu = GetAndLockGPU();
// await <<Run actual trancode here>>
ReleaseGPU(gup.Id);
}
private GPU GetAndLockGPU()
{
GPU gpu = null;
lock (lockObj)
{
gpu = availableGPUs.First(g => g.IsAvailable);
gpu.IsAvailable = false;
}
return gpu;
}
private void ReleaseGPU(int gpuId)
{
lock (lockObj)
{
availableGPUs.First(g => g.Id == gpuId).IsAvailable = true;
}
}
private class GPU
{
public int Id {get; set;}
public bool IsAvailable {get; set;} = true;
}
I have a fairly high throughput on a message counter (tens of thousands per second), and looking for an efficient way of getting the count without putting locks everywhere or ideally not locking on each message count when i am giving an update every 10 seconds.
Use of immutable counter object
I am using an immutable counter class:
public class Counter
{
public Counter(int quotes, int trades)
{
Quotes = quotes;
Trades = trades;
}
readonly public int Quotes;
readonly public int Trades;
// and some other counter fields snipped
}
And would update this on each message process loop:
class MyProcessor
{
System.Timers.Timer timer;
Counter counter = new Counter(0,0);
public MyProcessor()
{
// update ever 10 seconds
this.timer = new System.Timers.Timer(10000);
timer.Elapsed += (sender, e) => {
var quotesPerSecond = this.counter.Quotes / 10.0;
var tradesPerSecond = this.counter.Trades / 10.0;
this.Counter = new Counter(0,0);
});
}
public void ProcessMessages(Messages messages)
{
foreach(var message in messages) { /* */ }
var oldCounter = counter;
this.counter = new Counter(oldCounter.Quotes, oldCounter.Trades);
}
}
I have lots of counters (not all shown), so would mean a lot of individual Interlocked.Increment calls on individual counter fields.
The only other way I can think of is lock every single run of ProcessMessages (which will be extensive) and heavy for something which is a utility as opposed to critical where the program would crash.
Is it possible to use an immutable counter object in this fashion without hard interlocking/thread mechanisms when we only need to update once every 10 seconds?
Flag check idea to avoid locks
Could the timer thread set a flag for the ProcessMessages to check and if it sees it set, start the count from zero again, i.e.
/* snipped the MyProcessor class, same as before */
System.Timers.Timer timer;
Counter counter = new Counter(0,0);
ManualResetEvent reset = new ManualResetEvent(false);
public MyProcessor()
{
// update ever 10 seconds
this.timer = new System.Timers.Timer(10000);
timer.Elapsed += (sender, e) => {
var quotesPerSecond = this.counter.Quotes / 10.0;
var tradesPerSecond = this.counter.Trades / 10.0;
// log
this.reset.Set();
});
}
// this should be called every second with a heartbeat message posted to queue
public void ProcessMessages(Messages messages)
{
if (reset.WaitOne(0) == true)
{
this.counter = new Counter(this.counter.Quotes, this.counter.Trades, this.counter.Aggregates);
reset.Reset();
}
else
{
this.counter = new Counter(
this.counter.Quotes + message.Quotes.Count,
this.counter.Trades + message.Trades.Count);
}
}
/* end of MyProcessor class */
This would work, however the update "stalls" when the process messages comes to a halt (although the throughput is very high, it does pause for a number of hours at night ideally should show the actual rather than last value).
One way around this would be to post a heartbeat message to the MyProcessor.ProcessMessages() every second to force an internal update of the message counters and subsequent reset when the reset ManualResetEvent is set.
Here are three new methods for your Counter class. One for reading the latest value from a specific location, one for updating safely a specific location, and one for creating easily a new Counter based on an existing one:
public static Counter Read(ref Counter counter)
{
return Interlocked.CompareExchange(ref counter, null, null);
}
public static void Update(ref Counter counter, Func<Counter, Counter> updateFactory)
{
var counter1 = counter;
while (true)
{
var newCounter = updateFactory(counter1);
var counter2 = Interlocked.CompareExchange(ref counter, newCounter, counter1);
if (counter2 == counter1) break;
counter1 = counter2;
}
}
public Counter Add(int quotesDelta, int tradesDelta)
{
return new Counter(Quotes + quotesDelta, Trades + tradesDelta);
}
Usage example:
Counter latest = Counter.Read(ref this.counter);
Counter.Update(ref this.counter, existing => existing.Add(1, 1));
Accessing the MyProcessor.counter field directly by multiple threads concurrently is not thread-safe, because it's neither volatile nor protected by a lock. The above methods are safe to use because they are accessing the field through interlocked operations.
I wanted to update everyone with what I had come up with, the counter updates were pushed within the thread itself.
Everything is driven by the DequeueThread loop, and specifically this.queue.ReceiveAsync(TimeSpan.FromSeconds(UpdateFrequencySeconds)) function.
This will either return an item from the queue, process it and update the counters, or timeout and then update the counters - there are no other threads involved everything, including updating message rate, is done within the thread.
In summary, nothing runs in parallel (in terms of dequing the packet), it is fetching the items one at a time and processing it and the counters thereafter. Then finally looping back to process the next item in the queue.
This removes the need for synchronisation:
internal class Counter
{
public Counter(Action<int,int,int,int> updateCallback, double updateEvery)
{
this.updateCallback = updateCallback;
this.UpdateEvery = updateEvery;
}
public void Poll()
{
if (nextUpdate < DateTimeOffset.UtcNow)
{
// post the stats, and reset
this.updateCallback(this.quotes, this.trades, this.aggregates, this.statuses);
this.quotes = 0;
this.trades = 0;
this.aggregates = 0;
this.statuses = 0;
nextUpdate = DateTimeOffset.UtcNow.AddSeconds(this.UpdateEvery);
}
}
public void AddQuotes(int count) => this.quotes += count;
public void AddTrades(int count) => this.trades += count;
public void AddAggregates(int count) => this.aggregates += count;
public void AddStatuses(int count) => this.statuses += count;
private int quotes;
private int trades;
private int aggregates;
private int statuses;
private readonly Action<int,int,int,int> updateCallback;
public double UpdateEvery { get; private set; }
private DateTimeOffset nextUpdate;
}
public class DeserializeWorker
{
private readonly BufferBlock<byte[]> queue = new BufferBlock<byte[]>();
private readonly IPolygonDeserializer polygonDeserializer;
private readonly ILogger<DeserializeWorker> logger;
private readonly Counter counter;
const double UpdateFrequencySeconds = 5.0;
long maxBacklog = 0;
public DeserializeWorker(IPolygonDeserializer polygonDeserializer, ILogger<DeserializeWorker> logger)
{
this.polygonDeserializer = polygonDeserializer ?? throw new ArgumentNullException(nameof(polygonDeserializer));
this.logger = logger;
this.counter = new Counter(ProcesCounterUpdateCallback, UpdateFrequencySeconds);
}
public void Add(byte[] data)
{
this.queue.Post(data);
}
public Task Run(CancellationToken stoppingToken)
{
return Task
.Factory
.StartNew(
async () => await DequeueThread(stoppingToken),
stoppingToken,
TaskCreationOptions.LongRunning,
TaskScheduler.Default)
.Unwrap();
}
private async Task DequeueThread(CancellationToken stoppingToken)
{
while (stoppingToken.IsCancellationRequested == false)
{
try
{
var item = await this.queue.ReceiveAsync(TimeSpan.FromSeconds(UpdateFrequencySeconds), stoppingToken);
await ProcessAsync(item);
}
catch (TimeoutException)
{
// this is ok, timeout expired
}
catch(TaskCanceledException)
{
break; // task cancelled, break from loop
}
catch (Exception e)
{
this.logger.LogError(e.ToString());
}
UpdateCounters();
}
await StopAsync();
}
protected async Task StopAsync()
{
this.queue.Complete();
await this.queue.Completion;
}
protected void ProcessStatuses(IEnumerable<Status> statuses)
{
Parallel.ForEach(statuses, (current) =>
{
if (current.Result != "success")
this.logger.LogInformation($"{current.Result}: {current.Message}");
});
}
protected void ProcessMessages<T>(IEnumerable<T> messages)
{
Parallel.ForEach(messages, (current) =>
{
// serialize by type T
// dispatch
});
}
async Task ProcessAsync(byte[] item)
{
try
{
var memoryStream = new MemoryStream(item);
var message = await this.polygonDeserializer.DeserializeAsync(memoryStream);
var messagesTask = Task.Run(() => ProcessStatuses(message.Statuses));
var quotesTask = Task.Run(() => ProcessMessages(message.Quotes));
var tradesTask = Task.Run(() => ProcessMessages(message.Trades));
var aggregatesTask = Task.Run(() => ProcessMessages(message.Aggregates));
this.counter.AddStatuses(message.Statuses.Count);
this.counter.AddQuotes(message.Quotes.Count);
this.counter.AddTrades(message.Trades.Count);
this.counter.AddAggregates(message.Aggregates.Count);
Task.WaitAll(messagesTask, quotesTask, aggregatesTask, tradesTask);
}
catch (Exception e)
{
this.logger.LogError(e.ToString());
}
}
void UpdateCounters()
{
var currentCount = this.queue.Count;
if (currentCount > this.maxBacklog)
this.maxBacklog = currentCount;
this.counter.Poll();
}
void ProcesCounterUpdateCallback(int quotes, int trades, int aggregates, int statuses)
{
var updateFrequency = this.counter.UpdateEvery;
logger.LogInformation(
$"Queue current {this.queue.Count} (max {this.maxBacklog }), {quotes / updateFrequency} quotes/sec, {trades / updateFrequency} trades/sec, {aggregates / updateFrequency} aggregates/sec, {statuses / updateFrequency} status/sec");
}
}
When creating a Task, is it possible to record the parameters that were used to start the task.
Take to following as an example (just a prototype, it's not real!).
static void Main(string[] args)
{
ICollection<Task<int>> taskList = new List<Task<int>>();
// Create a set of tasks
for (int i = 1; i <= 10; i++)
{
var local_i = i; // Local scoped variable
Task<int> t = new Task<int>(() =>
{
return myFunc(local_i);
});
t.Start();
taskList.Add(t);
}
// Wait for all the tasks to complete.
Task.WaitAll(taskList.ToArray());
// Output the results
foreach (var tsk in taskList)
{
// the "???" should be the input value to the task
System.Diagnostics.Debug.WriteLine("Input: ??? - Result: "+tsk.Result);
}
}
static int myFunc(int i)
{
return (i * i);
}
When the results are output, I want to know what input variable was provided to myFunc() that produced the result
Besides returning a Tuple with both values, you can also make taskList an ICollection<Tuple<int, Task<int>>> and store the parameter there. To make it simpler, you might create your own class for that:
class TaskInfo<T>
{
public Task<T> Task { get; set; }
public T Parameter { get; set; }
}
And then
var taskList = new List<TaskInfo<int>>();
...
taskList.Add(new TaskInfo { Task = t, Parameter = local_i });
If you can change myFunc change the return type, so it will return the input and result as a Tuple.
If you can't you could use a Dictionary or List<Tuple<input,Task>> to store the input along with the task (instead of your ICollection)
I have a batch block in tpl dataflow and have several target blocks linked to the batch block. However, the number of target blocks changes dynamically and thus the size of the batches. Problem is that the batch size must be supplied at the initialization of the batchblock and I dont see a way to adjust it later on. Any ideas how to get around this? Is the only way to unlink (dispose all links to batchblock and from batchblock), re-initialize the batch block with a new batch size and then link again? I could do that but how to ensure that old batches and new batches are not get all mixed up?
For example if I had 2 transform blocks stream to batch block and now have an additional transform block and want to increase batch size to 3, how do I make sure that all previous batches prior to the increase were processed to ensure synched behavior? Point is that all transform blocks get the exact identical item and the outputs of those transform blocks should be batched in the way that only those outputs are batched that match identical inputs.
Here a sample how I want it to be:
Constant stream of ints to transform blocks:
1,2,3, [point where batch size is increased],4,5,...
Let transform blocks output what they got in like 1 => 1
So batchblock should output like this :
[1,1], [2,2], [3,3], [change of batch size], [4,4,4], [5,5,5],...
Here my current code:
public class Test
{
private Stopwatch watch;
private BroadcastBlock<List<InputObject>> tempBCB;
private BatchBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> batchBlock;
private TransformBlock<Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>[], List<FinalObject>> transformBlock;
private ActionBlock<List<FinalObject>> justToFlushTransformBlock;
private CoreLogic core1;
private CoreLogic core2;
public Test()
{
tempBCB = new BroadcastBlock<List<InputObject>>(input => input);
//here batch size = 2
batchBlock = new BatchBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>>(2, new GroupingDataflowBlockOptions { Greedy = false });
transformBlock = new TransformBlock<Tuple<List<InputObject>,Dictionary<int,IntermediateObject>>[],List<FinalObject>>(array =>
{
List<InputObject> inputObjects = array[0].Item1;
List<FinalObject> ret = inputObjects.ConvertAll(x => new FinalObject(x));
foreach (var tuple in array)
{
//iterate over each individual object
foreach (var dictionary in tuple.Item2)
{
ret[dictionary.Key].outputList.Add(dictionary.Value);
}
}
return ret;
}, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded });
justToFlushTransformBlock = new ActionBlock<List<FinalObject>>(list =>
{
//just in order to accept items from the transformBlock output queue
});
//Generate 2 CoreLogic objects
core1 = new CoreLogic();
core2 = new CoreLogic();
//linking
tempBCB.LinkTo(core1.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
tempBCB.LinkTo(core2.transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
core1.transformBlock.LinkTo(batchBlock);
core2.transformBlock.LinkTo(batchBlock);
batchBlock.LinkTo(transformBlock, new DataflowLinkOptions { PropagateCompletion = true });
transformBlock.LinkTo(justToFlushTransformBlock, new DataflowLinkOptions { PropagateCompletion = true });
}
public void Start()
{
const int numberChunks = 30;
watch = new Stopwatch();
watch.Start();
for (int j = 1; j <= numberChunks; j++)
{
int collectionSize = 10000 * j;
List<InputObject> collection = new List<InputObject>(collectionSize);
for (int i = 0; i < collectionSize; i++)
{
collection.Add(new InputObject(i));
}
tempBCB.Post(collection);
}
tempBCB.Complete();
Task.WhenAll(core1.transformBlock.Completion, core2.transformBlock.Completion).ContinueWith(_ =>
{
batchBlock.Complete();
});
transformBlock.Completion.Wait();
watch.Stop();
Console.WriteLine("Elapsed time (in milliseconds): " + watch.ElapsedMilliseconds);
Console.ReadLine();
}
}
public class CoreLogic
{
private Random rand;
public TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>> transformBlock;
public CoreLogic()
{
const int numberIntermediateObjects = 10000;
transformBlock = new TransformBlock<List<InputObject>, Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>>(input =>
{
//please ignore the fact that `input` is not utilized here, the point is to generate a collection of IntermediateObject and return
Dictionary<int, IntermediateObject> ret = new Dictionary<int, IntermediateObject>();
for (int i = 0; i < numberIntermediateObjects; i++)
{
IntermediateObject value = new IntermediateObject(i);
ret.Add(i, value);
}
var tuple = new Tuple<List<InputObject>, Dictionary<int, IntermediateObject>>(input, ret);
return tuple;
});
}
}
public class InputObject : ICloneable
{
public int value1 { get; private set; }
public InputObject(int value)
{
this.value1 = value;
}
object ICloneable.Clone()
{
return Clone();
}
public InputObject Clone()
{
return (InputObject)this.MemberwiseClone();
}
}
public class IntermediateObject
{
public int value1 { get; private set; }
public IntermediateObject(int value)
{
this.value1 = value;
}
}
public class FinalObject
{
public InputObject input { get; private set; }
public List<IntermediateObject> outputList;
public FinalObject(InputObject input)
{
this.input = input;
this.outputList = new List<IntermediateObject>();
}
}
public static class Cloning
{
public static List<TValue> CloneListCloneValues<TValue>(List<TValue> original) where TValue : ICloneable
{
List<TValue> ret = new List<TValue>(original.Count);
foreach (TValue entry in original)
{
ret.Add((TValue)entry.Clone());
}
return ret;
}
}
my program should run maximum (N) job at a time. if there is more job needs to be run it is store in temp storage and after completing one of the currently running job then i'll pick another trigger base on how much the trigger fails to start and it's priority, and then fire its job
at initialization phase, I create for example 5 job with 5 corresponding trigger and add it to scheduler everything's fine until second job is running but TriggerComplete of the trigger listener is not firing for picking up another job to run, could you please tell me where im wrong ??
public class CrawlerTriggerListener : ITriggerListener
{
private int maxConcurrentCrawling = 1;
private int currentCount = 0;
private object syncLock = new object();
private Dictionary fireDic = new Dictionary();
public string Name { get { return "listener"; } }
public void TriggerFired(Trigger trigger, JobExecutionContext context)
{
if (fireDic.Count == 0)
{
IScheduler sched = context.Scheduler;
string[] triggerNameList = sched.GetTriggerNames("triggerGroup");
foreach (string triggerName in triggerNameList)
{
MissfiredInfo missedInfo = new MissfiredInfo();
missedInfo.TriggerName = triggerName;
missedInfo.Priority = sched.GetTrigger(triggerName, "triggerGroup").Priority;
fireDic.Add(triggerName, missedInfo);
}
}
}
public bool VetoJobExecution(Trigger trigger, JobExecutionContext context)
{
lock (syncLock)
{
if (currentCount < maxConcurrentCrawling)
{
currentCount++;
fireDic[trigger.Name].FailCount = 0;
fireDic[trigger.Name].LastFireTime = DateTime.UtcNow;
return false;
}
else
{
fireDic[trigger.Name].LastFireTime = DateTime.UtcNow;
fireDic[trigger.Name].FailCount++;
return true;
}
}
}
public void TriggerMisfired(Trigger trigger) { }
public void TriggerComplete(Trigger trigger, JobExecutionContext context, SchedulerInstruction triggerInstructionCode)
{
lock (syncLock)
{
currentCount--;
var validCandidate = new Dictionary<string, int>();
foreach (KeyValuePair<string, MissfiredInfo> fireDicItem in fireDic)
if (fireDicItem.Value.FailCount > 0)
validCandidate.Add(fireDicItem.Key, fireDicItem.Value.FailCount * 73 + fireDicItem.Value.Priority);
if (validCandidate.Count > 0)
{
var sorted = (from entry in validCandidate orderby entry.Value ascending select entry);
string triggerName = sorted.First().Key;
fireDic[triggerName].LastFireTime = DateTime.UtcNow;
fireDic[triggerName].FailCount = 0;
string jobName = context.Scheduler.GetTrigger(triggerName, "triggerGroup").JobName;
currentCount++;
context.Scheduler.TriggerJob(jobName, "jobGroup");
}
}
}
}
Okay, so again, I'm not sure where you are instantiating the TriggerListener, but you might want to verify that you are adding the TriggerListener to the Scheduler.
http://quartznet.sourceforge.net/tutorial/lesson_7.html
See that the scheduler instance has a method for "adding" (or registering) listeners. If you don't do that, the events will never fire.