I'm experimenting with locks that don't require atomic instructions. Peterson's algorithm seemed like the simplest place to start. However, with enough iterations, something goes wrong.
Code:
public class Program
{
private static volatile int _i = 0;
public static void Main(string[] args)
{
for (int i = 0; i < 1000; i++)
{
RunTest();
}
Console.Read();
}
private static void RunTest()
{
_i = 0;
var lockType = new PetersonLock();
var t1 = new Thread(() => Inc(0, lockType));
var t2 = new Thread(() => Inc(1, lockType));
t1.Start();
t2.Start();
t1.Join();
t2.Join();
Console.WriteLine(_i);
}
private static void Inc(int pid, ILock lockType)
{
try
{
for (int i = 0; i < 1000000; i++)
{
lockType.Request(pid);
_i++;
lockType.Release(pid);
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}
}
public interface ILock
{
void Request(int pid);
void Release(int pid);
}
public class NoLock : ILock
{
public void Request(int pid) { }
public void Release(int pid) { }
}
public class StandardLock : ILock
{
private object _sync = new object();
public void Request(int pid)
{
Monitor.Enter(_sync);
}
public void Release(int pid)
{
Monitor.Exit(_sync);
}
}
public class PetersonLock : ILock
{
private volatile bool[] _wantsCs = new bool[2];
private volatile int _turn;
public void Request(int pid)
{
int j = pid == 1 ? 0 : 1;
_wantsCs[pid] = true;
_turn = j;
while (_wantsCs[j] && _turn == j)
{
Thread.Sleep(0);
}
}
public void Release(int pid)
{
_wantsCs[pid] = false;
}
}
When I run this, I consistently get < 2,000,000. What's going on?
The problem here is these two statements:
_wantsCs[pid] = true;
_turn = j;
The memory model of .NET and C# allows these two writes to be reordered.
To force them to not be reordered, add a memory barrier between them:
_wantsCs[pid] = true;
Thread.MemoryBarrier();
_turn = j;
and you will get the expected output every time.
Note that this very problem is described on the Wikipedia page for Peterson's Algorithm in the note section (shortened down here, go read the article for the full notes):
Notes
...
Most modern CPUs reorder memory accesses to improve execution efficiency (see memory ordering for types of reordering allowed). Such processors invariably give some way to force ordering in a stream of memory accesses, typically through a memory barrier instruction. Implementation of Peterson's and related algorithms on processors which reorder memory accesses generally requires use of such operations to work correctly to keep sequential operations from happening in an incorrect order.
(my emphasis)
Related
I have a class like this:
class Test {
const int capacity = 100_0000;
private HashSet<int> set = new HashSet<int>(capacity);
public Test() { this.Reassign(); }
private void Reassign() {
var thread = new Thread(() => {
while (true) {
set = new HashSet<int>(capacity);
Thread.Sleep(10);
}
});
thread.Start();
}
public void Add(int val) {
set.Add(val);
}
}
The add method is frequently called.
var test = new Test();
Task.Run(() => {
for (int i = 0; i < 100000000; i++) {
test.Add(i);
Thread.SpinWait(5000);
}
});
It doesn't matter whether the data is successfully stored in the set, in this case, is the behavior of the add method consistent with the following code?
class LockTest {
const int capacity = 100_0000;
private HashSet<int> set = new HashSet<int>(capacity);
public LockTest() { this.Reassign(); }
private void Reassign() {
var thread = new Thread(() => {
while (true) {
lock (set) set = new HashSet<int>(capacity);
Thread.Sleep(10);
}
});
thread.Start();
}
public void Add(int val) {
lock (set) set.Add(val);
}
}
In my tests, even if there is no lock statement, there is no error. But I don't know what happens at the moment of set = new HashSet<int>(capacity);.
If you change the code to this, what do you think will happen? My guess is that the number you print out will sometimes be the same and other time different. When it is the same, lock(set) has worked, but otherwise it hasn't.
class LockTest {
const int capacity = 100_0000;
private int setCnt = 0;
private object set = new object();
public LockTest() { this.Reassign(); }
private void Reassign() {
var thread = new Thread(() => {
while (true) {
lock (set) setCnt++;
Thread.Sleep(10);
}
});
thread.Start();
}
public void Add(int val) {
lock (set) Console.WriteLine($"{setCnt}");
}
}
I'm experimenting with threading in C#, and I've created the following class as a result. I've tried to avoid any cases of race conditions, yet a deadlock occurs on use.
The class uses two different locks, one spinlock for straightforward operations, and additionally a Monitor lock to wait in case no object is ready. I originally used EventWaitHandle, but discovered that race conditions were inevitable due to WaitOne/Set precedence.
Note that Monitor.Pulse could not precede Monitor.Wait, so what else could cause a deadlock? In the case where 5 threads use a TestPool class with a capacity of 4, the deadlock always occurs at SpinLock at an irregular moment.
internal class TestPool<T> where T : class
{
private int capacity;
private int unitPos;
private int waitUnitPos;
private int waitCount;
private int lockState;
private object lockObj;
private T[] units;
private Func<T> unitFactory;
public TestPool(int capacity, Func<T> unitFactory)
{
this.lockObj = new object();
this.unitFactory = unitFactory;
Init(capacity);
}
public T Fetch()
{
T unit;
Lock();
unit = (unitPos != capacity) ? units[unitPos++] : Wait();
Unlock();
return unit;
}
public void Store(T unit)
{
Lock();
if (waitCount == 0)
{
units[--unitPos] = unit;
}
else
{
Pulse(unit);
}
Unlock();
}
private T Wait()
{
waitCount++;
lock (lockObj)
{
Unlock();
Monitor.Wait(lockObj);
Lock();
return units[--waitUnitPos];
}
}
private void Pulse(T unit)
{
waitCount--;
units[waitUnitPos++] = unit;
lock (lockObj)
{
Monitor.Pulse(lockObj);
}
}
private void Lock()
{
if (Interlocked.CompareExchange(ref lockState, 1, 0) != 0)
{
SpinLock();
}
}
private void SpinLock()
{
SpinWait spinWait = new SpinWait();
do
{
spinWait.SpinOnce();
}
while (Interlocked.CompareExchange(ref lockState, 1, 0) != 0);
}
private void Unlock()
{
Interlocked.Exchange(ref lockState, 0);
}
private void Init(int capacity)
{
T[] tx = new T[capacity];
for (int i = 0; i < capacity; i++)
{
tx[i] = unitFactory.Invoke();
}
units = tx;
this.capacity = capacity;
}
}
Fixed it. I had to place the following code outside the Monitor lock.
Lock();
return units[--waitUnitPos];
class MultiThreading
{
public class ThreadClass
{
public string InputString { get; private set; }
public int StartPos { get; private set; }
public List<SearchAlgorithm.CandidateStr> completeModels;
public List<SearchAlgorithm.CandidateStr> partialModels;
public ThreadClass(string s, int sPos)
{
InputString = s;
StartPos = sPos;
completeModels = new List<SearchAlgorithm.CandidateStr>();
partialModels = new List<SearchAlgorithm.CandidateStr>();
}
public void Run(int strandID)
{
Thread t = new Thread(() => this._run(strandID));
t.Start();
}
private void _run(int strandID)
{
SearchAlgorithm.SearchInOneDirection(strandID, ref this.completeModels, ref this.partialModels);
}
public static void CombineResult(
List<ThreadClass> tc,
out List<SearchAlgorithm.CandidateStr> combinedCompleteModels,
out List<SearchAlgorithm.CandidateStr> combinedPartialModels)
{
// combine the result
}
}
}
class Program
{
static void Main(string s, int strandID)
{
int lenCutoff = 10000;
if (s.Length > lenCutoff)
{
var threads = new List<MultiThreading.ThreadClass>();
for (int i = 0; i <= s.Length; i += lenCutoff)
{
threads.Add(new MultiThreading.ThreadClass(s.Substring(i, lenCutoff), i));
threads[threads.Count - 1].Run(strandID);
}
**// How can I wait till all thread in threads list to finish?**
}
}
}
My question is how can I wait till all threads in "threads" list to finish and then do the call CombineResult method?
Thanks
You can add a List<Thread> structure to keep record of all your threads
private List<Thread> threads = new List<Thread>();
Then populate the list with the threads
public void Run(int strandID)
{
Thread t = new Thread(() => this._run(strandID));
t.Start();
threads.Add(t);
}
Lastly, you can have a method that calls Join for each thread in a list. It is usually a good practise to have a timeout delay so your program will not block forever (in case there is a bug in a thread)
public void WaitAll(List<Thread> threads, int maxWaitingTime)
{
foreach (var thread in threads)
{
thread.Join(maxWaitingTime); //throws after timeout expires
}
}
An alternative would be to use a Task class and to call Task.WaitAll
Have ThreadClass expose a means to join the thread:
private Thread nativeThread;
public void Run(int strandID)
{
nativeThread = new Thread(() => this._run(strandID));
nativeThread.Start();
}
public void Join()
{
nativeThread.Join();
}
And then use ThreadClass.Join on each thread after you start them:
var threads = new List<ThreadClass>();
for (int i = 0; i <= s.Length; i += lenCutoff)
{
threads.Add(new ThreadClass(s.Substring(i, lenCutoff), i));
threads[threads.Count - 1].Run(strandID);
}
// Waits for each thread to finish in succession
threads.ForEach(t => t.Join());
Or, throw away your ThreadClass and enjoy System.Threading.Tasks:
// e.g. class Models { Complete; Partial; }
// private Models Search(string source, int offset, int length, int ID)
var tasks = new List<Task<Models>>(
from x in Enumerable.Range(0, s.Length / lenCutoff)
select Task.Factory.StartNew<Models>(
() => Search(s, x, lenCutoff, strandID));
);
// private Models CombineResults(IEnumerable<Models> results)
var combine = Task.Factory.ContinueWhenAll<Models>(
tasks.ToArray(),
ts => CombineResults(ts.Select(t => t.Result)));
combine.Wait();
Models combinedModels = combine.Result;
Well I see the answer has already been chosen, but I had already started on writing a solution using System.Threading.Tasks as proposed by several others. Rather than using LINQ, I have tried to match the structure of the original code as much as possible:
class SearchClass
{
public String InputString { get; private set; }
public int StartPos { get; private set; }
public List<string> completeModels;
public List<string> partialModels;
public SearchClass(string s, int sPos)
{
InputString = s;
StartPos = sPos;
completeModels = new List<string>();
partialModels = new List<string>();
}
public void Run(int strandID)
{
// SearchAlgorithm.SearchInOneDirection(...);
}
// public static void CombineResult(...){ };
}
class Program
{
static void Main(string s, int strandID)
{
int lenCutoff = 10000;
if (s.Length > lenCutoff)
{
var searches = new List<SearchClass>();
var tasks = new List<System.Threading.Tasks.Task>();
for (int i = 0; i < s.Length; i += lenCutoff)
{
SearchClass newSearch = new SearchClass(s.Substring(i, lenCutoff), i);
searches.Add(newSearch);
tasks.Add(System.Threading.Tasks.Task.Factory.StartNew(()=>newSearch.Run(strandID)));
}
System.Threading.Tasks.Task.WaitAll(tasks.ToArray());
// Combine the result
}
}
}
I'm attempting to make my simple C# graphics library multi-threaded. However, after the introduction of this code:
/* foreach (IAffector affector in affectorLookup.Values)
affector.Update(timestep); */
taskManager.Value = timestep; taskManager.Start();
foreach (IAffector affector in affectorLookup.Values)
taskManager.AddToQueue(affector.Update);
taskManager.StopWhenDone();
taskManager.Wait();
the simulation starts experiencing sharp lag-spikes, which seem to originate in TaskHandler.Run (I can't tell for sure, because adding the previous code makes my code profiler ignore anything outside TaskHandler.Run).
The task manager:
public class TaskManager
{
public delegate void MethodDel(float timestep);
private Queue<MethodDel> queue;
private List<TaskHandler> handlers;
private float value;
public float Value
{
get
{
return value;
}
set
{
this.value = value;
}
}
public TaskManager()
{
this.queue = new Queue<MethodDel>();
this.handlers = new List<TaskHandler>(System.Environment.ProcessorCount);
for (int t = 0; t < this.handlers.Capacity; ++t)
this.handlers.Add(new TaskHandler(this));
this.value = 0;
}
public void Start()
{
foreach (var handler in handlers)
handler.Start();
}
public void Stop()
{
lock (queue)
queue.Clear();
foreach (var handler in handlers)
handler.StopWhenDone();
}
public void StopWhenDone()
{
foreach (var handler in handlers)
handler.StopWhenDone();
}
public void AddToQueue(MethodDel method)
{
lock (queue)
queue.Enqueue(method);
}
public bool GetFromQueue(out MethodDel method)
{
lock (queue)
{
if (queue.Count == 0) { method = null; return false; }
method = queue.Dequeue();
return true;
}
}
public int GetQueueCount()
{
return queue.Count;
}
internal void Wait()
{
// Have to wait for them one at a time because the main thread is STA.
WaitHandle[] waitHandles = new WaitHandle[1];
// for (int t = 0; t < handlers.Count; ++t)
// waitHandles[t] = handlers[t].WaitHandle;
// WaitHandle.WaitAll(waitHandles);
for (int t = 0; t < handlers.Count; ++t)
{ waitHandles[0] = handlers[t].WaitHandle; WaitHandle.WaitAll(waitHandles); }
}
}
And the task handler:
public class TaskHandler
{
private TaskManager manager;
private Thread thread;
private bool stopWhenDone;
private ManualResetEvent waitHandle;
public ManualResetEvent WaitHandle
{
get
{
return waitHandle;
}
}
public TaskHandler(TaskManager manager)
{
this.manager = manager;
}
public void Start()
{
waitHandle = new ManualResetEvent(false);
stopWhenDone = false;
thread = new Thread(Run);
thread.IsBackground = true;
thread.SetApartmentState(ApartmentState.MTA);
thread.Start();
}
public void StopWhenDone()
{
this.stopWhenDone = true;
}
// Possible source of slowdown
private void Run()
{
TaskManager.MethodDel curMethod;
while (!stopWhenDone || manager.GetQueueCount() > 0)
{
if (manager.GetFromQueue(out curMethod))
{
curMethod(manager.Value);
}
}
waitHandle.Set();
}
}
Starting a thread is a heavy operation. Not sure if it's as heavy as you are experiencing, but that could be it. Also, having all your processing run parallel can be putting a big strain on your system with possibly little benefit...
I'm going to venture that the spikes have something to do with waitHandle.Set();
I like the overall design, but I have not used WaitHandle before, so I am unsure how this interacts with your design.
Having some issue with Threadpooling here that I need some help with please. I am trying to write a Generator, and I need to allow users generate up to 10,000 lines with the code below. Problem with this is the line
WaitHandle.WaitAll(doneEvents);
Can only handle 64 WaitAll at a time, How can I best apply thread pooling to my code in this case?
public void GenerateInsertStatements(int iRequiredRows)
{
// One event is used for each row object
ManualResetEvent[] doneEvents = new ManualResetEvent[iRequiredRows];
Row[] rows = new Row[iRequiredRows];
for (int i = 0; i < iRequiredRows; i++)
{
doneEvents[i] = new ManualResetEvent(false);
Row row = new Row(this.Name, this.TableColumns, doneEvents[i]);
rows[i] = row;
ThreadPool.QueueUserWorkItem(row.ThreadPoolCallback, i);
}
WaitHandle.WaitAll(doneEvents);
using (sr = new StreamWriter(this.Name + ".sql"))
{
for(int i=0; i<rows.Length; i++)
{
WriteStatementToFile(i, rows[i].GeneratedInsertStatement);
}
}
}
Thanks in advance
I would use just one WaitHandle and one int. Like:
int done_when_zero; // This is a field of the class
ManualResetEvent evt = new ManualResetEvent (false); // Field
...
done_when_zero = iRequiredRows; // This goes before the loop
...
evt.WaitOne (); // this goes after the loop
evt.Reset (); // Prepare for next execution if needed
And then, at the end of ThreadPoolCallback:
if (Interlocked.Decrement (ref done_when_zero)) <= 0)
evt.Set ();
As it was already suggested using a counter and a single ManualResetEvent should work fine for you. Below is ThreadPoolWait class taken from .NET Matters: ThreadPoolWait and HandleLeakTracer (see Figure 3 Better Implementation of ThreadPoolWait for more info)
public class ThreadPoolWait : IDisposable
{
private int _remainingWorkItems = 1;
private ManualResetEvent _done = new ManualResetEvent(false);
public void QueueUserWorkItem(WaitCallback callback)
{
QueueUserWorkItem(callback, null);
}
public void QueueUserWorkItem(WaitCallback callback, object state)
{
ThrowIfDisposed();
QueuedCallback qc = new QueuedCallback();
qc.Callback = callback;
qc.State = state;
lock (_done) _remainingWorkItems++;
ThreadPool.QueueUserWorkItem(new WaitCallback(HandleWorkItem), qc);
}
public bool WaitOne() { return WaitOne(-1, false); }
public bool WaitOne(TimeSpan timeout, bool exitContext)
{
return WaitOne((int)timeout.TotalMilliseconds, exitContext);
}
public bool WaitOne(int millisecondsTimeout, bool exitContext)
{
ThrowIfDisposed();
DoneWorkItem();
bool rv = _done.WaitOne(millisecondsTimeout, exitContext);
lock (_done)
{
if (rv)
{
_remainingWorkItems = 1;
_done.Reset();
}
else _remainingWorkItems++;
}
return rv;
}
private void HandleWorkItem(object state)
{
QueuedCallback qc = (QueuedCallback)state;
try { qc.Callback(qc.State); }
finally { DoneWorkItem(); }
}
private void DoneWorkItem()
{
lock (_done)
{
--_remainingWorkItems;
if (_remainingWorkItems == 0) _done.Set();
}
}
private class QueuedCallback
{
public WaitCallback Callback;
public object State;
}
private void ThrowIfDisposed()
{
if (_done == null) throw new ObjectDisposedException(GetType().Name);
}
public void Dispose()
{
if (_done != null)
{
((IDisposable)_done).Dispose();
_done = null;
}
}
}
Probably not the most efficient solution, but it should work regardless of the 64 wait handles limit :
for(int i = 0; i < iRequiredRows; i++)
doneEvents[i].WaitOne();