Web Scraping with WebBrowser and Single Thread Apartment C# - c#

This is a question about a slight variation on Noseratio's code in this question: [link]How to cancel Task await after a timeout period
I am able to build his code exactly as-is to create a Console application that returns the URL and OuterHtml of each of the three web pages specified in the code. However, when I put the same code in a WinForms application, the only output I get is
URL:
http://example.com
That means that the code does not display the OuterHtml of the first page, and it does not display the URL or OuterHtml of either of the other two pages. What I would like to know is what about WinForms breaks the code and how to get around it.
I expected that since the code still creates a new STA, which creates a new thread, it should not matter that I am using a WinForm. I am using VS2013 Ultimate, .NET 4.5.1.
The WinForm consists of a single button. Clicking it is handled by private void button1_Click(object sender, EventArgs e), which has a body identical to Main in Noseratio's code.
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Win32;
namespace WebScraperAsync005
{
public partial class Form1 : Form
{
// main logic
static async Task ScrapSitesAsync(string[] urls, CancellationToken token)
{
using (var apartment = new MessageLoopApartment())
{
// create WebBrowser inside MessageLoopApartment
var webBrowser = apartment.Invoke(() => new WebBrowser());
try
{
foreach (var url in urls)
{
Console.WriteLine("URL:\n" + url);
// cancel in 30s or when the main token is signalled
var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token);
navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);
var navigationToken = navigationCts.Token;
// run the navigation task inside MessageLoopApartment
string html = await apartment.Run(() =>
webBrowser.NavigateAsync(url, navigationToken), navigationToken);
Console.WriteLine("HTML:\n" + html);
}
}
finally
{
// dispose of WebBrowser inside MessageLoopApartment
apartment.Invoke(() => webBrowser.Dispose());
}
}
}
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
try
{
WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5
var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);
var task = ScrapSitesAsync(
new[] { "http://example.com", "http://example.org", "http://example.net" },
cts.Token);
task.Wait();
Console.WriteLine("Press Enter to exit...");
Console.ReadLine();
}
catch (Exception ex)
{
while (ex is AggregateException && ex.InnerException != null)
ex = ex.InnerException;
Console.WriteLine(ex.Message);
Environment.Exit(-1);
}
}
}
/// <summary>
/// WebBrowserExt - WebBrowser extensions
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public static class WebBrowserExt
{
const int POLL_DELAY = 500;
// navigate and download
public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)
{
// navigate and await DocumentCompleted
var tcs = new TaskCompletionSource<bool>();
WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
tcs.TrySetResult(true);
using (token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: true))
{
webBrowser.DocumentCompleted += handler;
try
{
webBrowser.Navigate(url);
await tcs.Task; // wait for DocumentCompleted
}
finally
{
webBrowser.DocumentCompleted -= handler;
}
}
// get the root element
var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];
// poll the current HTML for changes asynchronosly
var html = documentElement.OuterHtml;
while (true)
{
// wait asynchronously, this will throw if cancellation requested
await Task.Delay(POLL_DELAY, token);
// continue polling if the WebBrowser is still busy
if (webBrowser.IsBusy)
continue;
var htmlNow = documentElement.OuterHtml;
if (html == htmlNow)
break; // no changes detected, end the poll loop
html = htmlNow;
}
// consider the page fully rendered
token.ThrowIfCancellationRequested();
return html;
}
// enable HTML5 (assuming we're running IE10+)
// more info: https://stackoverflow.com/a/18333982/1768303
public static void SetFeatureBrowserEmulation()
{
if (System.ComponentModel.LicenseManager.UsageMode != System.ComponentModel.LicenseUsageMode.Runtime)
return;
var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileNa me);
Registry.SetValue(#"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
appName, 10000, RegistryValueKind.DWord);
}
}
/// <summary>
/// MessageLoopApartment
/// STA thread with message pump for serial execution of tasks
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public class MessageLoopApartment : IDisposable
{
Thread _thread; // the STA thread
TaskScheduler _taskScheduler; // the STA thread's task scheduler
public TaskScheduler TaskScheduler { get { return _taskScheduler; } }
/// <summary>MessageLoopApartment constructor</summary>
public MessageLoopApartment()
{
var tcs = new TaskCompletionSource<TaskScheduler>();
// start an STA thread and gets a task scheduler
_thread = new Thread(startArg =>
{
EventHandler idleHandler = null;
idleHandler = (s, e) =>
{
// handle Application.Idle just once
Application.Idle -= idleHandler;
// return the task scheduler
tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
};
// handle Application.Idle just once
// to make sure we're inside the message loop
// and SynchronizationContext has been correctly installed
Application.Idle += idleHandler;
Application.Run();
});
_thread.SetApartmentState(ApartmentState.STA);
_thread.IsBackground = true;
_thread.Start();
_taskScheduler = tcs.Task.Result;
}
/// <summary>shutdown the STA thread</summary>
public void Dispose()
{
if (_taskScheduler != null)
{
var taskScheduler = _taskScheduler;
_taskScheduler = null;
// execute Application.ExitThread() on the STA thread
Task.Factory.StartNew(
() => Application.ExitThread(),
CancellationToken.None,
TaskCreationOptions.None,
taskScheduler).Wait();
_thread.Join();
_thread = null;
}
}
/// <summary>Task.Factory.StartNew wrappers</summary>
public void Invoke(Action action)
{
Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
}
public TResult Invoke<TResult>(Func<TResult> action)
{
return Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
}
public Task Run(Action action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task Run(Func<Task> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
}
}

The code you linked was designed for use inside a console app or Windows service. When you use it from a WinForms app, you're experiencing a deadlock caused by task.Wait() here:
var task = ScrapSitesAsync(
new[] { "http://example.com", "http://example.org", "http://example.net" },
cts.Token);
task.Wait();
To understand the nature of this deadlock, refer to Stephen Cleary's "Don't Block on Async Code".
The easiest workaround (but not the right one) might be to add ConfigureAwait(false) here:
// run the navigation task inside MessageLoopApartment
string html = await apartment.Run(() =>
webBrowser.NavigateAsync(url, navigationToken), navigationToken).ConfigureAwait(false);
However, the right fix would be to get rid of MessageLoopApartment altogether, it's redundant for a WinForms app which already had its own message loop. Then, use await task instead of task.Wait().
This way, the code actually gets simpler:
namespace WebScraperAsync005
{
public partial class Form1 : Form
{
// main logic
static async Task ScrapSitesAsync(string[] urls, CancellationToken token)
{
using(var webBrowser = new WebBrowser())
{
foreach (var url in urls)
{
Console.WriteLine("URL:\n" + url);
// cancel in 30s or when the main token is signalled
var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token);
navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);
var navigationToken = navigationCts.Token;
// run the navigation task inside MessageLoopApartment
string html = await webBrowser.NavigateAsync(url, navigationToken);
Console.WriteLine("HTML:\n" + html);
}
}
}
public Form1()
{
InitializeComponent();
}
private async void button1_Click(object sender, EventArgs e)
{
this.button1.Enabled = false;
try
{
WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5
var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);
await ScrapSitesAsync(
new[] { "http://example.com", "http://example.org", "http://example.net" },
cts.Token);
MessageBox.Show("Completed.");
}
catch (Exception ex)
{
while (ex is AggregateException && ex.InnerException != null)
ex = ex.InnerException;
MessageBox.Show(ex.Message);
}
this.button1.Enabled = true;
}
}
/// <summary>
/// WebBrowserExt - WebBrowser extensions
/// by Noseratio - http://stackoverflow.com/a/22262976/1768303
/// </summary>
public static class WebBrowserExt
{
const int POLL_DELAY = 500;
// navigate and download
public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)
{
// navigate and await DocumentCompleted
var tcs = new TaskCompletionSource<bool>();
WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
tcs.TrySetResult(true);
using (token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: true))
{
webBrowser.DocumentCompleted += handler;
try
{
webBrowser.Navigate(url);
await tcs.Task; // wait for DocumentCompleted
}
finally
{
webBrowser.DocumentCompleted -= handler;
}
}
// get the root element
var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];
// poll the current HTML for changes asynchronosly
var html = documentElement.OuterHtml;
while (true)
{
// wait asynchronously, this will throw if cancellation requested
await Task.Delay(POLL_DELAY, token);
// continue polling if the WebBrowser is still busy
if (webBrowser.IsBusy)
continue;
var htmlNow = documentElement.OuterHtml;
if (html == htmlNow)
break; // no changes detected, end the poll loop
html = htmlNow;
}
// consider the page fully rendered
token.ThrowIfCancellationRequested();
return html;
}
// enable HTML5 (assuming we're running IE10+)
// more info: http://stackoverflow.com/a/18333982/1768303
public static void SetFeatureBrowserEmulation()
{
if (System.ComponentModel.LicenseManager.UsageMode != System.ComponentModel.LicenseUsageMode.Runtime)
return;
var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
Registry.SetValue(#"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
appName, 10000, RegistryValueKind.DWord);
}
}
}

Related

How to cancel Task without exception?

I need to execute a kind of LongRunning task after a delay.
Each Task can be cancelled. I prefer TPL with cancellationToken.
Since my task is long running and before starting a task it has to be placed in dictionary I have to use new Task(). But I've faced different behavior - when task is created using new Task() after Cancel() it throws TaskCanceledException whereas a task created with Task.Run doesn't throw an exception.
Generally I need to recognize the difference and not get TaskCanceledException.
It's my code:
internal sealed class Worker : IDisposable
{
private readonly IDictionary<Guid, (Task task, CancellationTokenSource cts)> _tasks =
new Dictionary<Guid, (Task task, CancellationTokenSource cts)>();
public void ExecuteAfter(Action action, TimeSpan waitBeforeExecute, out Guid cancellationId)
{
var cts = new CancellationTokenSource();
var task = new Task(async () =>
{
await Task.Delay(waitBeforeExecute, cts.Token);
action();
}, cts.Token, TaskCreationOptions.LongRunning);
cancellationId = Guid.NewGuid();
_tasks.Add(cancellationId, (task, cts));
task.Start(TaskScheduler.Default);
}
public void ExecuteAfter2(Action action, TimeSpan waitBeforeExecute, out Guid cancellationId)
{
var cts = new CancellationTokenSource();
cancellationId = Guid.NewGuid();
_tasks.Add(cancellationId, (Task.Run(async () =>
{
await Task.Delay(waitBeforeExecute, cts.Token);
action();
}, cts.Token), cts));
}
public void Abort(Guid cancellationId)
{
if (_tasks.TryGetValue(cancellationId, out var value))
{
value.cts.Cancel();
//value.task.Wait();
_tasks.Remove(cancellationId);
Dispose(value.cts);
Dispose(value.task);
}
}
public void Dispose()
{
if (_tasks.Count > 0)
{
foreach (var t in _tasks)
{
Dispose(t.Value.cts);
Dispose(t.Value.task);
}
_tasks.Clear();
}
}
private static void Dispose(IDisposable obj)
{
if (obj == null)
{
return;
}
try
{
obj.Dispose();
}
catch (Exception ex)
{
//Log.Exception(ex);
}
}
}
internal class Program
{
private static void Main(string[] args)
{
Action act = () => Console.WriteLine("......");
Console.WriteLine("Started");
using (var w = new Worker())
{
w.ExecuteAfter(act, TimeSpan.FromMilliseconds(10000), out var id);
//w.ExecuteAfter2(act, TimeSpan.FromMilliseconds(10000), out var id);
Thread.Sleep(3000);
w.Abort(id);
}
Console.WriteLine("Enter to exit");
Console.ReadKey();
}
}
UPD:
This approach also works without exception
public void ExecuteAfter3(Action action, TimeSpan waitBeforeExecute, out Guid cancellationId)
{
var cts = new CancellationTokenSource();
cancellationId = Guid.NewGuid();
_tasks.Add(cancellationId, (Task.Factory.StartNew(async () =>
{
await Task.Delay(waitBeforeExecute, cts.Token);
action();
}, cts.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default), cts)); ;
}
The reason of the inconsistent behavior is fundamentally incorrect usage of an async delegate in the first case. The Task constructors just don't receive Func<Task> and your asynchronous delegate is always interpreted as async void not async Task in case of using with constructor. If an exception is raised in an async Task method it's caught and placed into Task object which isn't true for an async void method, in that case exception just bubbles up out of the method to a synchronization context and goes under category of unhandled exceptions (you can familiarize with details in this Stephen Cleary article). So what happens in case of using constructor: a task which is supposed to initiate asynchronous flow is created and started. Once it reaches point when Task.Delay(...) returns a promise, the task completes and it has no more relationship to anything which happens in Task.Delay continuation (you can easily check in debugger by setting a breakpoint to value.cts.Cancel() that the task object in the _tasks dictionary has status RanToCompletetion while however the task delegate essentially is still running). When a cancellation is requested the exception is raised inside the Task.Delay method and without existence of any promise object is being promoted to app domain.
In case of Task.Run the situation is different because there are overloads of this method which are able to accept Func<Task> or Func<Task<T>> and unwrap the tasks internally in order to return underlying promise instead of wrapped task which ensures proper task object inside the _tasks dictionary and proper error handling.
The third scenario despite the fact that it doesn't throw an exception it is partially correct. Unlike Task.Run, Task.Factory.StartNew doesn't unwrap underlying task to return promise, so task stored in the _tasks is just wrapper task, like in the case with constructor (again you can check its state with debugger). It however is able to understand Func<Task> parameters, so asynchronous delegate has async Task signature which allows at least to handle and store exception in the underlying task. In order to get this underlying task with Task.Factory.StartNew you need to unwrap the task by yourself with Unwrap() extension method.
The Task.Factory.StartNew isn't considered as a beast practice of creating tasks because of certain dangers related to its application (see there). It however can be used with some caveats if you need to apply specific options like LongRunning which cannot be directly applied with Task.Run.
I don't know why I got down votes here but it's inspired me to update my answer.
UPDATED
My full approach:
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApp4
{
internal class Program
{
private static void Main(string[] args)
{
using (var delayedWorker = new DelayedWorker())
{
delayedWorker.ProcessWithDelay(() => { Console.WriteLine("100"); }, TimeSpan.FromSeconds(5), out var cancellationId_1);
delayedWorker.ProcessWithDelay(() => { Console.WriteLine("200"); }, TimeSpan.FromSeconds(10), out var cancellationId_2);
delayedWorker.ProcessWithDelay(() => { Console.WriteLine("300"); }, TimeSpan.FromSeconds(15), out var cancellationId_3);
Cancel_3(delayedWorker, cancellationId_3);
Console.ReadKey();
}
}
private static void Cancel_3(DelayedWorker delayedWorker, Guid cancellationId_3)
{
Task.Run(() => { delayedWorker.Abort(cancellationId_3); }).Wait();
}
internal sealed class DelayedWorker : IDisposable
{
private readonly object _locker = new object();
private readonly object _disposeLocker = new object();
private readonly IDictionary<Guid, (Task task, CancellationTokenSource cts)> _tasks = new Dictionary<Guid, (Task task, CancellationTokenSource cts)>();
private bool _disposing;
public void ProcessWithDelay(Action action, TimeSpan waitBeforeExecute, out Guid cancellationId)
{
Console.WriteLine("Creating delayed action...");
CancellationTokenSource tempCts = null;
CancellationTokenSource cts = null;
try
{
var id = cancellationId = Guid.NewGuid();
tempCts = new CancellationTokenSource();
cts = tempCts;
var task = new Task(() => { Process(action, waitBeforeExecute, cts); }, TaskCreationOptions.LongRunning);
_tasks.Add(cancellationId, (task, cts));
tempCts = null;
task.ContinueWith(t =>
{
lock (_disposeLocker)
{
if (!_disposing)
{
TryRemove(id);
}
}
}, TaskContinuationOptions.ExecuteSynchronously);
Console.WriteLine($"Created(cancellationId: {cancellationId})");
task.Start(TaskScheduler.Default);
}
finally
{
if (tempCts != null)
{
tempCts.Dispose();
}
}
}
private void Process(Action action, TimeSpan waitBeforeExecute, CancellationTokenSource cts)
{
Console.WriteLine("Starting delayed action...");
cts.Token.WaitHandle.WaitOne(waitBeforeExecute);
if (cts.Token.IsCancellationRequested)
{
return;
}
lock (_locker)
{
Console.WriteLine("Performing action...");
action();
}
}
public bool Abort(Guid cancellationId)
{
Console.WriteLine($"Aborting(cancellationId: {cancellationId})...");
lock (_locker)
{
if (_tasks.TryGetValue(cancellationId, out var value))
{
if (value.task.IsCompleted)
{
Console.WriteLine("too late");
return false;
}
value.cts.Cancel();
value.task.Wait();
Console.WriteLine("Aborted");
return true;
}
Console.WriteLine("Either too late or wrong cancellation id");
return true;
}
}
private void TryRemove(Guid id)
{
if (_tasks.TryGetValue(id, out var value))
{
Remove(id, value.task, value.cts);
}
}
private void Remove(Guid id, Task task, CancellationTokenSource cts)
{
_tasks.Remove(id);
Dispose(cts);
Dispose(task);
}
public void Dispose()
{
lock (_disposeLocker)
{
_disposing = true;
}
if (_tasks.Count > 0)
{
foreach (var t in _tasks)
{
t.Value.cts.Cancel();
t.Value.task.Wait();
Dispose(t.Value.cts);
Dispose(t.Value.task);
}
_tasks.Clear();
}
}
private static void Dispose(IDisposable obj)
{
if (obj == null)
{
return;
}
try
{
obj.Dispose();
}
catch (Exception ex)
{
//log ex
}
}
}
}
}

InvalidComObjectException when using GeckoWebBrowser

I am using GeckoWebBrowser from GeckoFx45 to extract the source of a webpage after it is fully loaded.
When running my code only once everything works just fine, but when it is run multiple times a InvalidComObjectExceptionis thrown:
System.Runtime.InteropServices.InvalidComObjectException:
'COM object that has been separated from its underlying RCW cannot be used.'
Code:
public static async Task<string> LoadDomSourceAsync(string url, CancellationToken cancellationToken) {
using (var apartment = new MessageLoopApartment())
return await apartment.Run(async () => {
using (var wb = new GeckoWebBrowser()) {
var tcs = new TaskCompletionSource<string>();
try {
wb.DocumentCompleted += (s, a) => tcs.TrySetResult(((GeckoHtmlHtmlElement)wb.Document.DocumentElement).OuterHtml);
wb.NavigationError += (s, a) => tcs.TrySetException(new GeckoNetworkException());
wb.Navigate(url);
return await tcs.Task;
} catch (Exception e) { tcs.TrySetException(e); }
finally {
wb.Stop();
}
return null;
}
}, cancellationToken);
}
MessageLoopApartment:
using System;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace StreamLinkExtract {
/// <summary>
/// MessageLoopApartment
/// STA thread with message pump for serial execution of tasks
/// by Noseratio - http://stackoverflow.com/a/22262976/1768303
/// </summary>
public class MessageLoopApartment : IDisposable {
Thread _thread; // the STA thread
TaskScheduler _taskScheduler; // the STA thread's task scheduler
public TaskScheduler TaskScheduler { get { return _taskScheduler; } }
/// <summary>MessageLoopApartment constructor</summary>
public MessageLoopApartment() {
var tcs = new TaskCompletionSource<TaskScheduler>();
// start an STA thread and gets a task scheduler
_thread = new Thread(startArg => {
EventHandler idleHandler = null;
idleHandler = (s, e) => {
// handle Application.Idle just once
Application.Idle -= idleHandler;
// return the task scheduler
tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
};
// handle Application.Idle just once
// to make sure we're inside the message loop
// and SynchronizationContext has been correctly installed
Application.Idle += idleHandler;
Application.Run();
});
_thread.SetApartmentState(ApartmentState.STA);
_thread.IsBackground = true;
_thread.Start();
_taskScheduler = tcs.Task.Result;
}
/// <summary>shutdown the STA thread</summary>
public void Dispose() {
if (_taskScheduler != null) {
var taskScheduler = _taskScheduler;
_taskScheduler = null;
// execute Application.ExitThread() on the STA thread
Task.Factory.StartNew(
() => Application.ExitThread(),
CancellationToken.None,
TaskCreationOptions.None,
taskScheduler).Wait();
_thread.Join();
_thread = null;
}
}
/// <summary>Task.Factory.StartNew wrappers</summary>
public void Invoke(Action action) {
Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
}
public TResult Invoke<TResult>(Func<TResult> action) {
return Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
}
public Task Run(Action action, CancellationToken token) {
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token) {
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task Run(Func<Task> action, CancellationToken token) {
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token) {
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
}
}

Use threadpool to limit max number of threads - Attempted to read or write protected memory error

I am using some scraping code by Noseratio found here https://stackoverflow.com/a/22262976/3499115. He wrote it to scrape a list of urls, but I am using it, however to render only one url at a time inside another web crawler MVC controller that I am using. I call this code each time i find a specific type of link and it appears that doing this many times is causing me to run out of memory. Maybe a solution would be to use a threadpool and limit the max number of threads, but how would I do that to this code? Here is the web crawler code that calls the webbrowser code:
public static HtmlDocument renderJavascript(string url)
{
HtmlDocument doc = new HtmlDocument();
// using webBrowserScraper
try
{
WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5
var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);
var task = WebBrowserScraper.ScrapeSitesAsync(
url,
cts.Token);
task.Wait();
//Console.WriteLine("Press Enter to exit...");
//Console.ReadLine();
doc.LoadHtml(task.Result);
return doc;
}
catch (Exception ex)
{
while (ex is AggregateException && ex.InnerException != null)
ex = ex.InnerException;
Console.WriteLine(ex.Message);
//Environment.Exit(-1);
}
return null;
}
And the webbrowser code (I just changed the parameter to a single string in the ScrapeSitesAsync function:
using System;
using using System.Linq;
using System.Text;
using Microsoft.Win32;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace Abot.Demo
{
public class WebBrowserScraper
{
// by Noseratio - https://stackoverflow.com/a/22262976/1768303
// main logic
public static async Task<string> ScrapeSitesAsync(string url, CancellationToken token)
{
using (var apartment = new MessageLoopApartment())
{
// create WebBrowser inside MessageLoopApartment
var webBrowser = apartment.Invoke(() => new WebBrowser());
try
{
Console.WriteLine("WebBrowser URL:\n" + url);
// cancel in 30s or when the main token is signalled
var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token);
navigationCts.CancelAfter((int)TimeSpan.FromSeconds(10).TotalMilliseconds);
var navigationToken = navigationCts.Token;
// run the navigation task inside MessageLoopApartment
string html = await apartment.Run(() =>
webBrowser.NavigateAsync(url, navigationToken), navigationToken);
Console.WriteLine("Scrape complete for URL:\n" + url);
return html;
}
finally
{
// dispose of WebBrowser inside MessageLoopApartment
apartment.Invoke(() => webBrowser.Dispose());
}
}
}
}
/// <summary>
/// WebBrowserExt - WebBrowser extensions
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public static class WebBrowserExt
{
const int POLL_DELAY = 500;
// navigate and download
public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)
{
// navigate and await DocumentCompleted
var tcs = new TaskCompletionSource<bool>();
WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
tcs.TrySetResult(true);
using (token.Register(
() => { webBrowser.Stop(); tcs.TrySetCanceled(); },
useSynchronizationContext: true))
{
webBrowser.DocumentCompleted += handler;
try
{
webBrowser.Navigate(url);
await tcs.Task; // wait for DocumentCompleted
}
finally
{
webBrowser.DocumentCompleted -= handler;
}
}
// get the root element
var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];
// poll the current HTML for changes asynchronosly
var html = documentElement.OuterHtml;
while (true)
{
// wait asynchronously, this will throw if cancellation requested
await Task.Delay(POLL_DELAY, token);
// continue polling if the WebBrowser is still busy
if (webBrowser.IsBusy)
continue;
var htmlNow = documentElement.OuterHtml;
if (html == htmlNow)
break; // no changes detected, end the poll loop
html = htmlNow;
}
// consider the page fully rendered
token.ThrowIfCancellationRequested();
return html;
}
// enable HTML5 (assuming we're running IE10+)
// more info: https://stackoverflow.com/a/18333982/1768303
public static void SetFeatureBrowserEmulation()
{
if (System.ComponentModel.LicenseManager.UsageMode != System.ComponentModel.LicenseUsageMode.Runtime)
return;
var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
Registry.SetValue(#"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
appName, 10000, RegistryValueKind.DWord);
}
}
/// <summary>
/// MessageLoopApartment
/// STA thread with message pump for serial execution of tasks
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public class MessageLoopApartment : IDisposable
{
Thread _thread; // the STA thread
TaskScheduler _taskScheduler; // the STA thread's task scheduler
public TaskScheduler TaskScheduler { get { return _taskScheduler; } }
/// <summary>MessageLoopApartment constructor</summary>
public MessageLoopApartment()
{
var tcs = new TaskCompletionSource<TaskScheduler>();
// start an STA thread and gets a task scheduler
_thread = new Thread(startArg =>
{
EventHandler idleHandler = null;
idleHandler = (s, e) =>
{
// handle Application.Idle just once
Application.Idle -= idleHandler;
// return the task scheduler
tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
};
// handle Application.Idle just once
// to make sure we're inside the message loop
// and SynchronizationContext has been correctly installed
Application.Idle += idleHandler;
Application.Run();
});
_thread.SetApartmentState(ApartmentState.STA);
_thread.IsBackground = true;
_thread.Start();
_taskScheduler = tcs.Task.Result;
}
/// <summary>shutdown the STA thread</summary>
public void Dispose()
{
if (_taskScheduler != null)
{
var taskScheduler = _taskScheduler;
_taskScheduler = null;
// execute Application.ExitThread() on the STA thread
Task.Factory.StartNew(
() => Application.ExitThread(),
CancellationToken.None,
TaskCreationOptions.None,
taskScheduler).Wait();
_thread.Join();
_thread = null;
}
}
/// <summary>Task.Factory.StartNew wrappers</summary>
public void Invoke(Action action)
{
Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
}
public TResult Invoke<TResult>(Func<TResult> action)
{
return Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
}
public Task Run(Action action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task Run(Func<Task> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token)
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
}
}
One solution is to use SemaphoreSlim to maintain a limited pool of WebBrowser objects to scrape web sites in parallel. It also makes sense to share the common message loop for all WebBrowser instances.
Here is how it can be implemented, based on my console web scraper code you linked. The new part is the WebBrowserPool class (warning: only slightly tested):
using Microsoft.Win32;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace AsyncWebBrowserScraper
{
class Program
{
// by Noseratio - https://stackoverflow.com/a/23819021/1768303
// test: web-scrape a list of URLs
static async Task ScrapeSitesAsync(string[] urls, CancellationToken token)
{
using (var pool = new WebBrowserPool(maxParallel: 2, token: token))
{
// cancel each site in 30s or when the main token is signalled
var timeout = (int)TimeSpan.FromSeconds(30).TotalMilliseconds;
var results = urls.ToDictionary(
url => url, url => pool.ScrapeSiteAsync(url, timeout));
await Task.WhenAll(results.Values);
foreach (var url in results.Keys)
{
Console.WriteLine("URL:\n" + url);
string html = results[url].Result;
Console.WriteLine("HTML:\n" + html);
}
}
}
// entry point
static void Main(string[] args)
{
try
{
WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5
var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);
var task = ScrapeSitesAsync(
new[] { "http://example.com", "http://example.org", "http://example.net", "http://www.bing.com", "http://www.google.com" },
cts.Token);
task.Wait();
Console.WriteLine("Press Enter to exit...");
Console.ReadLine();
}
catch (Exception ex)
{
while (ex is AggregateException && ex.InnerException != null)
ex = ex.InnerException;
Console.WriteLine(ex.Message);
Environment.Exit(-1);
}
}
}
/// <summary>
/// WebBrowserPool the pool of WebBrowser objects sharing the same message loop
/// </summary>
public class WebBrowserPool : IDisposable
{
MessageLoopApartment _apartment; // a WinFroms STA thread with message loop
readonly SemaphoreSlim _semaphore; // regulate available browsers
readonly Queue<WebBrowser> _browsers; // the pool of available browsers
readonly HashSet<Task> _pendingTasks; // keep track of pending tasks for proper cancellation
readonly CancellationTokenSource _cts; // global cancellation (for Dispose)
public WebBrowserPool(int maxParallel, CancellationToken token)
{
if (maxParallel < 1)
throw new ArgumentException("maxParallel");
_cts = CancellationTokenSource.CreateLinkedTokenSource(token);
_apartment = new MessageLoopApartment();
_semaphore = new SemaphoreSlim(maxParallel);
_browsers = new Queue<WebBrowser>();
_pendingTasks = new HashSet<Task>();
// init the pool of WebBrowser objects
_apartment.Invoke(() =>
{
while (--maxParallel >= 0)
_browsers.Enqueue(new WebBrowser());
});
}
// Navigate to a site and get a snapshot of its DOM HTML
public async Task<string> ScrapeSiteAsync(string url, int timeout, CancellationToken token = default(CancellationToken))
{
var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token, _cts.Token);
var combinedToken = navigationCts.Token;
// we have a limited number of WebBrowser objects available, so await the semaphore
await _semaphore.WaitAsync(combinedToken);
try
{
if (timeout != Timeout.Infinite)
navigationCts.CancelAfter(timeout);
// run the main logic on the STA thread
return await _apartment.Run(async () =>
{
// acquire the 1st available WebBrowser from the pool
var webBrowser = _browsers.Dequeue();
try
{
var task = webBrowser.NavigateAsync(url, combinedToken);
_pendingTasks.Add(task); // register the pending task
try
{
return await task;
}
finally
{
// unregister the completed task
_pendingTasks.Remove(task);
}
}
finally
{
// return the WebBrowser to the pool
_browsers.Enqueue(webBrowser);
}
}, combinedToken);
}
finally
{
_semaphore.Release();
}
}
// Dispose of WebBrowserPool
public void Dispose()
{
if (_apartment == null)
throw new ObjectDisposedException(this.GetType().Name);
// cancel and wait for all pending tasks
_cts.Cancel();
var task = _apartment.Run(() => Task.WhenAll(_pendingTasks.ToArray()));
try
{
task.Wait();
}
catch
{
if (!task.IsCanceled)
throw;
}
// dispose of WebBrowser objects
_apartment.Run(() =>
{
while (_browsers.Any())
_browsers.Dequeue().Dispose();
});
_apartment.Dispose();
_apartment = null;
}
}
/// <summary>
/// WebBrowserExt - WebBrowser extensions
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public static class WebBrowserExt
{
const int POLL_DELAY = 500;
// navigate and download
public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)
{
// navigate and await DocumentCompleted
var tcs = new TaskCompletionSource<bool>();
WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
tcs.TrySetResult(true);
using (token.Register(
() => { webBrowser.Stop(); tcs.TrySetCanceled(); },
useSynchronizationContext: true))
{
webBrowser.DocumentCompleted += handler;
try
{
webBrowser.Navigate(url);
await tcs.Task; // wait for DocumentCompleted
}
finally
{
webBrowser.DocumentCompleted -= handler;
}
}
// get the root element
var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];
// poll the current HTML for changes asynchronosly
var html = documentElement.OuterHtml;
while (true)
{
// wait asynchronously, this will throw if cancellation requested
await Task.Delay(POLL_DELAY, token);
// continue polling if the WebBrowser is still busy
if (webBrowser.IsBusy)
continue;
var htmlNow = documentElement.OuterHtml;
if (html == htmlNow)
break; // no changes detected, end the poll loop
html = htmlNow;
}
// consider the page fully rendered
token.ThrowIfCancellationRequested();
return html;
}
// enable HTML5 (assuming we're running IE10+)
// more info: https://stackoverflow.com/a/18333982/1768303
public static void SetFeatureBrowserEmulation()
{
if (System.ComponentModel.LicenseManager.UsageMode != System.ComponentModel.LicenseUsageMode.Runtime)
return;
var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
Registry.SetValue(#"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
appName, 10000, RegistryValueKind.DWord);
}
}
/// <summary>
/// MessageLoopApartment
/// STA thread with message pump for serial execution of tasks
/// by Noseratio - https://stackoverflow.com/a/22262976/1768303
/// </summary>
public class MessageLoopApartment : IDisposable
{
Thread _thread; // the STA thread
TaskScheduler _taskScheduler; // the STA thread's task scheduler
public TaskScheduler TaskScheduler { get { return _taskScheduler; } }
/// <summary>MessageLoopApartment constructor</summary>
public MessageLoopApartment()
{
var tcs = new TaskCompletionSource<TaskScheduler>();
// start an STA thread and gets a task scheduler
_thread = new Thread(startArg =>
{
EventHandler idleHandler = null;
idleHandler = (s, e) =>
{
// handle Application.Idle just once
Application.Idle -= idleHandler;
// return the task scheduler
tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
};
// handle Application.Idle just once
// to make sure we're inside the message loop
// and SynchronizationContext has been correctly installed
Application.Idle += idleHandler;
Application.Run();
});
_thread.SetApartmentState(ApartmentState.STA);
_thread.IsBackground = true;
_thread.Start();
_taskScheduler = tcs.Task.Result;
}
/// <summary>shutdown the STA thread</summary>
public void Dispose()
{
if (_taskScheduler != null)
{
var taskScheduler = _taskScheduler;
_taskScheduler = null;
// execute Application.ExitThread() on the STA thread
Task.Factory.StartNew(
() => Application.ExitThread(),
CancellationToken.None,
TaskCreationOptions.None,
taskScheduler).Wait();
_thread.Join();
_thread = null;
}
}
/// <summary>Task.Factory.StartNew wrappers</summary>
public void Invoke(Action action)
{
Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
}
public TResult Invoke<TResult>(Func<TResult> action)
{
return Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
}
public Task Run(Action action, CancellationToken token = default(CancellationToken))
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token = default(CancellationToken))
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
}
public Task Run(Func<Task> action, CancellationToken token = default(CancellationToken))
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token = default(CancellationToken))
{
return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
}
}
}

Timeout an async method implemented with TaskCompletionSource

I have a blackbox object that exposes a method to kick of an async operation, and an event fires when the operation is complete. I have wrapped that into an Task<OpResult> BlackBoxOperationAysnc() method using TaskCompletionSource - that works well.
However, in that async wrapper I'd like to manage completing the async call with a timeout error if the event is not received after a given timeout. Currently I manage it with a timer as:
public Task<OpResult> BlackBoxOperationAysnc() {
var tcs = new TaskCompletionSource<TestResult>();
const int timeoutMs = 20000;
Timer timer = new Timer(_ => tcs.TrySetResult(OpResult.Timeout),
null, timeoutMs, Timeout.Infinite);
EventHandler<EndOpEventArgs> eventHandler = (sender, args) => {
...
tcs.TrySetResult(OpResult.BlarBlar);
}
blackBox.EndAsyncOpEvent += eventHandler;
blackBox.StartAsyncOp();
return tcs.Task;
}
Is that the only way to manage a timeout? Is there someway without setting up my own timer - I couldn't see anything timeout built into TaskCompletionSource?
You could use CancellationTokenSource with timeout. Use it together with your TaskCompletionSource like this.
E.g.:
public Task<OpResult> BlackBoxOperationAysnc() {
var tcs = new TaskCompletionSource<TestResult>();
const int timeoutMs = 20000;
var ct = new CancellationTokenSource(timeoutMs);
ct.Token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: false);
EventHandler<EndOpEventArgs> eventHandler = (sender, args) => {
...
tcs.TrySetResult(OpResult.BlarBlar);
}
blackBox.EndAsyncOpEvent += eventHandler;
blackBox.StartAsyncOp();
return tcs.Task;
}
Updated, here's a complete functional example:
using System;
using System.ComponentModel;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApplication
{
public class Program
{
// .NET 4.5/C# 5.0: convert EAP pattern into TAP pattern with timeout
public async Task<AsyncCompletedEventArgs> BlackBoxOperationAsync(
object state,
CancellationToken token,
int timeout = Timeout.Infinite)
{
var tcs = new TaskCompletionSource<AsyncCompletedEventArgs>();
using (var cts = CancellationTokenSource.CreateLinkedTokenSource(token))
{
// prepare the timeout
if (timeout != Timeout.Infinite)
{
cts.CancelAfter(timeout);
}
// handle completion
AsyncCompletedEventHandler handler = (sender, args) =>
{
if (args.Cancelled)
tcs.TrySetCanceled();
else if (args.Error != null)
tcs.SetException(args.Error);
else
tcs.SetResult(args);
};
this.BlackBoxOperationCompleted += handler;
try
{
using (cts.Token.Register(() => tcs.SetCanceled(), useSynchronizationContext: false))
{
this.StartBlackBoxOperation(null);
return await tcs.Task.ConfigureAwait(continueOnCapturedContext: false);
}
}
finally
{
this.BlackBoxOperationCompleted -= handler;
}
}
}
// emulate async operation
AsyncCompletedEventHandler BlackBoxOperationCompleted = delegate { };
void StartBlackBoxOperation(object state)
{
ThreadPool.QueueUserWorkItem(s =>
{
Thread.Sleep(1000);
this.BlackBoxOperationCompleted(this, new AsyncCompletedEventArgs(error: null, cancelled: false, userState: state));
}, state);
}
// test
static void Main()
{
try
{
new Program().BlackBoxOperationAsync(null, CancellationToken.None, 1200).Wait();
Console.WriteLine("Completed.");
new Program().BlackBoxOperationAsync(null, CancellationToken.None, 900).Wait();
}
catch (Exception ex)
{
while (ex is AggregateException)
ex = ex.InnerException;
Console.WriteLine(ex.Message);
}
Console.ReadLine();
}
}
}
A .NET 4.0/C# 4.0 vesion can be found here, it takes advantage of the compiler-generated IEnumerator state machine.
You could use an extension for Task from here (https://stackoverflow.com/a/22078975/2680660) which also uses CancellationTokenSource.
With slight modification:
public static async Task<TResult> TimeoutAfter<TResult>(this Task<TResult> task, TimeSpan timeout)
{
using (var timeoutCancellationTokenSource = new CancellationTokenSource())
{
var completedTask = await Task.WhenAny(task, Task.Delay(timeout, timeoutCancellationTokenSource.Token));
if (completedTask == task)
{
timeoutCancellationTokenSource.Cancel();
return await task; // Very important in order to propagate exceptions
}
else
{
throw new TimeoutException($"{nameof(TimeoutAfter)}: The operation has timed out after {timeout:mm\\:ss}");
}
}
}
public Task<OpResult> BlackBoxOperationAysnc()
{
var tcs = new TaskCompletionSource<TestResult>();
EventHandler<EndOpEventArgs> eventHandler = (sender, args) => {
...
tcs.TrySetResult(OpResult.BlarBlar);
}
blackBox.EndAsyncOpEvent += eventHandler;
blackBox.StartAsyncOp();
return tcs.Task.TimeoutAfter(TimeSpan.FromSeconds(20));
}

Implementing a timeout in c#

I am new to c#; I have mainly done Java.
I want to implement a timeout something along the lines:
int now= Time.now();
while(true)
{
tryMethod();
if(now > now+5000) throw new TimeoutException();
}
How can I implement this in C#? Thanks!
One possible way would be:
Stopwatch sw = new Stopwatch();
sw.Start();
while(true)
{
tryMethod();
if(sw.ElapsedMilliseconds > 5000) throw new TimeoutException();
}
However you currently have no way to break out of your loop. I would recommend having tryMethod return a bool and change it to:
Stopwatch sw = new Stopwatch();
sw.Start();
while(!tryMethod())
{
if(sw.ElapsedMilliseconds > 5000) throw new TimeoutException();
}
The question is quite old, but yet another option.
using(CancellationTokenSource cts = new CancellationTokenSource(5000))
{
cts.Token.Register(() => { throw new TimeoutException(); });
while(!cts.IsCancellationRequested)
{
tryMethod();
}
}
Technically, you should also propagate the CancellationToken in the tryMethod() to interupt it gracefully.
Working demo: (note I had to remove the exception throwing behavior as .netfiddle doesn't like it.)
https://dotnetfiddle.net/WjRxyk
I think you could do this with a timer and a delegate, my example code is below:
using System;
using System.Timers;
class Program
{
public delegate void tm();
static void Main(string[] args)
{
var t = new tm(tryMethod);
var timer = new Timer();
timer.Interval = 5000;
timer.Start();
timer.Elapsed += (sender, e) => timer_Elapsed(t);
t.BeginInvoke(null, null);
}
static void timer_Elapsed(tm p)
{
p.EndInvoke(null);
throw new TimeoutException();
}
static void tryMethod()
{
Console.WriteLine("FooBar");
}
}
You have tryMethod, you then create a delegate and point this delegate at tryMethod, then you start this delegate Asynchronously. Then you have a timer, with the Interval being 5000ms, you pass your delegate into your timer elapsed method (which should work as a delegate is a reference type, not an value type) and once the 5000 seconds has elapsed, you call the EndInvoke method on your delegate.
As long as tryMethod() doesn't block this should do what you want:
Not safe for daylight savings time or changing time zones when mobile:
DateTime startTime = DateTime.Now;
while(true)
{
tryMethod();
if(DateTime.Now.Subtract(startTime).TotalMilliseconds > 5000)
throw new TimeoutException();
}
Timezone and daylight savings time safe versions:
DateTime startTime = DateTime.UtcNow;
while(true)
{
tryMethod();
if(DateTime.UtcNow.Subtract(startTime).TotalMilliseconds > 5000)
throw new TimeoutException();
}
(.NET 3.5 or higher required for DateTimeOffset.)
DateTimeOffset startTime = DateTimeOffset.Now;
while(true)
{
tryMethod();
if(DateTimeOffset.Now.Subtract(startTime).TotalMilliseconds > 5000)
throw new TimeoutException();
}
Using Tasks for custom timeout on Async method
Here my implementation of a custom class with a method to wrap a task to have a timeout.
public class TaskWithTimeoutWrapper
{
protected volatile bool taskFinished = false;
public async Task<T> RunWithCustomTimeoutAsync<T>(int millisecondsToTimeout, Func<Task<T>> taskFunc, CancellationTokenSource cancellationTokenSource = null)
{
this.taskFinished = false;
var results = await Task.WhenAll<T>(new List<Task<T>>
{
this.RunTaskFuncWrappedAsync<T>(taskFunc),
this.DelayToTimeoutAsync<T>(millisecondsToTimeout, cancellationTokenSource)
});
return results[0];
}
public async Task RunWithCustomTimeoutAsync(int millisecondsToTimeout, Func<Task> taskFunc, CancellationTokenSource cancellationTokenSource = null)
{
this.taskFinished = false;
await Task.WhenAll(new List<Task>
{
this.RunTaskFuncWrappedAsync(taskFunc),
this.DelayToTimeoutAsync(millisecondsToTimeout, cancellationTokenSource)
});
}
protected async Task DelayToTimeoutAsync(int millisecondsToTimeout, CancellationTokenSource cancellationTokenSource)
{
await Task.Delay(millisecondsToTimeout);
this.ActionOnTimeout(cancellationTokenSource);
}
protected async Task<T> DelayToTimeoutAsync<T>(int millisecondsToTimeout, CancellationTokenSource cancellationTokenSource)
{
await this.DelayToTimeoutAsync(millisecondsToTimeout, cancellationTokenSource);
return default(T);
}
protected virtual void ActionOnTimeout(CancellationTokenSource cancellationTokenSource)
{
if (!this.taskFinished)
{
cancellationTokenSource?.Cancel();
throw new NoInternetException();
}
}
protected async Task RunTaskFuncWrappedAsync(Func<Task> taskFunc)
{
await taskFunc.Invoke();
this.taskFinished = true;
}
protected async Task<T> RunTaskFuncWrappedAsync<T>(Func<Task<T>> taskFunc)
{
var result = await taskFunc.Invoke();
this.taskFinished = true;
return result;
}
}
Then you can call it like this:
await new TaskWithTimeoutWrapper().RunWithCustomTimeoutAsync(10000, () => this.MyTask());
or
var myResult = await new TaskWithTimeoutWrapper().RunWithCustomTimeoutAsync(10000, () => this.MyTaskThatReturnsMyResult());
And you can add a cancellation token if you want to cancel the running async task if it gets to timeout.
Hope it helps
Another way I like to do it:
public class TimeoutAction
{
private Thread ActionThread { get; set; }
private Thread TimeoutThread { get; set; }
private AutoResetEvent ThreadSynchronizer { get; set; }
private bool _success;
private bool _timout;
/// <summary>
///
/// </summary>
/// <param name="waitLimit">in ms</param>
/// <param name="action">delegate action</param>
public TimeoutAction(int waitLimit, Action action)
{
ThreadSynchronizer = new AutoResetEvent(false);
ActionThread = new Thread(new ThreadStart(delegate
{
action.Invoke();
if (_timout) return;
_timout = true;
_success = true;
ThreadSynchronizer.Set();
}));
TimeoutThread = new Thread(new ThreadStart(delegate
{
Thread.Sleep(waitLimit);
if (_success) return;
_timout = true;
_success = false;
ThreadSynchronizer.Set();
}));
}
/// <summary>
/// If the action takes longer than the wait limit, this will throw a TimeoutException
/// </summary>
public void Start()
{
ActionThread.Start();
TimeoutThread.Start();
ThreadSynchronizer.WaitOne();
if (!_success)
{
throw new TimeoutException();
}
ThreadSynchronizer.Close();
}
}
CancellationTokenSource cts = new CancellationTokenSource();
cts.CancelAfter(10000);
try
{
Task task = Task.Run(() => { methodToTimeoutAfter10Seconds(); }, cts.Token);
TaskCompletionSource<bool> tcs = new TaskCompletionSource<bool>();
using (cts.Token.Register(s => ((TaskCompletionSource<bool>)s).TrySetResult(true), tcs))
{
if (task != await Task.WhenAny(task, tcs.Task))
{
throw new OperationCanceledException(cts.Token);
}
}
/* Wait until the task is finish or timeout. */
task.Wait();
/* Rest of the code goes here */
}
catch (TaskCanceledException)
{
Console.WriteLine("Timeout");
}
catch (OperationCanceledException)
{
Console.WriteLine("Timeout");
}
catch (Exception ex)
{
Console.WriteLine("Other exceptions");
}
finally
{
cts.Dispose();
}
Using mature library Polly it can be implemented using optimistic (thus CancellationToken based) as follows:
AsyncTimeoutPolicy policy = Policy.TimeoutAsync(60, TimeoutStrategy.Optimistic);
await policy.ExecuteAsync(async cancel => await myTask(cancel), CancellationToken.None);
myTask(cancel) should be of signature Func<CancellationToken, Task> e.g. async Task MyTast(CancellationToken token) {...}

Categories