I've got a stream of tokens that are produced very quickly and a processer that is relatively slow. The tokens are of three sub-types and I would prefer them to processed by their priority. So, I would like the tokens to be buffered after they've been produced and are waiting to be processed and have that buffer sorted by priority.
Here're my classes:
public enum Priority
{
High = 3,
Medium = 2,
Low = 1
}
public class Base : IComparable<Base>
{
public int Id { get; set; }
public int CompareTo(Base other)
{
return Id.CompareTo(other.Id);
}
}
public class Foo : Base { }
public class Bar : Base { }
public class Baz : Base { }
public class Token : IComparable<Token>
{
private readonly string _toString;
public Foo Foo { get; }
public Bar Bar { get; }
public Baz Baz { get; }
public Priority Priority =>
Baz == null
? Bar == null
? Priority.High
: Priority.Medium
: Priority.Low;
public int CompareTo(Token other)
{
if (Priority > other.Priority)
{
return -1;
}
if (Priority < other.Priority)
{
return 1;
}
switch (Priority)
{
case Priority.High:
return Foo.CompareTo(other.Foo);
case Priority.Medium:
return Bar.CompareTo(other.Bar);
case Priority.Low:
return Baz.CompareTo(other.Baz);
default:
throw new ArgumentOutOfRangeException();
}
}
public override string ToString()
{
return _toString;
}
public Token(Foo foo)
{
_toString = $"{nameof(Foo)}:{foo.Id}";
Foo = foo;
}
public Token(Foo foo, Bar bar) : this(foo)
{
_toString += $":{nameof(Bar)}:{bar.Id}";
Bar = bar;
}
public Token(Foo foo, Baz baz) : this(foo)
{
_toString += $":{nameof(Baz)}:{baz.Id}";
Baz = baz;
}
}
And here is my producer code:
var random = new Random();
var bazId = 0;
var barId = 0;
var fooTokens = (from id in Observable.Interval(TimeSpan.FromSeconds(1))
.Select(Convert.ToInt32)
.Take(3)
select new Token(new Foo { Id = id }))
.Publish();
var barTokens = (from fooToken in fooTokens
from id in Observable.Range(0, random.Next(5, 10))
.Select(_ => Interlocked.Increment(ref barId))
select new Token(fooToken.Foo, new Bar { Id = id }))
.Publish();
var bazTokens = (from barToken in barTokens
from id in Observable.Range(0, random.Next(1, 5))
.Select(_ => Interlocked.Increment(ref bazId))
select new Token(barToken.Foo, new Baz { Id = id }))
.Publish();
var tokens = bazTokens.Merge(barTokens)
.Merge(fooTokens)
.Do(dt =>
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
// Subscription
bazTokens.Connect();
barTokens.Connect();
fooTokens.Connect();
However I'm a bit stuck as to how to buffer and sort the tokens. If I do this, the tokens appear to be produced and consumed at the same time, which suggests that there's some buffering going on behind the scenes, but I can't control it.
tokens.Subscribe(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
If I use a TPL Dataflow ActionBlock, I can see the tokens being produced correctly and processed correctly, but I'm still not sure how to do the sorting.
var proc = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
tokens.Subscribe(dt => proc.Post(dt));
Any ideas or pointers where to go next would be appreciated!
Update:
I got something to work. I added a helper to clean up the code for displaying the test data:
private static void Display(Token dt, ConsoleColor col, int? wait = null)
{
if (wait.HasValue)
{
Thread.Sleep(TimeSpan.FromMilliseconds(wait.Value));
}
Console.ForegroundColor = col;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
}
I added a SortedSet:
var set = new SortedSet<Token>();
var tokens = bazTokens
.Merge(barTokens)
.Merge(fooTokens)
.Do(dt => Display(dt, ConsoleColor.Red));
tokens.Subscribe(dt => set.Add(dt));
And I also added a consumer, although I'm not a fan of my implementation:
var source = new CancellationTokenSource();
Task.Run(() =>
{
while (!source.IsCancellationRequested)
{
var dt = set.FirstOrDefault();
if (dt == null)
{
continue;
}
if (set.Remove(dt))
{
Display(dt, ConsoleColor.Green, 250);
}
}
}, source.Token);
So, now I'm getting exactly the results I'm looking for, but a) I'm not happy with the while polling and b) If I want multiple consumers, I'm going to run into race conditions. So, I'm still looking for better implementations if anyone has one!
The container you want is a priority queue, unfortunately there is no implementation in the .net runtime (there is in the c++ stl/cli but priority_queue is not made available to other languages from that).
There are existing non-MS containers that fill this role, you would need to search and look at the results to pick one that meets your needs.
Using Dataflow you can filter the tokens such that each priority level goes down a different path in your pipeline. The tokens are filtered through the use of a predicate on each priority typed link. Then it's up to you how you want to give preference based on priority.
Sorting:
var highPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
var midPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
var lowPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
var proc = new BufferBlock<Token>();
proc.LinkTo(highPriority, dt => dt.Priority == Priority.High);
proc.LinkTo(midPriority, dt => dt.Priority == Priority.Medium);
proc.LinkTo(lowPriority, dt => dt.Priority == Priority.Low);
tokens.Subscribe(dt => proc.Post(dt));
One way to give preference to higher priority items would be to allow more than the default sequential processing. You can do that by setting the MaxDegreeOfParallelism for each priority block.
Giving Preference:
var highPriOptions = new DataflowLinkOptions(){MaxDegreeOfParallelism = 3}
var highPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
}, highPriOptions);
var midPriOptions = new DataflowLinkOptions(){MaxDegreeOfParallelism = 2}
var midPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
}, midPriOptions);
var lowPriority = new ActionBlock<Token>(dt =>
{
Thread.Sleep(TimeSpan.FromMilliseconds(250));
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine($"{DateTime.Now:mm:ss.fff}:{dt}");
});
var proc = new BufferBlock<Token>();
proc.LinkTo(highPriority, dt => dt.Priority == Priority.High);
proc.LinkTo(midPriority, dt => dt.Priority == Priority.Medium);
proc.LinkTo(lowPriority, dt => dt.Priority == Priority.Low);
tokens.Subscribe(dt => proc.Post(dt));
These samples are by no means complete but should at least give you the idea.
Okay, so I used a normal lock for accessing the SortedSet, then increased the number of consumers and it seems to be working fine, so although I've not been able to come up with a full RX or a split RX / TPL DataFlow solution, this now does what I want, so I'll just show the changes I made in addition to the update in the original question and leave it there.
var set = new SortedSet<Token>();
var locker = new object();
var tokens = bazTokens
.Merge(barTokens)
.Merge(fooTokens)
.Do(dt => Display(dt, ConsoleColor.Red));
tokens.Subscribe(dt =>
{
lock (locker)
{
set.Add(dt);
}
});
for (var i = 0; i < Environment.ProcessorCount; i++)
{
Task.Run(() =>
{
while (!source.IsCancellationRequested)
{
Token dt;
lock (locker)
{
dt = set.FirstOrDefault();
}
if (dt == null)
{
continue;
}
bool removed;
lock (locker)
{
removed = set.Remove(dt);
}
if (removed)
{
Display(dt, ConsoleColor.Green, 750);
}
}
}, source.Token);
}
Thank you to the people who posted solutions, I appreciate the time you spent.
I think the conundrum here is that what you seem to be really after is the results of a pull model, based on fast, hot, push sources. What you seem to want is the "highest" priority yet received, but the question is "received by what?" If you had multiple subscribers, operating at different paces, they could each have their own view of what "highest" was.
So the way I see it is that you want to merge the sources into a kind of reactive, prioritized (sorted) queue, from which you pull results when the observer is ready.
I approached that by using a signal back to the Buffer, saying "my one observer is now ready to see the state of the prioritized list". This is achieved by using the Buffer overload that takes in an observable closing signal. That buffer contains the new list of elements received, which I just merge into the last list, sans 'highest'.
The code is just demo knocked up code for the purposes of this question - there are probably bugs:
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Concurrency;
using System.Reactive.Linq;
using System.Reactive.Subjects;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace RxTests
{
class Program
{
static void Main(string[] args)
{
var p = new Program();
p.TestPrioritisedBuffer();
Console.ReadKey();
}
void TestPrioritisedBuffer()
{
var source1 = Observable.Interval(TimeSpan.FromSeconds(1)).Do((source) => Console.WriteLine("Source1:"+source));
var source2 = Observable.Interval(TimeSpan.FromSeconds(5)).Scan((x,y)=>(x+100)).Do((source) => Console.WriteLine("Source2:" + source)); ;
BehaviorSubject<bool> closingSelector = new BehaviorSubject<bool>(true);
var m = Observable.Merge(source1, source2).
Buffer(closingSelector).
Select(s => new { list =s.ToList(), max=(long)0 }).
Scan((x, y) =>
{
var list = x.list.Union(y.list).OrderBy(k=>k);
var max = list.LastOrDefault();
var res = new
{
list = list.Take(list.Count()-1).ToList(),
max= max
};
return res;
}
).
Do((sorted) => Console.WriteLine("Sorted max:" + sorted.max + ". Priority queue length:" + sorted.list.Count)).
ObserveOn(Scheduler.Default); //observe on other thread
m.Subscribe((v)=> { Console.WriteLine("Observed: "+v.max); Thread.Sleep(3000); closingSelector.OnNext(true); }) ;
}
}
}
Related
I must be doing something fundamentally wrong here. I'm trying to get a "More Like This" query working in a search engine project we have that uses Elastic Search. The idea is that the CMS can write tags (like categories) to the page in a Meta tag or something, and we would read those into Elastic and use them to drive a "more like this" search based upon an input document id.
So if the input document has tags of catfish, chicken, goat I would expect Elastic Search to find other documents that share those tags and not return ones for racecar and airplane.
I've built a proof of concept console app by:
Getting a local Elastic Search 6.6.1 instance running in Docker by following the instructions on https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
Creating a new .NET Framework 4.6.1 Console App
Adding the NuGet packages for NEST 6.5.0 and ElasticSearch.Net 6.5.0
Then I created a new elastic index that contains objects (Type "MyThing") that have a "Tags" property. This tag is a random comma-delimited set of words from a set of possible values. I've inserted anywhere from 100 to 5000 items in the index in testing. I've tried more and fewer possible words in the set.
No matter what I try the MoreLikeThis query never returns anything, and I don't understand why.
Query that isn't returning results:
var result = EsClient.Search<MyThing>(s => s
.Index(DEFAULT_INDEX)
.Query(esQuery =>
{
var mainQuery = esQuery
.MoreLikeThis(mlt => mlt
.Include(true)
.Fields(f => f.Field(ff => ff.Tags, 5))
.Like(l => l.Document(d => d.Id(id)))
);
return mainQuery;
}
Full "program.cs" source:
using Nest;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Test_MoreLikeThis_ES6
{
class Program
{
public class MyThing
{
public string Tags { get; set; }
}
const string ELASTIC_SERVER = "http://localhost:9200";
const string DEFAULT_INDEX = "my_index";
const int NUM_RECORDS = 1000;
private static Uri es_node = new Uri(ELASTIC_SERVER);
private static ConnectionSettings settings = new ConnectionSettings(es_node).DefaultIndex(DEFAULT_INDEX);
private static ElasticClient EsClient = new ElasticClient(settings);
private static Random rnd = new Random();
static void Main(string[] args)
{
Console.WriteLine("Rebuild index? (y):");
var answer = Console.ReadLine().ToLower();
if (answer == "y")
{
RebuildIndex();
for (int i = 0; i < NUM_RECORDS; i++)
{
AddToIndex();
}
}
Console.WriteLine("");
Console.WriteLine("Getting a Thing...");
var aThingId = GetARandomThingId();
Console.WriteLine("");
Console.WriteLine("Looking for something similar to document with id " + aThingId);
Console.WriteLine("");
Console.WriteLine("");
GetMoreLikeAThing(aThingId);
}
private static string GetARandomThingId()
{
var firstdocQuery = EsClient
.Search<MyThing>(s =>
s.Size(1)
.Query(q => {
return q.FunctionScore(fs => fs.Functions(fn => fn.RandomScore(rs => rs.Seed(DateTime.Now.Ticks).Field("_seq_no"))));
})
);
if (!firstdocQuery.IsValid || firstdocQuery.Hits.Count == 0) return null;
var hit = firstdocQuery.Hits.First();
Console.WriteLine("Found a thing with id '" + hit.Id + "' and tags: " + hit.Source.Tags);
return hit.Id;
}
private static void GetMoreLikeAThing(string id)
{
var result = EsClient.Search<MyThing>(s => s
.Index(DEFAULT_INDEX)
.Query(esQuery =>
{
var mainQuery = esQuery
.MoreLikeThis(mlt => mlt
.Include(true)
.Fields(f => f.Field(ff => ff.Tags, 5))
.Like(l => l.Document(d => d.Id(id)))
);
return mainQuery;
}
));
if (result.IsValid)
{
if (result.Hits.Count > 0)
{
Console.WriteLine("These things are similar:");
foreach (var hit in result.Hits)
{
Console.WriteLine(" " + hit.Id + " : " + hit.Source.Tags);
}
}
else
{
Console.WriteLine("No similar things found.");
}
}
else
{
Console.WriteLine("There was an error running the ES query.");
}
Console.WriteLine("");
Console.WriteLine("Enter (y) to get another thing, or anything else to exit");
var y = Console.ReadLine().ToLower();
if (y == "y")
{
var aThingId = GetARandomThingId();
GetMoreLikeAThing(aThingId);
}
Console.WriteLine("");
Console.WriteLine("Any key to exit...");
Console.ReadKey();
}
private static void RebuildIndex()
{
var existsResponse = EsClient.IndexExists(DEFAULT_INDEX);
if (existsResponse.Exists) //delete existing mapping (and data)
{
EsClient.DeleteIndex(DEFAULT_INDEX);
}
var rebuildResponse = EsClient.CreateIndex(DEFAULT_INDEX, c => c.Settings(s => s.NumberOfReplicas(1).NumberOfShards(5)));
var response2 = EsClient.Map<MyThing>(m => m.AutoMap());
}
private static void AddToIndex()
{
var myThing = new MyThing();
var tags = new List<string> {
"catfish",
"tractor",
"racecar",
"airplane",
"chicken",
"goat",
"pig",
"horse",
"goose",
"duck"
};
var randNum = rnd.Next(0, tags.Count);
//get randNum random tags
var rand = tags.OrderBy(o => Guid.NewGuid().ToString()).Take(randNum);
myThing.Tags = string.Join(", ", rand);
var ir = new IndexRequest<MyThing>(myThing);
var indexResponse = EsClient.Index(ir);
Console.WriteLine("Index response: " + indexResponse.Id + " : " + string.Join(" " , myThing.Tags));
}
}
}
The issue here is that the default min_term_freq value of 2 will never be satisfied for any of the terms of the prototype document because all documents contain only each tag (term) once. If you drop min_term_freq to 1, you'll get results. Might also want to set min_doc_freq to 1 too, and combine with a query that excludes the prototype document.
Here's an example to play with
const string ELASTIC_SERVER = "http://localhost:9200";
const string DEFAULT_INDEX = "my_index";
const int NUM_RECORDS = 1000;
private static readonly Random _random = new Random();
private static readonly IReadOnlyList<string> Tags =
new List<string>
{
"catfish",
"tractor",
"racecar",
"airplane",
"chicken",
"goat",
"pig",
"horse",
"goose",
"duck"
};
private static ElasticClient _client;
private static void Main()
{
var pool = new SingleNodeConnectionPool(new Uri(ELASTIC_SERVER));
var settings = new ConnectionSettings(pool)
.DefaultIndex(DEFAULT_INDEX);
_client = new ElasticClient(settings);
Console.WriteLine("Rebuild index? (y):");
var answer = Console.ReadLine().ToLower();
if (answer == "y")
{
RebuildIndex();
AddToIndex();
}
Console.WriteLine();
Console.WriteLine("Getting a Thing...");
var aThingId = GetARandomThingId();
Console.WriteLine();
Console.WriteLine("Looking for something similar to document with id " + aThingId);
Console.WriteLine();
Console.WriteLine();
GetMoreLikeAThing(aThingId);
}
public class MyThing
{
public List<string> Tags { get; set; }
}
private static string GetARandomThingId()
{
var firstdocQuery = _client
.Search<MyThing>(s =>
s.Size(1)
.Query(q => q
.FunctionScore(fs => fs
.Functions(fn => fn
.RandomScore(rs => rs
.Seed(DateTime.Now.Ticks)
.Field("_seq_no")
)
)
)
)
);
if (!firstdocQuery.IsValid || firstdocQuery.Hits.Count == 0) return null;
var hit = firstdocQuery.Hits.First();
Console.WriteLine($"Found a thing with id '{hit.Id}' and tags: {string.Join(", ", hit.Source.Tags)}");
return hit.Id;
}
private static void GetMoreLikeAThing(string id)
{
var result = _client.Search<MyThing>(s => s
.Index(DEFAULT_INDEX)
.Query(esQuery => esQuery
.MoreLikeThis(mlt => mlt
.Include(true)
.Fields(f => f.Field(ff => ff.Tags))
.Like(l => l.Document(d => d.Id(id)))
.MinTermFrequency(1)
.MinDocumentFrequency(1)
) && !esQuery
.Ids(ids => ids
.Values(id)
)
)
);
if (result.IsValid)
{
if (result.Hits.Count > 0)
{
Console.WriteLine("These things are similar:");
foreach (var hit in result.Hits)
{
Console.WriteLine($" {hit.Id}: {string.Join(", ", hit.Source.Tags)}");
}
}
else
{
Console.WriteLine("No similar things found.");
}
}
else
{
Console.WriteLine("There was an error running the ES query.");
}
Console.WriteLine();
Console.WriteLine("Enter (y) to get another thing, or anything else to exit");
var y = Console.ReadLine().ToLower();
if (y == "y")
{
var aThingId = GetARandomThingId();
GetMoreLikeAThing(aThingId);
}
Console.WriteLine();
Console.WriteLine("Any key to exit...");
}
private static void RebuildIndex()
{
var existsResponse = _client.IndexExists(DEFAULT_INDEX);
if (existsResponse.Exists) //delete existing mapping (and data)
{
_client.DeleteIndex(DEFAULT_INDEX);
}
var rebuildResponse = _client.CreateIndex(DEFAULT_INDEX, c => c
.Settings(s => s
.NumberOfShards(1)
)
.Mappings(m => m
.Map<MyThing>(mm => mm.AutoMap())
)
);
}
private static void AddToIndex()
{
var bulkAllObservable = _client.BulkAll(GetMyThings(), b => b
.RefreshOnCompleted()
.Size(1000));
var waitHandle = new ManualResetEvent(false);
Exception exception = null;
var bulkAllObserver = new BulkAllObserver(
onNext: r =>
{
Console.WriteLine($"Indexed page {r.Page}");
},
onError: e =>
{
exception = e;
waitHandle.Set();
},
onCompleted: () => waitHandle.Set());
bulkAllObservable.Subscribe(bulkAllObserver);
waitHandle.WaitOne();
if (exception != null)
{
throw exception;
}
}
private static IEnumerable<MyThing> GetMyThings()
{
for (int i = 0; i < NUM_RECORDS; i++)
{
var randomTags = Tags.OrderBy(o => Guid.NewGuid().ToString())
.Take(_random.Next(0, Tags.Count))
.OrderBy(t => t)
.ToList();
yield return new MyThing { Tags = randomTags };
}
}
And here's an example output
Found a thing with id 'Ugg9LGkBPK3n91HQD1d5' and tags: airplane, goat
These things are similar:
4wg9LGkBPK3n91HQD1l5: airplane, goat
9Ag9LGkBPK3n91HQD1l5: airplane, goat
Vgg9LGkBPK3n91HQD1d5: airplane, goat, goose
sQg9LGkBPK3n91HQD1d5: airplane, duck, goat
lQg9LGkBPK3n91HQD1h5: airplane, catfish, goat
9gg9LGkBPK3n91HQD1l5: airplane, catfish, goat
FQg9LGkBPK3n91HQD1p5: airplane, goat, goose
Jwg9LGkBPK3n91HQD1p5: airplane, goat, goose
Fwg9LGkBPK3n91HQD1d5: airplane, duck, goat, tractor
Kwg9LGkBPK3n91HQD1d5: airplane, goat, goose, horse
I have following code:
public class Batcher<TPayload> : IBatcher<TPayload>
{
private static readonly BufferBlock<BatchElement<TPayload>> BufferBlock = new BufferBlock<BatchElement<TPayload>>(new DataflowBlockOptions
{
EnsureOrdered = true
});
private readonly TransformBlock<BatchElement<TPayload>, BatchElement<TPayload>> BufferInterceptor;
private readonly TransformBlock<BatchElement<TPayload>, BatchElement<TPayload>> TimeoutInterceptor;
public EventsBatcher(int size, int interval, IMagicService magicService, ILogger<Batcher<TPayload, TStrategy>> logger)
{
BufferInterceptor =
new TransformBlock<BatchElement<TPayload>, BatchElement<TPayload>>(x =>
{
logger.LogInformation($"Get a message with value: {x}");
return x;
});
TimeoutInterceptor =
new TransformBlock<BatchElement<TPayload>, BatchElement<TPayload>>(x =>
{
logger.LogInformation($"Move out from transformation block with a value: {x}");
return x;
});
var batchBlock = new BatchBlock<BatchElement<TPayload>>(size, new GroupingDataflowBlockOptions()
{
EnsureOrdered = true
});
var timer = new Timer(async _ =>
{
try
{
batchBlock.TriggerBatch();
var data = await batchBlock.ReceiveAsync();
if (!data.Any() && data.SomeLogic())
return;
await magicService.PushMessageAsync(batchElement.Payload);
}
catch (Exception e)
{
logger.LogError($"Error occurs while trying to invoke action on batch", e);
}
}, null, 0, 500);
var timeoutBlock = new TransformBlock<BatchElement<TPayload>, BatchElement<TPayload>>(v =>
{
timer.Change(interval, Timeout.Infinite);
return v;
});
TimeoutInterceptor.LinkTo(batchBlock);
timeoutBlock.LinkTo(TimeoutInterceptor);
BufferInterceptor.LinkTo(timeoutBlock);
BufferBlock.LinkTo(BufferInterceptor);
}
public async Task<Result<Unit>> SendAsync(BatchElement<TPayload> msg, CancellationToken token = new CancellationToken())
{
try
{
var result = await BufferBlock.SendAsync(msg, token);
return result
? ResultFactory.CreateSuccess()
: ResultFactory.CreateFailure<Unit>("Message was refused by queue");
}
catch (Exception e)
{
return ResultFactory.CreateFailure<Unit>(e.Message);
}
}
}
Which responsibility is to evaluate somehow data every x milliseconds. I try to write unit tests to that to be sure that everything works fine. Those tests are here:
public class BatcherTests
{
public EventsBatcher<int> Initialize(Dictionary<DateTime, int> output)
{
var busMock = new Mock<IMagicService>();
busMock.Setup(x => x.PushMessageAsync(It.IsAny<int>()))
.Callback<Data>((data) =>
{
output.Add(DateTime.Now, data);
}).Returns(Task.CompletedTask);
var loggerMock = new Mock<ILogger<Batcher<int>>>();
return new Batcher<int>(
2,
5000,
busMock.Object,
loggerMock.Object
);
}
[Fact]
public async Task Batcher_ShouldRemoveDuplicatedMessages()
{
var output = new Dictionary<DateTime, int>();
var batcher = Initialize(output);
var first = await batcher.SendAsync(new MockEvent { Payload = 1 });
var second = await batcher.SendAsync(new MockEvent { Payload = 1 });
(first.IsSuccess && second.IsSuccess).ShouldBeTrue();
while (output.Count != 2)
{
}
output.Count.ShouldBe(2);
output.First().Value.ShouldBe(1);
output.Last().Value.ShouldBe(1);
output.Clear();
}
[Fact]
public async Task Batcher_WhenSizeIsSetTo2AndWeSend3Items_ReturnTwoBatchedItemsWithDateIntervalPlusMinus5000msAndAllSendRequestsEndsWithSuccess()
{
var output = new Dictionary<DateTime, int>();
var batcher = Initialize(output);
var first = await batcher.SendAsync(new MockEvent { Payload = 1 });
var second = await batcher.SendAsync(new MockEvent { Payload = 1 });
var third = await batcher.SendAsync(new MockEvent { Payload = 1 });
(first.IsSuccess && second.IsSuccess && third.IsSuccess).ShouldBeTrue();
while (output.Count != 2) //never ends because there are already two elements in output dictionary
{
}
output.Count.ShouldBe(2);
output.First().Value.ShouldBe(2);
output.Last().Value.ShouldBe(1);
var interval = (output.Last().Key - output.First().Key).TotalSeconds;
(interval >= 4.5d && interval <= 5.5d).ShouldBeTrue();
output.Clear();
}
}
But the strange thing is that when I run them separately they end up with a success status. But when I run them all together one of them seems to stuck. This is because a dictionary which is passed to a logic method has 2 elements inside while starting a test. I don't see here a possibility of shared context since stub class is created at the beginning of test cases, the same with a dictionary. Is there something that I missing? I also try to split those test cases to separe classes but the same behavior occurs.
There is shared stated, but it is not in the test (directly).
Your BufferBlock is declared as static in the class Batcher<TPayload>. There is your shared state.
private static readonly BufferBlock<BatchElement<TPayload>> BufferBlock = new BufferBlock<BatchElement<TPayload>>(new DataflowBlockOptions
{
EnsureOrdered = true
});
When multiple tests are executed that shared block is linked to the other blocks multiple times.
I have a class:
public class ShipmentInformation
{
public string OuterNo { get; set; }
public long Start { get; set; }
public long End { get; set; }
}
I have a List<ShipmentInformation> variable called Results.
I then do:
List<ShipmentInformation> FinalResults = new List<ShipmentInformation>();
var OuterNumbers = Results.GroupBy(x => x.OuterNo);
foreach(var item in OuterNumbers)
{
var orderedData = item.OrderBy(x => x.Start);
ShipmentInformation shipment = new ShipmentInformation();
shipment.OuterNo = item.Key;
shipment.Start = orderedData.First().Start;
shipment.End = orderedData.Last().End;
FinalResults.Add(shipment);
}
The issue I have now is that within each grouped item I have various ShipmentInformation but the Start number may not be sequential by x. x can be 300 or 200 based on a incoming parameter. To illustrate I could have
Start = 1, End = 300
Start = 301, End = 600
Start = 601, End = 900
Start = 1201, End = 1500
Start = 1501, End = 1800
Because I have this jump I cannot use the above loop to create an instance of ShipmentInformation and take the first and last item in orderedData to use their data to populate that instance.
I would like some way of identifying a jump by 300 or 200 and creating an instance of ShipmentInformation to add to FinalResults where the data is sequnetial.
Using the above example I would have 2 instances of ShipmentInformation with a Start of 1 and an End of 900 and another with a Start of 1201 and End of 1800
Try the following:
private static IEnumerable<ShipmentInformation> Compress(IEnumerable<ShipmentInformation> shipments)
{
var orderedData = shipments.OrderBy(s => s.OuterNo).ThenBy(s => s.Start);
using (var enumerator = orderedData.GetEnumerator())
{
ShipmentInformation compressed = null;
while (enumerator.MoveNext())
{
var current = enumerator.Current;
if (compressed == null)
{
compressed = current;
continue;
}
if (compressed.OuterNo != current.OuterNo || compressed.End < current.Start - 1)
{
yield return compressed;
compressed = current;
continue;
}
compressed.End = current.End;
}
if (compressed != null)
{
yield return compressed;
}
}
}
Useable like so:
var finalResults = Results.SelectMany(Compress).ToList();
If you want something that probably has terrible performance and is impossible to understand, but only uses out-of-the box LINQ, I think this might do it.
var orderedData = item.OrderBy(x => x.Start);
orderedData
.SelectMany(x =>
Enumerable
.Range(x.Start, 1 + x.End - x.Start)
.Select(n => new { time = n, info = x))
.Select((x, i) => new { index = i, time = x.time, info = x.info } )
.GroupBy(t => t.time - t.info)
.Select(g => new ShipmentInformation {
OuterNo = g.First().Key,
Start = g.First().Start(),
End = g.Last().End });
My brain hurts.
(Edit for clarity: this just replaces what goes inside your foreach loop. You can make it even more horrible by putting this inside a Select statement to replace the foreach loop, like in rich's answer.)
How about this?
List<ShipmentInfo> si = new List<ShipmentInfo>();
si.Add(new ShipmentInfo(orderedData.First()));
for (int index = 1; index < orderedData.Count(); ++index)
{
if (orderedData.ElementAt(index).Start ==
(si.ElementAt(si.Count() - 1).End + 1))
{
si[si.Count() - 1].End = orderedData.ElementAt(index).End;
}
else
{
si.Add(new ShipmentInfo(orderedData.ElementAt(index)));
}
}
FinalResults.AddRange(si);
Another LINQ solution would be to use the Except extension method.
EDIT: Rewritten in C#, includes composing the missing points back into Ranges:
class Program
{
static void Main(string[] args)
{
Range[] l_ranges = new Range[] {
new Range() { Start = 10, End = 19 },
new Range() { Start = 20, End = 29 },
new Range() { Start = 40, End = 49 },
new Range() { Start = 50, End = 59 }
};
var l_flattenedRanges =
from l_range in l_ranges
from l_point in Enumerable.Range(l_range.Start, 1 + l_range.End - l_range.Start)
select l_point;
var l_min = 0;
var l_max = l_flattenedRanges.Max();
var l_allPoints =
Enumerable.Range(l_min, 1 + l_max - l_min);
var l_missingPoints =
l_allPoints.Except(l_flattenedRanges);
var l_lastRange = new Range() { Start = l_missingPoints.Min(), End = l_missingPoints.Min() };
var l_missingRanges = new List<Range>();
l_missingPoints.ToList<int>().ForEach(delegate(int i)
{
if (i > l_lastRange.End + 1)
{
l_missingRanges.Add(l_lastRange);
l_lastRange = new Range() { Start = i, End = i };
}
else
{
l_lastRange.End = i;
}
});
l_missingRanges.Add(l_lastRange);
foreach (Range l_missingRange in l_missingRanges) {
Console.WriteLine("Start = " + l_missingRange.Start + " End = " + l_missingRange.End);
}
Console.ReadKey(true);
}
}
class Range
{
public int Start { get; set; }
public int End { get; set; }
}
I want to process something using parallel loop like this :
public void FillLogs(IEnumerable<IComputer> computers)
{
Parallel.ForEach(computers, cpt=>
{
cpt.Logs = cpt.GetRawLogs().ToList();
});
}
Ok, it works fine. But How to do if I want the FillLogs method return an IEnumerable ?
public IEnumerable<IComputer> FillLogs(IEnumerable<IComputer> computers)
{
Parallel.ForEach(computers, cpt=>
{
cpt.Logs = cpt.GetRawLogs().ToList();
yield return cpt // KO, don't work
});
}
EDIT
It seems not to be possible... but I use something like this :
public IEnumerable<IComputer> FillLogs(IEnumerable<IComputer> computers)
{
return computers.AsParallel().Select(cpt => cpt);
}
But where I put the cpt.Logs = cpt.GetRawLogs().ToList(); instruction
Short version - no, that isn't possible via an iterator block; the longer version probably involves synchronized queue/dequeue between the caller's iterator thread (doing the dequeue) and the parallel workers (doing the enqueue); but as a side note - logs are usually IO-bound, and parallelising things that are IO-bound often doesn't work very well.
If the caller is going to take some time to consume each, then there may be some merit to an approach that only processes one log at a time, but can do that while the caller is consuming the previous log; i.e. it begins a Task for the next item before the yield, and waits for completion after the yield... but that is again, pretty complex. As a simplified example:
static void Main()
{
foreach(string s in Get())
{
Console.WriteLine(s);
}
}
static IEnumerable<string> Get() {
var source = new[] {1, 2, 3, 4, 5};
Task<string> outstandingItem = null;
Func<object, string> transform = x => ProcessItem((int) x);
foreach(var item in source)
{
var tmp = outstandingItem;
// note: passed in as "state", not captured, so not a foreach/capture bug
outstandingItem = new Task<string>(transform, item);
outstandingItem.Start();
if (tmp != null) yield return tmp.Result;
}
if (outstandingItem != null) yield return outstandingItem.Result;
}
static string ProcessItem(int i)
{
return i.ToString();
}
I don't want to be offensive, but maybe there is a lack of understanding. Parallel.ForEach means that the TPL will run the foreach according to the available hardware in several threads. But that means, that ii is possible to do that work in parallel! yield return gives you the opportunity to get some values out of a list (or what-so-ever) and give them back one-by-one as they are needed. It prevents of the need to first find all items matching the condition and then iterate over them. That is indeed a performance advantage, but can't be done in parallel.
Although the question is old I've managed to do something just for fun.
class Program
{
static void Main(string[] args)
{
foreach (var message in GetMessages())
{
Console.WriteLine(message);
}
}
// Parallel yield
private static IEnumerable<string> GetMessages()
{
int total = 0;
bool completed = false;
var batches = Enumerable.Range(1, 100).Select(i => new Computer() { Id = i });
var qu = new ConcurrentQueue<Computer>();
Task.Run(() =>
{
try
{
Parallel.ForEach(batches,
() => 0,
(item, loop, subtotal) =>
{
Thread.Sleep(1000);
qu.Enqueue(item);
return subtotal + 1;
},
result => Interlocked.Add(ref total, result));
}
finally
{
completed = true;
}
});
int current = 0;
while (current < total || !completed)
{
SpinWait.SpinUntil(() => current < total || completed);
if (current == total) yield break;
current++;
qu.TryDequeue(out Computer computer);
yield return $"Completed {computer.Id}";
}
}
}
public class Computer
{
public int Id { get; set; }
}
Compared to Koray's answer this one really uses all the CPU cores.
You can use the following extension method
public static class ParallelExtensions
{
public static IEnumerable<T1> OrderedParallel<T, T1>(this IEnumerable<T> list, Func<T, T1> action)
{
var unorderedResult = new ConcurrentBag<(long, T1)>();
Parallel.ForEach(list, (o, state, i) =>
{
unorderedResult.Add((i, action.Invoke(o)));
});
var ordered = unorderedResult.OrderBy(o => o.Item1);
return ordered.Select(o => o.Item2);
}
}
use like:
public void FillLogs(IEnumerable<IComputer> computers)
{
cpt.Logs = computers.OrderedParallel(o => o.GetRawLogs()).ToList();
}
Hope this will save you some time.
How about
Queue<string> qu = new Queue<string>();
bool finished = false;
Task.Factory.StartNew(() =>
{
Parallel.ForEach(get_list(), (item) =>
{
string itemToReturn = heavyWorkOnItem(item);
lock (qu)
qu.Enqueue(itemToReturn );
});
finished = true;
});
while (!finished)
{
lock (qu)
while (qu.Count > 0)
yield return qu.Dequeue();
//maybe a thread sleep here?
}
Edit:
I think this is better:
public static IEnumerable<TOutput> ParallelYieldReturn<TSource, TOutput>(this IEnumerable<TSource> source, Func<TSource, TOutput> func)
{
ConcurrentQueue<TOutput> qu = new ConcurrentQueue<TOutput>();
bool finished = false;
AutoResetEvent re = new AutoResetEvent(false);
Task.Factory.StartNew(() =>
{
Parallel.ForEach(source, (item) =>
{
qu.Enqueue(func(item));
re.Set();
});
finished = true;
re.Set();
});
while (!finished)
{
re.WaitOne();
while (qu.Count > 0)
{
TOutput res;
if (qu.TryDequeue(out res))
yield return res;
}
}
}
Edit2: I agree with the short No answer. This code is useless; you cannot break the yield loop.
Using Rx, I desire pause and resume functionality in the following code:
How to implement Pause() and Resume() ?
static IDisposable _subscription;
static void Main(string[] args)
{
Subscribe();
Thread.Sleep(500);
// Second value should not be shown after two seconds:
Pause();
Thread.Sleep(5000);
// Continue and show second value and beyond now:
Resume();
}
static void Subscribe()
{
var list = new List<int> { 1, 2, 3, 4, 5 };
var obs = list.ToObservable();
_subscription = obs.SubscribeOn(Scheduler.NewThread).Subscribe(p =>
{
Console.WriteLine(p.ToString());
Thread.Sleep(2000);
},
err => Console.WriteLine("Error"),
() => Console.WriteLine("Sequence Completed")
);
}
static void Pause()
{
// Pseudocode:
//_subscription.Pause();
}
static void Resume()
{
// Pseudocode:
//_subscription.Resume();
}
Rx Solution?
I believe I could make it work with some kind of Boolean field gating combined with thread locking (Monitor.Wait and Monitor.Pulse)
But is there an Rx operator or some other reactive shorthand to achieve the same aim?
Here's a reasonably simple Rx way to do what you want. I've created an extension method called Pausable that takes a source observable and a second observable of boolean that pauses or resumes the observable.
public static IObservable<T> Pausable<T>(
this IObservable<T> source,
IObservable<bool> pauser)
{
return Observable.Create<T>(o =>
{
var paused = new SerialDisposable();
var subscription = Observable.Publish(source, ps =>
{
var values = new ReplaySubject<T>();
Func<bool, IObservable<T>> switcher = b =>
{
if (b)
{
values.Dispose();
values = new ReplaySubject<T>();
paused.Disposable = ps.Subscribe(values);
return Observable.Empty<T>();
}
else
{
return values.Concat(ps);
}
};
return pauser.StartWith(false).DistinctUntilChanged()
.Select(p => switcher(p))
.Switch();
}).Subscribe(o);
return new CompositeDisposable(subscription, paused);
});
}
It can be used like this:
var xs = Observable.Generate(
0,
x => x < 100,
x => x + 1,
x => x,
x => TimeSpan.FromSeconds(0.1));
var bs = new Subject<bool>();
var pxs = xs.Pausable(bs);
pxs.Subscribe(x => { /* Do stuff */ });
Thread.Sleep(500);
bs.OnNext(true);
Thread.Sleep(5000);
bs.OnNext(false);
Thread.Sleep(500);
bs.OnNext(true);
Thread.Sleep(5000);
bs.OnNext(false);
It should be fairly easy for you to put this in your code with the Pause & Resume methods.
Here it is as an application of IConnectableObservable that I corrected slightly for the newer api (original here):
public static class ObservableHelper {
public static IConnectableObservable<TSource> WhileResumable<TSource>(Func<bool> condition, IObservable<TSource> source) {
var buffer = new Queue<TSource>();
var subscriptionsCount = 0;
var isRunning = System.Reactive.Disposables.Disposable.Create(() => {
lock (buffer)
{
subscriptionsCount--;
}
});
var raw = Observable.Create<TSource>(subscriber => {
lock (buffer)
{
subscriptionsCount++;
if (subscriptionsCount == 1)
{
while (buffer.Count > 0) {
subscriber.OnNext(buffer.Dequeue());
}
Observable.While(() => subscriptionsCount > 0 && condition(), source)
.Subscribe(
v => { if (subscriptionsCount == 0) buffer.Enqueue(v); else subscriber.OnNext(v); },
e => subscriber.OnError(e),
() => { if (subscriptionsCount > 0) subscriber.OnCompleted(); }
);
}
}
return isRunning;
});
return raw.Publish();
}
}
Here is my answer. I believe there may be a race condition around pause resume, however this can be mitigated by serializing all activity onto a scheduler. (favor Serializing over synchronizing).
using System;
using System.Reactive.Concurrency;
using System.Reactive.Disposables;
using System.Reactive.Linq;
using System.Reactive.Subjects;
using Microsoft.Reactive.Testing;
using NUnit.Framework;
namespace StackOverflow.Tests.Q7620182_PauseResume
{
[TestFixture]
public class PauseAndResumeTests
{
[Test]
public void Should_pause_and_resume()
{
//Arrange
var scheduler = new TestScheduler();
var isRunningTrigger = new BehaviorSubject<bool>(true);
Action pause = () => isRunningTrigger.OnNext(false);
Action resume = () => isRunningTrigger.OnNext(true);
var source = scheduler.CreateHotObservable(
ReactiveTest.OnNext(0.1.Seconds(), 1),
ReactiveTest.OnNext(2.0.Seconds(), 2),
ReactiveTest.OnNext(4.0.Seconds(), 3),
ReactiveTest.OnNext(6.0.Seconds(), 4),
ReactiveTest.OnNext(8.0.Seconds(), 5));
scheduler.Schedule(TimeSpan.FromSeconds(0.5), () => { pause(); });
scheduler.Schedule(TimeSpan.FromSeconds(5.0), () => { resume(); });
//Act
var sut = Observable.Create<IObservable<int>>(o =>
{
var current = source.Replay();
var connection = new SerialDisposable();
connection.Disposable = current.Connect();
return isRunningTrigger
.DistinctUntilChanged()
.Select(isRunning =>
{
if (isRunning)
{
//Return the current replayed values.
return current;
}
else
{
//Disconnect and replace current.
current = source.Replay();
connection.Disposable = current.Connect();
//yield silence until the next time we resume.
return Observable.Never<int>();
}
})
.Subscribe(o);
}).Switch();
var observer = scheduler.CreateObserver<int>();
using (sut.Subscribe(observer))
{
scheduler.Start();
}
//Assert
var expected = new[]
{
ReactiveTest.OnNext(0.1.Seconds(), 1),
ReactiveTest.OnNext(5.0.Seconds(), 2),
ReactiveTest.OnNext(5.0.Seconds(), 3),
ReactiveTest.OnNext(6.0.Seconds(), 4),
ReactiveTest.OnNext(8.0.Seconds(), 5)
};
CollectionAssert.AreEqual(expected, observer.Messages);
}
}
}
It just works:
class SimpleWaitPulse
{
static readonly object _locker = new object();
static bool _go;
static void Main()
{ // The new thread will block
new Thread (Work).Start(); // because _go==false.
Console.ReadLine(); // Wait for user to hit Enter
lock (_locker) // Let's now wake up the thread by
{ // setting _go=true and pulsing.
_go = true;
Monitor.Pulse (_locker);
}
}
static void Work()
{
lock (_locker)
while (!_go)
Monitor.Wait (_locker); // Lock is released while we’re waiting
Console.WriteLine ("Woken!!!");
}
}
Please, see How to Use Wait and Pulse for more details