I have written a simple "latency simulator" which works, but at times, messages are delayed for longer than the time specified. I need help to ensure that messages are delayed for the correct amount of time.
The main problem, I believe, is that I am using Thread.Sleep(x), which is depended on various factors but mainly on the clock interrupt rate, which causes Thread.Sleep() to have a resolution of roughly 15ms. Further, intensive tasks will demand more CPU time and will occasionally result in a delay greater than the one requested. If you are not familiar with the resolution issues of Thread.Sleep, you can read these SO posts: here, here and here.
This is my LatencySimulator:
public class LatencySimulatorResult: EventArgs
{
public int messageNumber { get; set; }
public byte[] message { get; set; }
}
public class LatencySimulator
{
private int messageNumber;
private int latency = 0;
private int processedMessageCount = 0;
public event EventHandler messageReady;
public void Delay(byte[] message, int delay)
{
latency = delay;
var result = new LatencySimulatorResult();
result.message = message;
result.messageNumber = messageNumber;
if (latency == 0)
{
if (messageReady != null)
messageReady(this, result);
}
else
{
ThreadPool.QueueUserWorkItem(ThreadPoolCallback, result);
}
Interlocked.Increment(ref messageNumber);
}
private void ThreadPoolCallback(object threadContext)
{
Thread.Sleep(latency);
var next = (LatencySimulatorResult)threadContext;
var ready = next.messageNumber == processedMessageCount + 1;
while (ready == false)
{
ready = next.messageNumber == processedMessageCount + 1;
}
if (messageReady != null)
messageReady(this, next);
Interlocked.Increment(ref processedMessageCount);
}
}
To use it, you create a new instance and bind to the event handler:
var latencySimulator = new LatencySimulator();
latencySimulator.messageReady += MessageReady;
You then call latencySimulator.Delay(someBytes, someDelay);
When a message has finished being delayed, the event is fired and you can then process the delayed message.
It is important that the order in which messages are added is maintained. I cannot have them coming out the other end of the latency simulator in some random order.
Here is a test program to use the latency simulator and to see how long messages have been delayed for:
private static LatencySimulator latencySimulator;
private static ConcurrentDictionary<int, PendingMessage> pendingMessages;
private static List<long> measurements;
static void Main(string[] args)
{
var results = TestLatencySimulator();
var anomalies = results.Result.Where(x=>x > 32).ToList();
foreach (var result in anomalies)
{
Console.WriteLine(result);
}
Console.ReadLine();
}
static async Task<List<long>> TestLatencySimulator()
{
latencySimulator = new LatencySimulator();
latencySimulator.messageReady += MessageReady;
var numberOfMeasurementsMax = 1000;
pendingMessages = new ConcurrentDictionary<int, PendingMessage>();
measurements = new List<long>();
var sendTask = Task.Factory.StartNew(() =>
{
for (var i = 0; i < numberOfMeasurementsMax; i++)
{
var message = new Message { Id = i };
pendingMessages.TryAdd(i, new PendingMessage() { Id = i });
latencySimulator.Delay(Serialize(message), 30);
Thread.Sleep(50);
}
});
//Spin some tasks up to simulate high CPU usage
Task.Factory.StartNew(() => { FindPrimeNumber(100000); });
Task.Factory.StartNew(() => { FindPrimeNumber(100000); });
Task.Factory.StartNew(() => { FindPrimeNumber(100000); });
sendTask.Wait();
return measurements;
}
static long FindPrimeNumber(int n)
{
int count = 0;
long a = 2;
while (count < n)
{
long b = 2;
int prime = 1;// to check if found a prime
while (b * b <= a)
{
if (a % b == 0)
{
prime = 0;
break;
}
b++;
}
if (prime > 0)
{
count++;
}
a++;
}
return (--a);
}
private static void MessageReady(object sender, EventArgs e)
{
LatencySimulatorResult result = (LatencySimulatorResult)e;
var message = (Message)Deserialize(result.message);
if (pendingMessages.ContainsKey(message.Id) != true) return;
pendingMessages[message.Id].stopwatch.Stop();
measurements.Add(pendingMessages[message.Id].stopwatch.ElapsedMilliseconds);
}
static object Deserialize(byte[] arrBytes)
{
using (var memStream = new MemoryStream())
{
var binForm = new BinaryFormatter();
memStream.Write(arrBytes, 0, arrBytes.Length);
memStream.Seek(0, SeekOrigin.Begin);
var obj = binForm.Deserialize(memStream);
return obj;
}
}
static byte[] Serialize<T>(T obj)
{
BinaryFormatter bf = new BinaryFormatter();
using (var ms = new MemoryStream())
{
bf.Serialize(ms, obj);
return ms.ToArray();
}
}
If you run this code, you will see that about 5% of the messages are delayed for more than the expected 30ms. In fact, some are as high as 60ms. Without any background tasks or high CPU usage, the simulator behaves as expected.
I need them all to be 30ms (or as close to as possible) - I do not want some arbitrary 50-60ms delays.
Can anyone suggest how I can refactor this code so that I can achieve the desired result, but without the use of Thread.Sleep() and with as little CPU overhead as possible?
Related
I like to decode X10 Code on the Raspberry Pi using C# on Windows 10 IoT but I haven no experience with RF decoding, so this is new territory for me.
I came across this post and I tried to convert this into C# code, but I had no success. Does anyone know how to decode this X10 Code correctly using C#, or can someone point me to the right Protocol specifications.
Here is the code I am currently using, however the ValueChanged Event is not called.
public static void Sniff(uint gpioPinNumb)
{
Task.Run(() => {
using (GpioPin pin = GpioController.GetDefault().OpenPin((int)gpioPinNumb, GpioSharingMode.Exclusive))
{
pin.SetDriveMode(GpioPinDriveMode.Input);
Stopwatch sw = Stopwatch.StartNew();
long elapsedMicrons = 0;
int[] states = new int[67];
int[] durations = new int[67];
uint changeCount = 0;
bool lockPassed = false;
bool isLock = false;
pin.ValueChanged += (GpioPin sender, GpioPinValueChangedEventArgs args) =>
{
elapsedMicrons = sw.ElapsedTicks / 10;
sw.Restart();
//Debug.WriteLine(elapsedMicrons);
if (elapsedMicrons > 25000 && !lockPassed && !isLock)
{
//X10 lock started
changeCount = 0;
durations[changeCount++] = (int)elapsedMicrons;
isLock = true;
Debug.WriteLine("Lock Started");
Debug.WriteLine("");
}
else if (isLock)
{
if (changeCount >= durations.Length)
{
isLock = false;
changeCount = 0;
Debug.WriteLine("===============================");
for (int i = 0; i < durations.Length; i++)
{
Debug.Write(durations[i++]);
Debug.Write(" ");
}
Debug.WriteLine("");
}
else
{
durations[changeCount++] = (int)elapsedMicrons;
}
}
};
}
});
}
Here is the code I am currently using, however the ValueChanged Event
is not called.
This issue caused by using statement that the C# "using" statement results in a call to Dispose(). This is the same as Close(). It also causes the object itself to go out of scope as soon as Dispose is called.
For safety you can move pin out of Sniff method like this:
private static GpioPin pin;
public static void Sniff(uint gpioPinNumb)
{
Task.Run(() => {
pin = GpioController.GetDefault().OpenPin((int)gpioPinNumb, GpioSharingMode.Exclusive);
pin.SetDriveMode(GpioPinDriveMode.Input);
...
...
This is a bit of a doozy and it's been a while since I worked with C#, so bear with me:
I'm running a jruby script to iterate through 900 files (5 Mb - 1500 Mb in size) to figure out how many dupes STILL exist within these (already uniq'd) files. I had little luck with awk.
My latest idea was to insert them into a local MongoDB instance like so:
db.collection('hashes').update({ :_id => hash}, { $inc: { count: 1} }, { upsert: true)
... so that later I could just query it like db.collection.where({ count: { $gt: 1 } }) to get all the dupes.
This is working great except it's been over 24 hours and at the time of writing I'm at 72,532,927 Mongo entries and growing.
I think Ruby's .each_line is bottlnecking the IO hardcore:
So what I'm thinking now is compiling a C# program which fires up a thread PER EACH FILE and inserts the line (md5 hash) into a Redis list.
From there, I could have another compiled C# program simply pop the values off and ignore the save if the count is 1.
So the questions are:
Will using a compiled file reader and multithreading the file reads significantly improve performance?
Is using Redis even necessary? With a tremendous amount of AWS memory, could I not just use the threads to fill some sort of a list atomically and proceed from there?
Thanks in advance.
Updated
New solution. Old solution. The main idea is to calculate dummy hashes(just sum of all chars in string) of each line and store it in Dictionary<ulong, List<LinePosition>> _hash2LinePositions. It's possible to have multiple hashes in the same stream and it solves by List in Dictionary Value. When the hashes are the same, we read and compare the strings from the streams. LinePosition is using for storing info about line - position in stream and its length. I don't have such huge files as you, but my tests shows that it works. Here is the full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
public class Solution
{
struct LinePosition
{
public long Start;
public long Length;
public LinePosition(long start, long count)
{
Start = start;
Length = count;
}
public override string ToString()
{
return string.Format("Start: {0}, Length: {1}", Start, Length);
}
}
class TextFileHasher : IDisposable
{
readonly Dictionary<ulong, List<LinePosition>> _hash2LinePositions;
readonly Stream _stream;
bool _isDisposed;
public HashSet<ulong> Hashes { get; private set; }
public string Name { get; private set; }
public TextFileHasher(string name, Stream stream)
{
Name = name;
_stream = stream;
_hash2LinePositions = new Dictionary<ulong, List<LinePosition>>();
Hashes = new HashSet<ulong>();
}
public override string ToString()
{
return Name;
}
public void CalculateFileHash()
{
int readByte = -1;
ulong dummyLineHash = 0;
// Line start position in file
long startPosition = 0;
while ((readByte = _stream.ReadByte()) != -1) {
// Read until new line
if (readByte == '\r' || readByte == '\n') {
// If there was data
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - 1 - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
else {
// Was it new line ?
if (dummyLineHash == 0)
startPosition = _stream.Position - 1;
// Calculate dummy hash
dummyLineHash += (uint)readByte;
}
}
if (dummyLineHash != 0) {
// Add line hash and line position to the dict
AddToDictAndHash(dummyLineHash, startPosition, _stream.Position - startPosition);
// Reset line hash
dummyLineHash = 0;
}
}
public List<LinePosition> GetLinePositions(ulong hash)
{
return _hash2LinePositions[hash];
}
public List<string> GetDuplicates()
{
List<string> duplicates = new List<string>();
foreach (var key in _hash2LinePositions.Keys) {
List<LinePosition> linesPos = _hash2LinePositions[key];
if (linesPos.Count > 1) {
duplicates.AddRange(FindExactDuplicates(linesPos));
}
}
return duplicates;
}
public void Dispose()
{
if (_isDisposed)
return;
_stream.Dispose();
_isDisposed = true;
}
private void AddToDictAndHash(ulong hash, long start, long count)
{
List<LinePosition> linesPosition;
if (!_hash2LinePositions.TryGetValue(hash, out linesPosition)) {
linesPosition = new List<LinePosition>() { new LinePosition(start, count) };
_hash2LinePositions.Add(hash, linesPosition);
}
else {
linesPosition.Add(new LinePosition(start, count));
}
Hashes.Add(hash);
}
public byte[] GetLineAsByteArray(LinePosition prevPos)
{
long len = prevPos.Length;
byte[] lineBytes = new byte[len];
_stream.Seek(prevPos.Start, SeekOrigin.Begin);
_stream.Read(lineBytes, 0, (int)len);
return lineBytes;
}
private List<string> FindExactDuplicates(List<LinePosition> linesPos)
{
List<string> duplicates = new List<string>();
linesPos.Sort((x, y) => x.Length.CompareTo(y.Length));
LinePosition prevPos = linesPos[0];
for (int i = 1; i < linesPos.Count; i++) {
if (prevPos.Length == linesPos[i].Length) {
var prevLineArray = GetLineAsByteArray(prevPos);
var thisLineArray = GetLineAsByteArray(linesPos[i]);
if (prevLineArray.SequenceEqual(thisLineArray)) {
var line = System.Text.Encoding.Default.GetString(prevLineArray);
duplicates.Add(line);
}
#if false
string prevLine = System.Text.Encoding.Default.GetString(prevLineArray);
string thisLine = System.Text.Encoding.Default.GetString(thisLineArray);
Console.WriteLine("PrevLine: {0}\r\nThisLine: {1}", prevLine, thisLine);
StringBuilder sb = new StringBuilder();
sb.Append(prevPos);
sb.Append(" is '");
sb.Append(prevLine);
sb.Append("'. ");
sb.AppendLine();
sb.Append(linesPos[i]);
sb.Append(" is '");
sb.Append(thisLine);
sb.AppendLine("'. ");
sb.Append("Equals => ");
sb.Append(prevLine.CompareTo(thisLine) == 0);
Console.WriteLine(sb.ToString());
#endif
}
else {
prevPos = linesPos[i];
}
}
return duplicates;
}
}
public static void Main(String[] args)
{
List<TextFileHasher> textFileHashers = new List<TextFileHasher>();
string text1 = "abc\r\ncba\r\nabc";
TextFileHasher tfh1 = new TextFileHasher("Text1", new MemoryStream(System.Text.Encoding.Default.GetBytes(text1)));
tfh1.CalculateFileHash();
textFileHashers.Add(tfh1);
string text2 = "def\r\ncba\r\nwet";
TextFileHasher tfh2 = new TextFileHasher("Text2", new MemoryStream(System.Text.Encoding.Default.GetBytes(text2)));
tfh2.CalculateFileHash();
textFileHashers.Add(tfh2);
string text3 = "def\r\nbla\r\nwat";
TextFileHasher tfh3 = new TextFileHasher("Text3", new MemoryStream(System.Text.Encoding.Default.GetBytes(text3)));
tfh3.CalculateFileHash();
textFileHashers.Add(tfh3);
List<string> totalDuplicates = new List<string>();
Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>> totalHashes = new Dictionary<ulong, Dictionary<TextFileHasher, List<LinePosition>>>();
textFileHashers.ForEach(tfh => {
foreach(var dummyHash in tfh.Hashes) {
Dictionary<TextFileHasher, List<LinePosition>> tfh2LinePositions = null;
if (!totalHashes.TryGetValue(dummyHash, out tfh2LinePositions))
totalHashes[dummyHash] = new Dictionary<TextFileHasher, List<LinePosition>>() { { tfh, tfh.GetLinePositions(dummyHash) } };
else {
List<LinePosition> linePositions = null;
if (!tfh2LinePositions.TryGetValue(tfh, out linePositions))
tfh2LinePositions[tfh] = tfh.GetLinePositions(dummyHash);
else
linePositions.AddRange(tfh.GetLinePositions(dummyHash));
}
}
});
HashSet<TextFileHasher> alreadyGotDuplicates = new HashSet<TextFileHasher>();
foreach(var hash in totalHashes.Keys) {
var tfh2LinePositions = totalHashes[hash];
var tfh = tfh2LinePositions.Keys.FirstOrDefault();
// Get duplicates in the TextFileHasher itself
if (tfh != null && !alreadyGotDuplicates.Contains(tfh)) {
totalDuplicates.AddRange(tfh.GetDuplicates());
alreadyGotDuplicates.Add(tfh);
}
if (tfh2LinePositions.Count <= 1) {
continue;
}
// Algo to get duplicates in more than 1 TextFileHashers
var tfhs = tfh2LinePositions.Keys.ToArray();
for (int i = 0; i < tfhs.Length; i++) {
var tfh1Positions = tfhs[i].GetLinePositions(hash);
for (int j = i + 1; j < tfhs.Length; j++) {
var tfh2Positions = tfhs[j].GetLinePositions(hash);
for (int k = 0; k < tfh1Positions.Count; k++) {
var tfh1Pos = tfh1Positions[k];
var tfh1ByteArray = tfhs[i].GetLineAsByteArray(tfh1Pos);
for (int m = 0; m < tfh2Positions.Count; m++) {
var tfh2Pos = tfh2Positions[m];
if (tfh1Pos.Length != tfh2Pos.Length)
continue;
var tfh2ByteArray = tfhs[j].GetLineAsByteArray(tfh2Pos);
if (tfh1ByteArray.SequenceEqual(tfh2ByteArray)) {
var line = System.Text.Encoding.Default.GetString(tfh1ByteArray);
totalDuplicates.Add(line);
}
}
}
}
}
}
Console.WriteLine();
if (totalDuplicates.Count > 0) {
Console.WriteLine("Total number of duplicates: {0}", totalDuplicates.Count);
Console.WriteLine("#######################");
totalDuplicates.ForEach(x => Console.WriteLine("{0}", x));
Console.WriteLine("#######################");
}
// Free resources
foreach (var tfh in textFileHashers)
tfh.Dispose();
}
}
If you have tons of ram... You guys are overthinking it...
var fileLines = File.ReadAllLines(#"c:\file.csv").Distinct();
I have a console app that is making HTTP queries and adding/updating products in my database according to response. Some fail and need to be retried a few times.
The way I came up with was to use a dictionary to store the product ID and a Task. Then I can check all the task results and re-run.
This is working but it strikes me as inefficient. Tasks are not being re-created until all tasks have finished. It would be more efficient if they were immediately restarted but I can't figure out how to do this. Also every retry involves a query to the database as only the ID is stored.
I made small app that shows how I am currently retrying failed requests.
Can someone suggest a more efficient method for retrying?
class Program
{
private static void Main(string[] args)
{
HttpQuery m = new HttpQuery();
var task = Task.Run(() => m.Start());
Task.WaitAll(task);
Console.WriteLine("Finished");
Console.ReadLine();
}
}
class HttpQuery
{
public async Task Start()
{
// dictionary where key represent reference to something that needs to be processed and bool whether it has completed or not
ConcurrentDictionary<int, Task<bool>> monitor = new ConcurrentDictionary<int, Task<bool>>();
// start async tasks.
Console.WriteLine("starting first try");
for (int i = 0; i < 1000; i++)
{
Console.Write(i+",");
monitor[i] = this.Query(i);
}
// wait for completion
await Task.WhenAll(monitor.Values.ToArray());
Console.WriteLine();
// start retries
// number of retries per query
int retries = 10;
int count = 0;
// check if max retries exceeded or all completed
while (count < retries && monitor.Any(x => x.Value.Result == false))
{
// make list of numbers that failed
List<int> retryList = monitor.Where(x => x.Value.Result == false).Select(x => x.Key).ToList();
Console.WriteLine("starting try number: " + (count+1) + ", Processing: " + retryList.Count);
// create list of tasks to wait for
List<Task<bool>> toWait = new List<Task<bool>>();
foreach (var i in retryList)
{
Console.Write(i + ",");
monitor[i] = this.Query(i);
toWait.Add(monitor[i]);
}
// wait for completion
await Task.WhenAll(toWait.ToArray());
Console.WriteLine();
count++;
}
Console.WriteLine("ended");
Console.ReadLine();
}
public async Task<bool> Query(int i)
{
// simulate a http request that may or may not fail
Random r = new Random();
int delay = i * r.Next(1, 10);
await Task.Delay(delay);
if (r.Next(0,2) == 1)
{
return true;
}
else
{
return false;
}
}
}
You can create another method and wrap all these ugly retry logic. All of that ugly code goes away :)
public async Task Start()
{
const int MaxNumberOfTries = 10;
List<Task<bool>> tasks = new List<Task<bool>>();
for (int i = 0; i < 1000; i++)
{
tasks.Add(this.QueryWithRetry(i, MaxNumberOfTries));
}
await Task.WhenAll(tasks);
}
public async Task<bool> QueryWithRetry(int i, int numOfTries)
{
int tries = 0;
bool result;
do
{
result = await Query(i);
tries++;
} while (!result && tries < numOfTries);
return result;
}
I am trying to create some kind of framework that simplifies process of writing object interaction algorithm. (One object -- many clients(algorithms))
For example I want to implement algorithm that do some very simple job and waits some condition meet in a loop:
public void MakeVerySimpleJob() { }
public void AsyncLoop()
{ // real algorithm can be more complex, but job is always very simple
while (true)
{
MakeVerySimpleJob();
WakeUpCondition = "As fast as u can!";
JobComplete.Set();
WakeUp.WaitOne();
}
}
void main()
{
Thread MyThread = new Thread(AsyncLoop);
MyThread.Start();
var w = new System.Diagnostics.Stopwatch(); w.Start();
for (int i = 0; i < 100000; i++)
{
// waitin for thread
JobComplete.WaitOne();
// ok we did it
WakeUpCondition = null;
WakeUp.Set();
}
w.Stop();
}
AutoResetEvent JobComplete = new AutoResetEvent(false);
AutoResetEvent WakeUp = new AutoResetEvent(false);
Unfortunately it consumes about 500ms to do 100000 simple jobs.
Ok multithreading is not acceptable in my case, but I dont want to force users to write algorithms in this manner:
// invoke it again and again
public void PseudoAsyncLoop()
{
if (CurrentState == 1)
{
MakeVerySimpleJob();
CurrentState = 2;
return;
}
else is (CurrentState == some_sate)
{
}
}
int CurrentState = 0;
So i look at Enumerators. With Enumerators user can implement their own algorithm in traditional style:
public IEnumerable<bool> PseudoAsyncLoop()
{
while (true)
{
MakeVerySimpleJob();
WakeUpCondition = "As fast as u can!";
yield return true;
}
}
public string WakeUpCondition { get; private set; }
void main()
{
var MyLoop = PseudoAsyncLoop();
var LoopEnumerator = MyLoop.GetEnumerator();
var w = new System.Diagnostics.Stopwatch(); w.Start();
for(int i = 0; i < 100000; i ++)
{
LoopEnumerator.MoveNext();
// ok we did it
WakeUpCondition = null;
}
w.Stop();
}
Now it takes about 3ms, great. But i think its something wrong with that all...
My questions is:
Am I in right direction?
How does professional programmers solves that types of problems?
May be there is some ways to optimize multithreaded version?
I don't completely understand what are you doing or why, but if this is actually representative of your code, then you can speed it up by using one of the -Slim synchronization primitives. There is no AutoResetEventSlim, but you can use SemaphoreSlim instead:
private readonly SemaphoreSlim JobComplete = new SemaphoreSlim(0, 1);
private readonly SemaphoreSlim WakeUp = new SemaphoreSlim(0, 1);
private void AsyncLoop()
{
while (true)
{
MakeVerySimpleJob();
WakeUpCondition = "As fast as u can!";
JobComplete.Release();
WakeUp.Wait();
}
}
private void main()
{
Thread MyThread = new Thread(AsyncLoop);
MyThread.Start();
for (int i = 0; i < 100000; i++)
{
JobComplete.Wait();
WakeUpCondition = null;
WakeUp.Release();
}
}
This results in about 5.5× faster execution on my machine.
Problem: In the code presented I am trying to index ~26000 geo-fences (polygons). I have tried many combinations of 1)inserting the data and 2)indexing the data that some are not presented in the code for brevity. I have imported all the data with pre-built index; importing the data and then making the index and making the index and importing the data partially with waiting for non-stale index. In all cases it eats up all the memory and fails (by fail I mean I wait for 30 minutes for completing the indexing process). One interesting observation happened in partial inserts with pre-built index: It always starts to eat up all memory around ~1790 entries.
Environment: I have tested this on 2 machines: 1) 4 GB Ram, Dual Core 2.56 CUP 2) 12 GB Ram, i7 CPU. I used Visual Studio 2012 (which should not be important) targeted .NET 4.0 with RavenDB build 2230. The only external library used here (other than RavenDB client libraries) was NetTopologySuite (NuGet).
Side Note and Background: I am doing my spatial calculations using 2 in-memory data structures: a KD-Tree and an R-Tree with static data (no insert/delete/update). Now a new step added to my project which needs to update/insert some data twice a day; because of that I searched for a replacement to remove the burden of housekeeping of this part and I came up with SQLite R-Tree module. It is fast enough (using some parallel programming+it is a mostly read-only database). So far RavenDB is not much of a choice (strategic decisions; etc) but I liked to play with it and it was disappointing. To be precise I love RavenDB 960 and I really am angry with RavenDB 2230.
I have used the in-memory trees and tested SQLite with ~3,000,000 polygons (again to be precise "bounding boxes") and failed to use RavenDB for ~26,000 polygons. I really hope that I am doing something badly wrong.
The code:
namespace MyApp
{
static partial class Program
{
static void Main(string[] args)
{
Initialization();
InitializeIndexes();
// MODE 1:
// OneTimeImport(-1, 1024, true);
// MODE 2:
// OneTimeImport(-1, 1024, false);
// MODE 3:
// PartialImport(64); and PartialImport(32); and PartialImport(16);
}
static void PartialImport(int len)
{
bool finished = false;
int index = 0;
// read finished from file
if (finished)
{
return;
}
// read index from file, if not exists then set it to 0
index = OneTimeImport(index, len, true);
if (index == 0)
{
finished = true;
}
// write index and finished to file
return;
}
static int OneTimeImport(int start, int len, bool waitForIndexing)
{
var counter = start < 0 ? 0 : start;
using (var reader = new StreamReader(#"D:\PATH\DATA.TXT", Encoding.UTF8))
using (var session = Store.OpenSession())
{
session.Advanced.MaxNumberOfRequestsPerSession = int.MaxValue - 1;
if (len < 0) len = 16;
IEnumerable<string> lines = null;
if (start < 0) { lines = reader.Lines(); }
else { lines = reader.Lines().Skip(start).Take(len); }
foreach (var line in lines)
{
var parts = line.Split(new string[] { "|" }, StringSplitOptions.None);
var lm = new Area();
lm.Description = parts[5];
var poly = string.Empty;
if (!string.IsNullOrWhiteSpace(parts[8]))
{
try
{
var list = new List<Coordinate>();
var pairs = parts[8].Split(';');
foreach (var p in pairs)
{
var coor = p.Split(',');
var lon = coor[0].ToDouble();
var lat = coor[1].ToDouble();
list.Add(new Coordinate(lon, lat));
}
if (list.Count > 0)
{
list.Add(list[0]);
var ring = new LinearRing(list.ToArray());
poly = new Polygon(ring).ToString();
}
}
catch { } // some logging
}
lm.Polygon = poly;
session.Store(lm);
counter++;
if (counter % len == 0)
{
session.SaveChanges();
if (waitForIndexing) while (Store.DatabaseCommands.GetStatistics().StaleIndexes.Length > 0) Thread.Sleep(10);
}
Trace.WriteLine(counter);
}
session.SaveChanges();
if (waitForIndexing) while (Store.DatabaseCommands.GetStatistics().StaleIndexes.Length > 0) Thread.Sleep(500);
}
return counter;
}
static void Initialization()
{
Store = new DocumentStore { ConnectionStringName = "LoxConnectionString" };
Store.Initialize();
}
static void InitializeIndexes()
{
IndexCreation.CreateIndexes(typeof(Program).Assembly, Store);
}
public static IEnumerable<string> Lines(this StreamReader reader)
{
string line = null;
while ((line = reader.ReadLine()) != null) yield return line;
}
public static double ToDouble(this string s)
{
double d = 0;
double.TryParse(s, out d);
return d;
}
public static DocumentStore Store { get; private set; }
}
public class Area
{
public string Id { get; set; }
public string Description { get; set; }
public string Polygon { get; set; }
}
public class AreaIndex : AbstractIndexCreationTask<Area>
{
public AreaIndex()
{
Map = docs => from doc in docs
select new
{
doc.Description,
_ = SpatialGenerate("AreaPolygon", doc.Polygon)
};
}
}
}