Mono SIMD worsening performance? - c#

Benchmark Code:
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Mono.Simd;
using MathNet.Numerics.LinearAlgebra.Single;
namespace XXX {
public static class TimeSpanExtensions {
public static double TotalNanoseconds(this TimeSpan timeSpan) {
return timeSpan.TotalMilliseconds * 1000000.0;
}
}
public sealed class SimdBenchmark : Benchmark {
Vector4f a = new Vector4f(1.0f, 2.0f, 3.0f, 4.0f);
Vector4f b = new Vector4f(1.0f, 2.0f, 3.0f, 4.0f);
Vector4f c;
public override void Do() {
c = a + b;
}
}
public sealed class MathNetBenchmark : Benchmark {
DenseVector a = new DenseVector(new float[]{1.0f,2.0f,3.0f,4.0f});
DenseVector b = new DenseVector(new float[]{1.0f,2.0f,3.0f,4.0f});
DenseVector c;
public override void Do() {
c = a + b;
}
}
public sealed class DefaultBenchmark : Benchmark {
Vector4 a = new Vector4(1.0f, 2.0f, 3.0f, 4.0f);
Vector4 b = new Vector4(1.0f, 2.0f, 3.0f, 4.0f);
Vector4 c;
public override void Do() {
c = a + b;
}
}
public sealed class SimpleBenchmark : Benchmark {
float a = 1.0f;
float b = 2.0f;
float c;
public override void Do() {
c = a + b;
}
}
public sealed class DelegateBenchmark : Benchmark {
private readonly Action _action;
public DelegateBenchmark(Action action) {
_action = action;
}
public override void Do() {
_action();
}
}
public abstract class Benchmark : IEnumerable<TimeSpan> {
public IEnumerator<TimeSpan> GetEnumerator() {
Do(); // Warm-up!
GC.Collect(); // Collect garbage.
GC.WaitForPendingFinalizers(); // Wait until finalizers finish.
var stopwatch = new Stopwatch();
while (true) {
stopwatch.Reset();
stopwatch.Start();
Do();
stopwatch.Stop();
yield return stopwatch.Elapsed;
}
}
IEnumerator IEnumerable.GetEnumerator() {
return GetEnumerator();
}
public abstract void Do();
}
public struct Vector4 {
float x;
float y;
float z;
float w;
public Vector4(float x, float y, float z, float w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public static Vector4 operator +(Vector4 v1, Vector4 v2) {
return new Vector4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
}
}
class MainClass {
public static void Main(string[] args) {
var avgNS1 = new SimdBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS2 = new SimpleBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS3 = new DefaultBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS4 = new MathNetBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
Console.WriteLine(avgNS1 + " ns");
Console.WriteLine(avgNS2 + " ns");
Console.WriteLine(avgNS3 + " ns");
Console.WriteLine(avgNS4 + " ns");
}
}
}
Environment Setup:
Windows 7 / Mono 2.10.8 / MonoDevelop 2.8.5
MonoDevelop Setup:
Tools > Options > .NET Runtimes > Mono 2.10.8 (Default)
Project > Options > Build > General > Target framework > Mono / .NET
4.0
Project > Options > Build > Compiler > General Options > Enable optimizations
Project > Options > Build > Compiler > General Options > Platform target > x86
Project > Options > Run > General > Parameters > -O=simd
Results:
94.4 ns
29.7 ns
49.9 ns
231595.2 ns

I would suspect your benchmark infrastructure first.
A couple of points might be:
You are using a `Stopwatch` to time single operations - it doesn't have the resolution
Your timings include a virtual function call
Your sample size ( 1000 ) is too small

These are my results:
1608.8 ns
1554.9 ns
1582.5 ns
(without MathNET, although it is not important here). Os is Ubuntu 10.10 (32bit), Mono 2.10.7. At this moment you may considering making a bug report targeting Windows Mono version. But:
I think this is not the right way to benchmarking SIMD operations because of the benchmark's mechanisms overhead.
For example look on this primitive test based on your Vector4 class.
const int count = 100000;
var simdVector = new Vector4f(1, 2, 3, 4);
var simdResult = simdVector;
var sw = Stopwatch.StartNew();
for(var i = 0; i < count; i++)
{
simdResult += simdVector;
}
sw.Stop();
Console.WriteLine("SIMD result: {0} {1}", sw.Elapsed, simdResult);
sw = Stopwatch.StartNew();
var usualVector = new Vector4(1, 2, 3, 4);
var usualResult = usualVector;
for(var i = 0; i < count; i++)
{
usualResult += usualVector;
}
sw.Stop();
Console.WriteLine("Usual result: {0} {1}", sw.Elapsed, usualResult);
On my machine results are:
SIMD result: 00:00:00.0005802 <100001, 200002, 300003, 400004>
Usual result: 00:00:00.0029598 <100001, 200002, 300003, 400004>
So something definitely different than your tests. So you may think that SIMD operations are that factor faster - but benchmarking is not that easy. There are many reasons for the upper loop being faster in this configuration. These reasons can be discussed on another occasion.
Nevertheless it is sure that SIMD are faster than couple of adds in a row. What you should check for is whether they are really emitted.
On Linux, one can check generated assembly (in the meaning of target processor's assembly, not the mono assembly ;)) using mono -v -v. Nevertheless I am not sure if it works on usual Windows system as it is probably using disas from GCC (you may have more luck using cygwin). By reading such assembly you can check whether SIMD operations are really emitted.
For example, by examining assembly generated for the above pasted program one can find that it uses addps instruction in its SIMD loop which is what we are looking for here.
Oh, and for the completeness here is output with SIMD disabled:
$ mono --optimize=-simd SimdTest.exe
SIMD result: 00:00:00.0027111 <100001, 200002, 300003, 400004>
Usual result: 00:00:00.0026127 <100001, 200002, 300003, 400004>
which is not so important as the generated assembly, containing no SIMD operations.
Hope this was helpful.

Well, I've managed to modify my benchmark code to make it more robust and completely unbiased. In other words:
First, as we discussed with Nicholas - measuring single operation might give distorted results. Moreover, as the frequency of Stopwatch is 10 million - it means that ticks occur every 100 ns. So considering this fact, previous results look rather bizarre. Therefore, in order to mitigate this issue, I decided to test 1000 operations rather than 1 at time.
Second, I'm not completely sure, but I guess that previous benchmark implementation was subjected to intensive caching, since on every iteration sums were computed between the same vectors (their components never changed). The only straightforward solution I see is to simply rebuild vectors with random components before every test.
The respective benchmark implementation is:
public static class TimeSpanExtensions {
public static double TotalNanoseconds(this TimeSpan timeSpan) {
return timeSpan.TotalMilliseconds * 1000000.0;
}
}
public static class RandomExtensions {
public static float NextFloat(this Random random) {
return (float)random.NextDouble();
}
public static float NextFloat(this Random random, float min, float max) {
return random.NextFloat() * (max - min) + min;
}
}
public sealed class SimdBenchmark : Benchmark {
Vector4f[] a = new Vector4f[1000];
Vector4f[] b = new Vector4f[1000];
Vector4f[] c = new Vector4f[1000];
public override void Begin() {
Random r = new Random();
for (int i = 0; i < 1000; ++i) {
a[i] = new Vector4f(r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat());
b[i] = new Vector4f(r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat());
}
}
public override void Do() {
for (int i = 0; i < 1000; ++i)
c[i] = a[i] + b[i];
}
public override void End() {
}
}
public sealed class MathNetBenchmark : Benchmark {
DenseVector[] a = new DenseVector[1000];
DenseVector[] b = new DenseVector[1000];
DenseVector[] c = new DenseVector[1000];
public override void Begin() {
Random r = new Random();
for (int i = 0; i < 1000; ++i) {
a[i] = new DenseVector(new float[]{r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat()});
b[i] = new DenseVector(new float[]{r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat()});
}
}
public override void Do() {
for (int i = 0; i < 1000; ++i)
c[i] = a[i] + b[i];
}
public override void End() {
}
}
public sealed class DefaultBenchmark : Benchmark {
Vector4[] a = new Vector4[1000];
Vector4[] b = new Vector4[1000];
Vector4[] c = new Vector4[1000];
public override void Begin() {
Random r = new Random();
for (int i = 0; i < 1000; ++i) {
a[i] = new Vector4(r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat());
b[i] = new Vector4(r.NextFloat(), r.NextFloat(), r.NextFloat(), r.NextFloat());
}
}
public override void Do() {
for (int i = 0; i < 1000; ++i)
c[i] = a[i] + b[i];
}
public override void End() {
}
}
public sealed class SimpleBenchmark : Benchmark {
float[] a = new float[1000];
float[] b = new float[1000];
float[] c = new float[1000];
public override void Begin() {
Random r = new Random();
for (int i = 0; i < 1000; ++i) {
a[i] = r.NextFloat();
b[i] = r.NextFloat();
}
}
public override void Do() {
for (int i = 0; i < 1000; ++i)
c[i] = a[i] + b[i];
}
public override void End() {
}
}
public sealed class DelegateBenchmark : Benchmark {
private readonly Action _action;
public DelegateBenchmark(Action action) {
_action = action;
}
public override void Begin() {
}
public override void Do() {
_action();
}
public override void End() {
}
}
public abstract class Benchmark : IEnumerable<TimeSpan> {
public IEnumerator<TimeSpan> GetEnumerator() {
Begin();
Do(); // Warm-up!
End();
var stopwatch = new Stopwatch();
while (true) {
Begin();
GC.Collect(); // Collect garbage.
GC.WaitForPendingFinalizers(); // Wait until finalizers finish.
stopwatch.Reset();
stopwatch.Start();
Do();
stopwatch.Stop();
End();
yield return stopwatch.Elapsed;
}
}
IEnumerator IEnumerable.GetEnumerator() {
return GetEnumerator();
}
public abstract void Begin();
public abstract void Do();
public abstract void End();
}
public struct Vector4 {
float x;
float y;
float z;
float w;
public Vector4(float x, float y, float z, float w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public static Vector4 operator +(Vector4 v1, Vector4 v2) {
return new Vector4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
}
}
class MainClass {
public static void Main(string[] args) {
var avgNS1 = new SimdBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS2 = new SimpleBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS3 = new DefaultBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
var avgNS4 = new MathNetBenchmark().Take(1000).Average(timeSpan => timeSpan.TotalNanoseconds());
Console.WriteLine(avgNS1 + " ns");
Console.WriteLine(avgNS2 + " ns");
Console.WriteLine(avgNS3 + " ns");
Console.WriteLine(avgNS4 + " ns");
}
}
Results:
3203.9 ns
2677.4 ns
20138.4 ns
597581060.7 ns
I think it confirms that SIMD is on air, because SimdBenchmark is getting close to SimpleBenchmark (as intended by SIMD technology) and is much better than DefaultBenchmark (again as implied by SIMD technology).
Moreover, the results seems consistent with konrad.kruczynski, because the ratio between SimdBenchmark (3203.9) and DefaultBenchmark (20138.4) is about 6 and the ratio between simdVector (5802) and usualVector (29598) is also about 6.
Anyway 2 questions still remain:
Why playing with "-O=simd" / "-O=-simd" has no effect. Is it deprecated? Is SIMD automatically engaged?
How could Stopwatch with 100 ns ticks give previous results (94.4, 29.7, 49.9), which are obviously lower than 100 ns?

Related

clrobj(<class name>) does not have llvm when passing array of struct to GPU Kernel (ALEA Library)

I am getting the "Fody/Alea.CUDA: clrobj(cGPU) does not have llvm" build error for a code in which I try to pass an array of struct to the NVIDIA Kernel using ALEA library. Here is a simplified version of my code. I removed the output gathering functionality in order to keep the code simple. I just need to be able to send the array of struct to the GPU for the moment.
using Alea.CUDA;
using Alea.CUDA.Utilities;
using Alea.CUDA.IL;
namespace GPUProgramming
{
public class cGPU
{
public int Slice;
public float FloatValue;
}
[AOTCompile(AOTOnly = true)]
public class TestModule : ILGPUModule
{
public TestModule(GPUModuleTarget target) : base(target)
{
}
const int blockSize = 64;
[Kernel]
public void Kernel2(deviceptr<cGPU> Data, int n)
{
var start = blockIdx.x * blockDim.x + threadIdx.x;
int ind = threadIdx.x;
var sharedSlice = __shared__.Array<int>(64);
var sharedFloatValue = __shared__.Array<float>(64);
if (ind < n && start < n)
{
sharedSlice[ind] = Data[start].Slice;
sharedFloatValue[ind] = Data[start].FloatValue;
Intrinsic.__syncthreads();
}
}
public void Test2(deviceptr<cGPU> Data, int n, int NumOfBlocks)
{
var GridDim = new dim3(NumOfBlocks, 1);
var BlockDim = new dim3(64, 1);
try
{
var lp = new LaunchParam(GridDim, BlockDim);
GPULaunch(Kernel2, lp, Data, n);
}
catch (CUDAInterop.CUDAException x)
{
var code = x.Data0;
Console.WriteLine("ErrorCode = {0}", code);
}
}
public void Test2(cGPU[] Data)
{
int NumOfBlocks = Common.divup(Data.Length, blockSize);
using (var d_Slice = GPUWorker.Malloc(Data))
{
try
{
Test_Kernel2(d_Slice.Ptr, Data.Length, NumOfBlocks);
}
catch (CUDAInterop.CUDAException x)
{
var code = x.Data0;
Console.WriteLine("ErrorCode = {0}", x.Data0);
}
}
}
}
}
Your data is class, which is reference type. Try use struct. Reference type doesn't fit Gpu well, since it require of allocating small memory on the heap.

C# Error in constructor of a class

I try to randomize the values if they are not changed, but it won't let me use the randomizer in the constructor and it gives an error when I use my other function.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Randomizer {
class Apartment {
public int height;
public int bas;
public int hasPent;
public Apartment(int b = 100, int h = 100, int p = 100) {
height = h;
bas = b;
hasPent = p;
public Room[,,] rooms = new Room[bas, bas, height];
finCon(bas, height, hasPent, rooms);
}
void finCon(int b, int h, int p, Room[,,] ro) {
Random r = new Random();
if (b==100) {
b = r.Next(2,4);
}
if (h==100) {
h = r.Next(4,15);
}
if (p==100) {
p = r.Next(0,20);
}
}
}
class Room {
int some = 37;
}
class Program {
static void Main(string[] args)
{
Apartment ap = new Apartment();
ap.finCon(ap.bas,ap.height,ap.hasPent,ap.rooms);
Console.WriteLine("{0}{1}",ap.bas,ap.height);
}
}
}
Errors:
(1:1) A namespace cannot directly contain members such as fields or methods
(16:25) } expected
(18:13) Method must have a return type
(18:23) Identifier expected
(18:31) Identifier expected
(18:40) Identifier expected
(18:47) Identifier expected
(21:9) A namespace cannot directly contain members such as fields or methods
(21:47) Identifier expected
(21:48) Identifier expected
(21:51) Expected class, delegate, enum, interface, or struct
(22:28) Expected class, delegate, enum, interface, or struct
(33:5) Type or namespace definition, or end-of-file expected
(46:1) Type or namespace definition, or end-of-file expected
I've made it compile:
namespace Randomizer
{
public class Apartment
{
public int height;
public int bas;
public int hasPent;
public Room[,,] rooms;
public Apartment(int b = 100, int h = 100, int p = 100)
{
height = h;
bas = b;
hasPent = p;
rooms = new Room[bas, bas, height];
finCon(bas, height, hasPent, rooms);
}
public void finCon(int b, int h, int p, Room[,,] ro)
{
Random r = new Random();
if (b == 100)
{
b = r.Next(2, 4);
}
if (h == 100)
{
h = r.Next(4, 15);
}
if (p == 100)
{
p = r.Next(0, 20);
}
}
}
public class Room
{
int some = 37;
}
class Program
{
static void Main(string[] args)
{
Apartment ap = new Apartment();
ap.finCon(ap.bas, ap.height, ap.hasPent, ap.rooms);
Console.WriteLine("{0}{1}", ap.bas, ap.height);
}
}
}
Your problem is trying to declare a property inside the constructor (you can't do it). I've made all classes public too.
Hope it will help you.

Unexpected behavior of static variable initialization

I'm not much familiar with WinRT. I'm encountering an unexpected behavior. I've a static variable _Verses that is initialized in static constructor of class. So expected behavior is _Verses will be initialized before first reference to static method as explained in When is a static constructor called in C#?
But when I call a static async function LoadData (WinRT) I got exception.
Object Reference not set to an instance of object.
My Code is:
public VerseCollection
{
public const int TotalVerses = 6236;
static Verse[] _Verses;
static VerseCollection()
{
_Verses = new Verse[TotalVerses];
}
internal static async void LoadData(StorageFile file)
{
using (var reader = new BinaryReader(await file.OpenStreamForReadAsync()))
{
int wId = 0;
for (int i = 0; i < VerseCollection.TotalVerses; i++)
{
var retValue = new string[reader.ReadInt32()];
for (int j = 0; j < retValue.Length; j++)
retValue[j] = reader.ReadString();
_Verses[i] = new Verse(i, wId, retValue);
wId += _Verses[i].Words.Count;
}
}
}
}
public Book
{
public static async Task<Book> CreateInstance()
{
VerseCollection.LoadData(await DigitalQuranDirectories.Data.GetFileAsync("quran-uthmani.bin"));
}
}
I call the function CreateInstance as:
async void DoInit()
{
await DigitalQuran.Book.CreateInstance();
}
Same code is working in desktop but not working for WinRT. Full Code of Book Class for Desktop is here and for VerseCollection class is here
EDIT:
Complete code is here
public class Book : VerseSpan
{
public static async Task<Book> CreateInstance()
{
_Instance = new Book();
VerseCollection.LoadData(await DigitalQuranDirectories.Data.GetFileAsync("quran-uthmani.bin"));
PrivateStorage.LoadQuranObjectsFromMetadata();
// Some Other Operations too
return _Instance;
}
}
public class VerseCollection
{
static Verse[] _Verses = new Verse[TotalVerses];
internal static async void LoadData(StorageFile file)
{
using (var reader = new BinaryReader(await file.OpenStreamForReadAsync()))
{
int wId = 0;
for (int i = 0; i < VerseCollection.TotalVerses; i++)
{
var retValue = new string[reader.ReadInt32()];
for (int j = 0; j < retValue.Length; j++)
retValue[j] = reader.ReadString();
_Verses[i] = new Verse(i, wId, retValue);
wId += _Verses[i].Words.Count;
}
}
}
}
public class Verse
{
public Verse(int number, int firstWordIndex, string[] words)
{
GlobalNumber = number + 1;
Words = new WordCollection(firstWordIndex, words, this);
}
}
public class WordCollection : ReadOnlyCollection<Word>
{
public const int TotalWords = 77878;
static Word[] _Words = new Word[TotalWords];
static string[] _WordsText = new string[TotalWords];
public WordCollection(int startIndex, int count)
: base(count)
{
this.startIndex = startIndex;
}
internal WordCollection(int startId, string[] words, Verse verse) : this(startId, words.Length)
{
int max = words.Length + startId;
for (int i = startId; i < max; i++)
{
_Words[i] = new Word(i, verse);
_WordsText[i] = words[i - startId];
}
}
}
public abstract class ReadOnlyCollection<T> : IEnumerable<T>
{
public ReadOnlyCollection(int count)
{
Count = count;
}
}
public class PrivateStorage
{
internal static async void LoadQuranObjectsFromMetadata()
{
using (var reader = new BinaryReader(await (await DigitalQuranDirectories.Data.GetFileAsync(".metadata")).OpenStreamForReadAsync()))
{
/* 1 */ ChapterCollection.LoadData(EnumerateChapters(reader));
/* 2 */ PartCollection.LoadData(EnumerateParts(reader));
/* Some other tasks */
}
}
static IEnumerator<ChapterMeta> EnumerateChapters(BinaryReader reader)
{
for (int i = 0; i < ChapterCollection.TotalChapters; i++)
{
yield return new ChapterMeta()
{
StartVerse = reader.ReadInt32(),
VerseCount = reader.ReadInt32(),
BowingCount = reader.ReadInt32(),
Name = reader.ReadString(),
EnglishName = reader.ReadString(),
TransliteratedName = reader.ReadString(),
RevelationPlace = (RevelationPlace)reader.ReadByte(),
RevelationOrder = reader.ReadInt32()
};
}
}
static IEnumerator<PartMeta> EnumerateParts(BinaryReader reader)
{
for (int i = 0; i < PartCollection.TotalParts; i++)
{
yield return new PartMeta()
{
StartVerse = reader.ReadInt32(),
VerseCount = reader.ReadInt32(),
ArabicName = reader.ReadString(),
TransliteratedName = reader.ReadString()
};
}
}
}
public class ChapterCollection : ReadOnlyCollection<Chapter>
{
public const int TotalChapters = 114;
static Chapter[] _Chapters = new Chapter[TotalChapters];
internal static void LoadData(IEnumerator<ChapterMeta> e)
{
for (int i = 0; i < TotalChapters; i++)
{
e.MoveNext();
_Chapters[i] = new Chapter(i, e.Current);
}
}
}
public class PartCollection : ReadOnlyCollection<Part>
{
public const int TotalParts = 30;
static Part[] _Parts = new Part[TotalParts];
internal static void LoadData(IEnumerator<PartMeta> e)
{
for (int i = 0; i < TotalParts; i++)
{
e.MoveNext();
_Parts[i] = new Part(i, e.Current);
}
}
}
When I run the code with debugger no exception is raised. Further After exception visual studio shows some times in class VerseCollection in function LoadData on line _Verses[i] = new Verse(i, wId, retValue); (_Verses is null) and some times in class ChapterCollection in Function LoadData on line _Chapters[i] = new Chapter(i, e.Current); (_Chapters is null)
There was issue with asynchronous call. File reading is asynchronous operation in WinRT. As We can't call async method with void return type with await statement. So next instructions executes without waiting for completion of last executing as another Task. This leads to NullReferanceExecption.
I managed to solve my problems by changing return type of all async operations from void to Task and called them with await like in the code below.
public class Book : VerseSpan
{
public static async Task<Book> CreateInstance()
{
_Instance = new Book();
await VerseCollection.LoadData(await DigitalQuranDirectories.Data.GetFileAsync("quran-uthmani.bin"));
await PrivateStorage.LoadQuranObjectsFromMetadata();
// Some Other Operations too
return _Instance;
}
}
public class VerseCollection
{
static Verse[] _Verses = new Verse[TotalVerses];
internal static async Task LoadData(StorageFile file)
{
using (var reader = new BinaryReader(await file.OpenStreamForReadAsync()))
{
int wId = 0;
for (int i = 0; i < VerseCollection.TotalVerses; i++)
{
var retValue = new string[reader.ReadInt32()];
for (int j = 0; j < retValue.Length; j++)
retValue[j] = reader.ReadString();
_Verses[i] = new Verse(i, wId, retValue);
wId += _Verses[i].Words.Count;
}
}
}
}
public class PrivateStorage
{
internal static async Task LoadQuranObjectsFromMetadata()
{
using (var reader = new BinaryReader(await (await DigitalQuranDirectories.Data.GetFileAsync(".metadata")).OpenStreamForReadAsync()))
{
/* Some tasks */
}
}
}
Because it is running on Desktop but not WinRT, it leads me to believe there is an issue with your asynchronous call. Because you are doing this asynchronously, there is no gaurantee that the constructor (static or not) will be finished running before the call to LoadData. Make sure that your constructor has finished executing before calling LoadData function, and this should give you consistent behaviour.

.Net Lists and maximum number of elements

So I am developing an append-only 64-bit-ish List and Dictionary, and I've run into a memory error. I figured I would at some point, but not at 64 MBs. I find that somewhat unexpected, and am curious if someone could explain to me why it's running into an issue at 64 MBs.
My test for my new List class is simply an attempt to create and load 8 GBs worth of bools into the List. I figured they'd suck up only ~1 bit each, so I'd get some good metrics / precision for testing my program.
Here is the output from VS:
- this {OrganicCodeDesigner.DynamicList64Tail<bool>} OrganicCodeDesigner.DynamicList64Tail<bool>
Count 536870912 ulong
- data Count = 536870912 System.Collections.Generic.List<bool>
- base {"Exception of type 'System.OutOfMemoryException' was thrown."} System.SystemException {System.OutOfMemoryException}
- base {"Exception of type 'System.OutOfMemoryException' was thrown."} System.Exception {System.OutOfMemoryException}
+ Data {System.Collections.ListDictionaryInternal} System.Collections.IDictionary {System.Collections.ListDictionaryInternal}
HelpLink null string
+ InnerException null System.Exception
Message "Exception of type 'System.OutOfMemoryException' was thrown." string
Source "mscorlib" string
StackTrace " at System.Collections.Generic.Mscorlib_CollectionDebugView`1.get_Items()" string
+ TargetSite {T[] get_Items()} System.Reflection.MethodBase {System.Reflection.RuntimeMethodInfo}
+ Static members
+ Non-Public members
- Raw View
Capacity 536870912 int
Count 536870912 int
- Static members
+ Non-Public members
- Non-Public members
+ _items {bool[536870912]} bool[]
_size 536870912 int
_syncRoot null object
_version 536870912 int
System.Collections.Generic.ICollection<T>.IsReadOnly false bool
System.Collections.ICollection.IsSynchronized false bool
System.Collections.ICollection.SyncRoot {object} object
System.Collections.IList.IsFixedSize false bool
System.Collections.IList.IsReadOnly false bool
item false bool
- Type variables
T bool bool
And here are the classes I am currently working on:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace OrganicCodeDesigner
{
public class DynamicList64Tail<T> : iList64<T>
{
private List<T> data;
public DynamicList64Tail()
{
data = new List<T>();
}
public void Add(T item)
{
data.Add(item);
}
public void Clear()
{
data.Clear();
}
public bool Contains(T item)
{
return data.Contains(item);
}
public ulong? IndexOf(T item)
{
if(this.data.Contains(item)) {
return (ulong)data.IndexOf(item);
}
return null;
}
public T this[ulong index]
{
get
{
return data[(int)(index)];
}
set
{
data[(int)(index)] = value;
}
}
public ulong Count
{
get { return (ulong)data.Count; }
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
namespace OrganicCodeDesigner
{
// #todo: Create IList64, with 64-bit longs in mind.
// #todo: Create BigIntegerList, which may supersede this one.
public class DynamicList64<T> : iList64<T>
{
private List<iList64<T>> data;
private ulong count = 0;
private ulong depth = 0;
public DynamicList64()
{
data = new List<iList64<T>>() { new DynamicList64Tail<T>()};
count = 0;
}
public DynamicList64(ulong depth)
{
this.depth = depth;
if (depth == 0)
{
data = new List<iList64<T>>() { new DynamicList64Tail<T>() };
}
else
{
depth -= 1;
data = new List<iList64<T>>() { new DynamicList64<T>(depth) };
}
}
internal DynamicList64(List<iList64<T>> data, ulong depth)
{
this.data = data;
this.depth = depth;
this.count = Int32.MaxValue;
}
public void Add(T item)
{
if (data.Count >= Int32.MaxValue)
{
//#todo: Do switch operation, whereby this {depth, List l} becomes this {depth + 1, List.Add(List l), count = 1}, and the new object becomes {depth, List l, count = max}
DynamicList64<T> newDynamicList64 = new DynamicList64<T>(this.data, this.depth);
this.data = new List<iList64<T>>() { newDynamicList64 };
this.count = 0;
this.depth += 1;
}
if(data[data.Count-1].Count >= Int32.MaxValue) {
if (depth == 0)
{
data.Add(new DynamicList64Tail<T>());
}
else
{
data.Add(new DynamicList64<T>(depth - 1));
}
}
data[data.Count - 1].Add(item);
count++;
}
public void Clear()
{
data.Clear();
data = new List<iList64<T>>() { new DynamicList64Tail<T>() };
count = 0;
depth = 0;
}
public bool Contains(T item)
{
foreach(iList64<T> l in data) {
if(l.Contains(item)) {
return true;
}
}
return false;
}
public ulong? IndexOf(T item)
{
for (int i = 0; i < data.Count; i++ )
{
if (data[i].Contains(item))
{
return (ulong)(((ulong)i * (ulong)(Int32.MaxValue)) + data[i].IndexOf(item).Value);
}
}
return null;
}
public T this[ulong index]
{
get
{
return data[(int)(index / Int32.MaxValue)][index % Int32.MaxValue];
}
set
{
data[(int)(index / Int32.MaxValue)][index % Int32.MaxValue] = value;
}
}
public ulong Count
{
get { return this.count; }
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace OrganicCodeDesigner
{
public interface iList64<T>
{
void Add(T item);
void Clear();
bool Contains(T item);
ulong? IndexOf(T item);
T this[ulong index] { get; set;}
ulong Count { get; }
}
}
And the test program's code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using OrganicCodeDesigner;
namespace OrganicCodeDesignerListDictionaryTest
{
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
}
private void Button_TestList_Click(object sender, EventArgs e)
{
DynamicList64<bool> newList = new DynamicList64<bool>();
newList.Add(true);
newList.Add(false);
bool b = true;
for (ulong i = 0; i < 68719476736; i++)
{
b = !b;
newList.Add(b);
//if(i%4096==0) {
//TextBox_Output.Text += "List now contains " + i + "\r";
//}
}
TextBox_Output.Text += "Successfully added all the bits.\r";
}
private void Button_TestDictionary_Click(object sender, EventArgs e)
{
}
}
}
Perhaps you can spot the error?
Perhaps you can spot the error?
I think the error is here:
I figured they'd suck up only ~1 bit each, so I'd get some good metrics / precision for testing my program.
A bool takes one byte, not one bit - so you've drastically underestimated the size of your list. You're actually running into an error with 512MB of bools. As Reed Copsey is editing a little faster than me - I suspect the list is trying to increase its size by allocating an array 2x it's current size [i.e. a 1GB array] and that this is running into some .net limitations.
This is probably a good time to start implementing your splitting logic.
There are limits to the size of an array in .NET. Even if you are running on 64bit platforms, and set gcAllowVeryLargeObjects (in .NET 4.5), you are still limited to 2,146,435,071 items max in a single dimension of the array.
(In pre-4.5, you are limited by 2gb for a single object, no matter how many entries it contains.)
That being said, a bool is represented by one byte, not one bit, so this will be quite a bit larger than you're expecting. That being said, you still only have 536,870,912 in your list when this fails, so theoretically, on a 64bit system with enough memory, the next allocation for growing the list should still be within the limits. However, this requires the program to succesfully allocate a single, contiguous chunk of memory large enough for the requested data (which will be 2x the size of the last chunk).

Factory Initializer + Singleton

I wrote a non-static Generic class instantiator for my Abstract Factory design, and use Singleton approach to make sure that only 1 instance of the instantiator will be initialized for every client request.
public sealed class FactoryInstantiator<T> where T: class
{
private static readonly FactoryInstantiator<T> _instance = new Instantiator<T>();
public static FactoryInstantiator<T> Instance
{
get
{
_client = HttpContext.Current.Session["ClientCode"].ToString();
return _instance;
}
}
private static string _client;
private string _className;
private string _fullyQualifiedClassName;
private string _assemblyName;
private FactoryInstantiator() { }
public T CreateInstance()
{
string fullClassName = typeof(T).ToString();
string[] splitClassName = fullClassName.Split('.');
_className = splitClassName[2];
_assemblyName = splitClassName[0] + "." + _client + "." + splitClassName[1];
_fullyQualifiedClassName = _assemblyName + "." + _className;
return (T)Activator.CreateInstance(Type.GetType(_fullyQualifiedClassName + "," + _assemblyName));
}
}
I abstracted the the whole namespace for each Client
namespace InventorySuite.Factory.BusinessLogic
{
// abstract factory
public abstract class InvoiceFactory
{
public abstract void Set() { }
}
}
namespace InventorySuite.Client1.BusinessLogic
{
// concrete invoice class for Client1
public class Invoice : InvoiceFactory
{
public override void Set() { }
}
}
namespace InventorySuite.Client2.BusinessLogic
{
// concrete invoice class for Client2
public class Invoice : InvoiceFactory
{
public override void Set() { }
}
}
protected void Page_Load(object sender, EventArgs e)
{
InvoiceFactory clientInvoice;
Session.Add("ClientCode", "Client1");
clientInvoice = FactoryInstantiator<InvoiceFactory>.Instance.CreateInstance();
clientInvoice.Set();
Session["ClientCode"] = "Client2";
clientInvoice = FactoryInstantiator<InvoiceFactory>.Instance.CreateInstance();
clientInvoice.Set();
}
It works well and already tested it, but my question is about its efficiency/performance hit, since I use reflection here, and for the Singleton approach if it has multi-threading issues (afaik, the singleton instance will be shared in all clients). I will also appreciate any other approach on this. thanks
You will not have any multi-threading issue since you're creating a new instance every time.
About the performance. You can measure the time creating 100 instances:
long ini = Environment.TickCount;
for (int i = 0; i < 100; i++)
{
Session["ClientCode"] = "Client2";
clientInvoice = FactoryInstantiator<InvoiceFactory>.Instance.CreateInstance();
clientInvoice.Set();
}
long timeCreate100Instances = Environment.TickCount - ini;
Using reflection, the performance hit resides in loading the assembly. I think that in your case, the class you're loading is in the same dll, you you also will not experiment any performance issue.
In other case, you can cache the Assembly obejcts in a Hastable/Dictionary in your CreateInstance() method.
using Richard and Daniel suggestions, I was able to reduce the performance hit of reflection using Caching. I therefore conclude that Reflection really has huge performance issues.
public T CreateInstance()
{
string fullClassName = typeof(T).ToString();
string[] splitClassName = fullClassName.Split('.');
_className = splitClassName[2];
_assemblyName = splitClassName[0] + "." + _client + "." + splitClassName[1];
_fullyQualifiedClassName = _assemblyName + "." + _className;
// use caching
T obj;
if (HttpContext.Current.Cache[_fullyQualifiedClassName] == null)
{
obj = (T)Activator.CreateInstance(Type.GetType(_fullyQualifiedClassName + "," + _assemblyName));
HttpContext.Current.Cache.Insert(_fullyQualifiedClassName, obj, null, DateTime.Now.AddMinutes(1), TimeSpan.Zero);
}
else
{
obj = (T)HttpContext.Current.Cache[_fullyQualifiedClassName];
}
return obj;
}
protected void Page_Load(object sender, EventArgs e)
{
InvoiceFactory inv;
Stopwatch globalTimer = Stopwatch.StartNew();
//normal instantiation
globalTimer = Stopwatch.StartNew();
for (int x = 0; x <= 10000; x++)
inv = new InventorySuit.Client1.BusinessLogic.Invoice;
globalTimer.Stop();
Response.Write(globalTimer.ElapsedMilliseconds + "<BR>");
//result 0ms
// using singleton factory w/o caching
globalTimer = Stopwatch.StartNew();
for (int x = 0; x <= 10000; x++)
inv = new FactoryInstantiator<InvoiceFactory>().CreateInstance();
globalTimer.Stop();
Response.Write(globalTimer.ElapsedMilliseconds + "<BR>");
//result 129ms
// using singleton factory w/ caching
for (int x = 0; x <= 10000; x++)
inv = FactoryInstantiator<InvoiceFactory>.Instance.CreateInstance();
globalTimer.Stop();
Response.Write(globalTimer.ElapsedMilliseconds + "<BR>");
//result 21ms
}
Loading the assemblies in Session State to solve multi-threading issue.
public T CreateInstance()
{
string fullClassName = typeof(T).ToString();
string[] splitClassName = fullClassName.Split('.');
_className = splitClassName[2];
_assemblyName = splitClassName[0] + "." + _client + "." + splitClassName[1];
_fullyQualifiedClassName = _assemblyName + "." + _className;
T obj;
var assemblies = HttpContext.Current.Session["ASSEMBLIES"] as Dictionary<string, T>;
if (assemblies == null)
{
assemblies = new Dictionary<string, T>();
assemblies.Add(_fullyQualifiedClassName, null);
HttpContext.Current.Session.Add("ASSEMBLIES", assemblies);
}
obj = assemblies[_fullyQualifiedClassName] as T;
if (obj == null)
{
obj = (T)Activator.CreateInstance(Type.GetType(_fullyQualifiedClassName + "," + _assemblyName));
assemblies[_fullyQualifiedClassName] = obj;
}
return obj;
}

Categories