In order to parallelize and accelerate calculations I"ve splitted one long for-cycle by two short for-cycles using TPL, these parts are called PointGenerator and PointGenerator2 in my class bellow:
class CalcPiTPL
{
int n;
int totalCounter;
int counter1;
int counter2;
double aPi;
public StringBuilder Msg; // diagonstic message
Stopwatch stopWatch = new Stopwatch();
public void Init(int aN)
{
stopWatch.Start();
n = aN; // save total calculate-iterations amount
aPi = -1; // flag, if no any calculate-iteration has been completed
Msg = new StringBuilder("No any calculate-iteration has been completed");
}
public void Run()
{
if (n < 1)
{
Msg = new StringBuilder("Invalid N-value");
return;
}
Task[] tasks = new Task[2];
tasks[0] = Task.Factory.StartNew((obj) => { PointGenerator((int)obj); }, n);
tasks[1] = Task.Factory.StartNew((obj) => { PointGenerator2((int)obj); }, n);
Task.WaitAll(tasks[0], tasks[1]);
totalCounter = counter1 + counter2;
aPi = 4.0 * ((double)totalCounter / (double)n); // to calculate approximate Pi - value
Console.WriteLine(aPi);
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine("RunTime " + elapsedTime);
}
public double Done()
{
if (aPi > 0)
{
Msg = new StringBuilder("Calculates has been completed successful");
return aPi; // return gotten value
}
else
{
return 0; // no result
}
}
public void PointGenerator(int n)//FIRST PART OF ONE BIG FOR-CYCLE
{
double s = 0.125;
double sP = s / (n / 2);
double x = Math.Sqrt(sP);
for (double cX = 0; cX <= 0.25; cX += x)
{
for (double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
counter1++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
}
public void PointGenerator2(int n)//SECOND PART OF ONE BIG FOR-CYCLE
{
double s = 0.125;
double sP = s / (n / 2);
double x = Math.Sqrt(sP);
for (double cX = 0.25; cX <= 0.5; cX += x)
{
for (double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
counter2++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
}
}
And this is the same class without Tasks using(TPL), it has one long for-cycle:
class TCalcPi//unparallel calculating method
{
int N;
int n_0;
double aPi;
public StringBuilder Msg; // diagnostic message
Stopwatch stopWatch = new Stopwatch();
public void Init(int aN)
{
stopWatch.Start();
N = aN; // save total calculate-iterations amount
aPi = -1; // flag, if no any calculate-iteration has been completed
Msg = new StringBuilder("No any calculate-iteration has been completed");
}
public void Run()
{
if (N < 1)
{
Msg = new StringBuilder("Invalid N - value");
return;
}
double s = 0.25;
double sP = s / N;
double x = Math.Sqrt(sP);
for (double cX = 0; cX <= 0.5; cX += x)//ONE LONG FOR-CYCLE
{
for(double cY = 0; cY <= 0.5; cY += x)
{
if (((cX - 0.5) * (cX - 0.5) + (cY - 0.5) * (cY - 0.5)) < 0.25)
{
n_0++; // coordinate in a circle! mark it by incrementing N_0
}
}
}
aPi = 4.0 * ((double)n_0 / (double)N); // to calculate approximate Pi - value
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds / 10);
Console.WriteLine("RunTime " + elapsedTime);
}
public double Done()
{
if (aPi > 0)
{
Msg = new StringBuilder("Calculates has been completed successful");
return aPi; // return gotten value
}
else
{
return 0; // no result
}
}
}
But unparallelized-class works faster, than parallelized(using TPL)class. How to fix it?
counter1 and counter2 most likely sit on the same cache line because they are adjacent in memory. This causes False Sharing. Probably, you are incrementing those counters very often. This pings the cacheline between the L1's of two cores for every alternation in time between the counters.
Separate them. As a proof of concept, like this:
int counter1;
long padding0, p1, p2, p3, p4, p5, p6, p7; //64 bytes padding
int counter2;
Let's hope the JIT does not reorder the fields. Maybe you need to use StructLayout.
Alternatively, make the counters local variables. Stack variables only have false sharing by extreme coincidence.
Related
I'm going through another programmers code and see the following function (and several variations, are called inside a massive loop and in the critical path) and I'm wondering if the c# compiler is smart enough to optimize it:
public double SomeFunction(double p_lower, double p_upper) {
double result = 0.0;
int low = (int)Math.Ceiling(p_lower);
int high = (int)Math.Ceiling(p_upper);
for( int i = low; i <= high; ++i){
double dbl_low;
double dbl_high;
if (i == low && i == high) {
dbl_low = p_lower; // corrected from low in original post
dbl_high = p_upper; // corrected from high original post
} else if (i == low) {
dbl_low = p_lower;
dbl_high = i;
} else if (i == high) {
dbl_low = i - 1;
dbl_high = p_upper;
} else {
dbl_low = i - 1;
dbl_high = i;
}
if (dbl_low != dbl_high) {
result += f(dbl_low,dbl_high);
}
}
return result;
}
What this function does is clear, the range from p_lower to p_upper is split up three parts:
Fraction up to the first integer, steps of 1 until the last integer, fraction from last integer to p_upper and call a function on those intervals.
The first condition is the edge case where the both lower and upper are within the same unit interval (correction from original)
My instinct (from when I learned to program and compilers were horrible) would be to rewrite the code as this:
public double SomeFunction2(double p_lower, double p_upper) {
if(p_upper < p_lower){
return 0.0;
}
double result = 0.0;
double low = Math.Ceiling(p_lower);
double high = Math.Ceiling(p_upper);
/// edge case
if (Math.Abs(low - high) < 0.00001) {
return Math.Abs(p_upper-p_lower)< 0.00001? 0.0 : f(p_lower, p_upper);
}
/// first fraction
result += Math.Abs(low - p_lower)< 0.00001? 0.0 : f(p_lower, low);
/// whole intervals
for( double i = low + 1.0; i < high; ++i){ // < instead of <=
result += f(i-1.0, i);
}
/// add last fraction and return
return result + f(high - 1.0, p_upper);
}
This way, there is not a whole cascade of conditional statements that is evaluated every loop, the first of which will always be false after the first, the second will always be true except for the final one. In fact there is no conditional in the loop, since the last condition has been incorporated in the loop range.
The loop counter is a double which should not be an issue since the range for low and high is 0.0 ... 120.0 all of which are exact as a double.
Am I wasting my time and does the compiler handle all this and is all I gain some readability?
I changed your second function a bit to improve readability:
using System;
using System.Diagnostics;
class Program
{
static void Main()
{
Stopwatch sw = new Stopwatch();
const int COUNT = 10000000;
double[] lowers = new double[COUNT];
double[] uppers = new double[COUNT];
double[] result = new double[COUNT];
double[] result2 = new double[COUNT];
Random random = new Random();
for (int i = 0; i < COUNT; i++)
{
lowers[i] = Math.Round(random.NextDouble() * 60.0, 2);
uppers[i] = lowers[i] + Math.Round(random.NextDouble() * 40.0,2);
}
sw.Start();
for (int i = 0; i < COUNT; i++)
{
result[i] = SomeFunction(lowers[i], uppers[i]);
}
sw.Stop();
Console.WriteLine("Elapsed Time for SomeFunction is {0} ms", sw.ElapsedMilliseconds);
sw.Reset();
sw.Start();
for (int i = 0; i < COUNT; i++)
{
result2[i] = SomeFunction2(lowers[i], uppers[i]);
}
sw.Stop();
Console.WriteLine("Elapsed Time for SomeFunction2 is {0} ms", sw.ElapsedMilliseconds);
for (int i = 0; i < COUNT; i++)
{
if (result[i] != result2[i])
{
Console.WriteLine("i: {0}",i);
}
}
}
public static double SomeFunction(double p_lower, double p_upper) {
double result = 0.0;
int low = (int)Math.Ceiling(p_lower);
int high = (int)Math.Ceiling(p_upper);
for(int i = low; i <= high; ++i){
double dbl_low;
double dbl_high;
if (i == low && i == high) {
dbl_low = p_lower;
dbl_high = p_upper;
} else if (i == low) {
dbl_low = p_lower;
dbl_high = i;
} else if (i == high) {
dbl_low = i - 1;
dbl_high = p_upper;
} else {
dbl_low = i - 1;
dbl_high = i;
}
if (dbl_low != dbl_high) {
result += f(dbl_low,dbl_high);
}
}
return result;
}
public static double SomeFunction2(double p_lower, double p_upper) {
double result = 0.0;
if (p_upper <= p_lower) {
return result;
}
double low = Math.Ceiling(p_lower);
double high = Math.Ceiling(p_upper);
/// edge case
if (high == low) {
return f(p_lower, p_upper);
}
/// first fraction
if (low > p_lower) {
result += f(p_lower, low);
}
/// whole intervals
for (int i = (int)low + 1; i < high; ++i){
result += f(i-1.0, i);
}
/// add last fraction and return
return result + f(high - 1.0, p_upper);
}
// Simple function f(a,b) for test purpose
public static double f(double a, double b)
{
return a + b;
}
}
Running this several times gave me:
3680 ms / 1863 ms -> 49%
2362 ms / 1441 ms -> 39%
3175 ms / 2030 ms -> 36%
2956 ms / 1531 ms -> 48%
So it stays quite close in terms of performance
The answer is that the latest MS c# compiler does not optimize this code fully.
I added large arrays of random numbers to Rafalon's program as a crude benchmark.
With this simple addition function the time difference is ~1950ms for SomeFunction and ~1160ms for Somefunction2. A 40% reduction in execution time by simply moving conditionals out of the loop.
Thanks to the people pointing out the error in transcribing the original functions and pointing out there were errors/I had misunderstood part of the original function I managed to get a new function that passes all our unit tests.
using System;
using System.Diagnostics;
class Program
{
static void Main()
{
Stopwatch sw = new Stopwatch();
const int COUNT = 10000000;
double[] lowers = new double[COUNT];
double[] uppers = new double[COUNT];
double[] result = new double[COUNT];
double[] result2 = new double[COUNT];
double sumerror = 0.0;
Random random = new Random();
for (int i = 0; i < COUNT; i++)
{
lowers[i] = Math.Round(random.NextDouble() * 60.0, 2);
uppers[i] = lowers[i] + Math.Round(random.NextDouble() * 40.0,2);
}
sw.Start();
for (int i = 0; i < COUNT; i++)
{
result[i] = SomeFunction(lowers[i], uppers[i]);
}
sw.Stop();
Console.WriteLine("Elapsed Time for SomeFunction is {0} ms", sw.ElapsedMilliseconds);
sw.Reset();
sw.Start();
for (int i = 0; i < COUNT; i++)
{
result2[i] = SomeFunction2(lowers[i], uppers[i]);
}
sw.Stop();
Console.WriteLine("Elapsed Time for SomeFunction2 is {0} ms", sw.ElapsedMilliseconds);
for (int i = 0; i < COUNT; i++)
{
sumerror += (result[i] - result2[i]);
if (Math.Abs(result[i] - result2[i])> 0.0001)
{
Console.WriteLine("i: {0}",i);
}
}
Console.WriteLine(sumerror); // should be zero, and we now use the results so no optimizing everything away.
}
public static double SomeFunction(double p_lower, double p_upper) {
double result = 0.0;
int low = (int)Math.Ceiling(p_lower);
int high = (int)Math.Ceiling(p_upper);
for(int i = low; i <= high; ++i){
double dbl_low;
double dbl_high;
if (i == low && i == high) {
dbl_low = p_lower;
dbl_high = p_upper;
} else if (i == low) {
dbl_low = p_lower;
dbl_high = i;
} else if (i == high) {
dbl_low = i - 1;
dbl_high = p_upper;
} else {
dbl_low = i - 1;
dbl_high = i;
}
if (dbl_low != dbl_high) {
result += f(dbl_low,dbl_high);
}
}
return result;
}
public static double SomeFunction2(double p_lower, double p_upper) {
double result = 0.0;
double low = Math.Ceiling(p_lower);
double high = Math.Ceiling(p_upper);
/// edge case
if (Math.Abs(high - low) < 0.00001) {
return Math.Abs(p_upper-p_lower)< 0.00001? 0.0 : f(p_lower, p_upper);
}
/// first fraction
result += Math.Abs(low - p_lower)< 0.00001? 0.0 : f(p_lower, low);
/// whole intervals
for( int i = (int)low + 1; i < high; ++i){
result += f(i-1.0, i);
}
/// add last fraction and return
return result + f(high - 1.0, p_upper);
}
// Simple function f(a,b) for test purpose
public static double f(double a, double b)
{
return a + b;
}
}
I got an array of data voltages and I want to get the RMS value from the FFT that has been applied before to that data. I've seen that RMS in time domain should be equal to RMS(fft) / sqrt(nFFT) from Parseval's Theorem, but gives me different results. I'm using these functions:
1)FFT
public static VectorDPoint FFT(double[] trama, double samplingFreq)
{
double fs = samplingFreq; // Sampling frequency
double t1 = 1 / fs; // Sample time
int l = trama.Length; // Length of signal
// Time vector
//Vector t = Normal(0, l, 1) * t1;
//// Values vector
//Vector y = new Vector(trama);
// We just use half of the data as the other half is simetric. The middle is found in NFFT/2 + 1
int nFFT = (int)Math.Pow(2, NextPow2(l));
if (nFFT > 655600)
{ }
// Create complex array for FFT transformation. Use 0s for imaginary part
Complex[] samples = new Complex[nFFT];
for (int i = 0; i < nFFT; i++)
{
if (i >= trama.Length)
{
samples[i] = new MathNet.Numerics.Complex(0, 0);
}
else
{
samples[i] = new MathNet.Numerics.Complex(trama[i], 0);
}
}
ComplexFourierTransformation fft = new ComplexFourierTransformation(TransformationConvention.Matlab);
fft.TransformForward(samples);
ComplexVector s = new ComplexVector(samples);
s = s / l;
Vector f = (fs / 2.0) * Linspace(0, 1, (nFFT / 2) + 1);
VectorDPoint result = new VectorDPoint();
for (int i = 0; i < (nFFT / 2) + 1; i++)
{
result.Add(new DPoint(f[i], 2 * s[i].Modulus));
}
s = null;
f = null;
samples = null;
return result;
2) RMS
public static double RMSCalculate(double[] channelValues, int samplesNumber, double sampleRate, DateTime currentDate)
{
double[] times = new double[channelValues.Length];
double sampleTime = 0.0;
double period = 0;
times[0] = currentDate.Second + currentDate.Millisecond / 1000.0;
sampleTime = 1 / sampleRate; //s
// Limited samples
for (int i = 1; i < channelValues.Length; i++)
{
times[i] = times[i - 1] + sampleTime;
}
DPoint RMSValues = new DPoint();
RMSValues.Y = 0;
if (channelValues.Length == 1)
{
double x = channelValues[0];
double y = channelValues[0];
RMSValues = new DPoint(x, Math.Abs(y));
}
else
{
for (int i = 0; i < times.Length - 1; i++)
{
period = 0;
if (i + 1 < times.Length)
{
RMSValues.Y += channelValues[i + 1] * channelValues[i + 1] * (times[i + 1] - times[i]);
}
}
period = times[times.Length - 1] - times[0];
RMSValues.Y = RMSValues.Y / period;
RMSValues.Y = Math.Sqrt(RMSValues.Y);
}
return RMSValues.Y;
}
My Issue
Hey, so I'm making this simple calculation to find the sum of sins between 0 and 100 degrees(as I use it as a benchmark for my systems), the calculation isn't the problem my issue is that I am new to Cudafy and I am unsure on how to properly pass in and return values so that it can be printed off here is my code:
Code
public const int N = 33 * 1024;
public const int threadsPerBlock = 256;
public const int blocksPerGrid = 32;
public static void Main()
{
Stopwatch watch = new Stopwatch();
watch.Start();
string Text = "";
int iterations = 1000000;
CudafyModule km = CudafyTranslator.Cudafy();
GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
gpu.LoadModule(km);
double[] dev_Value = gpu.Allocate<double>();
gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations,dev_Value);
double Value;
gpu.CopyFromDevice(dev_Value, out Value);
watch.Stop();
Text = watch.Elapsed.TotalSeconds.ToString();
Console.WriteLine("The process took a total of: " + Text + " Seconds");
Console.WriteLine(Value);
Console.Read();
gpu.FreeAll();
}
[Cudafy]
public static void SumOfSines(GThread thread,int iterations,double [] Value)
{
double total = new double();
double degAsRad = Math.PI / 180.0;
for (int i = 0; i < iterations; i++)
{
total = 0.0;
for (int z = 1; z < 101; z++)
{
double angle = (double)z * degAsRad;
total += Math.Sin(angle);
}
}
Value[0] = total;
}
The value that I am trying to extract from the CUDAfy part is the total and then print it off aswell as printing the time for the benchmarking. If anyone could post advice it would be very much appreciated (also any suggestions for making rid of any useless lines or unefficient pieces would also be good).
Doesn't matter I found the answer but I'll post it here:
public const int N = 33 * 1024;
public const int threadsPerBlock = 256;
public const int blocksPerGrid = 32;
public static void Main()
{
Stopwatch watch = new Stopwatch();
watch.Start();
CudafyModule km = CudafyTranslator.Cudafy();
GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
gpu.LoadModule(km);
string Text = "";
int iterations = 1000000;
double Value;
double[] dev_Value = gpu.Allocate<double>(iterations * sizeof(double));
gpu.Launch(blocksPerGrid, threadsPerBlock).SumOfSines(iterations, dev_Value);
gpu.CopyFromDevice(dev_Value, out Value);
watch.Stop();
Text = watch.Elapsed.TotalSeconds.ToString();
Console.WriteLine("The process took a total of: " + Text + " Seconds");
Console.WriteLine(Value);
Console.Read();
gpu.FreeAll();
}
[Cudafy]
public static void SumOfSines(GThread thread, int _iterations, double[] Value)
{
int threadID = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
int numThreads = thread.blockDim.x * thread.gridDim.x;
if (threadID < _iterations){
for (int i = threadID; i < _iterations; i += numThreads)
{
double _degAsRad = Math.PI / 180;
Value[i] = 0.0;
for (int a = 0; a < 100; a++)
{
double angle = (double)a * _degAsRad;
Value[i] += Math.Sin(angle);
}
}
}
}
-Jack
I am trying to generate white noise on a sinewave but there is a pause in my signal that should not be there.I am not sure why.
I have the write function on a different thread than generator.
here is my code:
public class whitenoise :AudioTrack
{
public whitenoise (int min1) : base(Stream.Music,44100, ChannelOut.Stereo, Encoding.Pcm16bit,793800,AudioTrackMode.Stream)
{
Frequency = 1000;
Amplitude = 1;
n2 = 0;
min = min1;
buffer = new short[793800];
}
public double Frequency { get; set; }
public double Amplitude { get; set; }
static int stopped;
static int n2;
int min;
short[] buffer;
Random rnd1;
void write1()
{
while(n2 < min )
{
Write (buffer, 0,793800);
}
}
public void play()
{
rnd1 = new System.Random();
stopped = 0;
gen(buffer);
ThreadPool.QueueUserWorkItem(o=>write1());
Play();
while (n2 < min)
{
gen (buffer);
//Write (buffer, 0, 793800);
}
stopped = 1;
}
public void gen(short[] buffer)
{
for (int n = 0; n <793800; n++) {
if (Frequency == 0) {
float temp1 = (float)(Amplitude * (2 * rnd1.NextDouble () - 1));
buffer [n] = (short)(temp1 * short.MaxValue);
} else {
float w = (float)( Math.Sin ( n2 * Math.PI * Frequency / 44100D));
float temp1 = (float)(w * Amplitude * (2 * rnd1.NextDouble () - 1));
buffer [n] = (short)(temp1 * short.MaxValue);
}
n2++;
}
}
public void stop ()
{
n2 = (min);
stopped = 1;
}
public int getstopped()
{
return stopped;
}
}
why is it not working?
why is there a pause ?
the gen is making the signal on a different thread
and both functions can use the buffer array
UPDate :
ok so while I was troubleshooting this problem, I learned that write() is called like 4 to 3 more times than gen(). SO, I need to make gen faster, I tried lowering the buffer size and I learned the loses I can go is without having problems in the sound is 44100, but gen is still too slow. so I tried this :
public void gen(short[] buffer)
{
for (int n = 0; n <44100; n++) {
if (Frequency == 0) {
float temp1 = (float)(Amplitude * (2 * rnd1.NextDouble () - 1));
buffer [n] = (short)(temp1 * short.MaxValue);
} else {
float temp1 = (float)( Math.Sin ( n2 * SinMath) * Amplitude * (2 * rnd1.NextDouble () - 1));
buffer [n] = (short)(temp1 * short.MaxValue);
}
n2++;
}
}
SinMath does this Math.PI * Frequency / 44100D somewhere else
but it is still not fast enough
any ideas ?
similarly to the question: Inverted beta in MySQL I need to use the BetaInv function inside a SQL Server stored procedure.
function is described here: Excel's BETAINV
is anybody aware of anything similar in TSQL or would you wrap it in a CLR .NET managed SQL user defined function?
I really need to use it within the stored procedure and not as post executing code in the C# side after data has been retrieved with the stored procedure because I should keep all logic on the db server for better reuse.
can I assume that a .NET managed udf running in the SQL Server would perform as fast as a normal native TSQL function?
Thanks!
I've in the end implemented the whole function myself, here the source code in case somebody needs it:
public static class UDFs
{
private const int MAXIT = 100;
private const double EPS = 0.0000003;
private const double FPMIN = 1.0E-30;
[SqlFunction(Name = "BetaInv", DataAccess = DataAccessKind.Read)]
public static SqlDouble BetaInv(SqlDouble p, SqlDouble alpha, SqlDouble beta, SqlDouble A, SqlDouble B)
{
return InverseBeta(p.Value, alpha.Value, beta.Value, A.Value, B.Value);
}
private static double InverseBeta(double p, double alpha, double beta, double A, double B)
{
double x = 0;
double a = 0;
double b = 1;
double precision = Math.Pow(10, -6); // converge until there is 6 decimal places precision
while ((b - a) > precision)
{
x = (a + b) / 2;
if (IncompleteBetaFunction(x, alpha, beta) > p)
{
b = x;
}
else
{
a = x;
}
}
if ((B > 0) && (A > 0))
{
x = x * (B - A) + A;
}
return x;
}
private static double IncompleteBetaFunction(double x, double a, double b)
{
double bt = 0;
if (x <= 0.0)
{
return 0;
}
if (x >= 1)
{
return 1;
}
bt = System.Math.Exp(Gammln(a + b) - Gammln(a) - Gammln(b) + a * System.Math.Log(x) + b * System.Math.Log(1.0 - x));
if (x < ((a + 1.0) / (a + b + 2.0)))
{
// Use continued fraction directly.
return (bt * betacf(a, b, x) / a);
}
else
{
// Use continued fraction after making the symmetry transformation.
return (1.0 - bt * betacf(b, a, 1.0 - x) / b);
}
}
private static double betacf(double a, double b, double x)
{
int m, m2;
double aa, c, d, del, h, qab, qam, qap;
qab = a + b; // These q’s will be used in factors that occur in the coe.cients (6.4.6).
qap = a + 1.0;
qam = a - 1.0;
c = 1.0; // First step of Lentz’s method.
d = 1.0 - qab * x / qap;
if (System.Math.Abs(d) < FPMIN)
{
d = FPMIN;
}
d = 1.0 / d;
h = d;
for (m = 1; m <= MAXIT; ++m)
{
m2 = 2 * m;
aa = m * (b - m) * x / ((qam + m2) * (a + m2));
d = 1.0 + aa * d; //One step (the even one) of the recurrence.
if (System.Math.Abs(d) < FPMIN)
{
d = FPMIN;
}
c = 1.0 + aa / c;
if (System.Math.Abs(c) < FPMIN)
{
c = FPMIN;
}
d = 1.0 / d;
h *= d * c;
aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
d = 1.0 + aa * d; // Next step of the recurrence (the odd one).
if (System.Math.Abs(d) < FPMIN)
{
d = FPMIN;
}
c = 1.0 + aa / c;
if (System.Math.Abs(c) < FPMIN)
{
c = FPMIN;
}
d = 1.0 / d;
del = d * c;
h *= del;
if (System.Math.Abs(del - 1.0) < EPS)
{
// Are we done?
break;
}
}
if (m > MAXIT)
{
return 0;
}
else
{
return h;
}
}
public static double Gammln(double xx)
{
double x, y, tmp, ser;
double[] cof = new double[] { 76.180091729471457, -86.505320329416776, 24.014098240830911, -1.231739572450155, 0.001208650973866179, -0.000005395239384953 };
y = xx;
x = xx;
tmp = x + 5.5;
tmp -= (x + 0.5) * System.Math.Log(tmp);
ser = 1.0000000001900149;
for (int j = 0; j <= 5; ++j)
{
y += 1;
ser += cof[j] / y;
}
return -tmp + System.Math.Log(2.5066282746310007 * ser / x);
}
}
}
as you can see in the code, the SqlFunction is calling the InverseBeta private method which does the job using couple of other methods.
results are the same of Excel.BetaInv up to 5 or 6 digits after comma.