I'm trying to transform a function to make it execute in parallel instead of sequential in C#, but I'm not sure what I'm doing wrong:
// sequential
static void Romberg(double a, double b, int n, double[,] R)
{
int i, j, k;
double h, sum;
h = b - a;
R[0, 0] = (h / 2) * (f(a) + f(b));
for (i = 1; i <= n; i++)
{
h = h / 2;
sum = 0;
for (k = 1; k <= (Math.Pow(2.0, i) - 1); k += 2)
{
sum += f(a + k * h);
}
R[i, 0] = R[i - 1, 0] / 2 + sum * h;
for (j = 1; j <= i; j++)
{
R[i, j] = R[i, j - 1] + (R[i, j - 1] - R[i - 1, j - 1]) / (Math.Pow(4.0, j) - 1);
}
}
}
// parallel
static void RombergCP(double a, double b, int n, double[,] R)
{
int i,j, k;
double h, sum;
h = b - a;
R[0, 0] = (h / 2) * (f(a) + f(b));
Parallel.For(0, n, options, i =>
{
h = h / 2;
sum = 0;
for (k = 1; k <= (Math.Pow(2.0, i) - 1); k += 2)
{
sum += f(a + k * h);
};
R[i, 0] = R[i - 1, 0] / 2 + sum * h;
for (j = 1; j <= i; j++)
{
R[i, j] = R[i, j - 1] + (R[i, j - 1] - R[i - 1, j - 1]) / (Math.Pow(4.0, j) - 1);
}
});
}
The error I'm getting is that "i" cannot be declared because it would give a different meaning to "i", which is used in a "parent or current" scope.
I tried renaming it to i2 in the parallel function but it gives the same error. Thanks in advance!
Remove the declaration of int i at the very top. It is declared by the lambda below.
A couple of issues:
declare variables in the smallest scope possible.
Your outer loop goes from for (i = 1; i <= n; i++) to Parallel.For(0, n, options, ...), that means R[i-1, ...] will throw in the Parallel version.
h = h / 2; is not thread-safe.
// parallel
static void RombergCP(double a, double b, int n, double[,] R)
{
//int i,j, k;
//double h, sum;
double h0 = b - a;
R[0, 0] = (h0 / 2) * (f(a) + f(b));
Parallel.For(1, n, options, i => // start at 1
{
//h = h / 2;
double h = (b - a) / Math.Pow(2, i); // derive from i
double sum = 0;
for (int k = 1; k <= (Math.Pow(2.0, i) - 1); k += 2) // keep k local
...
Related
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 years ago.
Improve this question
I wrote two programs to calculate the inverse of a matrix using Gaussian elimination, the first program was in C# and the second in CUDA C++. The two programs follow exactly the same procedure and gives the same final results. However, when I checked the values within the intermediate steps, I found slightly different values, less than 1e-5 relative error.
Here is a part of each code of both programs.
C#
int i, j, i1, n, y, z;
double[,] M = new double[n, n];
double[,] inv = new double[n, n];
for (i = 0; i < n; i++)
inv[i, i] = 1;
for (i = 0; i < n; i++)
{
for (j = i + 1; j < n; j++)
M[i, j] /= M[i, i];
for (j = 0; j < n; j++)
inv[i, j] /= M[i, i];
if (i != n - 1)
{
for (i1 = i + 1; i1 < n; i1++)
if (Math.Abs(M[i1, i]) >= 1e-9)
{
for (j = i + 1; j < n; j++)
M[i1, j] -= M[i1, i] * M[i, j];
for (j = 0; j < n; j++)
inv[i1, j] -= M[i1, i] * inv[i, j];
}
f = new StreamWriter("CPU.csv");
for (y = 0; y < n; y++)
{
for (z = 0; z < n; z++)
f.Write(M[y, z].ToString() + ",");
for (z = 0; z < n; z++)
f.Write(ans[y, z].ToString() + ",");
f.WriteLine();
}
f.Close();
}
}
for (i = n - 1; i > 0; i--)
{
for (i1 = 0; i1 < i; i1++)
if (Math.Abs(M[i1, i]) >= 1e-9)
for (j = 0; j < n; j++)
inv[i1, j] -= M[i1, i] * inv[i, j];
}
CUDA C++
int i, j;
double v;
double* d_A, * d_B, * d_v, * Z;
size = n * n * sizeof(double);
cudaMalloc(&d_A, size);
cudaMemcpy(d_A, A, size, cudaMemcpyHostToDevice);
cudaMalloc(&d_B, size);
cudaMalloc(&d_v, sizeof(double));
Z = new double[n * n];
Unity <<<1, n>>> (d_B, n);
cudaDeviceSynchronize();
for (i = 0; i < n; i++)
{
GetVal <<<1, 1>>> (d_A, i * (n + 1), d_v);
cudaMemcpy(&v, d_v, sizeof(double), cudaMemcpyDeviceToHost);
if (i != n - 1)
DivideRow <<<1, n - i - 1>>> (d_A, i * (n + 1) + 1, n - i - 1, v);
DivideRow <<<1, n>>> (d_B, i * n, n, v);
cudaDeviceSynchronize();
cudaMemcpy(Z, d_A, size, cudaMemcpyDeviceToHost);
cudaMemcpy(B, d_B, size, cudaMemcpyDeviceToHost);
if (i != n - 1)
{
dim3 GridA(1, 1);
dim3 BlockA(n - i - 1, n - i - 1);
dim3 GridB(1, 1);
dim3 BlockB(n - i - 1, n);
ModifyRow <<<GridA, BlockA>>> (d_A, i, i, i + 1, n - i - 1, n - i - 1);
ModifyRow <<<GridB, BlockB>>> (d_A, n, i, i, d_B, i + 1, 0, n - i - 1, n);
cudaDeviceSynchronize();
cudaMemcpy(Z, d_A, size, cudaMemcpyDeviceToHost);
cudaMemcpy(B, d_B, size, cudaMemcpyDeviceToHost);
myfile.open("GPU.csv");
for (x = 0; x < n; x++)
{
for (y = 0; y < n; y++)
myfile << Z[x * n + y] << ",";
for (y = 0; y < n; y++)
myfile << B[x * n + y] << ",";
myfile << "\n";
}
myfile.close();
}
}
cudaFree(d_v);
for (i = n - 1; i > 0; i--)
{
dim3 GridB(1, 1);
dim3 BlockB(i, n);
ModifyRow <<<GridB, BlockB>>> (d_A, n, i, i, d_B, 0, 0, i, n);
cudaDeviceSynchronize();
cudaMemcpy(Z, d_A, size, cudaMemcpyDeviceToHost);
cudaMemcpy(B, d_B, size, cudaMemcpyDeviceToHost);
}
cudaMemcpy(B, d_B, size, cudaMemcpyDeviceToHost);
cudaFree(d_A);
cudaFree(d_B);
I compared the values in CPU.csv and GPU.csv files, and found these differences.
What could be the reason for this? Do the calculations in CUDA C++ have less precision than C#?
From the NVIDIA documentation (about 2/3 of the way down):
The consequence [of rounding] is that different math libraries cannot be expected to compute exactly the same result for a given input. This applies to GPU programming as well. Functions compiled for the GPU will use the NVIDIA CUDA math library implementation while functions compiled for the CPU will use the host compiler math library implementation (e.g., glibc on Linux). Because these implementations are independent and neither is guaranteed to be correctly rounded, the results will often differ slightly.
Tells you all you need to know, really.
What algorithm is the following?
What I understand from the following source code is:
dir is the direction of FFT: forward=1, inverse=-1.
x is the real part
y is the imaginary part
What is m here?
If x = {1, 1, 1, 1, 0, 0, 0, 0}, and, y = {0,0,0,0,0,0,0,0,0}, what would be the value of m?
//Inplace 1D FFT
public static void FFT1D(int dir, int m, ref double[] x, ref double[] y)
{
long nn, i, i1, j, k, i2, l, l1, l2;
double c1, c2, tx, ty, t1, t2, u1, u2, z;
/* Calculate the number of points */
nn = 1;
for (i = 0; i < m; i++)
nn *= 2;
/* Do the bit reversal */
i2 = nn >> 1;
j = 0;
for (i = 0; i < nn - 1; i++)
{
if (i < j)
{
tx = x[i];
ty = y[i];
x[i] = x[j];
y[i] = y[j];
x[j] = tx;
y[j] = ty;
}
k = i2;
while (k <= j)
{
j -= k;
k >>= 1;
}
j += k;
}
/* Compute the FFT */
c1 = -1.0;
c2 = 0.0;
l2 = 1;
for (l = 0; l < m; l++)
{
l1 = l2;
l2 <<= 1;
u1 = 1.0;
u2 = 0.0;
for (j = 0; j < l1; j++)
{
for (i = j; i < nn; i += l2)
{
i1 = i + l1;
t1 = u1 * x[i1] - u2 * y[i1];
t2 = u1 * y[i1] + u2 * x[i1];
x[i1] = x[i] - t1;
y[i1] = y[i] - t2;
x[i] += t1;
y[i] += t2;
}
z = u1 * c1 - u2 * c2;
u2 = u1 * c2 + u2 * c1;
u1 = z;
}
c2 = Math.Sqrt((1.0 - c1) / 2.0);
if (dir == 1)
c2 = -c2;
c1 = Math.Sqrt((1.0 + c1) / 2.0);
}
/* Scaling for forward transform */
if (dir == 1)
{
for (i = 0; i < nn; i++)
{
x[i] /= (double)nn;
y[i] /= (double)nn;
}
}
}
The implementation of the FFT you have posted is limited to inputs of size 2m. Here m thus indirectly specify the size of the FFT block size. So, for your example with x = {1,1,1,1,0,0,0,0} and y={1,1,1,1,0,0,0,0} being arrays of size 8=23, m would be equal to 3.
Note that there are no additional checks for the size of the arrays x and y so make sure they are at least that size.
My algorithm output is wrong. I tried many solutions, but nothing comes out.
View my result.
SourceImage
I'm sorry Lena
for (int x = 1; x < fimage.Bitmap.Width - 1; x++)
{
for (int y = 1; y < fimage.Bitmap.Height - 1; y++)
{
double sumX = 0, sumY = 0, sum = 0;
for ( int i = -1; i <= 1; i++ )
{
for ( int j = -1; j <= 1; j++ )
{
sumX += fimage[y + i, x + j].R * kernel1[i + 1, j + 1];
sumY += fimage[y + i, x + j].R * kernel2[i + 1, j + 1];
}
}
sum = Math.Sqrt(sumX * sumX + sumY * sumY);
sum = sum > 255 ? 255 : sum < 0 ? 0 : sum;
fimage[x, y] = Color.FromArgb((byte)sum, (byte)sum, (byte)sum);
}
}
Two things that are fishy:
for ( int j = -1; j <= 1; j++ )
{
sumX += fimage[y + i, x + j].R * kernel1[i + 1, j + 1];
sumY += fimage[y + i, x + j].R * kernel2[i + 1, j + 1];
}
You only respect the red component of the image here, why is that?
The other major thing is changing the input picture while iterating through it:
sum = Math.Sqrt(sumX * sumX + sumY * sumY);
sum = sum > 255 ? 255 : sum < 0 ? 0 : sum;
fimage[x, y] = Color.FromArgb((byte)sum, (byte)sum, (byte)sum);
You should save the value to a different output image (create a new Bitmap(fimage.Width, fimage.Height) with the dimensions from the source image). That could explain the weird diagonal symmetry in your picture, where there's basically a symteric copy of the other side.
Can you guys help me converting this C# code to Objective-C?
I don't have a clue about C#/Visual Studio!
public static class BezierSpline
{
public static void GetCurveControlPoints(Point[] knots,
out Point[] firstControlPoints, out Point[] secondControlPoints)
{
int n = knots.Length - 1;
// Calculate first Bezier control points
// Right hand side vector
double[] rhs = new double[n];
// Set right hand side X values
for (int i = 1; i < n - 1; ++i)
rhs[i] = 4 * knots[i].X + 2 * knots[i + 1].X;
rhs[0] = knots[0].X + 2 * knots[1].X;
rhs[n - 1] = (8 * knots[n - 1].X + knots[n].X) / 2.0;
// Get first control points X-values
double[] x = GetFirstControlPoints(rhs);
// Set right hand side Y values
for (int i = 1; i < n - 1; ++i)
rhs[i] = 4 * knots[i].Y + 2 * knots[i + 1].Y;
rhs[0] = knots[0].Y + 2 * knots[1].Y;
rhs[n - 1] = (8 * knots[n - 1].Y + knots[n].Y) / 2.0;
// Get first control points Y-values
double[] y = GetFirstControlPoints(rhs);
// Fill output arrays.
firstControlPoints = new Point[n];
secondControlPoints = new Point[n];
for (int i = 0; i < n; ++i)
{
// First control point
firstControlPoints[i] = new Point(x[i], y[i]);
// Second control point
if (i < n - 1)
secondControlPoints[i] = new Point(2 * knots
[i + 1].X - x[i + 1], 2 *
knots[i + 1].Y - y[i + 1]);
else
secondControlPoints[i] = new Point((knots
[n].X + x[n - 1]) / 2,
(knots[n].Y + y[n - 1]) / 2);
}
}
private static double[] GetFirstControlPoints(double[] rhs)
{
int n = rhs.Length;
double[] x = new double[n]; // Solution vector.
double[] tmp = new double[n]; // Temp workspace.
double b = 2.0;
x[0] = rhs[0] / b;
for (int i = 1; i < n; i++) // Decomposition and forward substitution.
{
tmp[i] = 1 / b;
b = (i < n - 1 ? 4.0 : 3.5) - tmp[i];
x[i] = (rhs[i] - x[i - 1]) / b;
}
for (int i = 1; i < n; i++)
x[n - i - 1] -= tmp[n - i] * x[n - i]; // Backsubstitution.
return x;
}
}
thanks.
double[] tmp = new double[n];
tmp is an array of length n. Each value is not initialized explicitly, but it is implicitly set to the default value of the double type, which is 0. So tmp is an n length array of zeros. {0,0,0,0,0, ... 0}
I've done a fft to get fundamental frequency in real time and to implement high and low pass filters.
Now I want to be able to record to a .wav file after I apply a filter.
First I'll have to invert the fft and that is my question.
What are the steps to do this?
I use the FFT defined in this project.
Here is the code for it:
using System;
using System.Collections.Generic;
using System.Text;
namespace SoundLog
{
public class FourierTransform
{
static private int n, nu;
static private int BitReverse(int j)
{
int j2;
int j1 = j;
int k = 0;
for (int i = 1; i <= nu; i++)
{
j2 = j1 / 2;
k = 2 * k + j1 - 2 * j2;
j1 = j2;
}
return k;
}
static public double[] FFT(ref double[] x)
{
// Assume n is a power of 2
n = x.Length;
nu = (int)(Math.Log(n) / Math.Log(2));
int n2 = n / 2;
int nu1 = nu - 1;
double[] xre = new double[n];
double[] xim = new double[n];
double[] magnitude = new double[n2];
double[] decibel = new double[n2];
double tr, ti, p, arg, c, s;
for (int i = 0; i < n; i++)
{
xre[i] = x[i];
xim[i] = 0.0f;
}
int k = 0;
for (int l = 1; l <= nu; l++)
{
while (k < n)
{
for (int i = 1; i <= n2; i++)
{
p = BitReverse(k >> nu1);
arg = 2 * (double)Math.PI * p / n;
c = (double)Math.Cos(arg);
s = (double)Math.Sin(arg);
tr = xre[k + n2] * c + xim[k + n2] * s;
ti = xim[k + n2] * c - xre[k + n2] * s;
xre[k + n2] = xre[k] - tr;
xim[k + n2] = xim[k] - ti;
xre[k] += tr;
xim[k] += ti;
k++;
}
k += n2;
}
k = 0;
nu1--;
n2 = n2 / 2;
}
k = 0;
int r;
while (k < n)
{
r = BitReverse(k);
if (r > k)
{
tr = xre[k];
ti = xim[k];
xre[k] = xre[r];
xim[k] = xim[r];
xre[r] = tr;
xim[r] = ti;
}
k++;
}
for (int i = 0; i < n / 2; i++)
//magnitude[i] = (float)(Math.Sqrt((xre[i] * xre[i]) + (xim[i] * xim[i])));
decibel[i] = 10.0 * Math.Log10((float)(Math.Sqrt((xre[i] * xre[i]) + (xim[i] * xim[i]))));
//return magnitude;
return decibel;
}
}
}
There are so many really good fft implementations around such as FFTW that I highly recommend using one. They come with ifft as well. Yours, as implemented, will be excruciatingly slow.