I want to convert a YUV stream into RGB bytes such that they can be displayed through a WPF Image.
All YUV values are placed in their respective arrays per frame y, u, v. chroma width and height are half of the respective luma dimensions.
byte[] y = (byte[])yData;
byte[] u = (byte[])uData;
byte[] v = (byte[])vData;
var ym = new Mat(new[] { lumaHeight, lumaWidth }, MatType.CV_8UC1, y, new long[] { lumaStride });
var um = new Mat(new[] { chromaWidth, chromaHeight }, MatType.CV_8UC1, u, new long[] { chromaStride});
var vm = new Mat(new[] { chromaWidth, chromaHeight }, MatType.CV_8UC1, v, new long[] { chromaStride});
I use the following code to pass the data to openCV:
var combinedSource = new[] { ym, um, vm };
var m = new Mat();
var src = InputArray.Create(combinedSource);
var #out = OutputArray.Create(m);
Cv2.CvtColor(src, #out, ColorConversionCodes.YUV2BGR);
ImageData = #out.GetMat().ToBytes();
But I receive the error: {"!_src.empty()"}
The yuv arrays are definitely not empty.
I try another method using:
var combinedOut = new Mat(new[] { lumaHeight, lumaWidth }, MatType.CV_8UC3);
Cv2.Merge(combinedSource, combinedOut);
var bgra = combinedOut.CvtColor(ColorConversionCodes.YUV2BGR);
ImageData = bgra.ToBytes();
But receive the error {"mv[i].size == mv[0].size && mv[i].depth() == depth"}
Why do I receive these errors and what is the correct way to convert?
Following this post I was able to come to the following solution:
private byte[] YuvToRgbOpenCv(object luma, object chroma, object yData, object uData, object vData)
{
int[] lumaArray = (int[])luma;
int[] chromaArray = (int[])chroma;
int lumaWidth = lumaArray[0];
int lumaHeight = lumaArray[1];
int chromaWidth = chromaArray[0];
int chromaHeight = chromaArray[1];
byte[] y = (byte[])yData;
byte[] u = (byte[])uData;
byte[] v = (byte[])vData;
var ym = new Mat(new[] { lumaHeight, lumaWidth }, MatType.CV_8UC1, y);
var um = new Mat(new[] { chromaHeight, chromaWidth }, MatType.CV_8UC1, u);
var vm = new Mat(new[] { chromaHeight, chromaWidth }, MatType.CV_8UC1, v);
var umResized = um.Resize(new OpenCvSharp.Size(lumaWidth, lumaHeight), 0, 0, InterpolationFlags.Nearest);
var vmResized = vm.Resize(new OpenCvSharp.Size(lumaWidth, lumaHeight), 0, 0, InterpolationFlags.Nearest);
var yuvMat = new Mat();
var resizedChannels = new[] { ym, umResized, vmResized };
Cv2.Merge(resizedChannels, yuvMat);
var bgr = yuvMat.CvtColor(ColorConversionCodes.YUV2BGR);
var result = bgr.ToBytes();
return result;
}
I had to resize the U, V data length to match the length of Y.
Why don't you try to convert by looping through all array of source and output array. For this method to work you have to combine all arrays into one and then pass that array to the following function
private static unsafe void YUV2RGBManaged(byte[] YUVData, byte[] RGBData, int width, int height)
{
fixed(byte* pRGBs = RGBData, pYUVs = YUVData)
{
for (int r = 0; r < height; r++)
{
byte* pRGB = pRGBs + r * width * 3;
byte* pYUV = pYUVs + r * width * 2;
//process two pixels at a time
for (int c = 0; c < width; c += 2)
{
int C1 = pYUV[1] - 16;
int C2 = pYUV[3] - 16;
int D = pYUV[2] - 128;
int E = pYUV[0] - 128;
int R1 = (298 * C1 + 409 * E + 128) >> 8;
int G1 = (298 * C1 - 100 * D - 208 * E + 128) >> 8;
int B1 = (298 * C1 + 516 * D + 128) >> 8;
int R2 = (298 * C2 + 409 * E + 128) >> 8;
int G2 = (298 * C2 - 100 * D - 208 * E + 128) >> 8;
int B2 = (298 * C2 + 516 * D + 128) >> 8;
#if true
//check for overflow
//unsurprisingly this takes the bulk of the time.
pRGB[0] = (byte)(R1 < 0 ? 0 : R1 > 255 ? 255 : R1);
pRGB[1] = (byte)(G1 < 0 ? 0 : G1 > 255 ? 255 : G1);
pRGB[2] = (byte)(B1 < 0 ? 0 : B1 > 255 ? 255 : B1);
pRGB[3] = (byte)(R2 < 0 ? 0 : R2 > 255 ? 255 : R2);
pRGB[4] = (byte)(G2 < 0 ? 0 : G2 > 255 ? 255 : G2);
pRGB[5] = (byte)(B2 < 0 ? 0 : B2 > 255 ? 255 : B2);
#else
pRGB[0] = (byte)(R1);
pRGB[1] = (byte)(G1);
pRGB[2] = (byte)(B1);
pRGB[3] = (byte)(R2);
pRGB[4] = (byte)(G2);
pRGB[5] = (byte)(B2);
#endif
pRGB += 6;
pYUV += 4;
}
}
}
}
Related
I'm working on image processing in C#.
I have a problem getting the performance of offset large memory access in C#.
The speed is significantly different from the same size of zero offset memory.
In the case of C ++, the difference was not as great as in C#.
Can you please tell me why my code has this problem ?
Also, are there any solutions ?
Source
using System;
using System.Runtime.InteropServices;
using System.Diagnostics;
using System.Numerics;
namespace Test
{
class Program
{
unsafe static void Main(string[] args)
{
var width = 8000;
var height = 8000;
// var data = new Vector4[height * width]; <- similar problem occur
var data = (Vector4*)Marshal.AllocHGlobal(height * width * sizeof(Vector4));
var data2 = (Vector4*)Marshal.AllocHGlobal(height * width * sizeof(Vector4));
// MATRIX
float m11 = .7297023F, m12 = 0, m13 = 0, m14 = 0, m21 = 0, m22 = .6109577F,
m23 = 0, m24 = 0, m31 = 0, m33 = .597218F, m32 = 0, m34 = 0, m41 = 0, m42 = 0,
m43 = 0, m44 = 1F, m51 = .105F, m52 = .145F, m53 = .155F, m54 = 0;
var sw = new Stopwatch();
sw.Start();
for (int y = 0; y < height; ++y)
{
var offset = width * y;
for (int x = 0; x < width; ++x)
{
// Slow ( 600ms )
ref var sData = ref data[offset + x];
ref var dData = ref data2[offset + x];
// Fast ( 200ms )
// ref var sData = ref data[x];
// ref var dData = ref data2[x];
float b = sData.X;
float g = sData.Y;
float r = sData.Z;
float a = sData.W;
dData.X = (b * m11) + (g * m21) + (r * m31) + (a * m41) + m51;
dData.Y = (b * m12) + (g * m22) + (r * m32) + (a * m42) + m52;
dData.Z = (b * m13) + (g * m23) + (r * m33) + (a * m43) + m53;
dData.W = (b * m14) + (g * m24) + (r * m34) + (a * m44) + m54;
}
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Marshal.FreeHGlobal((IntPtr)data);
Marshal.FreeHGlobal((IntPtr)data2);
}
}
}
When using managed array pointer
var array1 = new Vector4[width * height];
var array2 = new Vector4[width * height];
fixed (Vector4* data = &array1[0])
fixed (Vector4* data2 = &array2[0])
for (int y = 0; y < height; ++y)
{
for (int x = 0; x < width; ++x)
{
// Slow ( 600ms )
ref var sData = ref data[width * y + x];
ref var dData = ref data2[width * y + x];
Offsetting the pointer in the outer loop
(A little improvement)
for (int y = 0; y < height; ++y)
{
var offsetData1 = data + width * y;
var offsetData2 = data2 + width * y;
for (int x = 0; x < width; ++x)
{
// Slow ( 470ms )
ref var sData = ref offsetData1[x];
ref var dData = ref offsetData2[x];
C++ ver
#include <iostream>
#include <chrono>
struct Vector4 {
float X = 0;
float Y = 0;
float Z = 0;
float W = 0;
};
int main()
{
long width = 8000;
long height = 8000;
auto buffer = new Vector4[width * height];
auto buffer2 = new Vector4[width * height];
// MATRIX
float m11 = .7297023F, m12 = 0, m13 = 0, m14 = 0, m21 = 0, m22 = .6109577F,
m23 = 0, m24 = 0, m31 = 0, m33 = .597218F, m32 = 0, m34 = 0, m41 = 0, m42 = 0,
m43 = 0, m44 = 1, m51 = .105F, m52 = .145F, m53 = .155F, m54 = 0;
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
for (int y = 0; y < height; ++y)
{
int offset = width * y;
for (int x = 0; x < width; ++x)
{
Vector4& sData = buffer[offset + x];
Vector4& dData = buffer2[offset + x];
float b = sData.X;
float g = sData.Y;
float r = sData.Z;
float a = sData.W;
dData.X = (b * m11) + (g * m21) + (r * m31) + (a * m41) + m51;
dData.Y = (b * m12) + (g * m22) + (r * m32) + (a * m42) + m52;
dData.Z = (b * m13) + (g * m23) + (r * m33) + (a * m43) + m53;
dData.W = (b * m14) + (g * m24) + (r * m34) + (a * m44) + m54;
}
}
end = std::chrono::system_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << elapsed << "\n";
delete[] buffer;
delete[] buffer2;
}
Benchmarks
Lang
description
time (ms)
C#
Zero offset pointer
600ms
C#
Offset pointer
200ms
C++
Zero offset pointer
190ms
C++
Offset pointer
260ms
C#
Offsetting the pointer in the outer loop
370ms
C#
Managed array pointer with offset
990ms
Other info
IL in SharpLab
CPU
Intel Core i7-6700k
Memory
DDR4 16GB
OS
Windows 10 20H2
Runtime
.NET 5
Lang ver
C# 9
Platform
X64
Currently I am developing a c# low pass filter in real time using directsound API. The problem that I've encountered is that various implementations of low pass filters seem not to work with my echo implementation. At the begining of the filtering I have an array of bytes (read) that is read from the capturing buffer. The values of this array are between 0 and 255.
When I subject the array to low-pass filtering algorithm I obtain values that fall between 0 and 1 and as an effect the play array is silent (zeros only). I attach the relevant code.
I imagine that that this approach is wrong and probably the input data should look different as the low pass algorithm seems to kill it completely. Any ideas? Thanks in advance.
Updated relevant code after KillaKem's comments:
//buffer format
var format = new WaveFormat
{
SamplesPerSecond = 44100,
BitsPerSample = 16,
Channels = 2,
FormatTag = WaveFormatTag.Pcm
};
//reading the buffer
byte[] read = (byte[])_dwCapBuffer.Read(offset, typeof(byte), LockFlag.None, _dwOutputBufferSize);
byte[] play = new byte [read.Length];
byte[] readL= new byte [read.Length/2];
byte[] readR= new byte [read.Length/2];
byte[] playL = new byte[read.Length / 2];
byte[] playR = new byte[read.Length / 2];
float[] readLfloat = new float[read.Length / 4];
float[] readRfloat = new float[read.Length / 4];
float[] playLfloat = new float[read.Length / 4];
float[] playRfloat = new float[read.Length / 4];
//dividing into channels and casting to float
for(int i = 0; i<read.Length; i=i+1)
{
if (i % 4 == 0)
{
readL[(int)i / 2] = read[i];
}
if(i%4==1)
{
readL[(int)i/2]=read[i];
readLfloat[(int)i / 4] = (((short)(read[i-1] << 8 | read[i])) / 32768f);
}
if (i % 4 == 2)
{
readR[(int)i / 2] = read[i];
}
if (i % 4 == 3)
{
readR[(int)i / 2] = read[i];
readRfloat[(int)i / 4] = (((short)(read[i - 1] << 8 | read[i])) / 32768f);
}
}
//filter coefficients
float frequency = 1000f;
float sampleRate = (float)_dwCapBuffer.Format.SamplesPerSecond;
float resonance = 0.5f;
float c = 1.0f / (float)Math.Tan(Math.PI * frequency / sampleRate);
float a0 = 1.0f / (1.0f + resonance * c + c * c);
float a1 = 2f * a0;
float a2 = a0;
float b1 = 2.0f * (1.0f - c * c) * a0;
float b2 = (1.0f - resonance * c + c * c) * a0;
//filtering
for(int i = 0; i < readLfloat.Length; i++)
{
float readCurrSample = readLfloat[i];
float playCurrSample = playLfloat[i];
float filtered=readCurrSample;
float readOneSample;
float readTwoSample;
float playOneSample;
float playTwoSample;
if (i ==0)
{
filtered = ((float)a0 * readCurrSample) + ((float)a1 * savelastRead) + ((float)a2 * saveprelastRead) - ((float)b1 * savelastPlay) - ((float)b2 * saveprelastPlay);
}
else if (i==1)
{
readOneSample = readLfloat[i-1];
playOneSample = playLfloat[i-1];
filtered = ((float)a0 * readCurrSample) + ((float)a1 * readOneSample) + ((float)a2 * savelastRead) - ((float)b1 * playOneSample) - ((float)b2 * savelastPlay);
}
else
{
readOneSample = readLfloat[i - 1];
playOneSample = playLfloat[i - 1];
readTwoSample = readLfloat[i - 2];
playTwoSample = playLfloat[i - 2];
filtered = ((float)a0 * readCurrSample) + ((float)a1 * readOneSample) + ((float)a2 * readTwoSample) - ((float)b1 * playOneSample) - ((float)b2 * playTwoSample);
}
if (i == readL.Length - 4)
{
saveprelastPlay = playCurrSample;
saveprelastRead = readCurrSample;
}
if (i == readL.Length-2)
{
savelastPlay = playCurrSample;
savelastRead = readCurrSample;
}
if (filtered > 1 || filtered < -1)
{
int x = 0;
}
playLfloat[i] = filtered;
}
playRfloat = playLfloat; //ignoring Right channel operations
//Recasting to bytes array
for (int i = 0; i < read.Length; i = i + 1)
{
if (i % 4 == 1)
{
byte[] bytes;
bytes = BitConverter.GetBytes((short)(playLfloat[(int)(i-1)/4] * 32768f));
read[i] = bytes[0];
read[i - 1] = bytes[1];
}
if (i % 4 == 3)
{
byte[] bytes;
bytes = BitConverter.GetBytes((short)(playRfloat[(int)(i - 1) / 4] * 32768f));
read[i] = bytes[0];
read[i - 1] = bytes[1];
}
}
I have a weird problem. I have been searching the internet for a good and fast Gaussian blur algorithm. And I finally found one!
So what I did was to try it in a new project - it worked just fine.
When I then was going to import the class into my main project and then tried it, I got a System.OverflowException. I'm finding this really weird that it works in one project, but not in another.
Here's the algorithm:
(I am calling the function almost the same. I tried calling it the exact same way, but I still got an exception)
public static void FastBlur(Bitmap SourceImage, int radius)
{
var rct = new Rectangle(0, 0, SourceImage.Width, SourceImage.Height);
var dest = new int[rct.Width * rct.Height];
var source = new int[rct.Width * rct.Height];
var bits = SourceImage.LockBits(rct, ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb);
Marshal.Copy(bits.Scan0, source, 0, source.Length);
SourceImage.UnlockBits(bits);
if (radius < 1) return;
int w = rct.Width;
int h = rct.Height;
int wm = w - 1;
int hm = h - 1;
int wh = w * h;
int div = radius + radius + 1;
var r = new int[wh];
var g = new int[wh];
var b = new int[wh];
int rsum, gsum, bsum, x, y, i, p1, p2, yi;
var vmin = new int[max(w, h)];
var vmax = new int[max(w, h)];
var dv = new int[256 * div];
for (i = 0; i < 256 * div; i++)
{
dv[i] = (i / div);
}
int yw = yi = 0;
for (y = 0; y < h; y++)
{ // blur horizontal
rsum = gsum = bsum = 0;
for (i = -radius; i <= radius; i++)
{
int p = source[yi + min(wm, max(i, 0))];
rsum += (p & 0xff0000) >> 16;
gsum += (p & 0x00ff00) >> 8;
bsum += p & 0x0000ff;
}
for (x = 0; x < w; x++)
{
r[yi] = dv[rsum];
g[yi] = dv[gsum];
b[yi] = dv[bsum];
if (y == 0)
{
vmin[x] = min(x + radius + 1, wm);
vmax[x] = max(x - radius, 0);
}
p1 = source[yw + vmin[x]];
p2 = source[yw + vmax[x]];
rsum += ((p1 & 0xff0000) - (p2 & 0xff0000)) >> 16;
gsum += ((p1 & 0x00ff00) - (p2 & 0x00ff00)) >> 8;
bsum += (p1 & 0x0000ff) - (p2 & 0x0000ff);
yi++;
}
yw += w;
}
for (x = 0; x < w; x++)
{ // blur vertical
rsum = gsum = bsum = 0;
int yp = -radius * w;
for (i = -radius; i <= radius; i++)
{
yi = max(0, yp) + x;
rsum += r[yi];
gsum += g[yi];
bsum += b[yi];
yp += w;
}
yi = x;
for (y = 0; y < h; y++)
{
dest[yi] = (int)(0xff000000u | (uint)(dv[rsum] << 16) | (uint)(dv[gsum] << 8) | (uint)dv[bsum]); // <--- Here's where I get the exception
if (x == 0)
{
vmin[y] = min(y + radius + 1, hm) * w;
vmax[y] = max(y - radius, 0) * w;
}
p1 = x + vmin[y];
p2 = x + vmax[y];
rsum += r[p1] - r[p2];
gsum += g[p1] - g[p2];
bsum += b[p1] - b[p2];
yi += w;
}
}
// copy back to image
var bits2 = SourceImage.LockBits(rct, ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb);
Marshal.Copy(dest, 0, bits2.Scan0, dest.Length);
SourceImage.UnlockBits(bits);
}
Here's how I call the function:
public Bitmap DownloadBlurredImage(List<string> uris, int blurradius)
{
Uri urlUri = new Uri(uris.ElementAt(0));
if (RandomSelect == true)
{
Random r = new Random();
urlUri = new Uri(uris.ElementAt(r.Next(0, uris.Count - 1)));
}
WebRequest webRequest = WebRequest.CreateDefault(urlUri);
webRequest.ContentType = "image/jpeg";
WebResponse webResponse = webRequest.GetResponse();
Stream mystream;
if ((mystream = webResponse.GetResponseStream()) != null)
{
Bitmap b = new Bitmap(mystream);
imageFilters.FastBlur(b, 10); //<--- here's where I'm calling
return b;
}
else
{
return null;
}
}
Does anyone know why this is acting up? I myself have NO clue..
You're using signed 'ints' to hold unsigned data - if the MSB is set, the overflow will occur. Use unsigned ints (for example for dest), or surround in an unchecked { } directive.
I am trying to calculate the value of a single dimensional Array, here is my code:
So when I click "Detect", it should start a threshold through my Image, beginning from i = 0 to Image height and from j = 0 to Image width:
public void detektieren_Click(object sender, RoutedEventArgs e)
{
for (i = 0; i < bitmap.Height; i++)
{
for (j = 0; j < bitmap.Width; j++)
{
stride = bitmap.PixelWidth * (bitmap.Format.BitsPerPixel / 8);
data = new byte[stride * bitmap.PixelHeight];
bitmap.CopyPixels(data, stride, 0);
index = i * stride + 4 * j;
Now accessing the ARGB data:
byte A = data[index + 3];
byte R = data[index + 2];
byte G = data[index + 1];
byte B = data[index];
After the threshold, if there are any Pixels meet the condition R=0 & G=0 & B=255:
if (Convert.ToInt32(R) == 0 && Convert.ToInt32(G) == 0 && Convert.ToInt32(B) == 255)
{
// Create a writer and open the file:
StreamWriter Messdaten;
if (!File.Exists("C:/Users/.../Messdaten.csv"))
{
Messdaten = new StreamWriter("C:/Users/.../Messdaten.csv");
}
else
{
Messdaten = File.AppendText("C:/Users/.../Messdaten.csv");
}
// Write to the file:
Messdaten.WriteLine(j + ";" + i);
// Close the stream:
Messdaten.Close();
for (y = 0; y < bitmap.Height; y++)
{
for (x = 0; x < bitmap.Width; x++)
{
double x_mw = 0; double y_mw = 0;
int[] x_array = new int[(int)bitmap.Width];
int[] y_array = new int[(int)bitmap.Height];
x_array[x] = j;
x_mw = x_array.Average();
y_array[y] = i;
y_mw = y_array.Average();
xy_coord.Content = (int) x_mw + ";" + (int) y_mw;
}
}
}
}
}
}
Everything works perfectly in the CSV file, I can detect a Pixel (e.g. blue with R=0 G=0 B=255). But I also want to copy the data of each single Pixel into Array. But apparently it doesn't really deliver what I want. It doesn't calculate the average value of sum of blue Pixels (= the centroid of the blue Pixels scatter), instead it just Shows x_mw = 0 and y_mw = 0. What did I do wrong?
After I did some modification it works. So this is the code:
public void detektieren_Click(object sender, RoutedEventArgs e)
{
int x_sum = 0; int y_sum = 0; int x_count = 0; int y_count = 0; int x_mw; int y_mw;
int[] x_array = new int[(int)bitmap.Width];
int[] y_array = new int[(int)bitmap.Height];
int[] x_array_copy = new int[(int)bitmap.Width];
int[] y_array_copy = new int[(int)bitmap.Height];
stride = bitmap.PixelWidth * (bitmap.Format.BitsPerPixel / 8);
data = new byte[stride * bitmap.PixelHeight];
bitmap.CopyPixels(data, stride, 0);
for (i = 0; i < (int) bitmap.Height; i++)
{
for (j = 0; j < (int) bitmap.Width; j++)
{
index = i * stride + 4 * j;
byte A = data[index + 3];
byte R = data[index + 2];
byte G = data[index + 1];
byte B = data[index];
if (Convert.ToInt32(R) == 0 && Convert.ToInt32(G) == 0 && Convert.ToInt32(B) == 255)
{
x_array[j] = j;
x_count++;
x_array_copy[j] = x_array_copy[j] + j;
x_sum = (int) x_array_copy.Sum();
x_mw = x_sum / x_count;
y_array[i] = i;
y_count++;
y_array_copy[i] = y_array_copy[i] + i;
y_sum = (int) y_array_copy.Sum();
y_mw = y_sum / y_count;
xy_coord.Content = x_mw + ";" + y_mw;
}
}
}
}
I have byte array with yuv420 data.
byte[] yuv420;//yuv data
How can I convert this to an Image<Bgr, byte>?
I found a math formula to convert to RGB and then to Image<Bgr, byte> but it is very slow. Is there a way to convert it faster?
There is a class in Emgu for converting
COLOR_CONVERSION(enum CV_YUV2RGB Convert YUV color to RGB)
but I can not understand how use this class. Can anyone help?
static Bitmap ConvertYUV2RGB(byte[] yuvFrame, byte[] rgbFrame, int width, int height)
{
int uIndex = width * height;
int vIndex = uIndex + ((width * height) >> 2);
int gIndex = width * height;
int bIndex = gIndex * 2;
int temp = 0;
//图片为pic1,RGB颜色的二进制数据转换得的int r,g,b;
Bitmap bm = new Bitmap(width, height);
int r = 0;
int g = 0;
int b = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
// R分量
temp = (int)(yuvFrame[y * width + x] + (yuvFrame[vIndex + (y / 2) * (width / 2) + x / 2] - 128) * YUV2RGB_CONVERT_MATRIX[0, 2]);
rgbFrame[y * width + x] = (byte)(temp < 0 ? 0 : (temp > 255 ? 255 : temp));
// G分量
temp = (int)(yuvFrame[y * width + x] + (yuvFrame[uIndex + (y / 2) * (width / 2) + x / 2] - 128) * YUV2RGB_CONVERT_MATRIX[1, 1] + (yuvFrame[vIndex + (y / 2) * (width / 2) + x / 2] - 128) * YUV2RGB_CONVERT_MATRIX[1, 2]);
rgbFrame[gIndex + y * width + x] = (byte)(temp < 0 ? 0 : (temp > 255 ? 255 : temp));
// B分量
temp = (int)(yuvFrame[y * width + x] + (yuvFrame[uIndex + (y / 2) * (width / 2) + x / 2] - 128) * YUV2RGB_CONVERT_MATRIX[2, 1]);
rgbFrame[bIndex + y * width + x] = (byte)(temp < 0 ? 0 : (temp > 255 ? 255 : temp));
Color c = Color.FromArgb(rgbFrame[y * width + x], rgbFrame[gIndex + y * width + x], rgbFrame[bIndex + y * width + x]);
bm.SetPixel(x, y, c);
}
}
return bm;
}
static double[,] YUV2RGB_CONVERT_MATRIX = new double[3, 3] { { 1, 0, 1.4022 }, { 1, -0.3456, -0.7145 }, { 1, 1.771, 0 } };
static byte clamp(float input)
{
if (input < 0) input = 0;
if (input > 255) input = 255;
return (byte)Math.Abs(input);
}
You are in luck because i solved exactly this issue before. There are some links in the code for more info.
In general always try to use pointers when doing image processing and avoid calling functions in nested loops. In my code the size comparison is by far the slowest part but unfortunately it is needed (try switching it off using the pre-processor switch).
I have to say though that in the end i never used this function because it was just too slow, i opted to implement it in c++ and call it from c# using p invoke.
private static unsafe void YUV2RGBManaged(byte[] YUVData, byte[] RGBData, int width, int height)
{
//returned pixel format is 2yuv - i.e. luminance, y, is represented for every pixel and the u and v are alternated
//like this (where Cb = u , Cr = y)
//Y0 Cb Y1 Cr Y2 Cb Y3
/*http://msdn.microsoft.com/en-us/library/ms893078.aspx
*
* C = Y - 16
D = U - 128
E = V - 128
R = clip(( 298 * C + 409 * E + 128) >> 8)
G = clip(( 298 * C - 100 * D - 208 * E + 128) >> 8)
B = clip(( 298 * C + 516 * D + 128) >> 8)
* here are a whole bunch more formats for doing this...
* http://stackoverflow.com/questions/3943779/converting-to-yuv-ycbcr-colour-space-many-versions
*/
fixed(byte* pRGBs = RGBData, pYUVs = YUVData)
{
for (int r = 0; r < height; r++)
{
byte* pRGB = pRGBs + r * width * 3;
byte* pYUV = pYUVs + r * width * 2;
//process two pixels at a time
for (int c = 0; c < width; c += 2)
{
int C1 = pYUV[1] - 16;
int C2 = pYUV[3] - 16;
int D = pYUV[2] - 128;
int E = pYUV[0] - 128;
int R1 = (298 * C1 + 409 * E + 128) >> 8;
int G1 = (298 * C1 - 100 * D - 208 * E + 128) >> 8;
int B1 = (298 * C1 + 516 * D + 128) >> 8;
int R2 = (298 * C2 + 409 * E + 128) >> 8;
int G2 = (298 * C2 - 100 * D - 208 * E + 128) >> 8;
int B2 = (298 * C2 + 516 * D + 128) >> 8;
#if true
//check for overflow
//unsurprisingly this takes the bulk of the time.
pRGB[0] = (byte)(R1 < 0 ? 0 : R1 > 255 ? 255 : R1);
pRGB[1] = (byte)(G1 < 0 ? 0 : G1 > 255 ? 255 : G1);
pRGB[2] = (byte)(B1 < 0 ? 0 : B1 > 255 ? 255 : B1);
pRGB[3] = (byte)(R2 < 0 ? 0 : R2 > 255 ? 255 : R2);
pRGB[4] = (byte)(G2 < 0 ? 0 : G2 > 255 ? 255 : G2);
pRGB[5] = (byte)(B2 < 0 ? 0 : B2 > 255 ? 255 : B2);
#else
pRGB[0] = (byte)(R1);
pRGB[1] = (byte)(G1);
pRGB[2] = (byte)(B1);
pRGB[3] = (byte)(R2);
pRGB[4] = (byte)(G2);
pRGB[5] = (byte)(B2);
#endif
pRGB += 6;
pYUV += 4;
}
}
}
}
and incase you decide to implement this in c++
void YUV2RGB(void *yuvDataIn,void *rgbDataOut, int w, int h, int outNCh)
{
const int ch2 = 2 * outNCh;
unsigned char* pRGBs = (unsigned char*)rgbDataOut;
unsigned char* pYUVs = (unsigned char*)yuvDataIn;
for (int r = 0; r < h; r++)
{
unsigned char* pRGB = pRGBs + r * w * outNCh;
unsigned char* pYUV = pYUVs + r * w * 2;
//process two pixels at a time
for (int c = 0; c < w; c += 2)
{
int C1 = pYUV[1] - 16;
int C2 = pYUV[3] - 16;
int D = pYUV[2] - 128;
int E = pYUV[0] - 128;
int R1 = (298 * C1 + 409 * E + 128) >> 8;
int G1 = (298 * C1 - 100 * D - 208 * E + 128) >> 8;
int B1 = (298 * C1 + 516 * D + 128) >> 8;
int R2 = (298 * C2 + 409 * E + 128) >> 8;
int G2 = (298 * C2 - 100 * D - 208 * E + 128) >> 8;
int B2 = (298 * C2 + 516 * D + 128) >> 8;
//unsurprisingly this takes the bulk of the time.
pRGB[0] = (unsigned char)(R1 < 0 ? 0 : R1 > 255 ? 255 : R1);
pRGB[1] = (unsigned char)(G1 < 0 ? 0 : G1 > 255 ? 255 : G1);
pRGB[2] = (unsigned char)(B1 < 0 ? 0 : B1 > 255 ? 255 : B1);
pRGB[3] = (unsigned char)(R2 < 0 ? 0 : R2 > 255 ? 255 : R2);
pRGB[4] = (unsigned char)(G2 < 0 ? 0 : G2 > 255 ? 255 : G2);
pRGB[5] = (unsigned char)(B2 < 0 ? 0 : B2 > 255 ? 255 : B2);
pRGB += ch2;
pYUV += 4;
}
}
}
The biggest offender in that code is the use of Bitmap.SetPixel; it is very slow to do this on every inner loop iteration. Instead, use a byte array to store your RGB values and once it is filled, copy it into a bitmap as a single step.
Secondly, understand that y, u and v are bytes, and so can only have 256 possible values. It is therefore perfectly feasible to build lookup tables for r, g and b, so you don't have to perform any computations in your inner loop.
Finally, if you really want performance you'll have to write this in C++ using pointer arithmetic and compile with all optimizations on. This loop is also a very good candidate for a parallel for since every iteration operates on independent data. It is also possible to optimize this further with SSE intrinsics, converting several pixels per instruction.
Hopefully this should get you started.
I just found an old piece of code which might help you. YUV conversion using OpenCVSharp
(disclaimer: i removed some unnecessary code and haven't tested this!)
IplImage yuvImage = new IplImage(w, h, BitDepth.U8, 3);
IplImage rgbImage = new IplImage(w, h, BitDepth.U8, 3);
Cv.CvtColor(yuvImage, rgbImage, ColorConversion.CrCbToBgr);
to answer your other question - to converting byte[] to a Bitmap use this
int w= 100;
int h = 200;
int ch = 3;
byte[] imageData = new byte[w*h*ch]; //you image data here
Bitmap bitmap = new Bitmap(w,h,PixelFormat.Format24bppRgb);
BitmapData bmData = bitmap.LockBits(new System.Drawing.Rectangle(0, 0, bitmap.Width, bitmap.Height), ImageLockMode.ReadWrite, bitmap.PixelFormat);
IntPtr pNative = bmData.Scan0;
Marshal.Copy(imageData,0,pNative,w*h*ch);
bitmap.UnlockBits(bmData);
One faster mode. Two mutiplication and two add less per pixel:
private static unsafe void YUV2RGBManaged(byte[] YUVData, byte[] RGBData, int width, int height)
{
//returned pixel format is 2yuv - i.e. luminance, y, is represented for every pixel and the u and v are alternated
//like this (where Cb = u , Cr = y)
//Y0 Cb Y1 Cr Y2 Cb Y3
/*http://msdn.microsoft.com/en-us/library/ms893078.aspx
*
C = 298 * (Y - 16) + 128
D = U - 128
E = V - 128
R = clip(( C + 409 * E) >> 8)
G = clip(( C - 100 * D - 208 * E) >> 8)
B = clip(( C + 516 * D ) >> 8)
* here are a whole bunch more formats for doing this...
* http://stackoverflow.com/questions/3943779/converting-to-yuv-ycbcr-colour-space-many-versions
*/
fixed(byte* pRGBs = RGBData, pYUVs = YUVData)
{
for (int r = 0; r < height; r++)
{
byte* pRGB = pRGBs + r * width * 3;
byte* pYUV = pYUVs + r * width * 2;
//process two pixels at a time
for (int c = 0; c < width; c += 2)
{
int C1 = 298 * (pYUV[1] - 16) + 128;
int C2 = 298 * (pYUV[3] - 16) + 128;
int D = pYUV[2] - 128;
int E = pYUV[0] - 128;
int R1 = (C1 + 409 * E) >> 8;
int G1 = (C1 - 100 * D - 208 * E) >> 8;
int B1 = (C1 + 516 * D) >> 8;
int R2 = (C2 + 409 * E) >> 8;
int G2 = (C2 - 100 * D - 208 * E) >> 8;
int B2 = (298 * C2 + 516 * D) >> 8;
//check for overflow
//unsurprisingly this takes the bulk of the time.
pRGB[0] = (byte)(R1 < 0 ? 0 : R1 > 255 ? 255 : R1);
pRGB[1] = (byte)(G1 < 0 ? 0 : G1 > 255 ? 255 : G1);
pRGB[2] = (byte)(B1 < 0 ? 0 : B1 > 255 ? 255 : B1);
pRGB[3] = (byte)(R2 < 0 ? 0 : R2 > 255 ? 255 : R2);
pRGB[4] = (byte)(G2 < 0 ? 0 : G2 > 255 ? 255 : G2);
pRGB[5] = (byte)(B2 < 0 ? 0 : B2 > 255 ? 255 : B2);
pRGB += 6;
pYUV += 4;
}
}
}
}