I'm using UdpClient to get a RTP stream from phone calls through Avaya DMCC sdk. I would like to play this stream through the computer's speakers. After a lot of searching I've only been able to find solutions that require saving to a file and then playing the file but I need to play the stream through the speakers without saving to a file. I'd like to send audio to the speakers as I receive it.
public void StartClient()
{
// Create new UDP client. The IP end point tells us which IP is sending the data
client = new UdpClient(port);
endPoint = new IPEndPoint(System.Net.IPAddress.Any, port);
selectedCodec = new MuLawChatCodec();
waveOut = new WaveOut();
waveProvider = new BufferedWaveProvider(selectedCodec.RecordFormat);
waveOut.Init(waveProvider);
waveOut.Play();
listening = true;
listenerThread = new Thread(ReceiveCallback);
listenerThread.Start();
}
private void ReceiveCallback()
{
// Begin looking for the next packet
while (listening)
{
// Receive packet
byte[] packet = client.Receive(ref endPoint);
// Packet header
int version = GetRTPValue(packet, 0, 1);
int padding = GetRTPValue(packet, 2, 2);
int extension = GetRTPValue(packet, 3, 3);
int csrcCount = GetRTPValue(packet, 4, 7);
int marker = GetRTPValue(packet, 8, 8);
int payloadType = GetRTPValue(packet, 9, 15);
int sequenceNum = GetRTPValue(packet, 16, 31);
int timestamp = GetRTPValue(packet, 32, 63);
int ssrcId = GetRTPValue(packet, 64, 95);
int csrcid = (csrcCount == 0) ? -1 : GetRTPValue(packet, 96, 95 + 32 * (csrcCount));
int extHeader = (csrcCount == 0) ? -1 : GetRTPValue(packet, 128 + (32 * csrcCount), 127 + (32 * csrcCount));
int payloadIndex = csrcCount == 0 ? 96 : 128 + 32 * csrcCount;
int payload = GetRTPValue(packet, payloadIndex, packet.Length);
byte[] Payload = new byte[packet.Length - payloadIndex];
Buffer.BlockCopy(packet, payloadIndex, Payload, 0, packet.Length - payloadIndex);
byte[] decoded = selectedCodec.Decode(Payload, 0, Payload.Length);
}
}
private int GetRTPValue(byte[] packet, int startBit, int endBit)
{
int result = 0;
// Number of bits in value
int length = endBit - startBit + 1;
// Values in RTP header are big endian, so need to do these conversions
for (int i = startBit; i <= endBit; i++)
{
int byteIndex = i / 8;
int bitShift = 7 - (i % 8);
result += ((packet[byteIndex] >> bitShift) & 1) * (int)Math.Pow(2, length - i + startBit - 1);
}
return result;
}
I now successfully have audio from the call being played over the speakers by adding a byte[] containing just the payload to NAudio's BufferedWaveProvider
There's a demo of how to play audio received over the network included with the NAudio source code (see Network Chat Demo in the NAudioDemo project). Basically use an AcmStream to decode the audio, and then put it into a BufferedWaveProvider which the soundcard is playing from.
Related
I am trying to write user configuration data (Data Page 55, 0x37) to a smart fitness device (bicycle trainer) via FE-C over Bluetooth. Although using different values or using the max values I do not notice any change in resistance. Is there a mistake processing the data? The second method (private static byte[] CreateFECUserConfiguration()) returns a byte array which will be written to the device.
private async Task WriteUserConfiguration(GattCharacteristic characteristic)
{
DataWriter writer = new DataWriter();
byte[] bytes = CreateFECUserConfiguration();
writer.WriteBytes(bytes);
var valResult = await characteristic.WriteValueAsync(writer.DetachBuffer());
if (valResult == GattCommunicationStatus.Success)
{
Debug.WriteLine("Write UserConfiguration Successful");
}
}
// create values for testing, will be provided by the user later on
private static byte[] CreateFECUserConfiguration()
{
byte[] bytes = new byte[13]; // size of message
UInt16 userWeight = (ushort)(655.34 / 0.01); // 0-655.34
byte[] userWeightBytes = BitConverter.GetBytes(userWeight);
byte bicycleWheelDiameterOffset = 10; // 0-10, 0.5 byte
UInt16 bicycleWeight = 50 * 20; // 0 – 50, * 20, 1.5 byte
// start merging bicycle wheel diameter offset and bicycle weight + putting them in the right order
byte[] tempWheelDiameterOffset = new byte[1] { bicycleWheelDiameterOffset };
BitArray bicycleWheelDiameterOffsetBits = new BitArray(tempWheelDiameterOffset);
byte[] testbicycleWeightBytes = BitConverter.GetBytes(bicycleWeight);
BitArray testBicycleWeight = new BitArray(testbicycleWeightBytes);
bool[] tempBicycleWeightPartTwo = new bool[8] { testBicycleWeight[4], testBicycleWeight[5], testBicycleWeight[6], testBicycleWeight[7], testBicycleWeight[8], testBicycleWeight[9], testBicycleWeight[10], testBicycleWeight[11] };
BitArray bicycleWeightBitsTwo = new BitArray(tempBicycleWeightPartTwo);
bool[] mergeBitsAsBools = new bool[8] { testBicycleWeight[0], testBicycleWeight[1], testBicycleWeight[2], testBicycleWeight[3], bicycleWheelDiameterOffsetBits[0], bicycleWheelDiameterOffsetBits[1], bicycleWheelDiameterOffsetBits[2], bicycleWheelDiameterOffsetBits[3] };
BitArray tempMergeWheelDiameterOffsetPlusBicycleWeight = new BitArray(mergeBitsAsBools);
byte[] wheelDiameterOffsetPlusBicycleWeight = new byte[1];
byte[] bicycleWeightByteTwo = new byte[1];
tempMergeWheelDiameterOffsetPlusBicycleWeight.CopyTo(wheelDiameterOffsetPlusBicycleWeight, 0);
bicycleWeightBitsTwo.CopyTo(bicycleWeightByteTwo, 0);
//end merging
byte bicycleWheelDiameter = (byte)(0.5 / 0.01); // 0 – 2.54m
byte gearRatio = (byte)(1 / 0.03); // 0.03 – 7.65
bytes[0] = 0xA4;
bytes[1] = 0x09; // lenght
bytes[2] = 0x4F; // message type
bytes[3] = 0x05; // channel
bytes[4] = 0x37; // Page Number 55
bytes[5] = userWeightBytes[0]; // User Weight LSB
bytes[6] = userWeightBytes[1]; // User Weight MSB
bytes[7] = 0xFF; // Reserved for future use
bytes[8] = wheelDiameterOffsetPlusBicycleWeight[0]; // Bicycle Wheel Diameter Offset 0,5 byte + Bicycle Weight LSN (probably typo in documentation -> LSB?) 0,5 byte
bytes[9] = bicycleWeightByteTwo[0]; // BicycleWeight MSB
bytes[10] = bicycleWheelDiameter; // Bicycle Wheel Diameter
bytes[11] = gearRatio; // Gear Ration
bytes[12] = ComputeChecksum(bytes); // Method to calcute checksum
return bytes;
}
Ant+ FE-C Data Page 55 (0x3)
Ant+ message format structure
Is there a way to split a wav file in C# WP8? I'd like to save a sample of a wav file, starting at, for example, 00:30 and ending at 00:40.
Maybe using some kind of stream or buffer, but then I'd have to know when to start/finish copying the stream to another wav file.
How can I do this?
on my blog I posted about this, starting with this post (http://csharp-tricks-en.blogspot.de/2011/03/read-in-wave-files.html) aboud reading a wave file with the next 2 posts building up on this .. hope that helps!
Best
Oliver
Since a wav file's header can vary from 44 (standard) to over 100 bytes, you'll need to first determine the exact size of the header and also read important meta information before you can start chopping up your wav file(s).
So you need to know the following meta data about your wav file(s),
Sample rate,
Bit depth,
Channel count,
Audio data format (whether samples are PCM or Floating-Point)
Header size.
Before continuing, I'd recommend reading the format of a wav file header first so you'll have a better idea of what the code is doing.
So first we need to get our meta info,
private void ReadMetaData(FileStream stream, out bool isFloatinfPoint, out int channelCount, out int sampleRate, out int bitDepth, out int headerSize)
{
var headerBytes = new byte[200];
// Read header bytes.
stream.Position = 0;
stream.Read(headerBytes, 0, 200);
headerSize = new string(Encoding.ASCII.GetChars(headerBytes)).IndexOf("data") + 8;
isFloatinfPoint = BitConverter.ToUInt16(new byte[] { headerBytes[20], headerBytes[21] }, 0) == 3 ? true : false;
channelCount = BitConverter.ToUInt16(new byte[] { headerBytes[22] , headerBytes[23] }, 0);
sampleRate = (int)BitConverter.ToUInt32(new byte[] { headerBytes[24], headerBytes[25], headerBytes[26], headerBytes[27] }, 0);
bitDepth = BitConverter.ToUInt16(new byte[] { headerBytes[34], headerBytes[35] }, 0);
}
Once we have this data we can then calculate where we need to start and stop reading our file. To calculate the start and end indexes we do,
var startIndex = (int)(start.TotalSeconds * sampleRate * byteDepth * channelCount);
var endIndex = (int)(end.TotalSeconds * sampleRate * byteDepth * channelCount);
start & end would be a TimeSpan indicating when to start and stop cropping.
We can now read the bytes from our file using our newly calculated info, if you're using a FileStream you would do the following,
var newBytes = new byte[endIndex - startIndex];
myStream.Position = headerSize + startIndex; // Add headerSize to position to make sure we don't read the header.
myStream.Read(newBytes, 0, newBytes.Length);
The all you have to do is write a wav header to the destination file along with the newly extracted audio. So, putting this altogether you should end up with something like this,
private void CropWavFile(string inputFilePath, string outputFilePath, TimeSpan start, TimeSpan end)
{
var stream = new FileStream(inputFilePath, FileMode.Open);
var newStream = new FileStream(outputFilePath, FileMode.OpenOrCreate);
var isFloatingPoint = false;
var sampleRate = 0;
var bitDepth = 0;
var channelCount = 0;
var headerSize = 0;
// Get meta info
ReadMetaData(stream, out isFloatingPoint, out channelCount, out sampleRate, out bitDepth, out headerSize);
// Calculate where we need to start and stop reading.
var startIndex = (int)(start.TotalSeconds * sampleRate * (bitDepth / 8) * channelCount);
var endIndex = (int)(end.TotalSeconds * sampleRate * (bitDepth / 8) * channelCount);
var bytesCount = endIndex - startIndex;
var newBytes = new byte[bytesCount];
// Read audio data.
stream.Position = startIndex + headerSize;
stream.Read(newBytes, 0, bytesCount);
// Write the wav header and our newly extracted audio to the new wav file.
WriteMetaData(newStream, isFloatingPoint, (ushort)channelCount, (ushort)bitDepth, sampleRate, newBytes.Length / (bitDepth / 8));
newStream.Write(newBytes, 0, newBytes.Length);
stream.Dispose();
newStream.Dispose();
}
private void WriteMetaData(FileStream stream, bool isFloatingPoint, ushort channels, ushort bitDepth, int sampleRate, int totalSampleCount)
{
stream.Position = 0;
// RIFF header.
// Chunk ID.
stream.Write(Encoding.ASCII.GetBytes("RIFF"), 0, 4);
// Chunk size.
stream.Write(BitConverter.GetBytes(((bitDepth / 8) * totalSampleCount) + 36), 0, 4);
// Format.
stream.Write(Encoding.ASCII.GetBytes("WAVE"), 0, 4);
// Sub-chunk 1.
// Sub-chunk 1 ID.
stream.Write(Encoding.ASCII.GetBytes("fmt "), 0, 4);
// Sub-chunk 1 size.
stream.Write(BitConverter.GetBytes(16), 0, 4);
// Audio format (floating point (3) or PCM (1)). Any other format indicates compression.
stream.Write(BitConverter.GetBytes((ushort)(isFloatingPoint ? 3 : 1)), 0, 2);
// Channels.
stream.Write(BitConverter.GetBytes(channels), 0, 2);
// Sample rate.
stream.Write(BitConverter.GetBytes(sampleRate), 0, 4);
// Bytes rate.
stream.Write(BitConverter.GetBytes(sampleRate * channels * (bitDepth / 8)), 0, 4);
// Block align.
stream.Write(BitConverter.GetBytes((ushort)channels * (bitDepth / 8)), 0, 2);
// Bits per sample.
stream.Write(BitConverter.GetBytes(bitDepth), 0, 2);
// Sub-chunk 2.
// Sub-chunk 2 ID.
stream.Write(Encoding.ASCII.GetBytes("data"), 0, 4);
// Sub-chunk 2 size.
stream.Write(BitConverter.GetBytes((bitDepth / 8) * totalSampleCount), 0, 4);
}
private void ReadMetaData(FileStream stream, out bool isFloatinfPoint, out int channelCount, out int sampleRate, out int bitDepth, out int headerSize)
{
var headerBytes = new byte[200];
// Read header bytes.
stream.Position = 0;
stream.Read(headerBytes, 0, 200);
headerSize = new string(Encoding.ASCII.GetChars(headerBytes)).IndexOf("data") + 8;
isFloatinfPoint = BitConverter.ToUInt16(new byte[] { headerBytes[20], headerBytes[21] }, 0) == 3 ? true : false;
channelCount = BitConverter.ToUInt16(new byte[] { headerBytes[22] , headerBytes[23] }, 0);
sampleRate = (int)BitConverter.ToUInt32(new byte[] { headerBytes[24], headerBytes[25], headerBytes[26], headerBytes[27] }, 0);
bitDepth = BitConverter.ToUInt16(new byte[] { headerBytes[34], headerBytes[35] }, 0);
}
Following the wave file standard to record audio data with Kinect, I wrote a WriteWavHeader() method which works perfect for 1 channel (PCM, Sample rate: 16 kHz, Bits/sample: 16). The problem is that when I try to record in stereo (nChannels = 2) it seems like the speed of the track doubles! Any ideas what I am doing wrong?
static void WriteWavHeader(Stream stream, int dataLength)
{
using (var memStream = new MemoryStream(64))
{
int cbFormat = 16; //sizeof(WAVEFORMATEX)
WAVEFORMATEX format = new WAVEFORMATEX()
{
//Subchunk1Size == 16 for PCM.
wFormatTag = 1,
//NumChannels : Mono = 1, Stereo = 2, etc.
nChannels = 2,
//SampleRate 8000, 44100
nSamplesPerSec = 16000,
//ByteRate == SampleRate * NumChannels * BitsPerSample/8
nAvgBytesPerSec = 32000*2,
//BlockAlign == NumChannels * BitsPerSample/8
nBlockAlign = 2*2,
//BitsPerSample 8 bits = 8, 16 bits = 16, etc.
wBitsPerSample = 16
};
using (var binarywriter = new BinaryWriter(memStream))
{
//RIFF header
WriteString(memStream, "RIFF");
binarywriter.Write(dataLength+8 + cbFormat+8 + 4);
WriteString(memStream, "WAVE");
WriteString(memStream, "fmt ");
binarywriter.Write(cbFormat);
//WAVEFORMATEX
binarywriter.Write(format.wFormatTag);
binarywriter.Write(format.nChannels);
binarywriter.Write(format.nSamplesPerSec);
binarywriter.Write(format.nAvgBytesPerSec);
binarywriter.Write(format.nBlockAlign);
binarywriter.Write(format.wBitsPerSample);
//data header
WriteString(memStream, "data");
binarywriter.Write(dataLength);
memStream.WriteTo(stream);
}
}
}
And the recording method:
public void RecordAudio()
{
//Subchunk2Size == NumSamples * NumChannels * BitsPerSample/8
int recordingLength = 5 *16000* 2 *2;
byte[] buffer = new byte[1024];
using (FileStream _fileStream = new FileStream("c:\\kinectAudio.wav", FileMode.Create))
{
WriteWavHeader(_fileStream, recordingLength);
int count, totalCount = 0;
//Start capturing audio
using (Stream audioStream = this.sensor.AudioSource.Start())
{
while ((count = audioStream.Read(buffer, 0, buffer.Length)) > 0 && totalCount < recordingLength)
{
_fileStream.Write(buffer, 0, count);
totalCount += count;
}
}
//write the real wav header
long prePosition = _fileStream.Position;
_fileStream.Seek(0, SeekOrigin.Begin);
WriteWavHeader(_fileStream, totalCount);
_fileStream.Seek(prePosition, SeekOrigin.Begin);
_fileStream.Flush();
}
}
I'm currently trying to do pitch shifting of a wave file using this algorithm
https://sites.google.com/site/mikescoderama/pitch-shifting
Here my code which use the above implementation, but with no luck. The outputted wave file seems to be corrupted or not valid.
The code is quite simple, except for the pitch shift algorithm :)
It load a wave file, it reads the wave file data and put it in a
byte[] array.
Then it "normalize" bytes data into -1.0f to 1.0f format (as
requested by the creator of the pitch shift algorithm).
It applies the pitch shift algorithm and then convert back the
normalized data into a bytes[] array.
Finally saves a wave file with the same header of the original wave
file and the pitch shifted data.
Am I missing something?
static void Main(string[] args)
{
// Read the wave file data bytes
byte[] waveheader = null;
byte[] wavedata = null;
using (BinaryReader reader = new BinaryReader(File.OpenRead("sound.wav")))
{
// Read first 44 bytes (header);
waveheader= reader.ReadBytes(44);
// Read data
wavedata = reader.ReadBytes((int)reader.BaseStream.Length - 44);
}
short nChannels = BitConverter.ToInt16(waveheader, 22);
int sampleRate = BitConverter.ToInt32(waveheader, 24);
short bitRate = BitConverter.ToInt16(waveheader, 34);
// Normalized data store. Store values in the format -1.0 to 1.0
float[] in_data = new float[wavedata.Length / 2];
// Normalize wave data into -1.0 to 1.0 values
using(BinaryReader reader = new BinaryReader(new MemoryStream(wavedata)))
{
for (int i = 0; i < in_data.Length; i++)
{
if(bitRate == 16)
in_data[i] = reader.ReadInt16() / 32768f;
if (bitRate == 8)
in_data[i] = (reader.ReadByte() - 128) / 128f;
}
}
//PitchShifter.PitchShift(1f, in_data.Length, (long)1024, (long)32, sampleRate, in_data);
// Backup wave data
byte[] copydata = new byte[wavedata.Length];
Array.Copy(wavedata, copydata, wavedata.Length);
// Revert data to byte format
Array.Clear(wavedata, 0, wavedata.Length);
using (BinaryWriter writer = new BinaryWriter(new MemoryStream(wavedata)))
{
for (int i = 0; i < in_data.Length; i++)
{
if(bitRate == 16)
writer.Write((short)(in_data[i] * 32768f));
if (bitRate == 8)
writer.Write((byte)((in_data[i] * 128f) + 128));
}
}
// Compare new wavedata with copydata
if (wavedata.SequenceEqual(copydata))
{
Console.WriteLine("Data has no changes");
}
else
{
Console.WriteLine("Data has changed!");
}
// Save modified wavedata
string targetFilePath = "sound_low.wav";
if (File.Exists(targetFilePath))
File.Delete(targetFilePath);
using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath)))
{
writer.Write(waveheader);
writer.Write(wavedata);
}
Console.ReadLine();
}
The algorithm here works fine
https://sites.google.com/site/mikescoderama/pitch-shifting
My mistake was on how i was reading the wave header and wave data. I post here the fully working code
WARNING: this code works only for PCM 16 bit (stereo/mono) waves. Can be easily adapted to works with PCM 8 bit.
static void Main(string[] args)
{
// Read header, data and channels as separated data
// Normalized data stores. Store values in the format -1.0 to 1.0
byte[] waveheader = null;
byte[] wavedata = null;
int sampleRate = 0;
float[] in_data_l = null;
float[] in_data_r = null;
GetWaveData("sound.wav", out waveheader, out wavedata, out sampleRate, out in_data_l, out in_data_r);
//
// Apply Pitch Shifting
//
if(in_data_l != null)
PitchShifter.PitchShift(2f, in_data_l.Length, (long)1024, (long)10, sampleRate, in_data_l);
if(in_data_r != null)
PitchShifter.PitchShift(2f, in_data_r.Length, (long)1024, (long)10, sampleRate, in_data_r);
//
// Time to save the processed data
//
// Backup wave data
byte[] copydata = new byte[wavedata.Length];
Array.Copy(wavedata, copydata, wavedata.Length);
GetWaveData(in_data_l, in_data_r, ref wavedata);
//
// Check if data actually changed
//
bool noChanges = true;
for (int i = 0; i < wavedata.Length; i++)
{
if (wavedata[i] != copydata[i])
{
noChanges = false;
Console.WriteLine("Data has changed!");
break;
}
}
if(noChanges)
Console.WriteLine("Data has no changes");
// Save modified wavedata
string targetFilePath = "sound_low.wav";
if (File.Exists(targetFilePath))
File.Delete(targetFilePath);
using (BinaryWriter writer = new BinaryWriter(File.OpenWrite(targetFilePath)))
{
writer.Write(waveheader);
writer.Write(wavedata);
}
Console.ReadLine();
}
// Returns left and right float arrays. 'right' will be null if sound is mono.
public static void GetWaveData(string filename, out byte[] header, out byte[] data, out int sampleRate, out float[] left, out float[] right)
{
byte[] wav = File.ReadAllBytes(filename);
// Determine if mono or stereo
int channels = wav[22]; // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels
// Get sample rate
sampleRate = BitConverter.ToInt32(wav, 24);
int pos = 12;
// Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
while(!(wav[pos]==100 && wav[pos+1]==97 && wav[pos+2]==116 && wav[pos+3]==97)) {
pos += 4;
int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
pos += 4 + chunkSize;
}
pos += 4;
int subchunk2Size = BitConverter.ToInt32(wav, pos);
pos += 4;
// Pos is now positioned to start of actual sound data.
int samples = subchunk2Size / 2; // 2 bytes per sample (16 bit sound mono)
if (channels == 2)
samples /= 2; // 4 bytes per sample (16 bit stereo)
// Allocate memory (right will be null if only mono sound)
left = new float[samples];
if (channels == 2)
right = new float[samples];
else
right = null;
header = new byte[pos];
Array.Copy(wav, header, pos);
data = new byte[subchunk2Size];
Array.Copy(wav, pos, data, 0, subchunk2Size);
// Write to float array/s:
int i=0;
while (pos < subchunk2Size)
{
left[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]);
pos += 2;
if (channels == 2)
{
right[i] = BytesToNormalized_16(wav[pos], wav[pos + 1]);
pos += 2;
}
i++;
}
}
// Return byte data from left and right float data. Ignore right when sound is mono
public static void GetWaveData(float[] left, float[] right, ref byte[] data)
{
// Calculate k
// This value will be used to convert float to Int16
// We are not using Int16.Max to avoid peaks due to overflow conversions
float k = (float)Int16.MaxValue / left.Select(x => Math.Abs(x)).Max();
// Revert data to byte format
Array.Clear(data, 0, data.Length);
int dataLenght = left.Length;
int byteId = -1;
using (BinaryWriter writer = new BinaryWriter(new MemoryStream(data)))
{
for (int i = 0; i < dataLenght; i++)
{
byte byte1 = 0;
byte byte2 = 0;
byteId++;
NormalizedToBytes_16(left[i], k, out byte1, out byte2);
writer.Write(byte1);
writer.Write(byte2);
if (right != null)
{
byteId++;
NormalizedToBytes_16(right[i], k, out byte1, out byte2);
writer.Write(byte1);
writer.Write(byte2);
}
}
}
}
// Convert two bytes to one double in the range -1 to 1
static float BytesToNormalized_16(byte firstByte, byte secondByte)
{
// convert two bytes to one short (little endian)
short s = (short)((secondByte << 8) | firstByte);
// convert to range from -1 to (just below) 1
return s / 32678f;
}
// Convert a float value into two bytes (use k as conversion value and not Int16.MaxValue to avoid peaks)
static void NormalizedToBytes_16(float value, float k, out byte firstByte, out byte secondByte)
{
short s = (short)(value * k);
firstByte = (byte)(s & 0x00FF);
secondByte = (byte)(s >> 8);
}
sorry to revive this but I tried that pitchshifter class and, while it works, I get crackles in the audio while pitching down(0.5f). You work out a way around that?
I am trying to encode a recorded audio using Nspeex and then transfer it over internet and decode on the other end. I am doing all this in Windows Phone 7/8. To encode and decode I am using following code. But while decoding I am not getting the result back correctly which I can play again. Can anyone provide me with encoding and decoding code which runs on WP7/8 recorded audio:
private static Microphone mic = Microphone.Default;
private static byte[] EncodeSpeech(byte[] buf, int len)
{
BandMode mode = GetBandMode(mic.SampleRate);
SpeexEncoder encoder = new SpeexEncoder(mode);
// set encoding quality to lowest (which will generate the smallest size in the fastest time)
encoder.Quality = 1;
int inDataSize = len / 2;
// convert to short array
short[] data = new short[inDataSize];
int sampleIndex = 0;
for (int index = 0; index < len; index += 2, sampleIndex++)
{
data[sampleIndex] = BitConverter.ToInt16(buf, index);
}
// note: the number of samples per frame must be a multiple of encoder.FrameSize
inDataSize = inDataSize - inDataSize % encoder.FrameSize;
var encodedData = new byte[len];
int encodedBytes = encoder.Encode(data, 0, inDataSize, encodedData, 0, len);
if (encodedBytes != 0)
{
// each chunk is laid out as follows:
// | 4-byte total chunk size | 4-byte encoded buffer size | <encoded-bytes> |
byte[] inDataSizeBuf = BitConverter.GetBytes(inDataSize);
byte[] sizeBuf = BitConverter.GetBytes(encodedBytes + inDataSizeBuf.Length);
byte[] returnBuf = new byte[encodedBytes + sizeBuf.Length + inDataSizeBuf.Length];
sizeBuf.CopyTo(returnBuf, 0);
inDataSizeBuf.CopyTo(returnBuf, sizeBuf.Length);
Array.Copy(encodedData, 0, returnBuf, sizeBuf.Length + inDataSizeBuf.Length, encodedBytes);
return returnBuf;
}
else
return buf;
}
private byte[] DecodeSpeech(byte[] buf)
{
BandMode mode = GetBandMode(mic.SampleRate);
SpeexDecoder decoder = new SpeexDecoder(mode);
byte[] inDataSizeBuf = new byte[4];
byte[] sizeBuf = new byte[4];
byte[] encodedBuf = new byte[buf.Length - 8];
Array.Copy(buf, 0, sizeBuf, 0, 4);
Array.Copy(buf, 4, inDataSizeBuf, 0, 4);
Array.Copy(buf, 8, encodedBuf, 0, buf.Length - 8);
int inDataSize = BitConverter.ToInt32(inDataSizeBuf, 0);
int size = BitConverter.ToInt32(sizeBuf, 0);
short[] decodedBuf = new short[inDataSize];
int decodedSize = decoder.Decode(encodedBuf, 0, encodedBuf.Length, decodedBuf, 0, false);
byte[] returnBuf = new byte[inDataSize * 2];
for (int index = 0; index < decodedBuf.Length; index++)
{
byte[] temp = BitConverter.GetBytes(decodedBuf[index]);
Array.Copy(temp, 0, returnBuf, index * 2, 2);
}
return returnBuf;
}
private static BandMode GetBandMode(int sampleRate)
{
if (sampleRate <= 8000)
return BandMode.Narrow;
if (sampleRate <= 16000)
return BandMode.Wide;
return BandMode.UltraWide;
}
I think your problem may be that you are newing up a new SpeexEncoder every time you want to encode audio. You should try making that a member for your class and re-use it.
I looked at the code for Nspeex I noticed that SpeexEncoder uses NbEncoder for the narrow band. In that class it looks like it keeps a history of some previous audio data in order perform the encoding. This should mean that the output for different instances of encoders would not go together.
private static Microphone mic = Microphone.Default;
private static SpeexEncoder encoder = CreateEncoder();
private static SpeexEncoder CreateEncoder()
{
BandMode mode = GetBandMode(mic.SampleRate);
SpeexEncoder encoder = new SpeexEncoder(mode);
// set encoding quality to lowest (which will generate the smallest size in the fastest time)
encoder.Quality = 1;
return encoder;
}
private static byte[] EncodeSpeech(byte[] buf, int len)
{
int inDataSize = len / 2;
...