I was creating a proof of concept for playing sound from Discord voice channels.
I'm using this NAudio providers for mixing sounds:
private readonly OpusDecoder _opusDecoder = OpusDecoder.Create(SamplingRate, Channels);
private readonly WaveFormat _waveFormat = new(SamplingRate, Channels);
private readonly MixingSampleProvider _mixing;
private readonly Dictionary<uint, BufferedWaveProvider> _bufferedWaveProviders = new();
private event Action<byte[]> UdpMessageReceived;
public VoiceConnection(Client client)
{
_client = client;
_cts = new CancellationTokenSource();
SendNonce[0] = 0x80;
SendNonce[1] = 0x78;
_mixing = new MixingSampleProvider(WaveFormat.CreateIeeeFloatWaveFormat(SamplingRate, Channels))
{
ReadFully = true,
};
_outputPlayer = new WaveOut();
_outputPlayer.Init(_mixing);
}
So, an application now listens for the udp connection in this method:
private void StartListeningLoop()
=> Task.Run(() =>
{
Console.WriteLine("Started listening to udp voice channel...");
while (_udp.Client.Connected)
{
try
{
if (_udp.Available <= 0) continue;
var received = _udp.Receive(ref _endPoint);
UdpMessageReceived?.Invoke(received);
}
catch (Exception ex)
{
Console.WriteLine(ex);
throw;
}
}
Console.WriteLine("Udp connection was closed");
});
After that I use this method to handle each packet that is received and add them to the buffered stream for each specific user speaking:
private void HandleUdpMessage(byte[] received)
{
if (received[0] != 0x90 && received[0] != 0x80)
{
return;
}
var nonce = new byte[24];
received.SubArray(0, 12).CopyTo(nonce, 0);
var rawData = SecretBox.Open(received.SubArray(12), nonce, _secretKey);
var hasExtension = Convert.ToBoolean(received[0] & 0b10000);
var cc = received[0] & 0b1111;
if (cc > 0)
{
rawData = rawData.SubArray(cc * 4);
}
if (hasExtension)
{
var l = rawData[2] << 8 | rawData[3];
rawData = rawData.SubArray(4 + l * 4);
}
var ssrcBytes = nonce.SubArray(8, 12);
ssrcBytes = ssrcBytes.Reverse().ToArray();
var ssrc = BitConverter.ToUInt32(ssrcBytes);
_bufferedWaveProviders.TryGetValue(ssrc, out var provider);
using var memoryStream = new MemoryStream();
var decoded = new short[FrameSize * Channels * sizeof(short)];
_opusDecoder.Decode(rawData, 0, rawData.Length, decoded, 0, (int)FrameSize);
foreach (var #short in decoded)
{
var bytes = BitConverter.GetBytes(#short);
memoryStream.Write(bytes, 0, sizeof(short));
}
var decodedBuffer = memoryStream.GetBuffer();
provider?.AddSamples(decodedBuffer, 0, decoded.Length);
}
It was working well for one person speaking, but when 2 or more people start to speak at the same time the sound becomes too noisy and sound more like static interference than voice.
I've tried using multiple WaveOut (one per user), but it didn't help.
Am I doing something completely wrong here? Because now I have no idea why is it happening
Related
I am doing some performance tests on ZeroMQ in order to compare it with others like RabbitMQ and ActiveMQ.
In my broadcast tests and to avoid "The Dynamic Discovery Problem" as referred by ZeroMQ documentation I have used a proxy. In my scenario, I am using 50 concurrent publishers each one sending 500 messages with 1ms delay between sends. Each message is then read by 50 subscribers. And as I said I am losing messages, each of the subscribers should receive a total of 25000 messages and they are each receiving between 5000 and 10000 messages only.
I am using Windows and C# .Net client clrzmq4 (4.1.0.31).
I have already tried some solutions that I found on other posts:
I have set linger to TimeSpan.MaxValue
I have set ReceiveHighWatermark to 0 (as it is presented as infinite, but I have tried also Int32.MaxValue)
I have set checked for slow start receivers, I made receivers start some seconds before publishers
I had to make sure that no garbage collection is made to the socket instances (linger should do it but to make sure)
I have a similar scenario (with similar logic) using NetMQ and it works fine. The other scenario does not use security though and this one does (and that's also the reason why I use clrzmq in this one because I need client authentication with certificates that is not yet possible on NetMQ).
EDIT:
public class MCVEPublisher
{
public void publish(int numberOfMessages)
{
string topic = "TopicA";
ZContext ZContext = ZContext.Create();
ZSocket publisher = new ZSocket(ZContext, ZSocketType.PUB);
//Security
// Create or load certificates
ZCert serverCert = Main.GetOrCreateCert("publisher");
var actor = new ZActor(ZContext, ZAuth.Action, null);
actor.Start();
// send CURVE settings to ZAuth
actor.Frontend.Send(new ZFrame("VERBOSE"));
actor.Frontend.Send(new ZMessage(new List<ZFrame>()
{ new ZFrame("ALLOW"), new ZFrame("127.0.0.1") }));
actor.Frontend.Send(new ZMessage(new List<ZFrame>()
{ new ZFrame("CURVE"), new ZFrame(".curve") }));
publisher.CurvePublicKey = serverCert.PublicKey;
publisher.CurveSecretKey = serverCert.SecretKey;
publisher.CurveServer = true;
publisher.Linger = TimeSpan.MaxValue;
publisher.ReceiveHighWatermark = Int32.MaxValue;
publisher.Connect("tcp://127.0.0.1:5678");
Thread.Sleep(3500);
for (int i = 0; i < numberOfMessages; i++)
{
Thread.Sleep(1);
var update = $"{topic} {"message"}";
using (var updateFrame = new ZFrame(update))
{
publisher.Send(updateFrame);
}
}
//just to make sure it does not end instantly
Thread.Sleep(60000);
//just to make sure publisher is not garbage collected
ulong Affinity = publisher.Affinity;
}
}
public class MCVESubscriber
{
private ZSocket subscriber;
private List<string> prints = new List<string>();
public void read()
{
string topic = "TopicA";
var context = new ZContext();
subscriber = new ZSocket(context, ZSocketType.SUB);
//Security
ZCert serverCert = Main.GetOrCreateCert("xpub");
ZCert clientCert = Main.GetOrCreateCert("subscriber");
subscriber.CurvePublicKey = clientCert.PublicKey;
subscriber.CurveSecretKey = clientCert.SecretKey;
subscriber.CurveServer = true;
subscriber.CurveServerKey = serverCert.PublicKey;
subscriber.Linger = TimeSpan.MaxValue;
subscriber.ReceiveHighWatermark = Int32.MaxValue;
// Connect
subscriber.Connect("tcp://127.0.0.1:1234");
subscriber.Subscribe(topic);
while (true)
{
using (var replyFrame = subscriber.ReceiveFrame())
{
string messageReceived = replyFrame.ReadString();
messageReceived = Convert.ToString(messageReceived.Split(' ')[1]);
prints.Add(messageReceived);
}
}
}
public void PrintMessages()
{
Console.WriteLine("printing " + prints.Count);
}
}
public class Main
{
static void Main(string[] args)
{
broadcast(500, 50, 50, 30000);
}
public static void broadcast(int numberOfMessages, int numberOfPublishers, int numberOfSubscribers, int timeOfRun)
{
new Thread(() =>
{
using (var context = new ZContext())
using (var xsubSocket = new ZSocket(context, ZSocketType.XSUB))
using (var xpubSocket = new ZSocket(context, ZSocketType.XPUB))
{
//Security
ZCert serverCert = GetOrCreateCert("publisher");
ZCert clientCert = GetOrCreateCert("xsub");
xsubSocket.CurvePublicKey = clientCert.PublicKey;
xsubSocket.CurveSecretKey = clientCert.SecretKey;
xsubSocket.CurveServer = true;
xsubSocket.CurveServerKey = serverCert.PublicKey;
xsubSocket.Linger = TimeSpan.MaxValue;
xsubSocket.ReceiveHighWatermark = Int32.MaxValue;
xsubSocket.Bind("tcp://*:5678");
//Security
serverCert = GetOrCreateCert("xpub");
var actor = new ZActor(ZAuth.Action0, null);
actor.Start();
// send CURVE settings to ZAuth
actor.Frontend.Send(new ZFrame("VERBOSE"));
actor.Frontend.Send(new ZMessage(new List<ZFrame>()
{ new ZFrame("ALLOW"), new ZFrame("127.0.0.1") }));
actor.Frontend.Send(new ZMessage(new List<ZFrame>()
{ new ZFrame("CURVE"), new ZFrame(".curve") }));
xpubSocket.CurvePublicKey = serverCert.PublicKey;
xpubSocket.CurveSecretKey = serverCert.SecretKey;
xpubSocket.CurveServer = true;
xpubSocket.Linger = TimeSpan.MaxValue;
xpubSocket.ReceiveHighWatermark = Int32.MaxValue;
xpubSocket.Bind("tcp://*:1234");
using (var subscription = ZFrame.Create(1))
{
subscription.Write(new byte[] { 0x1 }, 0, 1);
xpubSocket.Send(subscription);
}
Console.WriteLine("Intermediary started, and waiting for messages");
// proxy messages between frontend / backend
ZContext.Proxy(xsubSocket, xpubSocket);
Console.WriteLine("end of proxy");
//just to make sure it does not end instantly
Thread.Sleep(60000);
//just to make sure xpubSocket and xsubSocket are not garbage collected
ulong Affinity = xpubSocket.Affinity;
int ReceiveHighWatermark = xsubSocket.ReceiveHighWatermark;
}
}).Start();
Thread.Sleep(5000); //to make sure proxy started
List<MCVESubscriber> Subscribers = new List<MCVESubscriber>();
for (int i = 0; i < numberOfSubscribers; i++)
{
MCVESubscriber ZeroMqSubscriber = new MCVESubscriber();
new Thread(() =>
{
ZeroMqSubscriber.read();
}).Start();
Subscribers.Add(ZeroMqSubscriber);
}
Thread.Sleep(10000);//to make sure all subscribers started
for (int i = 0; i < numberOfPublishers; i++)
{
MCVEPublisher ZeroMqPublisherBroadcast = new MCVEPublisher();
new Thread(() =>
{
ZeroMqPublisherBroadcast.publish(numberOfMessages);
}).Start();
}
Thread.Sleep(timeOfRun);
foreach (MCVESubscriber Subscriber in Subscribers)
{
Subscriber.PrintMessages();
}
}
public static ZCert GetOrCreateCert(string name, string curvpath = ".curve")
{
ZCert cert;
string keyfile = Path.Combine(curvpath, name + ".pub");
if (!File.Exists(keyfile))
{
cert = new ZCert();
Directory.CreateDirectory(curvpath);
cert.SetMeta("name", name);
cert.Save(keyfile);
}
else
{
cert = ZCert.Load(keyfile);
}
return cert;
}
}
This code also produces the expected number of messages when security is disabled, but when turned on it doesn't.
Does someone know another thing to check? Or has it happened to anyone before?
Thanks
I've built a windows service that subscribes to around 10,000 stock tickers in real-time using ClientWebSocket. If I subscribe to 1,000 tickers I receive all the data points as I should (receiving few hundred messages a second), as soon as I get up to 2,000 tickers I don't seem to be receiving the data I should be, 10,000 (receiving thousands of messages a second) its even worse. I've run comparison reports and it looks like I'm losing up to 60% of the packets. I've talked to polygon (the provider of the real-time data) about this issue and they claim their Socket is a firehose and everything that should go out, goes out, and that none of their other clients are complaining. So the only logical thing here would to be to assume its my code, or some limitation. Maybe it's the Task portion of the Receive method? Maybe window's has a max task limitation and I'm exceeding it.
I've also tested this on a high powered dedicated server with 10gb connection so it doesnt seem to be a connection or hardware limitation.
I've also by passed my BlockingCollection cache and the problem still persisted.
Hopefully one of you has some insight, thank you!
Here's my code:
public static ConcurrentDictionary<string, TradeObj> TradeFeed = new ConcurrentDictionary<string, TradeObj>();
public static ConcurrentDictionary<string, QuoteObj> QuoteFeed = new ConcurrentDictionary<string, QuoteObj>();
public static ConcurrentDictionary<string, AggObj> AggFeed = new ConcurrentDictionary<string, AggObj>();
public static BlockingCollection<byte[]> packets = new BlockingCollection<byte[]>();
private static void Start(string[] args)
{
try
{
Polygon.StartSub();
int HowManyConsumers = 2;
for (int i = 0; i < HowManyConsumers; i++)
{
Task.Factory.StartNew(Polygon.ConsumePackets);
}
} catch(Exception e)
{
Console.WriteLine(e.Message);
}
Console.ReadKey();
}
public static async Task StartSub()
{
do
{
using (var socket = new ClientWebSocket())
try
{
// socket.Options.KeepAliveInterval = TimeSpan.Zero;
var Connection = "wss://socket.polygon.io/stocks";
await socket.ConnectAsync(new Uri(Connection), CancellationToken.None);
Console.WriteLine("Websocket opened to Polygon.");
await Send(socket, "{\"action\":\"auth\",\"params\":\""+ConfigurationManager.AppSettings["PolygonAPIToken"]+"\"}");
List<List<string>> batches = new List<List<string>>();
for (int i = 0; i < FeedCache.Tickers.Count(); i += 500)
{
var tempList = new List<string>();
tempList.AddRange(FeedCache.Tickers.Skip(i).Take(500));
batches.Add(tempList);
}
int bNum = 0;
string[] quoteStrings = new string[batches.Count()];
foreach (var tList in batches)
{
var tQuery = "";
tQuery = tQuery + "T." + string.Join(",T.", tList.ToArray());
tQuery = tQuery + ",A." + string.Join(",A.", tList.ToArray());
tQuery = tQuery + ",Q." + string.Join(",Q.", tList.ToArray());
quoteStrings[bNum] = tQuery;
bNum++;
}
for (int i = 0; i < quoteStrings.Count(); i++)
{
string SubscribeString = "{\"action\":\"subscribe\",\"params\":\"" + quoteStrings[i] + "\"}";
await Send(socket, SubscribeString);
}
await Receive(socket);
}
catch (Exception ex)
{
Console.WriteLine($"ERROR - {ex.Message}");
Console.WriteLine(ex.ToString());
}
} while (true);
}
static async Task Send(ClientWebSocket socket, string data)
{
var segment = new ArraySegment<byte>(Encoding.UTF8.GetBytes(data));
await socket.SendAsync(segment, WebSocketMessageType.Text, true, CancellationToken.None);
}
static async Task Receive(ClientWebSocket socket)
{
do {
WebSocketReceiveResult result;
var buffer = new ArraySegment<byte>(new byte[2000]);
using (var ms = new MemoryStream())
{
do
{
result = await socket.ReceiveAsync(buffer, CancellationToken.None);
ms.Write(buffer.Array, buffer.Offset, result.Count);
} while (!result.EndOfMessage);
if (result.MessageType == WebSocketMessageType.Close)
{
await socket.CloseOutputAsync(WebSocketCloseStatus.NormalClosure, "Closed in server by the client", CancellationToken.None);
Console.WriteLine("Socket disconnecting, trying to reconnect.");
await StartSub();
}
else
{
packets.Add(ms.ToArray());
}
}
} while (true);
}
public static async void ConsumePackets()
{
foreach (var buffer in packets.GetConsumingEnumerable())
{
using (var ms = new MemoryStream(buffer))
{
ms.Seek(0, SeekOrigin.Begin);
using (var reader = new StreamReader(ms, Encoding.UTF8))
{
var data = await reader.ReadToEndAsync();
try
{
var j = JArray.Parse(data);
if (j != null)
{
string id = (string)j[0]["ev"];
switch (id)
{
case "T":
AddOrUpdateTrade((string)j[0]["sym"], j);
break;
case "Q":
AddOrUpdateQuote((string)j[0]["sym"], j);
break;
case "A":
AddOrUpdateAgg((string)j[0]["sym"], j);
break;
}
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}
}
}
}
}
public static void AddOrUpdateTrade(string ticker, JArray data)
{
TradeFeed.AddOrUpdate(ticker, new TradeObj {
LastPrice = (double)data[0]["p"],
TradeCount = 1
}, (key, existingVal) =>
{
return new TradeObj {
LastPrice = (double)data[0]["p"],
TradeCount = existingVal.TradeCount + 1,
PriceDirection = (double)data[0]["p"] < existingVal.LastPrice ? "D" : "U"
};
});
}
public static void AddOrUpdateAgg(string ticker, JArray data)
{
AggFeed.AddOrUpdate(ticker, new AggObj
{
TickVolume = (long)data[0]["v"],
VolumeShare = (long)data[0]["av"],
OpenPrice = (double)data[0]["op"],
TickAverage = (double)data[0]["a"],
VWAP = (double)data[0]["vw"],
TickClosePrice = (double)data[0]["c"],
TickHighPrice = (double)data[0]["h"],
TickLowPrice = (double)data[0]["l"],
TickOpenPrice = (double)data[0]["o"]
}, (key, existingVal) =>
{
return new AggObj
{
TickVolume = (long)data[0]["v"],
VolumeShare = (long)data[0]["av"],
OpenPrice = (double)data[0]["op"],
TickAverage = (double)data[0]["a"],
VWAP = (double)data[0]["vw"],
TickClosePrice = (double)data[0]["c"],
TickHighPrice = (double)data[0]["h"],
TickLowPrice = (double)data[0]["l"],
TickOpenPrice = (double)data[0]["o"]
};
});
}
public static void AddOrUpdateQuote(string ticker, JArray data)
{
QuoteFeed.AddOrUpdate(ticker, new QuoteObj
{
BidPrice = (double)data[0]["bp"],
BidSize = (double)data[0]["bs"],
AskPrice = (double)data[0]["ap"],
AskSize = (double)data[0]["as"]
}, (key, existingVal) =>
{
return new QuoteObj
{
BidPrice = (double)data[0]["bp"],
BidSize = (double)data[0]["bs"],
AskPrice = (double)data[0]["ap"],
AskSize = (double)data[0]["as"]
};
});
}
I have successfully used Grpc in Unity and sent request to Dialog flow and received response. You can check the details here
However the whole returned result is the following only
{ "queryResult": { "languageCode": "ja" } }
The expected response id, query text, etc are not returned.
When testing in console.dialogflow.com I get the following result
{
"responseId": "cdf8003e-6599-4a28-9314-f4462c36e21b",
"queryResult": {
"queryText": "おはようございます",
"speechRecognitionConfidence": 0.92638445,
"languageCode": "ja"
}
}
However when I tried in console.dialogflow.com and didn't say anything I got
{ "queryResult": { "languageCode": "ja" } }
So perhaps the InputAudio encoding is wrong somehow.
Here's how I do it
var serializedByteArray = convertToBytes(samples);
request.InputAudio = Google.Protobuf.ByteString.CopyFrom(serializedByteArray);
And convert to bytes is like the following
public static byte[] convertToBytes(float[] audio)
{
List<byte> bytes = new List<byte>();
foreach (float audioI in audio) {
bytes.AddRange(BitConverter.GetBytes(audioI));
}
return bytes.ToArray();
}
The audio source is define as follows where sampleRate is 16000
audioSource.clip = Microphone.Start(null, true, 30, sampleRate);
I made sure to set sample rate hz properly.
queryInput.AudioConfig.SampleRateHertz = sampleRate;
Edit:
I have logged the recorded bytes from unity to a file (have all the bytes streamed appended together) and have written a console application to test the binary generated but using DetectIntent rather than streaming detect intent.
GoogleCredential credential = GoogleCredential.FromJson(privateKey);
var url = "dialogflow.googleapis.com";
Grpc.Core.Channel channel = new Grpc.Core.Channel(url, credential.ToChannelCredentials());
var client = SessionsClient.Create(channel);
CallOptions options = new CallOptions();
DetectIntentRequest detectIntentRequest = new DetectIntentRequest();
detectIntentRequest.Session = "projects/projectid/agent/sessions/" + "detectIntent";
QueryInput queryInput = new QueryInput();
queryInput.AudioConfig = new InputAudioConfig();
queryInput.AudioConfig.LanguageCode = "ja";
queryInput.AudioConfig.SampleRateHertz = sampleRate;//must be between 8khz and 48khz
queryInput.AudioConfig.AudioEncoding = AudioEncoding.Linear16;
detectIntentRequest.QueryInput = queryInput;
detectIntentRequest.InputAudio = Google.Protobuf.ByteString.CopyFrom(File.ReadAllBytes("D:\\temp\\audio.bytes"));
var response = client.DetectIntent(detectIntentRequest);
Console.WriteLine(response.ToString());
Console.WriteLine(response.ResponseId);
Console.Read();
I still get this (and empty response.ResponseId)
{ "queryResult": { "languageCode": "ja" } }
Thanks for advance.
Finally found the answer. The way I converted the datasource float to linear16 byte array was obviously wrong. Here's the code that worked
Credits to that post on unity forum.
https://forum.unity.com/threads/writing-audiolistener-getoutputdata-to-wav-problem.119295/#post-899142
public static byte[] convertToBytes(float[] dataSource)
{
var intData = new Int16[dataSource.Length];
//converting in 2 steps : float[] to Int16[], //then Int16[] to Byte[]
var bytesData = new Byte[dataSource.Length * 2];
//bytesData array is twice the size of
//dataSource array because a float converted in Int16 is 2 bytes.
var rescaleFactor = 32767; //to convert float to Int16
for (var i = 0; i < dataSource.Length; i++)
{
intData[i] = (short)(dataSource[i] * rescaleFactor);
var byteArr = new byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
return bytesData;
}
I'm trying to get the Bing Speech API to work in C# via WebSockets. I've looked through the implementation in Javascript here and have been following the protocol instructions here, but I've come up against a complete brick wall. I can't use the existing C# service because I'm running in a Linux container, so I need to use an implementation on .net Core. Annoyingly, the existing service is closed-source!
I can connect to the web socket successfully, but I can't ever get the server to respond to my connection. I'm expecting to receive a turn.start text message from the server, but I get booted off the server as soon as I've sent a few bytes of an audio file. I know the audio file is in the right format because I've got it directly from the C# service sample here.
I feel like I’ve exhausted the options here. The only thing I can think of now is that I’m not sending the audio chunks correctly. Currently, I’m just sending the audio file in consecutive 4096 bytes. I know the first audio message contains the RIFF header which is only 36 bytes, and then I'm just sending this along with the next (4096-36) bytes.
Here is my code in full. You should just be able to run it as a .net core or .net framework console application, and will need an audio file and an API key.
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Net.WebSockets;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApp3
{
class Program
{
static void Main(string[] args)
{
Task.Run(async () =>
{
var bingService = new BingSpeechToTextService();
var audioFilePath = #"FILEPATH GOES HERE";
var authenticationKey = #"BING AUTHENTICATION KEY GOES HERE";
await bingService.RegisterJob(audioFilePath, authenticationKey);
}).Wait();
}
}
public class BingSpeechToTextService
{
/* #region Private Static Methods */
private static async Task Receiving(ClientWebSocket client)
{
var buffer = new byte[128];
while (true)
{
var result = await client.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);
var res = Encoding.UTF8.GetString(buffer, 0, result.Count);
if (result.MessageType == WebSocketMessageType.Text)
{
Console.WriteLine(Encoding.UTF8.GetString(buffer, 0, result.Count));
}
else if (result.MessageType == WebSocketMessageType.Close)
{
Console.WriteLine($"Closing ... reason {client.CloseStatusDescription}");
var description = client.CloseStatusDescription;
//await client.CloseOutputAsync(WebSocketCloseStatus.NormalClosure, "", CancellationToken.None);
break;
}
else
{
Console.WriteLine("Other result");
}
}
}
/* #endregion Private Static Methods */
/* #region Public Static Methods */
public static UInt16 ReverseBytes(UInt16 value)
{
return (UInt16)((value & 0xFFU) << 8 | (value & 0xFF00U) >> 8);
}
/* #endregion Public Static Methods */
/* #region Interface: 'Unscrypt.Bing.SpeechToText.Client.Api.IBingSpeechToTextJobService' Methods */
public async Task<int?> RegisterJob(string audioFilePath, string authenticationKeyStr)
{
var authenticationKey = new BingSocketAuthentication(authenticationKeyStr);
var token = authenticationKey.GetAccessToken();
/* #region Connect web socket */
var cws = new ClientWebSocket();
var connectionId = Guid.NewGuid().ToString("N");
var lang = "en-US";
cws.Options.SetRequestHeader("X-ConnectionId", connectionId);
cws.Options.SetRequestHeader("Authorization", "Bearer " + token);
Console.WriteLine("Connecting to web socket.");
var url = $"wss://speech.platform.bing.com/speech/recognition/interactive/cognitiveservices/v1?format=simple&language={lang}";
await cws.ConnectAsync(new Uri(url), new CancellationToken());
Console.WriteLine("Connected.");
/* #endregion*/
/* #region Receiving */
var receiving = Receiving(cws);
/* #endregion*/
/* #region Sending */
var sending = Task.Run(async () =>
{
/* #region Send speech.config */
dynamic speechConfig =
new
{
context = new
{
system = new
{
version = "1.0.00000"
},
os = new
{
platform = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
name = "Browser",
version = ""
},
device = new
{
manufacturer = "SpeechSample",
model = "SpeechSample",
version = "1.0.00000"
}
}
};
var requestId = Guid.NewGuid().ToString("N");
var speechConfigJson = JsonConvert.SerializeObject(speechConfig, Formatting.None);
StringBuilder outputBuilder = new StringBuilder();
outputBuilder.Append("path:speech.config\r\n"); //Should this be \r\n
outputBuilder.Append($"x-timestamp:{DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffK")}\r\n");
outputBuilder.Append($"content-type:application/json\r\n");
outputBuilder.Append("\r\n\r\n");
outputBuilder.Append(speechConfigJson);
var strh = outputBuilder.ToString();
var encoded = Encoding.UTF8.GetBytes(outputBuilder.ToString());
var buffer = new ArraySegment<byte>(encoded, 0, encoded.Length);
if (cws.State != WebSocketState.Open) return;
Console.WriteLine("Sending speech.config");
await cws.SendAsync(buffer, WebSocketMessageType.Text, true, new CancellationToken());
Console.WriteLine("Sent.");
/* #endregion*/
/* #region Send audio parts. */
var fileInfo = new FileInfo(audioFilePath);
var streamReader = fileInfo.OpenRead();
for (int cursor = 0; cursor < fileInfo.Length; cursor++)
{
outputBuilder.Clear();
outputBuilder.Append("path:audio\r\n");
outputBuilder.Append($"x-requestid:{requestId}\r\n");
outputBuilder.Append($"x-timestamp:{DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffK")}\r\n");
outputBuilder.Append($"content-type:audio/x-wav");
var headerBytes = Encoding.ASCII.GetBytes(outputBuilder.ToString());
var headerbuffer = new ArraySegment<byte>(headerBytes, 0, headerBytes.Length);
var str = "0x" + (headerBytes.Length).ToString("X");
var headerHeadBytes = BitConverter.GetBytes((UInt16)headerBytes.Length);
var isBigEndian = !BitConverter.IsLittleEndian;
var headerHead = !isBigEndian ? new byte[] { headerHeadBytes[1], headerHeadBytes[0] } : new byte[] { headerHeadBytes[0], headerHeadBytes[1] };
//Audio should be pcm 16kHz, 16bps mono
var byteLen = 8192 - headerBytes.Length - 2;
var fbuff = new byte[byteLen];
streamReader.Read(fbuff, 0, byteLen);
var arr = headerHead.Concat(headerBytes).Concat(fbuff).ToArray();
var arrSeg = new ArraySegment<byte>(arr, 0, arr.Length);
Console.WriteLine($"Sending data from {cursor}");
if (cws.State != WebSocketState.Open) return;
cursor += byteLen;
var end = cursor >= fileInfo.Length;
await cws.SendAsync(arrSeg, WebSocketMessageType.Binary, true, new CancellationToken());
Console.WriteLine("Data sent");
var dt = Encoding.ASCII.GetString(arr);
}
await cws.SendAsync(new ArraySegment<byte>(), WebSocketMessageType.Binary, true, new CancellationToken());
streamReader.Dispose();
/* #endregion*/
{
var startWait = DateTime.UtcNow;
while ((DateTime.UtcNow - startWait).TotalSeconds < 30)
{
await Task.Delay(1);
}
if (cws.State != WebSocketState.Open) return;
}
});
/* #endregion*/
/* #region Wait for tasks to complete */
await Task.WhenAll(sending, receiving);
if (sending.IsFaulted)
{
var err = sending.Exception;
throw err;
}
if (receiving.IsFaulted)
{
var err = receiving.Exception;
throw err;
}
/* #endregion*/
return null;
}
/* #endregion Interface: 'Unscrypt.Bing.SpeechToText.Client.Api.IBingSpeechToTextJobService' Methods */
public class BingSocketAuthentication
{
public static readonly string FetchTokenUri = "https://api.cognitive.microsoft.com/sts/v1.0";
private string subscriptionKey;
private string token;
private Timer accessTokenRenewer;
//Access token expires every 10 minutes. Renew it every 9 minutes.
private const int RefreshTokenDuration = 9;
public BingSocketAuthentication(string subscriptionKey)
{
this.subscriptionKey = subscriptionKey;
this.token = FetchToken(FetchTokenUri, subscriptionKey).Result;
// renew the token on set duration.
accessTokenRenewer = new Timer(new TimerCallback(OnTokenExpiredCallback),
this,
TimeSpan.FromMinutes(RefreshTokenDuration),
TimeSpan.FromMilliseconds(-1));
}
public string GetAccessToken()
{
return this.token;
}
private void RenewAccessToken()
{
this.token = FetchToken(FetchTokenUri, this.subscriptionKey).Result;
Console.WriteLine("Renewed token.");
}
private void OnTokenExpiredCallback(object stateInfo)
{
try
{
RenewAccessToken();
}
catch (Exception ex)
{
Console.WriteLine(string.Format("Failed renewing access token. Details: {0}", ex.Message));
}
finally
{
try
{
accessTokenRenewer.Change(TimeSpan.FromMinutes(RefreshTokenDuration), TimeSpan.FromMilliseconds(-1));
}
catch (Exception ex)
{
Console.WriteLine(string.Format("Failed to reschedule the timer to renew access token. Details: {0}", ex.Message));
}
}
}
private async Task<string> FetchToken(string fetchUri, string subscriptionKey)
{
using (var client = new HttpClient())
{
client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey);
UriBuilder uriBuilder = new UriBuilder(fetchUri);
uriBuilder.Path += "/issueToken";
var result = await client.PostAsync(uriBuilder.Uri.AbsoluteUri, null);
Console.WriteLine("Token Uri: {0}", uriBuilder.Uri.AbsoluteUri);
return await result.Content.ReadAsStringAsync();
}
}
}
}
}
I knew it was going to be simple.
After a frustrating few hours of coding, I've found the problem. I've been forgetting to send a request id along with the speech.config call.
Related to my other question except now I try async hoping it would fix the issues. It doesn't.
I'm trying to create a simple SOCKS5 server. I set my browser (firefox) to use this program as a SOCKS5. The idea is a program connects to the proxy server, give it information required and the server just simply reads/writes data from one connection to the other. This one simply does that and doesn't log nor filter anything. It is dead simple but because of the CPU issue and the fact it takes several seconds to connect to a site after you hit a few pages makes it completely unusable. How on earth is this eating up so much CPU? And why does it take a long time to connect to a site? Both async and sync suffer from this
using System;
using System.Collections.Concurrent;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net.Sockets;
using System.Timers;
using System.IO;
using System.Net;
using System.Threading;
namespace ProxyTest
{
class Program
{
static ManualResetEvent tcpClientConnected =new ManualResetEvent(false);
static void Main(string[] args)
{
var s2 = new TcpListener(9998);
s2.Start();
Task.Run(() =>
{
while (true)
{
tcpClientConnected.Reset();
s2.BeginAcceptTcpClient(Blah, s2);
tcpClientConnected.WaitOne();
}
});
while (true)
System.Threading.Thread.Sleep(10000000);
}
static void Blah(IAsyncResult ar)
{
try
{
Console.WriteLine("Connection");
TcpListener listener = (TcpListener)ar.AsyncState;
using (var socketin = listener.EndAcceptTcpClient(ar))
{
tcpClientConnected.Set();
var ns1 = socketin.GetStream();
var r1 = new BinaryReader(ns1);
var w1 = new BinaryWriter(ns1);
if (false)
{
var s3 = new TcpClient();
s3.Connect("127.0.0.1", 9150);
var ns3 = s3.GetStream();
var r3 = new BinaryReader(ns3);
var w3 = new BinaryWriter(ns3);
while (true)
{
while (ns1.DataAvailable)
{
var b = ns1.ReadByte();
w3.Write((byte)b);
//Console.WriteLine("1: {0}", b);
}
while (ns3.DataAvailable)
{
var b = ns3.ReadByte();
w1.Write((byte)b);
Console.WriteLine("2: {0}", b);
}
}
}
{
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1))
return;
var c = r1.ReadByte();
for (int i = 0; i < c; ++i)
r1.ReadByte();
w1.Write((byte)5);
w1.Write((byte)0);
}
{
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1))
return;
if (r1.ReadByte() != 0)
return;
}
byte[] ipAddr = null;
string hostname = null;
var type = r1.ReadByte();
switch (type)
{
case 1:
ipAddr = r1.ReadBytes(4);
break;
case 3:
hostname = Encoding.ASCII.GetString(r1.ReadBytes(r1.ReadByte()));
break;
case 4:
throw new Exception();
}
var nhport = r1.ReadInt16();
var port = IPAddress.NetworkToHostOrder(nhport);
var socketout = new TcpClient();
if (hostname != null)
socketout.Connect(hostname, port);
else
socketout.Connect(new IPAddress(ipAddr), port);
w1.Write((byte)5);
w1.Write((byte)0);
w1.Write((byte)0);
w1.Write(type);
switch (type)
{
case 1:
w1.Write(ipAddr);
break;
case 2:
w1.Write((byte)hostname.Length);
w1.Write(Encoding.ASCII.GetBytes(hostname), 0, hostname.Length);
break;
}
w1.Write(nhport);
var buf1 = new byte[4096];
var buf2 = new byte[4096];
var ns2 = socketout.GetStream();
var r2 = new BinaryReader(ns2);
var w2 = new BinaryWriter(ns2);
Task.Run(() =>
{
var re = new ManualResetEvent(false);
while (true)
{
re.Reset();
ns1.BeginRead(buf1, 0, buf1.Length, ReadCallback, new A() { buf = buf1, thisSocket = socketin, otherSocket = socketout, thisStream = ns1, otherStream = ns2, re=re });
re.WaitOne();
}
});
Task.Run(() =>
{
var re = new ManualResetEvent(false);
while (true)
{
re.Reset();
ns2.BeginRead(buf2, 0, buf2.Length, ReadCallback, new A() { buf = buf2, thisSocket = socketout, otherSocket = socketin, thisStream = ns2, otherStream = ns1, re = re });
re.WaitOne();
}
});
while (true)
{
if (socketin.Connected == false)
return;
Thread.Sleep(100);
}
}
}
catch { }
}
class A { public byte[] buf; public TcpClient thisSocket, otherSocket; public NetworkStream thisStream, otherStream; public ManualResetEvent re;};
static void ReadCallback(IAsyncResult ar)
{
try
{
var a = (A)ar.AsyncState;
var ns1 = a.thisStream;
var len = ns1.EndRead(ar);
a.otherStream.Write(a.buf, 0, len);
a.re.Set();
}
catch
{
}
}
}
}
Caveat: I had to adjust things slightly since I'm not using 4.5.
Task.Run() --> new Thread().Start()
You are using far too many threads.
Simply attempting to load this question in stackoverflow caused 30+ threads to spawn, which reproduces the behavior seen using Task.Run().
With your code cut down to a single thread per connection, my CPU usage is hovering around 0%. Everything loads quickly.
using System;
using System.Collections.Generic;
using System.Collections.Concurrent;
using System.ComponentModel;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net.Sockets;
using System.Timers;
using System.IO;
using System.Net;
using System.Threading;
namespace SOCKS5
{
static class Program
{
static void Main()
{
var s2 = new TcpListener(9998);
s2.Start();
while (true)
{
if (s2.Pending())
{
Thread test = new Thread(() =>
{
using (TcpClient client = s2.AcceptTcpClient())
{
Blah(client);
}
});
test.Start();
}
Thread.Sleep(10);
}
}
static void Blah(TcpClient listener)
{
try
{
Console.WriteLine("Connection");
//TcpListener listener = (TcpListener)ar.AsyncState;
//tcpClientConnected.Set();
var ns1 = listener.GetStream();
var r1 = new BinaryReader(ns1);
var w1 = new BinaryWriter(ns1);
if (false)
{
var s3 = new TcpClient();
s3.Connect("127.0.0.1", 9150);
var ns3 = s3.GetStream();
var r3 = new BinaryReader(ns3);
var w3 = new BinaryWriter(ns3);
while (true)
{
while (ns1.DataAvailable)
{
var b = ns1.ReadByte();
w3.Write((byte)b);
//Console.WriteLine("1: {0}", b);
}
while (ns3.DataAvailable)
{
var b = ns3.ReadByte();
w1.Write((byte)b);
Console.WriteLine("2: {0}", b);
}
}
}
{
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1))
return;
var c = r1.ReadByte();
for (int i = 0; i < c; ++i)
r1.ReadByte();
w1.Write((byte)5);
w1.Write((byte)0);
}
{
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1))
return;
if (r1.ReadByte() != 0)
return;
}
byte[] ipAddr = null;
string hostname = null;
var type = r1.ReadByte();
switch (type)
{
case 1:
ipAddr = r1.ReadBytes(4);
break;
case 3:
hostname = Encoding.ASCII.GetString(r1.ReadBytes(r1.ReadByte()));
break;
case 4:
throw new Exception();
}
var nhport = r1.ReadInt16();
var port = IPAddress.NetworkToHostOrder(nhport);
var socketout = new TcpClient();
if (hostname != null)
socketout.Connect(hostname, port);
else
socketout.Connect(new IPAddress(ipAddr), port);
w1.Write((byte)5);
w1.Write((byte)0);
w1.Write((byte)0);
w1.Write(type);
switch (type)
{
case 1:
w1.Write(ipAddr);
break;
case 2:
w1.Write((byte)hostname.Length);
w1.Write(Encoding.ASCII.GetBytes(hostname), 0, hostname.Length);
break;
}
w1.Write(nhport);
var buf1 = new byte[4096];
var buf2 = new byte[4096];
var ns2 = socketout.GetStream();
DateTime last = DateTime.Now;
while ((DateTime.Now - last).TotalMinutes < 5.0)
{
if (ns1.DataAvailable)
{
int size = ns1.Read(buf1, 0, buf1.Length);
ns2.Write(buf1, 0, size);
last = DateTime.Now;
}
if (ns2.DataAvailable)
{
int size = ns2.Read(buf2, 0, buf2.Length);
ns1.Write(buf2, 0, size);
last = DateTime.Now;
}
Thread.Sleep(10);
}
}
catch { }
finally
{
try
{
listener.Close();
}
catch (Exception) { }
}
}
}
}
Edit:
This ended up being kinda fun to mess with.
After routing Firefox traffic through this for a few hours, some observations.
Never noticed a regular pattern to determine when to close connections. Letting threads terminate after they've been idle for 5 minutes (no rx/tx) keeps the thread count fairly low. It's a pretty safe bound that allows services such as gmail chat to keep functioning.
For some reason, the program would occasionally not receive requests from the browser, which would report a timeout. No notification of a missed request in the program, nothing. Only noticed when browsing stackoverflow. Still haven't figured that one out.
There are a few things going on here!
The async calls are all called synchronous style. As in, the thread that starts the operation calls a WaitOne - this basically just makes it equivalent to a synchonous call, no different.
Sleep loops are bad.
A sleep(1) loop will respond quickly but use some CPU, a sleep(1000) loop will respond slowly but use less CPU.
Having a dozen threads in a sleep loop doesn't use much CPU, but if the number of threads keeps increasing, CPU usage will become significant.
The best way is to use async calls instead of polling.
Lots of tasks running loops. Without guaranteed exit paths these cause the thread count to skyrocket.
If you are forwarding data from socket A to socket B, you need to act when either socket is closed: cease forwarding, ensure that pending writes complete and close the sockets.
The current implementation doesn't properly ensure both forwarding tasks are closed if one closes, and the technique of starting a task then blocking on a manual reset event can fail if the task gets an exception prior to setting the event. Both cases leave a task running ad infinitum.
Checking Socket.Connected seems like an obvious thing to do but in practice this is just a cache of the whether the last IO operation encoundered a disconnect.
I prefer to act on "zero recv"s which are your first notification of a disconnect.
I knocked up a quick async version of your original synchronous routine using PowerThreading via NuGet (this is a way of doing async routines prior to framework 4.5).
This works using TcpListener with zero cpu usage and very low number of threads.
This can be done in vanilla c# using async/await... I just don't know how yet :)
using System;
using System.Collections.Generic;
using System.Text;
namespace AeProxy
{
using System.IO;
using System.Net;
using System.Net.Sockets;
using System.Threading;
// Need to install Wintellect.Threading via NuGet for this:
using Wintellect.Threading.AsyncProgModel;
class Program
{
static void Main(string[] args)
{
var ae = new AsyncEnumerator() {SyncContext = null};
var mainOp = ae.BeginExecute(ListenerFiber(ae), null, null);
// block until main server is finished
ae.EndExecute(mainOp);
}
static IEnumerator<int> ListenerFiber(AsyncEnumerator ae)
{
var listeningServer = new TcpListener(IPAddress.Loopback, 9998);
listeningServer.Start();
while (!ae.IsCanceled())
{
listeningServer.BeginAcceptTcpClient(ae.End(0, listeningServer.EndAcceptTcpClient), null);
yield return 1;
if (ae.IsCanceled()) yield break;
var clientSocket = listeningServer.EndAcceptTcpClient(ae.DequeueAsyncResult());
var clientAe = new AsyncEnumerator() { SyncContext = null };
clientAe.BeginExecute(
ClientFiber(clientAe, clientSocket),
ar =>
{
try
{
clientAe.EndExecute(ar);
}
catch { }
}, null);
}
}
static long clients = 0;
static IEnumerator<int> ClientFiber(AsyncEnumerator ae, TcpClient clientSocket)
{
Console.WriteLine("ClientFibers ++{0}", Interlocked.Increment(ref clients));
try
{
// original code to do handshaking and connect to remote host
var ns1 = clientSocket.GetStream();
var r1 = new BinaryReader(ns1);
var w1 = new BinaryWriter(ns1);
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1)) yield break;
var c = r1.ReadByte();
for (int i = 0; i < c; ++i) r1.ReadByte();
w1.Write((byte)5);
w1.Write((byte)0);
if (!(r1.ReadByte() == 5 && r1.ReadByte() == 1)) yield break;
if (r1.ReadByte() != 0) yield break;
byte[] ipAddr = null;
string hostname = null;
var type = r1.ReadByte();
switch (type)
{
case 1:
ipAddr = r1.ReadBytes(4);
break;
case 3:
hostname = Encoding.ASCII.GetString(r1.ReadBytes(r1.ReadByte()));
break;
case 4:
throw new Exception();
}
var nhport = r1.ReadInt16();
var port = IPAddress.NetworkToHostOrder(nhport);
var socketout = new TcpClient();
if (hostname != null) socketout.Connect(hostname, port);
else socketout.Connect(new IPAddress(ipAddr), port);
w1.Write((byte)5);
w1.Write((byte)0);
w1.Write((byte)0);
w1.Write(type);
switch (type)
{
case 1:
w1.Write(ipAddr);
break;
case 3:
w1.Write((byte)hostname.Length);
w1.Write(Encoding.ASCII.GetBytes(hostname), 0, hostname.Length);
break;
}
w1.Write(nhport);
using (var ns2 = socketout.GetStream())
{
var forwardAe = new AsyncEnumerator() { SyncContext = null };
forwardAe.BeginExecute(
ForwardingFiber(forwardAe, ns1, ns2), ae.EndVoid(0, forwardAe.EndExecute), null);
yield return 1;
if (ae.IsCanceled()) yield break;
forwardAe.EndExecute(ae.DequeueAsyncResult());
}
}
finally
{
Console.WriteLine("ClientFibers --{0}", Interlocked.Decrement(ref clients));
}
}
private enum Operation { OutboundWrite, OutboundRead, InboundRead, InboundWrite }
const int bufsize = 4096;
static IEnumerator<int> ForwardingFiber(AsyncEnumerator ae, NetworkStream inputStream, NetworkStream outputStream)
{
while (!ae.IsCanceled())
{
byte[] outputRead = new byte[bufsize], outputWrite = new byte[bufsize];
byte[] inputRead = new byte[bufsize], inputWrite = new byte[bufsize];
// start off output and input reads.
// NB ObjectDisposedExceptions can be raised here when a socket is closed while an async read is in progress.
outputStream.BeginRead(outputRead, 0, bufsize, ae.End(1, ar => outputStream.EndRead(ar)), Operation.OutboundRead);
inputStream.BeginRead(inputRead, 0, bufsize, ae.End(1, ar => inputStream.EndRead(ar)), Operation.InboundRead);
var pendingops = 2;
while (!ae.IsCanceled())
{
// wait for the next operation to complete, the state object passed to each async
// call can be used to find out what completed.
if (pendingops == 0) yield break;
yield return 1;
if (!ae.IsCanceled())
{
int byteCount;
var latestEvent = ae.DequeueAsyncResult();
var currentOp = (Operation)latestEvent.AsyncState;
if (currentOp == Operation.InboundRead)
{
byteCount = inputStream.EndRead(latestEvent);
if (byteCount == 0)
{
pendingops--;
outputStream.Close();
continue;
}
Array.Copy(inputRead, outputWrite, byteCount);
outputStream.BeginWrite(outputWrite, 0, byteCount, ae.EndVoid(1, outputStream.EndWrite), Operation.OutboundWrite);
inputStream.BeginRead(inputRead, 0, bufsize, ae.End(1, ar => inputStream.EndRead(ar)), Operation.InboundRead);
}
else if (currentOp == Operation.OutboundRead)
{
byteCount = outputStream.EndRead(latestEvent);
if (byteCount == 0)
{
pendingops--;
inputStream.Close();
continue;
}
Array.Copy(outputRead, inputWrite, byteCount);
inputStream.BeginWrite(inputWrite, 0, byteCount, ae.EndVoid(1, inputStream.EndWrite), Operation.InboundWrite);
outputStream.BeginRead(outputRead, 0, bufsize, ae.End(1, ar => outputStream.EndRead(ar)), Operation.OutboundRead);
}
else if (currentOp == Operation.InboundWrite)
{
inputStream.EndWrite(latestEvent);
}
else if (currentOp == Operation.OutboundWrite)
{
outputStream.EndWrite(latestEvent);
}
}
}
}
}
}
}
In this line...
while (true)
System.Threading.Thread.Sleep(10000000);
Will not be better to replace it by a simple:
Console.ReadKey();
is the only CPU consumption thing I see.
Also, as a suggestion, you should limit the number of incoming connections and use a Thread pool pattern (in a queue or something).
You should take a look at Overlapped I/O.
One Thread per connection maybe works fine but in general it's bad.
You should use async versions of TcpClient methods instead of spawning threads.