Lazily transform data in C# Stream - c#

I read from a stream that contains CRLF each X bytes (for me X is 2033 because the file is generated with bcp, I put X to 4 in sample code). I would like to transform the stream into another stream without this CRLF. The new stream will be deserialized as xml.
Likewise, I can do it easily and runs gracefully in this way:
using System.IO;
using System.Text;
using System.Xml.Linq;
public class CREliminator
{
public const int BcpChunkSize = 4;
public Stream Run(StreamReader reader)
{
var auxstream = new MemoryStream();
var auxwriter = new StreamWriter(auxstream);
var chunk = new char[BcpChunkSize];
do
{
var n_bytes =
reader
.ReadBlock(chunk, 0, BcpChunkSize);
auxwriter.Write(chunk[..n_bytes]);
auxwriter.Flush();
if (n_bytes == BcpChunkSize)
{
char[] chunk2 = new char[2];
n_bytes = reader.ReadBlock(chunk2, 0, 2);
}
} while (!reader.EndOfStream);
auxstream.Position = 0;
return auxstream;
}
}
public class UnitTest1
{
[Fact]
public void Test1()
{
var CRLF="\r\n";
var string_data = $"<doc{CRLF}umen{CRLF}t>A<{CRLF}/doc{CRLF}umen{CRLF}t>";
var expected = string_data.Replace(CRLF, "");
// to stream
var memory = new MemoryStream(Encoding.UTF8.GetBytes(string_data));
var data = new StreamReader(memory);
// act
var result = new CREliminator().Run(data);
// assert
var x = XDocument.Load(result);
Assert.Equal(expected, x.ToString());
}
}
But this code loads all stream in memory before to return the new stream.
My question is, how can do it in a Lazy mode? I mean, processing stream when some process is reading from new stream.
Thanks.

Only Stream.Read method needs to be implemented:
Read from source stream in chunks
Skip 2 bytes after every chunk
using System.Buffers;
using System.Text;
var sourceString = string.Concat(
Enumerable.Range(1, 10).Select(_ => "Foo \r\nBar \r\nBaz!\r\n"));
Console.WriteLine("Source: " + sourceString);
var encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
var sourceBytes = encoding.GetBytes(sourceString);
using var stream = new MemoryStream(sourceBytes);
using var filter = new CrLfFilteringStream(stream, 4);
using var reader = new StreamReader(filter, encoding);
var res = reader.ReadToEnd();
Console.WriteLine("Result: " + res);
public class CrLfFilteringStream : Stream
{
private readonly Stream _stream;
private readonly int _chunkSize;
private readonly byte[] _chunk;
private int _chunkPosition;
private int _chunkLength;
public CrLfFilteringStream(Stream stream, int chunkSize)
{
_stream = stream ?? throw new ArgumentNullException(nameof(stream));
_chunkSize = chunkSize;
_chunkPosition = chunkSize;
_chunkLength = chunkSize;
_chunk = ArrayPool<byte>.Shared.Rent(chunkSize);
}
public override int Read(byte[] buffer, int offset, int count)
{
var toRead = count;
var bufferPosition = 0;
Span<byte> sink = stackalloc byte[2];
while (toRead > 0 && _chunkLength > 0)
{
if (_chunkPosition >= _chunkSize)
{
_chunkPosition = 0;
_chunkLength = _stream.Read(_chunk, 0, _chunkSize);
// Skip CR LF.
_stream.Read(sink);
}
var currentRead = Math.Min(_chunkLength, toRead);
Array.Copy(_chunk, _chunkPosition, buffer, bufferPosition, currentRead);
toRead -= currentRead;
bufferPosition += currentRead;
_chunkPosition += currentRead;
}
return count - toRead;
}
public override void Flush() => throw new NotSupportedException();
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
public override bool CanRead => true;
public override bool CanSeek => false;
public override bool CanWrite => false;
public override long Length => throw new NotSupportedException();
public override long Position
{
get => throw new NotSupportedException();
set => throw new NotSupportedException();
}
protected override void Dispose(bool disposing)
{
ArrayPool<byte>.Shared.Return(_chunk);
base.Dispose(disposing);
}
}
This code rents a single chunk-sized buffer from ArrayPool and does not allocate anything else (besides CrLfFilteringStream instance).

Related

ByteStream source in SharpDX.MediaFoundation hanging

I have been trying to get a custom audio stream to work with SharpDX.MediaFoundation.
To this end I have wrapped my audio object in a class that implements System.IO.Stream as follows:
public class AudioReaderWaveStream : System.IO.Stream
{
byte[] waveHeader = new byte[44];
AudioCore.IAudioReader reader = null;
ulong readHandle = 0xffffffff;
long readPosition = 0;
public AudioReaderWaveStream(AudioCore.CEditedAudio content)
{
reader = content as AudioCore.IAudioReader;
readHandle = reader.OpenDevice();
int sampleRate = 0;
short channels = 0;
content.GetFormat(out sampleRate, out channels);
System.IO.MemoryStream memStream = new System.IO.MemoryStream(waveHeader);
using (System.IO.BinaryWriter bw = new System.IO.BinaryWriter(memStream))
{
bw.Write("RIFF".ToCharArray());
bw.Write((Int32)Length - 8);
bw.Write("WAVE".ToCharArray());
bw.Write("fmt ".ToCharArray());
bw.Write((Int32)16);
bw.Write((Int16)3);
bw.Write((Int16)1);
bw.Write((Int32)sampleRate);
bw.Write((Int32)sampleRate * 4);
bw.Write((Int16)4);
bw.Write((Int16)32);
bw.Write("data".ToCharArray());
bw.Write((Int32)reader.GetSampleCount() * 4);
}
}
protected override void Dispose(bool disposing)
{
if (readHandle != 0xffffffff)
{
reader.CloseDevice(readHandle);
readHandle = 0xfffffffff;
}
base.Dispose(disposing);
}
~AudioReaderWaveStream()
{
Dispose();
}
public override bool CanRead
{
get
{
return true;
}
}
public override bool CanSeek
{
get
{
return true;
}
}
public override bool CanWrite
{
get
{
return false;
}
}
public override long Length
{
get
{
// Number of float samples + header of 44 bytes.
return (reader.GetSampleCount() * 4) + 44;
}
}
public override long Position
{
get
{
return readPosition;
}
set
{
readPosition = value;
}
}
public override void Flush()
{
//throw new NotImplementedException();
}
public override int Read(byte[] buffer, int offset, int count)
{
if (count <= 0)
return 0;
int retCount = count;
if (Position < 44)
{
int headerCount = count;
if ( Position + count >= 44 )
{
headerCount = 44 - (int)Position;
}
Array.Copy(waveHeader, Position, buffer, offset, headerCount);
offset += headerCount;
Position += headerCount;
count -= headerCount;
}
if (count > 0)
{
float[] readBuffer = new float[count/4];
reader.Seek(readHandle, Position - 44);
reader.ReadAudio(readHandle, readBuffer);
Array.Copy(readBuffer, 0, buffer, offset, count);
}
return retCount;
}
public override long Seek(long offset, System.IO.SeekOrigin origin)
{
if (origin == System.IO.SeekOrigin.Begin)
{
readPosition = offset;
}
else if (origin == System.IO.SeekOrigin.Current)
{
readPosition += offset;
}
else
{
readPosition = Length - offset;
}
return readPosition;
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
}
I then take this object and create a source resolver using it as follows:
// Create a source resolver.
SharpDX.MediaFoundation.ByteStream sdxByteStream = new ByteStream( ARWS );
SharpDX.MediaFoundation.SourceResolver resolver = new SharpDX.MediaFoundation.SourceResolver();
ComObject source = (ComObject)resolver.CreateObjectFromStream( sdxByteStream, "File.wav", SourceResolverFlags.MediaSource );
However every time I'm doing this it hangs on the CreateObjectFromStream call. I've had a look inside SharpDX to see whats going on and it seems the actual hang occurs when it makes the call to the underlying interface through CreateObjectFromByteStream. I've also looked to see what data is actually read from the byte stream. It reads the first 16 bytes which includes the 'RIFF', the RIFF size, the 'WAVE' and the 'fmt '. Then nothing else.
Has anyone got any ideas what I could be doing wrong. I've tried all sorts of combinations of the SourceResolverFlags but nothing seems to make any difference. It just hangs.
It does remind me somewhat of interthread marshalling but all the media foundation calls are made from the same thread so I don't think its that. I'm also fairly sure that MediaFoundation uses free threading so this shouldn't be a problem anyway.
Has anyone any idea what I could possibly be doing wrong?
Thanks!
Ok I have come up with a solution to this. It looks like I may be having a COM threading issue. The read happens in a thread and that thread was calling back to the main thread which the function was called from.
So I used the async version of the call and perform an Application.DoEvents() to hand across control where necessary.
Callback cb = new Callback( resolver );
IUnknown cancel = null;
resolver.BeginCreateObjectFromByteStream( sdxByteStream, "File.wav", (int)(SourceResolverFlags.MediaSource | SourceResolverFlags.ByteStream), null, out cancel, cb, null );
if ( cancel != null )
{
cancel.Dispose();
}
while( cb.MediaSource == null )
{
System.Windows.Forms.Application.DoEvents();
}
SharpDX.MediaFoundation.MediaSource mediaSource = cb.MediaSource;
I really hate COM's threading model ...

The datastream was too long [duplicate]

When I'm trying to write very large amount of data (list with 300 000 rows and more) to memory stream using CsvHelper, it throws the exception "System.IO.IOException: Stream was too long.".
Data class is rather big and has ~30 properties, consequently each record in the file would have ~30 columns.
This is the actual writing code where exception throws (by the way this code is based on that answer of CsvHelper lib's author):
using (var memoryStream = new MemoryStream())
{
using (var streamWriter = new StreamWriter(memoryStream, encoding ?? Encoding.ASCII))
{
var csvWriter = new CsvWriter(streamWriter, GetConfiguration(delimiter, mappingClassType, mappingActions));
csvWriter.WriteRecords(data); //data is IEnumerable<T> and has more than 300k records
streamWriter.Flush();
return memoryStream.ToArray();
}
}
Then I save the resulted bytes array into the file.
File.WriteAllBytes(filePath, resultedBytesArray);
Please note, that the same code works well when I write 100 000 records to the file (in that case the file has size about 1GB). By the way, my goal is to write more then 600 000 data records.
This is the relevant part of the stack trace related to this issue.
Stream was too long.|System.IO.IOException: Stream was too long.
at System.IO.MemoryStream.Write(Byte[] buffer, Int32 offset, Int32 count)
at System.IO.StreamWriter.Flush(Boolean flushStream, Boolean flushEncoder)
at System.IO.StreamWriter.Write(Char[] buffer, Int32 index, Int32 count)
at CsvHelper.CsvWriter.NextRecord() in C:\Users\Josh\Projects\CsvHelper\src\CsvHelper\CsvWriter.cs:line 290
at CsvHelper.CsvWriter.WriteRecords(IEnumerable records) in C:\Users\Josh\Projects\CsvHelper\src\CsvHelper\CsvWriter.cs:line 490
at FileExport.Csv.CsvDocument.Create[T](IEnumerable`1 data, String delimiter, Encoding encoding, Type mappingClassType, IDictionary`2 mappingActions) in d:\Dev\DrugDevExport\FileExport\Csv\CsvDocument.cs:line 33
As far as I'm concerned the basic way to achieve my goal and avoid that issue is to split my list of written data up on few parts and concatenate them together then, but may be is there any pretty obvious and easy solution without a significant code refactoring (like increase the default stream/buffer size, etc..)?
Also keep in mind, that I've also applied two possible solutions in order to prevent "Out Of Memory" objects exception.
got rid of 2GB limitation for objects (from here https://stackoverflow.com/a/20912869) Yes, I'm running on x64 OS with 32GB RAM.
set up x64 "Platform target" in the build settings section (from here https://stackoverflow.com/a/22592876)
Thanks in advance.
You can address this limitation of 2GB by writing your own MemoryStream :
class HugeMemoryStream : Stream
{
#region Fields
private const int PAGE_SIZE = 1024000;
private const int ALLOC_STEP = 1024;
private byte[][] _streamBuffers;
private int _pageCount = 0;
private long _allocatedBytes = 0;
private long _position = 0;
private long _length = 0;
#endregion Fields
#region Internals
private int GetPageCount(long length)
{
int pageCount = (int)(length / PAGE_SIZE) + 1;
if ((length % PAGE_SIZE) == 0)
pageCount--;
return pageCount;
}
private void ExtendPages()
{
if (_streamBuffers == null)
{
_streamBuffers = new byte[ALLOC_STEP][];
}
else
{
byte[][] streamBuffers = new byte[_streamBuffers.Length + ALLOC_STEP][];
Array.Copy(_streamBuffers, streamBuffers, _streamBuffers.Length);
_streamBuffers = streamBuffers;
}
_pageCount = _streamBuffers.Length;
}
private void AllocSpaceIfNeeded(long value)
{
if (value < 0)
throw new InvalidOperationException("AllocSpaceIfNeeded < 0");
if (value == 0)
return;
int currentPageCount = GetPageCount(_allocatedBytes);
int neededPageCount = GetPageCount(value);
while (currentPageCount < neededPageCount)
{
if (currentPageCount == _pageCount)
ExtendPages();
_streamBuffers[currentPageCount++] = new byte[PAGE_SIZE];
}
_allocatedBytes = (long)currentPageCount * PAGE_SIZE;
value = Math.Max(value, _length);
if (_position > (_length = value))
_position = _length;
}
#endregion Internals
#region Stream
public override bool CanRead => true;
public override bool CanSeek => true;
public override bool CanWrite => true;
public override long Length => _length;
public override long Position
{
get { return _position; }
set
{
if (value > _length)
throw new InvalidOperationException("Position > Length");
else if (value < 0)
throw new InvalidOperationException("Position < 0");
else
_position = value;
}
}
public override void Flush() { }
public override int Read(byte[] buffer, int offset, int count)
{
int currentPage = (int)(_position / PAGE_SIZE);
int currentOffset = (int)(_position % PAGE_SIZE);
int currentLength = PAGE_SIZE - currentOffset;
long startPosition = _position;
if (startPosition + count > _length)
count = (int)(_length - startPosition);
while (count != 0 && _position < _length)
{
if (currentLength > count)
currentLength = count;
Array.Copy(_streamBuffers[currentPage++], currentOffset, buffer, offset, currentLength);
offset += currentLength;
_position += currentLength;
count -= currentLength;
currentOffset = 0;
currentLength = PAGE_SIZE;
}
return (int)(_position - startPosition);
}
public override long Seek(long offset, SeekOrigin origin)
{
switch (origin)
{
case SeekOrigin.Begin:
break;
case SeekOrigin.Current:
offset += _position;
break;
case SeekOrigin.End:
offset = _length - offset;
break;
default:
throw new ArgumentOutOfRangeException("origin");
}
return Position = offset;
}
public override void SetLength(long value)
{
if (value < 0)
throw new InvalidOperationException("SetLength < 0");
if (value == 0)
{
_streamBuffers = null;
_allocatedBytes = _position = _length = 0;
_pageCount = 0;
return;
}
int currentPageCount = GetPageCount(_allocatedBytes);
int neededPageCount = GetPageCount(value);
// Removes unused buffers if decreasing stream length
while (currentPageCount > neededPageCount)
_streamBuffers[--currentPageCount] = null;
AllocSpaceIfNeeded(value);
if (_position > (_length = value))
_position = _length;
}
public override void Write(byte[] buffer, int offset, int count)
{
int currentPage = (int)(_position / PAGE_SIZE);
int currentOffset = (int)(_position % PAGE_SIZE);
int currentLength = PAGE_SIZE - currentOffset;
long startPosition = _position;
AllocSpaceIfNeeded(_position + count);
while (count != 0)
{
if (currentLength > count)
currentLength = count;
Array.Copy(buffer, offset, _streamBuffers[currentPage++], currentOffset, currentLength);
offset += currentLength;
_position += currentLength;
count -= currentLength;
currentOffset = 0;
currentLength = PAGE_SIZE;
}
}
#endregion Stream
}
using ICSharpCode.SharpZipLib.GZip;
using System;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
// HugeMemoryStrem Test
string filename = #"gzip-filename.gz";
HugeMemoryStream ms = new HugeMemoryStream();
using (StreamWriter sw = new StreamWriter(ms, Encoding.UTF8, 16384, true))
using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (GZipInputStream gzipStream = new GZipInputStream(fs))
using (StreamReader sr = new StreamReader(gzipStream, Encoding.UTF8, false, 16384, true))
{
for (string line = sr.ReadLine(); line != null; line = sr.ReadLine())
sw.WriteLine(line);
}
ms.Seek(0, SeekOrigin.Begin);
using (StreamReader srm = new StreamReader(ms, Encoding.UTF8, false, 16384, true))
using (FileStream fs = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read))
using (GZipInputStream gzipStream = new GZipInputStream(fs))
using (StreamReader sr = new StreamReader(gzipStream, Encoding.UTF8, false, 16384, true))
{
for (string line1 = sr.ReadLine(), line2 = srm.ReadLine(); line1 != null; line1 = sr.ReadLine(), line2 = srm.ReadLine())
{
if (line1 != line2)
throw new InvalidDataException();
}
}
Many thanks Spender, like he mentioned in the comment below the question, it has been fixed by replacing MemoryStream with FileStream and writing data direct into the file.
It was absolutely useless in my case to write data to MemoryStream and then copy it again into the file without any reason. Thanks him again for opening my eyes on that fact.
My fixed code below.
using (var fileStream = File.Create(path))
{
using (var streamWriter = new StreamWriter(fileStream, encoding ?? Encoding.ASCII))
{
var csvWriter = new CsvWriter(streamWriter, GetConfiguration(delimiter, mappingClassType, mappingActions));
csvWriter.WriteRecords(data);
}
}
Now it works with any amount of input data.

Serialize and deserialize char(s)

i have a list of chars on my class. Serialization and deserialization are works as expected. If my list contains which char is need to describe byte order mark. Example char code is 56256. So, created simple test to as this question is below.
[Test]
public void Utf8CharSerializeAndDeserializeShouldEqual()
{
UInt16 charCode = 56256;
char utfChar = (char)charCode;
using (MemoryStream ms = new MemoryStream())
{
using (StreamWriter writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
{
var serializer = new JsonSerializer();
serializer.Serialize(writer, utfChar);
}
ms.Position = 0;
using (StreamReader reader = new StreamReader(ms, true))
{
using (JsonTextReader jsonReader = new JsonTextReader(reader))
{
var serializer = new JsonSerializer();
char deserializedChar = serializer.Deserialize<char>(jsonReader);
Console.WriteLine($"{(int)utfChar}, {(int)deserializedChar}");
Assert.AreEqual(utfChar, deserializedChar);
Assert.AreEqual((int)utfChar, (int)deserializedChar);
}
}
}
}
Test works as fine when char code is not needed a BOM. For example 65(A) will pass this test.
Your problem is unrelated to Json.NET. Your problem is that U+DBC0 (decimal 56256) is an invalid unicode character, and, as explained in the documentation, the Encoding.UTF8 used by your StreamWriter will not encode such a character:
Encoding.UTF8 returns a UTF8Encoding object that uses replacement fallback to replace each string that it can't encode and each byte that it can't decode with a question mark ("?") character.
To confirm this, if you replace Encoding.UTF8 with new UTF8Encoding(true, true) in your test example, you will get the following exception:
EncoderFallbackException: Unable to translate Unicode character \uDBC0 at index 1 to specified code page.
If you are going to try to serialize invalid Unicode char values, you're going to need to manually encode them as, e.g., a byte array using the following:
public static partial class TextExtensions
{
static void ToBytesWithoutEncoding(char c, out byte lower, out byte upper)
{
var u = (uint)c;
lower = unchecked((byte)u);
upper = unchecked((byte)(u >> 8));
}
public static byte[] ToByteArrayWithoutEncoding(this char c)
{
byte lower, upper;
ToBytesWithoutEncoding(c, out lower, out upper);
return new byte[] { lower, upper };
}
public static byte[] ToByteArrayWithoutEncoding(this ICollection<char> list)
{
if (list == null)
return null;
var bytes = new byte[checked(list.Count * 2)];
int to = 0;
foreach (var c in list)
{
ToBytesWithoutEncoding(c, out bytes[to], out bytes[to + 1]);
to += 2;
}
return bytes;
}
public static char ToCharWithoutEncoding(this byte[] bytes)
{
return bytes.ToCharWithoutEncoding(0);
}
public static char ToCharWithoutEncoding(this byte[] bytes, int position)
{
if (bytes == null)
return default(char);
char c = default(char);
if (position < bytes.Length)
c += (char)bytes[position];
if (position + 1 < bytes.Length)
c += (char)((uint)bytes[position + 1] << 8);
return c;
}
public static List<char> ToCharListWithoutEncoding(this byte[] bytes)
{
if (bytes == null)
return null;
var chars = new List<char>(bytes.Length / 2 + bytes.Length % 2);
for (int from = 0; from < bytes.Length; from += 2)
{
chars.Add(bytes.ToCharWithoutEncoding(from));
}
return chars;
}
}
Then modify your test method as follows:
public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed()
{
Utf8JsonCharSerializeAndDeserializeShouldEqualFixed((char)56256);
}
public void Utf8JsonCharSerializeAndDeserializeShouldEqualFixed(char utfChar)
{
byte[] data;
using (MemoryStream ms = new MemoryStream())
{
using (StreamWriter writer = new StreamWriter(ms, new UTF8Encoding(true, true), 1024))
{
var serializer = new JsonSerializer();
serializer.Serialize(writer, utfChar.ToByteArrayWithoutEncoding());
}
data = ms.ToArray();
}
using (MemoryStream ms = new MemoryStream(data))
{
using (StreamReader reader = new StreamReader(ms, true))
{
using (JsonTextReader jsonReader = new JsonTextReader(reader))
{
var serializer = new JsonSerializer();
char deserializedChar = serializer.Deserialize<byte[]>(jsonReader).ToCharWithoutEncoding();
//Console.WriteLine(string.Format("{0}, {1}", utfChar, deserializedChar));
Assert.AreEqual(utfChar, deserializedChar);
Assert.AreEqual((int)utfChar, (int)deserializedChar);
}
}
}
}
Or, if you have a List<char> property in some container class, you can create the following converter:
public class CharListConverter : JsonConverter
{
public override bool CanConvert(Type objectType)
{
return objectType == typeof(List<char>);
}
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
{
if (reader.TokenType == JsonToken.Null)
return null;
var bytes = serializer.Deserialize<byte[]>(reader);
return bytes.ToCharListWithoutEncoding();
}
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
{
var list = (ICollection<char>)value;
var bytes = list.ToByteArrayWithoutEncoding();
serializer.Serialize(writer, bytes);
}
}
And apply it as follows:
public class RootObject
{
[JsonConverter(typeof(CharListConverter))]
public List<char> Characters { get; set; }
}
In both cases Json.NET will encode the byte array as Base64.

Returning database results as a stream

I have a function that returns database query results. These results have got very large, and I now would like to pass them as a stream, so that the client can start to process them quicker, and memory usage is less. But I don't really know how to do this, the following function works, but what I want to know how to change it so that it starts to stream upon reading from the first table.
public Stream GetResults()
{
IFormatter formatter = new BinaryFormatter();
Stream stream = new MemoryStream();
formatter.Serialize(stream, GetItemsFromTable1());
formatter.Serialize(stream, GetItemsFromTable2());
formatter.Serialize(stream, GetItemsFromTable3());
formatter.Serialize(stream, GetItemsFromTable4());
stream.Position = 0;
return stream;
}
You could write a custom Stream implementation which functions as a pipe. If you then moved your GetItemsFromTable() method calls into a background task, the client could start reading results from the stream immediately.
In my solution below I'm using a circular buffer as a backing store for the pipe stream. Memory usage will be reduced only if the client consumes data fast enough. But even in the worst case scenario it shouldn't use more memory then your current solution. If memory usage is a bigger priority for you than execution speed then your stream could potentially block write calls until space is available. My solution below does not block writes; it expands the capacity of the circular buffer so that the background thread can continue filling data without delays.
The GetResults method might look like this:
public Stream GetResults()
{
// Begin filling the pipe with data on a background thread
var pipeStream = new CircularBufferPipeStream();
Task.Run(() => WriteResults(pipeStream));
// Return pipe stream for immediate usage by client
// Note: client is responsible for disposing of the stream after reading all data!
return pipeStream;
}
// Runs on background thread, filling circular buffer with data
void WriteResults(CircularBufferPipeStream stream)
{
IFormatter formatter = new BinaryFormatter();
formatter.Serialize(stream, GetItemsFromTable1());
formatter.Serialize(stream, GetItemsFromTable2());
formatter.Serialize(stream, GetItemsFromTable3());
formatter.Serialize(stream, GetItemsFromTable4());
// Indicate that there's no more data to write
stream.CloseWritePort();
}
And the circular buffer stream:
/// <summary>
/// Stream that acts as a pipe by supporting reading and writing simultaneously from different threads.
/// Read calls will block until data is available or the CloseWritePort() method has been called.
/// Read calls consume bytes in the circular buffer immediately so that more space is available for writes into the circular buffer.
/// Writes do not block; the capacity of the circular buffer will be expanded as needed to write the entire block of data at once.
/// </summary>
class CircularBufferPipeStream : Stream
{
const int DefaultCapacity = 1024;
byte[] _buffer;
bool _writePortClosed = false;
object _readWriteSyncRoot = new object();
int _length;
ManualResetEvent _dataAddedEvent;
int _start = 0;
public CircularBufferPipeStream(int initialCapacity = DefaultCapacity)
{
_buffer = new byte[initialCapacity];
_length = 0;
_dataAddedEvent = new ManualResetEvent(false);
}
public void CloseWritePort()
{
lock (_readWriteSyncRoot)
{
_writePortClosed = true;
_dataAddedEvent.Set();
}
}
public override bool CanRead { get { return true; } }
public override bool CanWrite { get { return true; } }
public override bool CanSeek { get { return false; } }
public override void Flush() { }
public override long Length { get { throw new NotImplementedException(); } }
public override long Position
{
get { throw new NotImplementedException(); }
set { throw new NotImplementedException(); }
}
public override long Seek(long offset, SeekOrigin origin) { throw new NotImplementedException(); }
public override void SetLength(long value) { throw new NotImplementedException(); }
public override int Read(byte[] buffer, int offset, int count)
{
int bytesRead = 0;
while (bytesRead == 0)
{
bool waitForData = false;
lock (_readWriteSyncRoot)
{
if (_length != 0)
bytesRead = ReadDirect(buffer, offset, count);
else if (_writePortClosed)
break;
else
{
_dataAddedEvent.Reset();
waitForData = true;
}
}
if (waitForData)
_dataAddedEvent.WaitOne();
}
return bytesRead;
}
private int ReadDirect(byte[] buffer, int offset, int count)
{
int readTailCount = Math.Min(Math.Min(_buffer.Length - _start, count), _length);
Array.Copy(_buffer, _start, buffer, offset, readTailCount);
_start += readTailCount;
_length -= readTailCount;
if (_start == _buffer.Length)
_start = 0;
int readHeadCount = Math.Min(Math.Min(_buffer.Length - _start, count - readTailCount), _length);
if (readHeadCount > 0)
{
Array.Copy(_buffer, _start, buffer, offset + readTailCount, readHeadCount);
_start += readHeadCount;
_length -= readHeadCount;
}
return readTailCount + readHeadCount;
}
public override void Write(byte[] buffer, int offset, int count)
{
lock (_readWriteSyncRoot)
{
// expand capacity as needed
if (count + _length > _buffer.Length)
{
var expandedBuffer = new byte[Math.Max(_buffer.Length * 2, count + _length)];
_length = ReadDirect(expandedBuffer, 0, _length);
_start = 0;
_buffer = expandedBuffer;
}
int startWrite = (_start + _length) % _buffer.Length;
int writeTailCount = Math.Min(_buffer.Length - startWrite, count);
Array.Copy(buffer, offset, _buffer, startWrite, writeTailCount);
startWrite += writeTailCount;
_length += writeTailCount;
if (startWrite == _buffer.Length)
startWrite = 0;
int writeHeadCount = count - writeTailCount;
if (writeHeadCount > 0)
{
Array.Copy(buffer, offset + writeTailCount, _buffer, startWrite, writeHeadCount);
_length += writeHeadCount;
}
}
_dataAddedEvent.Set();
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
if (_dataAddedEvent != null)
{
_dataAddedEvent.Dispose();
_dataAddedEvent = null;
}
}
base.Dispose(disposing);
}
}
try
public Stream GetResults()
{
IFormatter formatter = new BinaryFormatter();
Stream stream = new MemoryStream();
formatter.Serialize(stream, GetItemsFromTable1());
formatter.Serialize(stream, GetItemsFromTable2());
formatter.Serialize(stream, GetItemsFromTable3());
formatter.Serialize(stream, GetItemsFromTable4());
stream.Seek(0L, SeekOrigin.Begin);
return stream;
}
why the changes?
remove using, because your stream gets disposed once it leaves the using-block. disposing the stream means you cannot use it anymore
seek to the beginning of the stream. if you start reading from the stream without seeking to its beginning, you would start to deserialize/ read from its end; but unfortunately there is no content behind the end of the stream
however, I don't see how using a MemoryStream reduces memory usage. I would suggest chaining it into a DeflateStream or a FileStream to reduce RAM-usage
hope this helps

AES OFB encryption for RijndaelManaged

I need to communicate from a C# application to another application via encrypted messages in OFB mode. I know that RijndaelManaged does not have support for AES OFB mode. Is there anybody more experienced than me aware of any other way to encrypt/decrypt using OFB mode?
The following stream implements OFB by using a key stream generated by a zero-fed CBC cipher stream.
public class OFBStream : Stream
{
private const int BLOCKS = 16;
private const int EOS = 0; // the goddess of dawn is found at the end of the stream
private Stream parent;
private CryptoStream cbcStream;
private CryptoStreamMode mode;
private byte[] keyStreamBuffer;
private int keyStreamBufferOffset;
private byte[] readWriteBuffer;
public OFBStream (Stream parent, SymmetricAlgorithm algo, CryptoStreamMode mode)
{
if (algo.Mode != CipherMode.CBC)
algo.Mode = CipherMode.CBC;
if (algo.Padding != PaddingMode.None)
algo.Padding = PaddingMode.None;
this.parent = parent;
this.cbcStream = new CryptoStream (new ZeroStream (), algo.CreateEncryptor (), CryptoStreamMode.Read);
this.mode = mode;
keyStreamBuffer = new byte[algo.BlockSize * BLOCKS];
readWriteBuffer = new byte[keyStreamBuffer.Length];
}
public override int Read (byte[] buffer, int offset, int count)
{
if (!CanRead) {
throw new NotSupportedException ();
}
int toRead = Math.Min (count, readWriteBuffer.Length);
int read = parent.Read (readWriteBuffer, 0, toRead);
if (read == EOS)
return EOS;
for (int i = 0; i < read; i++) {
// NOTE could be optimized (branches for each byte)
if (keyStreamBufferOffset % keyStreamBuffer.Length == 0) {
FillKeyStreamBuffer ();
keyStreamBufferOffset = 0;
}
buffer [offset + i] = (byte)(readWriteBuffer [i]
^ keyStreamBuffer [keyStreamBufferOffset++]);
}
return read;
}
public override void Write (byte[] buffer, int offset, int count)
{
if (!CanWrite) {
throw new NotSupportedException ();
}
int readWriteBufferOffset = 0;
for (int i = 0; i < count; i++) {
if (keyStreamBufferOffset % keyStreamBuffer.Length == 0) {
FillKeyStreamBuffer ();
keyStreamBufferOffset = 0;
}
if (readWriteBufferOffset % readWriteBuffer.Length == 0) {
parent.Write (readWriteBuffer, 0, readWriteBufferOffset);
readWriteBufferOffset = 0;
}
readWriteBuffer [readWriteBufferOffset++] = (byte)(buffer [offset + i]
^ keyStreamBuffer [keyStreamBufferOffset++]);
}
parent.Write (readWriteBuffer, 0, readWriteBufferOffset);
}
private void FillKeyStreamBuffer ()
{
int read = cbcStream.Read (keyStreamBuffer, 0, keyStreamBuffer.Length);
// NOTE undocumented feature
// only works if keyStreamBuffer.Length % blockSize == 0
if (read != keyStreamBuffer.Length)
throw new InvalidOperationException ("Implementation error: could not read all bytes from CBC stream");
}
public override bool CanRead {
get { return mode == CryptoStreamMode.Read; }
}
public override bool CanWrite {
get { return mode == CryptoStreamMode.Write; }
}
public override void Flush ()
{
// should never have to be flushed, implementation empty
}
public override bool CanSeek {
get { return false; }
}
public override long Seek (long offset, System.IO.SeekOrigin origin)
{
throw new NotSupportedException ();
}
public override long Position {
get { throw new NotSupportedException (); }
set { throw new NotSupportedException (); }
}
public override long Length {
get { throw new NotSupportedException (); }
}
public override void SetLength (long value)
{
throw new NotSupportedException ();
}
}
Additional class ZeroStream required by OFBStream
class ZeroStream : System.IO.Stream
{
public override int Read (byte[] buffer, int offset, int count)
{
for (int i = 0; i < count; i++) {
buffer [offset + i] = 0;
}
return count;
}
public override bool CanRead {
get { return true; }
}
... the rest is not implemented
}
And you can use it as I do for a test vector:
// NIST CAVP test vector F.4.1: OFB-AES128.Encrypt from NIST SP 800-38A
RijndaelManaged aes = new RijndaelManaged ();
aes.Key = FromHex ("2b7e151628aed2a6abf7158809cf4f3c");
aes.IV = FromHex ("000102030405060708090A0B0C0D0E0F");
MemoryStream testVectorStream = new MemoryStream (FromHex (
"6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710"));
OFBStream testOFBStream = new OFBStream (testVectorStream, aes, CryptoStreamMode.Read);
MemoryStream cipherTextStream = new MemoryStream ();
testOFBStream.CopyTo (cipherTextStream);
Console.WriteLine (ToHex (cipherTextStream.ToArray ()));
Note that the stream handling has not been fully tested (yet).

Categories