Can I decompress and deserialize a file using streams? - c#

My application serializes an object using Json.Net, compresses the resulting JSON, then saves this to file. Additionally the application can load an object from one of these files. These objects can be tens of Mb in size and I'm concerned about memory usage, due to the way the existing code creates large strings and byte arrays:-
public void Save(MyClass myObject, string filename)
{
var json = JsonConvert.SerializeObject(myObject);
var bytes = Compress(json);
File.WriteAllBytes(filename, bytes);
}
public MyClass Load(string filename)
{
var bytes = File.ReadAllBytes(filename);
var json = Decompress(bytes);
var myObject = JsonConvert.DeserializeObject<MyClass>(json);
}
private static byte[] Compress(string s)
{
var bytes = Encoding.Unicode.GetBytes(s);
using (var ms = new MemoryStream())
{
using (var gs = new GZipStream(ms, CompressionMode.Compress))
{
gs.Write(bytes, 0, bytes.Length);
gs.Close();
return ms.ToArray();
}
}
}
private static string Decompress(byte[] bytes)
{
using (var msi = new MemoryStream(bytes))
{
using (var mso = new MemoryStream())
{
using (var gs = new GZipStream(msi, CompressionMode.Decompress))
{
gs.CopyTo(mso);
return Encoding.Unicode.GetString(mso.ToArray());
}
}
}
}
I was wondering if the Save/Load methods could be replaced with streams? I've found examples of using streams with Json.Net but am struggling to get my head around how to fit in the additional compression stuff.

JsonSerializer has methods to serialize from a JsonTextReader and to a StreamWriter, both of which can be created on top of any sort of stream, including a GZipStream. Using them, you can create the following extension methods:
public static partial class JsonExtensions
{
// Buffer sized as recommended by Bradley Grainger, https://faithlife.codes/blog/2012/06/always-wrap-gzipstream-with-bufferedstream/
// But anything smaller than 85,000 bytes should be OK, since objects larger than that go on the large object heap. See:
// https://learn.microsoft.com/en-us/dotnet/standard/garbage-collection/large-object-heap
const int BufferSize = 8192;
// Disable writing of BOM as per https://datatracker.ietf.org/doc/html/rfc8259#section-8.1
static readonly Encoding DefaultEncoding = new UTF8Encoding(false);
public static void SerializeToFileCompressed(object value, string path, JsonSerializerSettings settings = null)
{
using (var fs = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Read))
SerializeCompressed(value, fs, settings);
}
public static void SerializeCompressed(object value, Stream stream, JsonSerializerSettings settings = null)
{
using (var compressor = new GZipStream(stream, CompressionMode.Compress))
using (var writer = new StreamWriter(compressor, DefaultEncoding, BufferSize))
{
var serializer = JsonSerializer.CreateDefault(settings);
serializer.Serialize(writer, value);
}
}
public static T DeserializeFromFileCompressed<T>(string path, JsonSerializerSettings settings = null)
{
using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
return DeserializeCompressed<T>(fs, settings);
}
public static T DeserializeCompressed<T>(Stream stream, JsonSerializerSettings settings = null)
{
using (var compressor = new GZipStream(stream, CompressionMode.Decompress))
using (var reader = new StreamReader(compressor))
using (var jsonReader = new JsonTextReader(reader))
{
var serializer = JsonSerializer.CreateDefault(settings);
return serializer.Deserialize<T>(jsonReader);
}
}
}
See Performance Tips: Optimize Memory Usage in the Json.NET documentation.

For those looking for an idea how to use the extensions from #dbc in uwp apps, I modified the code to this - where the StorageFile is a file you have access to write to.
public static async void SerializeToFileCompressedAsync(object value, StorageFile file, JsonSerializerSettings settings = null)
{
using (var stream = await file.OpenStreamForWriteAsync())
SerializeCompressed(value, stream, settings);
}
public static void SerializeCompressed(object value, Stream stream, JsonSerializerSettings settings = null)
{
using (var compressor = new GZipStream(stream, CompressionMode.Compress))
using (var writer = new StreamWriter(compressor))
{
var serializer = JsonSerializer.CreateDefault(settings);
serializer.Serialize(writer, value);
}
}
public static async Task<T> DeserializeFromFileCompressedAsync<T>(StorageFile file, JsonSerializerSettings settings = null)
{
using (var stream = await file.OpenStreamForReadAsync())
return DeserializeCompressed<T>(stream, settings);
}
public static T DeserializeCompressed<T>(Stream stream, JsonSerializerSettings settings = null)
{
using (var compressor = new GZipStream(stream, CompressionMode.Decompress))
using (var reader = new StreamReader(compressor))
using (var jsonReader = new JsonTextReader(reader))
{
var serializer = JsonSerializer.CreateDefault(settings);
return serializer.Deserialize<T>(jsonReader);
}
}

Related

Reading from XML file in memory using XmlSerializer

I use code below to open zip archive in memory.
using (var leagueFile = File.OpenRead(openFileDialog.FileName))
using (var package = new ZipArchive(leagueFile, ZipArchiveMode.Read))
{
foreach (var team in package.Entries)
{
if (team.Name.EndsWith(".xml"))
{
_xmlHandler.Import<Player>(team.FullName, Encoding.UTF8);
//...
}
}
}
When I try to deserialize with my Import<T>() method, app crashes due to file wasn't found.
public T Import<T>(string fileName, Encoding encoding) where T : class, new()
{
var serializer = new XmlSerializer(typeof(T));
serializer.UnknownNode += serializer_UnknownNode;
serializer.UnknownAttribute += serializer_UnknownAttribute;
var reader = XmlReader.Create(new StreamReader(fileName, encoding));
var po = (T)serializer.Deserialize(reader);
return po;
}
The problem is that app is searching for fileName in the bin directory of application. Not in stream (?) of zip archive. Is there a way to do this with XmlSerializer class?
Modify your code as below:
using (var leagueFile = File.OpenRead(openFileDialog.FileName))
using (var package = new ZipArchive(leagueFile, ZipArchiveMode.Read))
{
foreach (var team in package.Entries)
{
if (team.Name.EndsWith(".xml"))
{
using(var xmlStream = team.Open())
{
_xmlHandler.Import<Player>(xmlStream, Encoding.UTF8);
//...
}
}
}
}
public T Import<T>(Stream input, Encoding encoding) where T : class, new()
{
var serializer = new XmlSerializer(typeof(T));
serializer.UnknownNode += serializer_UnknownNode;
serializer.UnknownAttribute += serializer_UnknownAttribute;
var reader = XmlReader.Create(input);
var po = (T)serializer.Deserialize(reader);
return po;
}

C# JSON Deserialize into local variables

I have something like this:
public class MyClass_T
{
public string filename;
public string filename_txt;
public int version = 1;
public double A;
public double B;
....and so on with about 100 more variables
}
I've written the data to a file in JSON format with
public bool readFormatFile(string filename)
{
JsonSerializer serializer = new JsonSerializer();
serializer.NullValueHandling = NullValueHandling.Ignore;
using (FileStream fs = new FileStream(filename, FileMode.Create, System.IO.FileAccess.Write, FileShare.Read))
{
using (StreamWriter sw = new StreamWriter(fs))
{
using (JsonWriter writer = new JsonTextWriter(sw))
{
serializer.Serialize(writer, this);
}
}
}
}
Now I want to deserialize it. I know I can do this:
public bool writeFormatFile(string filename)
{
MyClass_T MC = new MyClass_T();
using (FileStream fs = new FileStream(filename, FileMode.Open, System.IO.FileAccess.Read, FileShare.Read))
{
using (StreamReader sr = new StreamReader(fs))
{
JsonSerializer serializer = new JsonSerializer();
serializer.NullValueHandling = NullValueHandling.Ignore;
serializer.MissingMemberHandling = MissingMemberHandling.Ignore;
MC = (MyClass_T)serializer.Deserialize(sr, typeof(MyClass_T));
}
}
}
Note that readFormatFile and writeFormatFile are part of MyClass_T. I need to get the values back into my local variables without having to do a bunch of
filename = MC.filename;
filename_txt = MC.filename_txt;
version = MC.version;
A = MC.A;
B = MC.B;
...and so on for the 100 or so variables.
Thoughts and ideas on how to proceed on this?
Like many people have made clear, it is not a very good idea to deserialize a JSON object to local variables, however if you can not create a class for the object, you can use the dynamic object type, this will allow you to deserialize the JSON into an object whilst removing the need to add any strongly types models/classes.
all you need to do is cast the deserialized result to a dynamic as you would any other object, e.g.
dynamic myObject = (dynamic)serializer.Deserialize(sr, typeof(dynamic));
this will allow you to then access myObject as you would any other object e.g.
myObject.filename for example.
i would also like to reiterate that, you really should be using a class for this as it makes code a lot more predictable, maintainable and clean in general
Instead of using an instance 'ReadFormatFile' method, you could add a static method 'ReadFormatFile' to the 'MyClass_T' class that returns the 'MyClass_T' instance that is returned from the serializer. And then you could use that method instead of calling the 'ReadFormatFile' method (or using 'new' to instantiate a 'MyClass_T' instance that is populated with the information that was previously serialized to the file).
Consider the following class:
class Demo
{
public double A;
public double B;
public void WriteFormatFile(string filename)
{
JsonSerializer serializer = new JsonSerializer();
serializer.NullValueHandling = NullValueHandling.Ignore;
using (FileStream fs = new FileStream(filename, FileMode.Create, System.IO.FileAccess.Write, FileShare.Read))
{
using (StreamWriter sw = new StreamWriter(fs))
{
using (JsonWriter writer = new JsonTextWriter(sw))
{
serializer.Serialize(writer, this);
}
}
}
}
public static Demo ReadFormatFile(string filename)
{
using (FileStream fs = new FileStream(filename, FileMode.Open, System.IO.FileAccess.Read, FileShare.Read))
{
using (StreamReader sr = new StreamReader(fs))
{
JsonSerializer serializer = new JsonSerializer();
serializer.NullValueHandling = NullValueHandling.Ignore;
serializer.MissingMemberHandling = MissingMemberHandling.Ignore;
return (Demo)serializer.Deserialize(sr, typeof(Demo));
}
}
}
}
and its use elsewhere:
const string filename = "demo.json";
var d = new Demo();
d.A = 2;
d.B = 3;
d.WriteFormatFile(filename);
d.A = 4;
// replace d.ReadFormatFile(filename); with the following line
d = Demo.ReadFormatFile(filename);
Console.WriteLine(d.A);
The output will be '2', the restored value for the 'A' field.

Is there a format which can store tabular data in lesser size than excel?

I have already tried out keeping in text file(Obviosuly it takes more space as it is not customized to hold tables as an excel is). I also tried keeping serialized data(c#, just an fyi). There wasn't any benefit in either cases.
Since most 'tabular' data will be text, you'll get large gains in compressing this before saving (using GZip and BsonWriter as an example):
public static byte[] Compress(object entity)
{
using (var stream = new MemoryStream())
{
using (var zipStream = new GZipStream(stream, CompressionLevel.Optimal))
{
using (var writer = new BsonWriter(zipStream))
{
var serializer = new JsonSerializer();
serializer.Serialize(writer, entity);
}
}
return stream.ToArray();
}
}
** Update **
Protobuf is far superior to both XmlSerializer and BinaryFormatter in terms of serialized entity size and speed. Try using this to serialize your entities before compressing:
public static byte[] Compress(object entity)
{
using (var stream = new MemoryStream())
{
using (var zipStream = new GZipStream(stream, CompressionLevel.Optimal))
{
Serializer.Serialize(stream, entity);
}
return stream.ToArray();
}
}

Compress and decompress a Stream with Compression.DeflateStream

I am trying to compress and decompress a Stream using Compression.DeflateStream. Compressing seems to work correctly since the code below compresses my Stream to a 110 bytes long array. However, reading the decompressed Stream results in an empty string.
class Program
{
static void Main(string[] args)
{
// Compress a random string value
string value = Path.GetRandomFileName();
byte[] compressedBytes;
using (var writer = new StreamWriter(new MemoryStream()))
{
writer.Write(value);
writer.Flush();
writer.BaseStream.Position = 0;
compressedBytes = Compress(writer.BaseStream);
}
// Decompress compressed bytes
Stream decompressedStream = Decompress(compressedBytes);
// here already applies: decompressedStream.Length == 0
using (var reader = new StreamReader(decompressedStream))
{
string decompressedValue = reader.ReadToEnd();
if (value == decompressedValue)
Console.WriteLine("Success");
else
Console.WriteLine("Failed");
}
}
private static byte[] Compress(Stream input)
{
using (var compressStream = new MemoryStream())
using (var compressor = new DeflateStream(compressStream, CompressionMode.Compress))
{
input.CopyTo(compressor);
return compressStream.ToArray();
}
}
private static Stream Decompress(byte[] input)
{
var output = new MemoryStream();
using (var compressStream = new MemoryStream(input))
using (var decompressor = new DeflateStream(compressStream, CompressionMode.Decompress))
decompressor.CopyTo(output);
output.Position = 0;
return output;
}
}
Can anyone help me on this one?
Many thanks.
Fix your Compress function:
private static byte[] Compress(Stream input)
{
using(var compressStream = new MemoryStream())
using(var compressor = new DeflateStream(compressStream, CompressionMode.Compress))
{
input.CopyTo(compressor);
compressor.Close();
return compressStream.ToArray();
}
}
compressed stream was not flushed before returning the resulting byte array.
All those answers are far away from ideal form because all of you forgot that "using" disposing and closing streams its means that additional Close() is not needed. I think that ideal code will be like this:
public static class CompressionHelper
{
public static byte[] Compress(byte[] data)
{
byte[] compressArray = null;
try
{
using (MemoryStream memoryStream = new MemoryStream())
{
using (DeflateStream deflateStream = new DeflateStream(memoryStream, CompressionMode.Compress))
{
deflateStream.Write(data, 0, data.Length);
}
compressArray = memoryStream.ToArray();
}
}
catch (Exception exception)
{
// do something !
}
return compressArray;
}
public static byte[] Decompress(byte[] data)
{
byte[] decompressedArray = null;
try
{
using (MemoryStream decompressedStream = new MemoryStream())
{
using (MemoryStream compressStream = new MemoryStream(data))
{
using (DeflateStream deflateStream = new DeflateStream(compressStream, CompressionMode.Decompress))
{
deflateStream.CopyTo(decompressedStream);
}
}
decompressedArray = decompressedStream.ToArray();
}
}
catch (Exception exception)
{
// do something !
}
return decompressedArray;
}
}
Try closing the streams:
class Program
{
static void Main(string[] args)
{
// Compress a random string value
string value = DateTime.Now.ToLongTimeString();
byte[] compressedBytes;
using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(value)))
{
compressedBytes = Compress(stream);
}
// Decompress compressed bytes
using (var decompressedStream = Decompress(compressedBytes))
using (var reader = new StreamReader(decompressedStream))
{
string decompressedValue = reader.ReadToEnd();
if (value == decompressedValue)
Console.WriteLine("Success");
else
Console.WriteLine("Failed");
}
}
public static byte[] Compress(Stream input)
{
using (var compressedStream = new MemoryStream())
using (var zipStream = new GZipStream(compressedStream, CompressionMode.Compress))
{
input.CopyTo(zipStream);
zipStream.Close();
return compressedStream.ToArray();
}
}
public static Stream Decompress(byte[] data)
{
var output = new MemoryStream();
using(var compressedStream = new MemoryStream(data))
using(var zipStream = new GZipStream(compressedStream, CompressionMode.Decompress))
{
zipStream.CopyTo(output);
zipStream.Close();
output.Position = 0;
return output;
}
}
}

Compression/Decompression string with C#

I am newbie in .net. I am doing compression and decompression string in C#. There is a XML and I am converting in string and after that I am doing compression and decompression.There is no compilation error in my code except when I decompression my code and return my string, its returning only half of the XML.
Below is my code, please correct me where I am wrong.
Code:
class Program
{
public static string Zip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}
//Prepare for compress
System.IO.MemoryStream ms = new System.IO.MemoryStream();
System.IO.Compression.GZipStream sw = new System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress);
//Compress
sw.Write(byteArray, 0, byteArray.Length);
//Close, DO NOT FLUSH cause bytes will go missing...
sw.Close();
//Transform byte[] zip data to string
byteArray = ms.ToArray();
System.Text.StringBuilder sB = new System.Text.StringBuilder(byteArray.Length);
foreach (byte item in byteArray)
{
sB.Append((char)item);
}
ms.Close();
sw.Dispose();
ms.Dispose();
return sB.ToString();
}
public static string UnZip(string value)
{
//Transform string into byte[]
byte[] byteArray = new byte[value.Length];
int indexBA = 0;
foreach (char item in value.ToCharArray())
{
byteArray[indexBA++] = (byte)item;
}
//Prepare for decompress
System.IO.MemoryStream ms = new System.IO.MemoryStream(byteArray);
System.IO.Compression.GZipStream sr = new System.IO.Compression.GZipStream(ms,
System.IO.Compression.CompressionMode.Decompress);
//Reset variable to collect uncompressed result
byteArray = new byte[byteArray.Length];
//Decompress
int rByte = sr.Read(byteArray, 0, byteArray.Length);
//Transform byte[] unzip data to string
System.Text.StringBuilder sB = new System.Text.StringBuilder(rByte);
//Read the number of bytes GZipStream red and do not a for each bytes in
//resultByteArray;
for (int i = 0; i < rByte; i++)
{
sB.Append((char)byteArray[i]);
}
sr.Close();
ms.Close();
sr.Dispose();
ms.Dispose();
return sB.ToString();
}
static void Main(string[] args)
{
XDocument doc = XDocument.Load(#"D:\RSP.xml");
string val = doc.ToString(SaveOptions.DisableFormatting);
val = Zip(val);
val = UnZip(val);
}
}
My XML size is 63KB.
The code to compress/decompress a string
public static void CopyTo(Stream src, Stream dest) {
byte[] bytes = new byte[4096];
int cnt;
while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0) {
dest.Write(bytes, 0, cnt);
}
}
public static byte[] Zip(string str) {
var bytes = Encoding.UTF8.GetBytes(str);
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(mso, CompressionMode.Compress)) {
//msi.CopyTo(gs);
CopyTo(msi, gs);
}
return mso.ToArray();
}
}
public static string Unzip(byte[] bytes) {
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(msi, CompressionMode.Decompress)) {
//gs.CopyTo(mso);
CopyTo(gs, mso);
}
return Encoding.UTF8.GetString(mso.ToArray());
}
}
static void Main(string[] args) {
byte[] r1 = Zip("StringStringStringStringStringStringStringStringStringStringStringStringStringString");
string r2 = Unzip(r1);
}
Remember that Zip returns a byte[], while Unzip returns a string. If you want a string from Zip you can Base64 encode it (for example by using Convert.ToBase64String(r1)) (the result of Zip is VERY binary! It isn't something you can print to the screen or write directly in an XML)
The version suggested is for .NET 2.0, for .NET 4.0 use the MemoryStream.CopyTo.
IMPORTANT: The compressed contents cannot be written to the output stream until the GZipStream knows that it has all of the input (i.e., to effectively compress it needs all of the data). You need to make sure that you Dispose() of the GZipStream before inspecting the output stream (e.g., mso.ToArray()). This is done with the using() { } block above. Note that the GZipStream is the innermost block and the contents are accessed outside of it. The same goes for decompressing: Dispose() of the GZipStream before attempting to access the data.
according to
this snippet
i use this code and it's working fine:
using System;
using System.IO;
using System.IO.Compression;
using System.Text;
namespace CompressString
{
internal static class StringCompressor
{
/// <summary>
/// Compresses the string.
/// </summary>
/// <param name="text">The text.</param>
/// <returns></returns>
public static string CompressString(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
var memoryStream = new MemoryStream();
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
{
gZipStream.Write(buffer, 0, buffer.Length);
}
memoryStream.Position = 0;
var compressedData = new byte[memoryStream.Length];
memoryStream.Read(compressedData, 0, compressedData.Length);
var gZipBuffer = new byte[compressedData.Length + 4];
Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
return Convert.ToBase64String(gZipBuffer);
}
/// <summary>
/// Decompresses the string.
/// </summary>
/// <param name="compressedText">The compressed text.</param>
/// <returns></returns>
public static string DecompressString(string compressedText)
{
byte[] gZipBuffer = Convert.FromBase64String(compressedText);
using (var memoryStream = new MemoryStream())
{
int dataLength = BitConverter.ToInt32(gZipBuffer, 0);
memoryStream.Write(gZipBuffer, 4, gZipBuffer.Length - 4);
var buffer = new byte[dataLength];
memoryStream.Position = 0;
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
{
gZipStream.Read(buffer, 0, buffer.Length);
}
return Encoding.UTF8.GetString(buffer);
}
}
}
}
With the advent of .NET 4.0 (and higher) with the Stream.CopyTo() methods, I thought I would post an updated approach.
I also think the below version is useful as a clear example of a self-contained class for compressing regular strings to Base64 encoded strings, and vice versa:
public static class StringCompression
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(string uncompressedString)
{
byte[] compressedBytes;
using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}
// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}
return Convert.ToBase64String(compressedBytes);
}
/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(string compressedString)
{
byte[] decompressedBytes;
var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));
using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);
decompressedBytes = decompressedStream.ToArray();
}
}
return Encoding.UTF8.GetString(decompressedBytes);
}
}
Here’s another approach using the extension methods technique to extend the String class to add string compression and decompression. You can drop the class below into an existing project and then use thusly:
var uncompressedString = "Hello World!";
var compressedString = uncompressedString.Compress();
and
var decompressedString = compressedString.Decompress();
To wit:
public static class Extensions
{
/// <summary>
/// Compresses a string and returns a deflate compressed, Base64 encoded string.
/// </summary>
/// <param name="uncompressedString">String to compress</param>
public static string Compress(this string uncompressedString)
{
byte[] compressedBytes;
using (var uncompressedStream = new MemoryStream(Encoding.UTF8.GetBytes(uncompressedString)))
{
using (var compressedStream = new MemoryStream())
{
// setting the leaveOpen parameter to true to ensure that compressedStream will not be closed when compressorStream is disposed
// this allows compressorStream to close and flush its buffers to compressedStream and guarantees that compressedStream.ToArray() can be called afterward
// although MSDN documentation states that ToArray() can be called on a closed MemoryStream, I don't want to rely on that very odd behavior should it ever change
using (var compressorStream = new DeflateStream(compressedStream, CompressionLevel.Fastest, true))
{
uncompressedStream.CopyTo(compressorStream);
}
// call compressedStream.ToArray() after the enclosing DeflateStream has closed and flushed its buffer to compressedStream
compressedBytes = compressedStream.ToArray();
}
}
return Convert.ToBase64String(compressedBytes);
}
/// <summary>
/// Decompresses a deflate compressed, Base64 encoded string and returns an uncompressed string.
/// </summary>
/// <param name="compressedString">String to decompress.</param>
public static string Decompress(this string compressedString)
{
byte[] decompressedBytes;
var compressedStream = new MemoryStream(Convert.FromBase64String(compressedString));
using (var decompressorStream = new DeflateStream(compressedStream, CompressionMode.Decompress))
{
using (var decompressedStream = new MemoryStream())
{
decompressorStream.CopyTo(decompressedStream);
decompressedBytes = decompressedStream.ToArray();
}
}
return Encoding.UTF8.GetString(decompressedBytes);
}
}
I like #fubo's answer the best but I think this is much more elegant.
This method is more compatible because it doesn't manually store the length up front.
Also I've exposed extensions to support compression for string to string, byte[] to byte[], and Stream to Stream.
public static class ZipExtensions
{
public static string CompressToBase64(this string data)
{
return Convert.ToBase64String(Encoding.UTF8.GetBytes(data).Compress());
}
public static string DecompressFromBase64(this string data)
{
return Encoding.UTF8.GetString(Convert.FromBase64String(data).Decompress());
}
public static byte[] Compress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.CompressTo(destinationStream);
return destinationStream.ToArray();
}
}
public static byte[] Decompress(this byte[] data)
{
using (var sourceStream = new MemoryStream(data))
using (var destinationStream = new MemoryStream())
{
sourceStream.DecompressTo(destinationStream);
return destinationStream.ToArray();
}
}
public static void CompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(outputStream, CompressionMode.Compress))
{
stream.CopyTo(gZipStream);
gZipStream.Flush();
}
}
public static void DecompressTo(this Stream stream, Stream outputStream)
{
using (var gZipStream = new GZipStream(stream, CompressionMode.Decompress))
{
gZipStream.CopyTo(outputStream);
}
}
}
This is an updated version for .NET 4.5 and newer using async/await and IEnumerables:
public static class CompressionExtensions
{
public static async Task<IEnumerable<byte>> Zip(this object obj)
{
byte[] bytes = obj.Serialize();
using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(mso, CompressionMode.Compress))
await msi.CopyToAsync(gs);
return mso.ToArray().AsEnumerable();
}
}
public static async Task<object> Unzip(this byte[] bytes)
{
using (MemoryStream msi = new MemoryStream(bytes))
using (MemoryStream mso = new MemoryStream())
{
using (var gs = new GZipStream(msi, CompressionMode.Decompress))
{
// Sync example:
//gs.CopyTo(mso);
// Async way (take care of using async keyword on the method definition)
await gs.CopyToAsync(mso);
}
return mso.ToArray().Deserialize();
}
}
}
public static class SerializerExtensions
{
public static byte[] Serialize<T>(this T objectToWrite)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
binaryFormatter.Serialize(stream, objectToWrite);
return stream.GetBuffer();
}
}
public static async Task<T> _Deserialize<T>(this byte[] arr)
{
using (MemoryStream stream = new MemoryStream())
{
BinaryFormatter binaryFormatter = new BinaryFormatter();
await stream.WriteAsync(arr, 0, arr.Length);
stream.Position = 0;
return (T)binaryFormatter.Deserialize(stream);
}
}
public static async Task<object> Deserialize(this byte[] arr)
{
object obj = await arr._Deserialize<object>();
return obj;
}
}
With this you can serialize everything BinaryFormatter supports, instead only of strings.
Edit:
In case, you need take care of Encoding, you could just use Convert.ToBase64String(byte[])...
Take a look at this answer if you need an example!
For those who still getting The magic number in GZip header is not correct. Make sure you are passing in a GZip stream. ERROR
and if your string was zipped using php you'll need to do something like:
public static string decodeDecompress(string originalReceivedSrc) {
byte[] bytes = Convert.FromBase64String(originalReceivedSrc);
using (var mem = new MemoryStream()) {
//the trick is here
mem.Write(new byte[] { 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 }, 0, 8);
mem.Write(bytes, 0, bytes.Length);
mem.Position = 0;
using (var gzip = new GZipStream(mem, CompressionMode.Decompress))
using (var reader = new StreamReader(gzip)) {
return reader.ReadToEnd();
}
}
}
We can reduce code complexity by using StreamReader and StreamWriter rather than manually converting strings to byte arrays. Three streams is all you need:
public static byte[] Zip(string uncompressed)
{
byte[] ret;
using (var outputMemory = new MemoryStream())
{
using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
{
using (var sw = new StreamWriter(gz, Encoding.UTF8))
{
sw.Write(uncompressed);
}
}
ret = outputMemory.ToArray();
}
return ret;
}
public static string Unzip(byte[] compressed)
{
string ret = null;
using (var inputMemory = new MemoryStream(compressed))
{
using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
{
using (var sr = new StreamReader(gz, Encoding.UTF8))
{
ret = sr.ReadToEnd();
}
}
}
return ret;
}
For .net6 cross platform Compression/Decompression string with C# using SharpZipLib library. Test for ubuntu(18.0.x) and windows.
#region helper
private byte[] Zip(string text)
{
if (text == null)
return null;
byte[] ret;
using (var outputMemory = new MemoryStream())
{
using (var gz = new GZipStream(outputMemory, CompressionLevel.Optimal))
{
using (var sw = new StreamWriter(gz, Encoding.UTF8))
{
sw.Write(text);
}
}
ret = outputMemory.ToArray();
}
return ret;
}
private string Unzip(byte[] bytes)
{
string ret = null;
using (var inputMemory = new MemoryStream(bytes))
{
using (var gz = new GZipStream(inputMemory, CompressionMode.Decompress))
{
using (var sr = new StreamReader(gz, Encoding.UTF8))
{
ret = sr.ReadToEnd();
}
}
}
return ret;
}
#endregion

Categories