uncompressed file is bigger than original file in GZIP - c#

i'm using the following function to compress(thanks to http://www.dotnetperls.com/):
public static void CompressStringToFile(string fileName, string value)
{
// A.
// Write string to temporary file.
string temp = Path.GetTempFileName();
File.WriteAllText(temp, value);
// B.
// Read file into byte array buffer.
byte[] b;
using (FileStream f = new FileStream(temp, FileMode.Open))
{
b = new byte[f.Length];
f.Read(b, 0, (int)f.Length);
}
// C.
// Use GZipStream to write compressed bytes to target file.
using (FileStream f2 = new FileStream(fileName, FileMode.Create))
using (GZipStream gz = new GZipStream(f2, CompressionMode.Compress, false))
{
gz.Write(b, 0, b.Length);
}
}
and for decompress:
static byte[] Decompress(byte[] gzip)
{
// Create a GZIP stream with decompression mode.
// ... Then create a buffer and write into while reading from the GZIP stream.
using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
{
const int size = 4096;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
int count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}
so my goal is actually compress log files and than to decompress them in memory and compare the uncompressed file to the original file in order to check that the compression succeeded and i'm able to open the compressed file successfuly.
the problem is that the uncompressed file is most of the time bigger than the original file and my compare check is failing altough the compression probably succeeded.
any idea why ?
btw here how i compare the uncompressed file to the original file:
static bool FileEquals(byte[] file1, byte[] file2)
{
if (file1.Length == file2.Length)
{
for (int i = 0; i < file1.Length; i++)
{
if (file1[i] != file2[i])
{
return false;
}
}
return true;
}
return false;
}

Try this method to compress a file:
public static byte[] Compress(byte[] raw)
{
using (MemoryStream memory = new MemoryStream())
{
using (GZipStream gzip = new GZipStream(memory,
CompressionMode.Compress, true))
{
gzip.Write(raw, 0, raw.Length);
}
return memory.ToArray();
}
}
}
And this to decompress :
static byte[] Decompress(byte[] gzip)
{
// Create a GZIP stream with decompression mode.
// ... Then create a buffer and write into while reading from the GZIP stream.
using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
{
const int size = 4096;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
int count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}
}
Tell me if it worked.
Goodluck.

Think you'd be better off with the simplest API call, try Stream.CopyTo(). I can't find the error in your code. If I was working on it, I'd probably make sure everything is getting flushed properly.. can't recall if GZipStream is going to flush its output to FileStream when the using block closes.. but then you are also saying that the final file is larger, not smaller.
Anyhow, best policy in my experience.. don't rewrite gotcha prone code when you don't need to. At least you tested it ;)

Related

Extract a bytearray variable using C# DotNetZip

I have a C# function that receives a compressed ByteArray as a parameter.
I need to EXTRACT this byteArray and send the resulting uncompressed byteArray to another function.
I need help extracting zipBytes to unzippedBytes please see below PSEUDO CODE:
SOLUTION using Zlib.net!
byte[] receiveZipByte (byte[] zipBytes)
{
MemoryStream oInStream = new MemoryStream(pZFileData);
ZInputStream oZInstream = new ZInputStream(oInStream);
MemoryStream oOutStream = new MemoryStream();
byte[] buffer = new byte[2000];
int len;
while ((len = oZInstream.read(buffer, 0, 2000)) > 0)
{
oOutStream.Write(buffer, 0, len);
}
byte[] pFileData = oOutStream.ToArray();
oZInstream.Close();
oOutStream.Close();
return unzippedBytes;
}
If what you're trying to do is decompress data that has been compressed using zlib, I would suggest using the ZLib.NET library: http://www.componentace.com/zlib_.NET.htm
It would depend on what you want to do with the data; but, here's an example of using a Stream with DotNetZip:
using (var input = new ZipInputStream(new MemoryStream(zipBytes)))
{
ZipEntry e;
while ((e = input.GetNextEntry()) != null)
{
if (e.IsDirectory) continue;
using (var output = File.Open(e.FileName, FileMode.Create, FileAccess.ReadWrite))
{
while ((n = input.Read(buffer, 0, buffer.Length)) > 0)
{
output.Write(buffer, 0, n);
}
}
}
}

C# decode (decompress) Deflate data of PDF File

I would like to decompress in C# some DeflateCoded data (PDF extracted).
Unfortunately I got every time the exception "Found invalid data while decoding.".
But the data are valid.
private void Decompress()
{
FileStream fs = new FileStream(#"S:\Temp\myFile.bin", FileMode.Open);
//First two bytes are irrelevant
fs.ReadByte();
fs.ReadByte();
DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Decompress);
StreamToFile(d_Stream, #"S:\Temp\myFile1.txt", FileMode.OpenOrCreate);
d_Stream.Close();
fs.Close();
}
private static void StreamToFile(Stream inputStream, string outputFile, FileMode fileMode)
{
if (inputStream == null)
throw new ArgumentNullException("inputStream");
if (String.IsNullOrEmpty(outputFile))
throw new ArgumentException("Argument null or empty.", "outputFile");
using (FileStream outputStream = new FileStream(outputFile, fileMode, FileAccess.Write))
{
int cnt = 0;
const int LEN = 4096;
byte[] buffer = new byte[LEN];
while ((cnt = inputStream.Read(buffer, 0, LEN)) != 0)
outputStream.Write(buffer, 0, cnt);
}
}
Does anyone has some ideas?
Thanks.
I added this for test data:-
private static void Compress()
{
FileStream fs = new FileStream(#"C:\Temp\myFile.bin", FileMode.Create);
DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Compress);
for (byte n = 0; n < 255; n++)
d_Stream.WriteByte(n);
d_Stream.Close();
fs.Close();
}
Modified Decompress like this:-
private static void Decompress()
{
FileStream fs = new FileStream(#"C:\Temp\myFile.bin", FileMode.Open);
//First two bytes are irrelevant
// fs.ReadByte();
// fs.ReadByte();
DeflateStream d_Stream = new DeflateStream(fs, CompressionMode.Decompress);
StreamToFile(d_Stream, #"C:\Temp\myFile1.txt", FileMode.OpenOrCreate);
d_Stream.Close();
fs.Close();
}
Ran it like this:-
static void Main(string[] args)
{
Compress();
Decompress();
}
And got no errors.
I conclude that either the first two bytes are relevant (Obviously they are with my particular test data.) or
that your data has a problem.
Can we have some of your test data to play with?
(Obviously don't if it's sensitive)
private static string decompress(byte[] input)
{
byte[] cutinput = new byte[input.Length - 2];
Array.Copy(input, 2, cutinput, 0, cutinput.Length);
var stream = new MemoryStream();
using (var compressStream = new MemoryStream(cutinput))
using (var decompressor = new DeflateStream(compressStream, CompressionMode.Decompress))
decompressor.CopyTo(stream);
return Encoding.Default.GetString(stream.ToArray());
}
Thank you user159335 and user1011394 for bringing me on the right track! Just pass all bytes of the stream to input of above function. Make sure the bytecount is the same as the length specified.
All you need to do is use GZip instead of Deflate. Below is the code I use for the content of the stream… endstream section in a PDF document:
using System.IO.Compression;
public void DecompressStreamData(byte[] data)
{
int start = 0;
while ((this.data[start] == 0x0a) | (this.data[start] == 0x0d)) start++; // skip trailling cr, lf
byte[] tempdata = new byte[this.data.Length - start];
Array.Copy(data, start, tempdata, 0, data.Length - start);
MemoryStream msInput = new MemoryStream(tempdata);
MemoryStream msOutput = new MemoryStream();
try
{
GZipStream decomp = new GZipStream(msInput, CompressionMode.Decompress);
decomp.CopyTo(msOutput);
}
catch (Exception e)
{
MessageBox.Show(e.Message);
}
}
None of the solutions worked for me on Deflate attachments in a PDF/A-3 document. Some research showed that .NET DeflateStream does not support compressed streams with a header and trailer as per RFC1950.
Error message for reference: The archive entry was compressed using an unsupported compression method.
The solution is to use an alternative library SharpZipLib
Here is a simple method that successfully decoded a Deflate attachment from a PDF/A-3 file for me:
public static string SZLDecompress(byte[] data) {
var outputStream = new MemoryStream();
using var compressedStream = new MemoryStream(data);
using var inputStream = new InflaterInputStream(compressedStream);
inputStream.CopyTo(outputStream);
outputStream.Position = 0;
return Encoding.Default.GetString(outputStream.ToArray());
}

C# Trying to replace a byte while using MemoryStream class

I get a text file from a mainframe and sometimes there are some 0x0D injected into the middle of the text lines.
The previos programmer created a method using the FileStream class. This method works fine but is taking around 30 minutes to go thru the entire file.
My thought was to pass the text lines that are needed (about 25 lines) to a method to decrease the processing time.
I've been working with the MemoryStream class but am having issue where it does not find the 0x0D control code.
Here is the current FileStream method:
private void ReplaceFileStream(string strInputFile)
{
FileStream fileStream = new FileStream(strInputFile, FileMode.Open, FileAccess.ReadWrite);
byte filebyte;
while (fileStream.Position < fileStream.Length)
{
filebyte = (byte)fileStream.ReadByte();
if (filebyte == 0x0D)
{
filebyte = 0x20;
fileStream.Position = fileStream.Position - 1;
fileStream.WriteByte(filebyte);
}
}
fileStream.Close();
}
and here is the MemoryStream method:
private void ReplaceMemoryStream(string strInputLine)
{
byte[] byteArray = Encoding.ASCII.GetBytes(strInputLine);
MemoryStream fileStream = new MemoryStream(byteArray);
byte filebyte;
while (fileStream.Position < fileStream.Length)
{
filebyte = (byte)fileStream.ReadByte();
if (filebyte == 0x0D)
{
filebyte = 0x20;
fileStream.Position = fileStream.Position - 1;
fileStream.WriteByte(filebyte);
}
}
fileStream.Close();
}
As I have not used the MemoryStream class before am not that familar with it. Any tips or ideas?
I don't know the size of your files, but if they are small enough that you can load the whole thing in memory at once, then you could do something like this:
private void ReplaceFileStream(string strInputFile)
{
byte[] fileBytes = File.ReadAllBytes(strInputFile);
bool modified = false;
for(int i=0; i < fileBytes.Length; ++i)
{
if (fileByte[i] == 0x0D)
{
fileBytes[i] = 0x20;
modified = true;
}
}
if (modified)
{
File.WriteAllBytes(strInputFile, fileBytes);
}
}
If you can't read the whole file in at once, then you should switch to a buffered reading type of setup, here is an example that reads from the file, writes to a temp file, then in the end copies the temp file over the original file. This should yield better performance then reading a file one byte at a time:
private void ReplaceFileStream(string strInputFile)
{
string tempFile = Path.GetTempFileName();
try
{
using(FileStream input = new FileStream(strInputFile,
FileMode.Open, FileAccess.Read))
using(FileStream output = new FileStream(tempFile,
FileMode.Create, FileAccess.Write))
{
byte[] buffer = new byte[4096];
bytesRead = input.Read(buffer, 0, 4096);
while(bytesRead > 0)
{
for(int i=0; i < bytesRead; ++i)
{
if (buffer[i] == 0x0D)
{
buffer[i] = 0x20;
}
}
output.Write(buffer, 0, bytesRead);
bytesRead = input.Read(buffer, 0, 4096);
}
output.Flush();
}
File.Copy(tempFile, strInputFile);
}
finally
{
if (File.Exists(tempFile))
{
File.Delete(tempFile);
}
}
}
if your replacement code does not find the 0x0D in the stream and the previous method with the FileStream does it, I think it could be because of the Encoding you are using to get the bytes of the file, you can try with some other encoding types.
otherwise your code seems to be fine, I would use a using around the MemoryStream to be sure it gets closed and disposed, something like this:
using(var fileStream = new MemoryStream(byteArray))
{
byte filebyte;
// your while loop...
}
looking at your code I am not 100% sure the changes you make to the memory stream will be persisted; Actually I think that if you do not save it after the changes, your changes will be lost. I can be wrong in this but you should test and see, if it does not save you should use StreamWriter to save it after the changes.

Problem with C# Decompression

Have some data in a sybase image type column that I want to use in a C# app. The data has been compressed by Java using the java.util.zip package. I wanted to test that I could decompress the data in C#. So I wrote a test app that pulls it out of the database:
byte[] bytes = (byte[])reader.GetValue(0);
This gives me a compressed byte[] of 2479 length.
Then I pass this to a seemingly standard C# decompression method:
public static byte[] Decompress(byte[] gzBuffer)
{
MemoryStream ms = new MemoryStream();
int msgLength = BitConverter.ToInt32(gzBuffer, 0);
ms.Write(gzBuffer, 4, gzBuffer.Length - 4);
byte[] buffer = new byte[msgLength];
ms.Position = 0;
GZipStream zip = new GZipStream(ms, CompressionMode.Decompress);
zip.Read(buffer, 0, buffer.Length);
return buffer;
}
The value for msgLength is 1503501432 which seems way out of range. The original document should be in the range of 5K -50k. Anyway when I use that value to create "buffer" not surprisingly I get an OutOfMemoryException.
What is happening?
Jim
The Java compress method is as follows:
public byte[] compress(byte[] bytes) throws Exception {
byte[] results = new byte[bytes.length];
Deflater deflator = new Deflater();
deflater.setInput(bytes);
deflater.finish();
int len = deflater.deflate(results);
byte[] out = new byte[len];
for(int i=0; i<len; i++) {
out[i] = results[i];
}
return(out);
}
As I cant see your java code, I can only guess you are compressing your data to a zip file stream. Therefore it will obviously fail if you are trying to decompress that stream with a gzip decompression in c#. Either you change your java code to a gzip compression (Example here at the bottom of the page), or you decompress the zip file stream in c# with an appropriate library (e.g. SharpZipLib).
Update
Ok now, I see you are using deflate for the compression in java. So, obviously you have to use the same algorithm in c#: System.IO.Compression.DeflateStream
public static byte[] Decompress(byte[] buffer)
{
using (MemoryStream ms = new MemoryStream(buffer))
using (Stream zipStream = new DeflateStream(ms,
CompressionMode.Decompress, true))
{
int initialBufferLength = buffer.Length * 2;
byte[] buffer = new byte[initialBufferLength];
bool finishedExactly = false;
int read = 0;
int chunk;
while (!finishedExactly &&
(chunk = zipStream.Read(buffer, read, buffer.Length - read)) > 0)
{
read += chunk;
if (read == buffer.Length)
{
int nextByte = zipStream.ReadByte();
// End of Stream?
if (nextByte == -1)
{
finishedExactly = true;
}
else
{
byte[] newBuffer = new byte[buffer.Length * 2];
Array.Copy(buffer, newBuffer, buffer.Length);
newBuffer[read] = (byte)nextByte;
buffer = newBuffer;
read++;
}
}
}
if (!finishedExactly)
{
byte[] final = new byte[read];
Array.Copy(buffer, final, read);
buffer = final;
}
}
return buffer;
}

compressing and decomressing in .net endsup with an decompressed array of zero's

I'm trying to compress and decompress a memory stream to send it over an tcp connection.
In the following code snap I do do the decompressing right after compressing to get it working first.
What ever I do I end up with a devompressed buffer wit all zero's and in the line
int read = Decompress.Read(buffie, 0, buffie.Length);
it seems that 0 bytes are read.
Does anyone has a clue what is wrong?
bytesRead = ms.Read(buf, 0, i);
MemoryStream partialMs = new MemoryStream();
GZipStream gZip = new GZipStream(partialMs, CompressionMode.Compress);
gZip.Write(buf, 0, buf.Length);
partialMs.Position = 0;
byte[] compressedBuf = new byte[partialMs.Length];
partialMs.Read(compressedBuf, 0, (int)partialMs.Length);
partialMs.Close();
byte[] gzBuffer = new byte[compressedBuf.Length + 4];
System.Buffer.BlockCopy(compressedBuf, 0, gzBuffer, 4, compressedBuf.Length);
System.Buffer.BlockCopy(BitConverter.GetBytes(buf.Length), 0, gzBuffer, 0, 4);
using (MemoryStream mems = new MemoryStream())
{
int msgLength = BitConverter.ToInt32(gzBuffer, 0);
byte[] buffie = new byte[msgLength];
mems.Write(gzBuffer, 4, gzBuffer.Length - 4);
mems.Flush();
mems.Position = 0;
using (GZipStream Decompress = new GZipStream(mems, CompressionMode.Decompress, true))
{
int read = Decompress.Read(buffie, 0, buffie.Length);
Decompress.Close();
}
}
Your implementation could use some work. there seems to be some confusion as to which streams should be used where. here is a working example to get you started..
see user content at the bottom of this MSDN page
var original = new byte[65535];
var compressed = GZipTest.Compress(original);
var decompressed = GZipTest.Decompress(compressed);
using System.IO;
using System.IO.Compression;
public class GZipTest
{
public static byte[] Compress(byte[] uncompressedBuffer)
{
using (var ms = new MemoryStream())
{
using (var gzip = new GZipStream(ms, CompressionMode.Compress, true))
{
gzip.Write(uncompressedBuffer, 0, uncompressedBuffer.Length);
}
byte[] compressedBuffer = ms.ToArray();
return compressedBuffer;
}
}
public static byte[] Decompress(byte[] compressedBuffer)
{
using (var gzip = new GZipStream(new MemoryStream(compressedBuffer), CompressionMode.Decompress))
{
byte[] uncompressedBuffer = ReadAllBytes(gzip);
return uncompressedBuffer;
}
}
private static byte[] ReadAllBytes(Stream stream)
{
var buffer = new byte[4096];
using (var ms = new MemoryStream())
{
int bytesRead = 0;
do
{
bytesRead = stream.Read(buffer, 0, buffer.Length);
if (bytesRead > 0)
{
ms.Write(buffer, 0, bytesRead);
}
} while (bytesRead > 0);
return ms.ToArray();
}
}
}
You're not closing the GzipStream you're writing to, so it's probably all buffered. I suggest you close it when you're done writing your data.
By the way, you can get the data out of a MemoryStream much more easily than your current code: use MemoryStream.ToArray.

Categories