C# Split a file into two byte arrays - c#

Because the maximum value of a byte array is 2GB, lets say i have a larger file and i need to convert it to a byte array. Since i can't hold the whole file, how should i convert it into two?
I tried:
long length = new System.IO.FileInfo(#"c:\a.mp4").Length;
int chunkSize = Convert.ToInt32(length / 2);
byte[] part2;
FileStream fileStream = new FileStream(filepath, FileMode.Open, FileAccess.Read);
try
{
part2 = new byte[chunkSize]; // create buffer
fileStream.Read(part2, 0, chunkSize);
}
finally
{
fileStream.Close();
}
byte[] part3;
fileStream = new FileStream(filepath, FileMode.Open, FileAccess.Read);
try
{
part3 = new byte[chunkSize]; // create buffer
fileStream.Read(part3, 5, (int)(length - (long)chunkSize));
}
finally
{
fileStream.Close();
}
but it's not working.
Any ideas?

You can use a StreamReader to read in file too large to read into a byte array
const int max = 1024*1024;
public void ReadALargeFile(string file, int start = 0)
{
FileStream fileStream = new FileStream(file, FileMode.Open,FileAccess.Read);
using (fileStream)
{
byte[] buffer = new byte[max];
fileStream.Seek(start, SeekOrigin.Begin);
int bytesRead = fileStream.Read(buffer, start, max);
while(bytesRead > 0)
{
DoSomething(buffer, bytesRead);
bytesRead = fileStream.Read(buffer, start, max);
}
}
}

If you are working with extremely large files, you should use MemoryMappedFile, which maps a physical file to a memory space:
using (var mmf = MemoryMappedFile.CreateFromFile(#"c:\path\to\big.file"))
{
using (var accessor = mmf.CreateViewAccessor())
{
byte myValue = accessor.ReadByte(someOffset);
accessor.Write((byte)someValue);
}
}
See also: MemoryMappedViewAccessor
You can also read/write chunks of the file with the different methods in MemoryMappedViewAccessor.

This was my solution:
byte[] part1;
byte[] part2;
bool odd = false;
int chunkSize = Convert.ToInt32(length/2);
if (length % 2 == 0)
{
part1 = new byte[chunkSize];
part2 = new byte[chunkSize];
}
else
{
part1 = new byte[chunkSize];
part2 = new byte[chunkSize + 1];
odd = true;
}
FileStream fileStream = new FileStream(filepath, FileMode.Open, FileAccess.Read);
using (fileStream)
{
fileStream.Seek(0, SeekOrigin.Begin);
int bytesRead = fileStream.Read(part1, 0, chunkSize);
if (odd)
{
bytesRead = fileStream.Read(part2, 0, chunkSize + 1);
}
else
{
bytesRead = fileStream.Read(part2, 0, chunkSize);
}
}

Related

transfer byte array by chunk over webservice asmx

I am working on a legacy webservice that are used to transfer zip files between server, it works fine until recently where the size of files become larger, more than 700 mb. This causes exceptions as my console application (client) are not able to receive large byte array send by the webservice at once, so i tried to send and receive the byte array by chunk, but the zip file ended up being corrupt and cannot be unzipped, below is what i have tried, i am a bit unfamiliar with webservice so would appreciate if you guys can point out my mistake or maybe point me to a better solution :
console app(original) :
string saveto = MdlMain.FileName;
object file = (object)serviceSoapClient.GetFile( saveto);
FileStream fileStream = new FileStream(GlobalVariable.FileDirectory + str2, FileMode.Create);
fileStream.Write((byte[])file, 0, Conversions.ToInteger(NewLateBinding.LateGet(file, (Type)null, "Length", new object[0], (string[])null, (Type[])null, (bool[])null)));
fileStream.Flush();
fileStream.Close();
webservice (original) :
public byte[] GetFile( string strFilename)
{
FileStream fileStream = new FileStream(this.DJDownloadPath + strFilename, FileMode.Open, FileAccess.Read);
long length = fileStream.Length;
byte[] array = new byte[checked((int)(length + 1L - 1L) + 1)];
fileStream.Read(array, 0, checked((int)length));
fileStream.Close();
HttpContext.Current.Response.BufferOutput = false;
HttpContext.Current.Response.Buffer = false;
return array;
}
Console app (Modified):
FileStream fileStream= new FileStream(#"D:\example.zip", FileMode.Create);
int totalchunkNo = targetfilesize / 2000000;
int remainder = targetfilesize % 2000000;
if (remainder > 0 && remainder != totalchunkNo)
totalchunkNo++;
for (int i = 1; i <= totalchunkNo; i++)
{
byte[] bytetobeWritten = (byte[])serviceSoapClient.GetFileByChunk(MdlMain.FileName, i);
fileStream.Write(bytetobeWritten, 0, Conversions.ToInteger(NewLateBinding.LateGet(bytetobeWritten, (Type)null, "Length", new object[0], (string[])null, (Type[])null, (bool[])null)));
}
fileStream.Flush();
fileStream.Close();
webservice (Modified):
[WebMethod]
public byte[] GetFileByChunk(string strFilename, int requestChunkNo)
{
FileStream fileStream = new FileStream(this.DJDownloadPath + strFilename, FileMode.Open, FileAccess.Read);
long length = fileStream.Length;
byte[] array = new byte[length];
int incomingOffset = 0;
int chunkSize = 2000000;
fileStream.Read(array, 0, (int)length);
fileStream.Close();
int currentchunkNo = 0;
byte[] outboundBuffer = new byte[chunkSize];
while (incomingOffset < array.Length)
{
int lengthh = Math.Min(outboundBuffer.Length, array.Length - incomingOffset);
Buffer.BlockCopy(array, incomingOffset,
outboundBuffer, 0,
lengthh);
incomingOffset += lengthh;
currentchunkNo++;
if (currentchunkNo == requestChunkNo)
{
return outboundBuffer;
}
}
return null;
}

Read Chunk of data from file based on offset and length

int n = 0;
string encodeString = string.Empty;
using (FileStream fsSource = new FileStream("test.pdf", FileMode.Open, FileAccess.Read))
{
byte[] bytes = new byte[count];
n = fsSource.Read(bytes, offset, count);
encodeString = System.Convert.ToBase64String(bytes);
}
The above code is working fine if I provide offset-0 and length-1024, but the second time if I provide Offset-1024 and length-1024 it is returning an error.
My requirement is I want to get byte array data from offset to length.
1st chunk = 0-1024
2nd chunk = 1024-2048
..
Last chunk = SomeValue -Filesize.
Example in Node.js using readChunk.sync(file_path, Number(offset), Number(size)); - this code is able to get the byte array of data from offset to length.
public static string ReadFileStreamInChunks()
{
const int readChunkBufferLength = 1024;
string filePath = "test.pdf";
string encodeString = string.Empty;
var readChunk = new char[readChunkBufferLength];
int readChunkLength;
using (StringWriter sw = new StringWriter())
using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read))
using (StreamReader sr = new StreamReader(fs))
{
do
{
readChunkLength = sr.ReadBlock(readChunk, 0, readChunkBufferLength);
sw.Write(readChunk, 0, readChunkLength);
} while (readChunkLength > 0);
return sw.ToString();
}
}
actually i think your problem is understanding the concepts of these parameters in your code , Count is your Chunk Size and offset is where to start Reading so if you want to read (1): a part of File to end Just Add To Your Offset (Offset + count of Bytes you Want To Seek) but (2): if You Want to Read A Part Of File From Middle You Shouldn't Modify Count That Is Your Chunk Size You Should modify Where You Write Your Byte Array Usually It's a Do-While Loop like :
long position = 0;
do
{
// read bytes from input stream
int bytesRead = request.FileByteStream.Read(buffer, 0, chunkSize);
if (bytesRead == 0)
{
break;
}
// write bytes to output stream
writeStream.Write(buffer, 0, bytesRead);
position += bytesRead;
if(position == "the value you want")
break;
} while (true);

C# file transfer with tcpclient and server

When I send a file with the code below, some data (small amout) is missing. The file size doess not match on the receiver side. Sending a regular string is fine so theres no connection issue here. Im just looking for a minimal improvement to fix the issue, I will add error checking etc later. Thanks! The code is mostly copied from some tutorial but i dont remember which though...
Client is the std .Net TcpClient class
Client.Client is it's socket
public void SendFile2(string fileName)
{
using (FileStream fs = File.OpenRead(fileName))
{
byte[] lenBytes = BitConverter.GetBytes((int)fs.Length);
Client.Client.Send(lenBytes);
byte[] buffer = new byte[1024];
int bytesRead;
fs.Position = 0;
while ((bytesRead = fs.Read(buffer, 0, 1024)) > 0)
Client.Client.Send(buffer, bytesRead, SocketFlags.None);
}
}
public bool ReceiveFile2(string fileName)
{
using (FileStream fs = File.Create(fileName))
{
byte[] lenBytes = new byte[4];
if (Client.Client.Receive(lenBytes) < 4)
return false;
long len = BitConverter.ToInt32(lenBytes, 0);
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = Client.Client.Receive(buffer)) > 0)
fs.Write(buffer, 0, bytesRead);
return len == fs.Position;
}
}
SOLUTION:
public void SendFile(string fileName)
{
using (FileStream fs = File.OpenRead(fileName))
{
byte[] lenBytes = BitConverter.GetBytes((int)fs.Length);
Client.Client.Send(lenBytes);
byte[] buffer = new byte[1024];
int bytesRead;
fs.Position = 0;
while ((bytesRead = fs.Read(buffer, 0, 1024)) > 0)
Client.Client.Send(buffer, bytesRead, SocketFlags.None);
}
}
public bool ReceiveFile(string fileName)
{
using (FileStream fs = File.Create(fileName))
{
byte[] lenBytes = new byte[4];
if (Client.Client.Receive(lenBytes) < 4)
return false;
long len = BitConverter.ToInt32(lenBytes, 0);
byte[] buffer = new byte[1024];
int bytesRead;
// Changed from here
while (fs.Position < len)
{
bytesRead = Client.Client.Receive(buffer);
fs.Write(buffer, 0, bytesRead);
}
// To here
return len == fs.Position;
}
}
I think this line can be a problem.
if (Client.Client.Receive(lenBytes) < 4)
and
while ((bytesRead = Client.Client.Receive(buffer)) > 0)
You have two receives in your code.
So you drop first bytes.
That can explain the differences you see in files sizes.

How to read file by chunks

I'm a little bit confused aboot how i should read large file(> 8GB) by chunks in case each chunk has own size.
If I know chunk size it looks like code bellow:
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, ProgramOptions.BufferSizeForChunkProcessing))
{
using (BufferedStream bs = new BufferedStream(fs, ProgramOptions.BufferSizeForChunkProcessing))
{
byte[] buffer = new byte[ProgramOptions.BufferSizeForChunkProcessing];
int byteRead;
while ((byteRead = bs.Read(buffer, 0, ProgramOptions.BufferSizeForChunkProcessing)) > 0)
{
byte[] originalBytes;
using (MemoryStream mStream = new MemoryStream())
{
mStream.Write(buffer, 0, byteRead);
originalBytes = mStream.ToArray();
}
}
}
}
But imagine, I've read large file by chunks made some coding with each chunk(chunk's size after that operation has been changed) and written to another new file all processed chunks. And now I need to do the opposite operation. But I don't know exactly chunk size. I have an idea. After each chunk has been processed i have to write new chunk size before chunk bytes. Like this:
Number of block bytes
Block bytes
Number of block bytes
Block bytes
So in that case first what i need to do is read chunk's header and learn what is chunk size exactly. I read and write to file only byte arrays. But I have a question - how should look chunk's header ? May be header have to contain some boundary ?
If the file is rigidly structured so that each block of data is preceded by a 32-bit length value, then it is easy to read. The "header" for each block is just the 32-bit length value.
If you want to read such a file, the easiest way is probably to encapsulate the reading into a method that returns IEnumerable<byte[]> like so:
public static IEnumerable<byte[]> ReadChunks(string path)
{
var lengthBytes = new byte[sizeof(int)];
using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
{
int n = fs.Read(lengthBytes, 0, sizeof (int)); // Read block size.
if (n == 0) // End of file.
yield break;
if (n != sizeof(int))
throw new InvalidOperationException("Invalid header");
int blockLength = BitConverter.ToInt32(lengthBytes, 0);
var buffer = new byte[blockLength];
n = fs.Read(buffer, 0, blockLength);
if (n != blockLength)
throw new InvalidOperationException("Missing data");
yield return buffer;
}
}
Then you can use it simply:
foreach (var block in ReadChunks("MyFileName"))
{
// Process block.
}
Note that you don't need to provide your own buffering.
try this
public static IEnumerable<byte[]> ReadChunks(string fileName)
{
const int MAX_BUFFER = 1048576;// 1MB
byte[] filechunk = new byte[MAX_BUFFER];
int numBytes;
using (var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
{
long remainBytes = fs.Length;
int bufferBytes = MAX_BUFFER;
while (true)
{
if (remainBytes <= MAX_BUFFER)
{
filechunk = new byte[remainBytes];
bufferBytes = (int)remainBytes;
}
if ((numBytes = fs.Read(filechunk, 0, bufferBytes)) > 0)
{
remainBytes -= bufferBytes;
yield return filechunk;
}
else
{
break;
}
}
}
}

C# Prepend 40 bytes onto stream

I am trying to send a FileStream of a file.
But I now want to add 40 byte Checksum to the start.
How can I do this? Ive tried creating my own stream class to concatinate two streams.. And Ive looked at stream writers.
Surely they must be an easy way. Or an alternative way. And I DONT want to load the entire file into a byte array, appead to that and write that back to a stream.
public Stream getFile(String basePath, String path) {
return new FileStream(basePath + path, FileMode.Open, FileAccess.Read);
}
See MergeStream.cs. Here's how you can use it:
var mergeStream = new MergeStream(new MemoryStream(checksum), File.OpenRead(path));
return mergeStream;
byte[] checksum = new byte[40];
//...
FileStream oldFileStream = new FileStream(oldFile, FileMode.Open, FileAccess.Read);
FileStream newFileStream = new FileStream(newFile, FileMode.Create, FileAccess.Write);
using(oldFileStream)
using(newFileStream)
{
newFileStream.Write(checksum, 0, checksum.Length);
oldFileStream.CopyTo(newFileStream);
}
File.Copy(newFile, oldFile, overwrite : true);
If you don't want to use a temporary file, the only solution is to open the file in ReadWrite mode and use two alternating buffers:
private static void Swap<T>(ref T obj1, ref T obj2)
{
T tmp = obj1;
obj1 = obj2;
obj2 = tmp;
}
public static void PrependToFile(string filename, byte[] bytes)
{
FileStream stream = new FileStream(filename, FileMode.Open, FileAccess.ReadWrite);
PrependToStream(stream, bytes);
}
public static void PrependToStream(Stream stream, byte[] bytes)
{
const int MAX_BUFFER_SIZE = 4096;
using(stream)
{
int bufferSize = Math.Max(MAX_BUFFER_SIZE, bytes.Length);
byte[] buffer1 = new byte[bufferSize];
byte[] buffer2 = new byte[bufferSize];
int readCount1;
int readCount2;
long totalLength = stream.Length + bytes.Length;
readCount1 = stream.Read(buffer1, 0, bytes.Length);
stream.Position = 0;
stream.Write(bytes, 0, bytes.Length);
int written = bytes.Length;
while (written < totalLength)
{
readCount2 = stream.Read(buffer2, 0, buffer2.Length);
stream.Position -= readCount2;
stream.Write(buffer1, 0, readCount1);
written += readCount1;
Swap(ref buffer1, ref buffer2);
Swap(ref readCount1, ref readCount2);
}
}
}

Categories