I have a C# console app that I'm trying to create that processes all the files in a given directory and writes output to another given directory. I want to process the input files X bytes at a time.
namespace FileConverter
{
class Program
{
static void Main(string[] args)
{
string srcFolder = args[0];
string destFolder = args[1];
string[] srcFiles = Directory.GetFiles(srcFolder);
for (int s = 0; s < srcFiles.Length; s++)
{
byte[] fileBuffer;
int numBytesRead = 0;
int readBuffer = 10000;
FileStream srcStream = new FileStream(srcFiles[s], FileMode.Open, FileAccess.Read);
int fileLength = (int)srcStream.Length;
string destFile = destFolder + "\\" + Path.GetFileName(srcFiles[s]) + "-processed";
FileStream destStream = new FileStream(destFile, FileMode.OpenOrCreate, FileAccess.Write);
//Read and process the source file by some chunk of bytes at a time
while (numBytesRead < fileLength)
{
fileBuffer = new byte[readBuffer];
//Read some bytes into the fileBuffer
//TODO: This doesn't work on subsequent blocks
int n = srcStream.Read(fileBuffer, numBytesRead, readBuffer);
//If we didn't read anything, there's no more to process
if (n == 0)
break;
//Process the fileBuffer
for (int i = 0; i < fileBuffer.Length; i++)
{
//Process each byte in the array here
}
//Write data
destStream.Write(fileBuffer, numBytesRead, readBuffer);
numBytesRead += readBuffer;
}
srcStream.Close();
destStream.Close();
}
}
}
}
I'm running into an error at execution time at:
//Read some bytes into the fileBuffer
//TODO: This doesn't work on subsequent blocks
int n = srcStream.Read(fileBuffer, numBytesRead, readBuffer);
I don't want to load the entire file into memory, as it could possibly be many gigabytes in size. I really want to be able to read some number of bytes, process them, write them out to a file, and then read in the next X bytes and repeat.
It gets through one iteration of the loop, and then dies on the second. The error I get is:
"Offset and length were out of bounds for the array or count is greater than the number of elements from index to the end of the source collection."
The sample file I'm working with is about 32k.
Can anyone tell me what I'm doing wrong here?
The second parameter to Read is not the offset into the file - it is the offset into the buffer at which to start writing data. So just pass 0.
Also, don't assume the buffer is filled each time: you should only process "n" bytes from the buffer. And the buffer should be reused between iterations.
If you need to read exactly a number of bytes:
static void ReadOrThrow(Stream source, byte[] buffer, int count) {
int read, offset = 0;
while(count > 0 && (read = source.Read(buffer, offset, count)) > 0) {
offset += read;
count -= read;
}
if(count != 0) throw new EndOfStreamException();
}
Note that Write works similarly, so you need to pass 0 as the offset and n as the count.
It should be
destStream.Write(fileBuffer, numBytesRead, n);
numBytesRead += n;
because n is the actual number of bytes that was read
Related
Hello I am trying to rewrite file by replacing bytes but it takes too much time to rewrite large files. For example on 700MB this code was working about 6 minutes. Pls help me to make it work less than 1 minute.
static private void _12_56(string fileName)
{
byte[] byteArray = File.ReadAllBytes(fileName);
for (int i = 0; i < byteArray.Count() - 6; i += 6)
{
Swap(ref byteArray[i], ref byteArray[i + 4]);
Swap(ref byteArray[i + 1], ref byteArray[i + 5]);
}
File.WriteAllBytes(fileName, byteArray);
}
Read the file in chuncks of bytes which are divisible by 6.
Replace the necessary bytes in each chunk and write each chunk to another file before reading the next chunk.
You can also try to perform the read of the next chunk in parallel with writing the next chunk:
using( var source = new FileStream(#"c:\temp\test.txt", FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
using( var target = new FileStream(#"c:\temp\test.txt", FileMode.Open, FileAccess.Write, FileShare.ReadWrite))
{
await RewriteFile(source, target);
}
}
private async Task RewriteFile( FileStream source, FileStream target )
{
// We're reading bufferSize bytes from the source-stream inside one half of the buffer
// while the writeTask is writing the other half of the buffer to the target-stream.
// define how many chunks of 6 bytes you want to read per read operation
int chunksPerBuffer = 1;
int bufferSize = 6 * chunksPerBuffer;
// declare a byte array that contains both the bytes that are read
// and the bytes that are being written in parallel.
byte[] buffer = new byte[bufferSize * 2];
// curoff is the start-position of the bytes we're working with in the
// buffer
int curoff = 0;
Task writeTask = Task.CompletedTask;
int len;
// Read the desired number of bytes from the file into the buffer.
// In the first read operation, the bytes will be placed in the first
// half of the buffer. The next read operation will read them in
// the second half of the buffer.
while ((len = await source.ReadAsync(buffer, curoff, bufferSize).ConfigureAwait(false)) != 0)
{
// Swap the bytes in the current buffer.
// When reading x * 6 bytes in one go, every 1st byte will be replaced by the 4th byte; every 2nd byte will be replaced by the 5th byte.
for (int i = curoff; i < bufferSize + curoff; i += 6)
{
Swap(ref buffer[i], ref buffer[i + 4]);
Swap(ref buffer[i + 1], ref buffer[i + 5]);
}
// wait until the previous write-task completed.
await writeTask.ConfigureAwait(false);
// Start writing the bytes that have just been processed.
// Do not await the task here, so that the next bytes
// can be read in parallel.
writeTask = target.WriteAsync(buffer, curoff, len);
// Position the pointer to the beginnen of the other part
// in the buffer
curoff ^= bufferSize;
}
// Make sure that the last write also finishes before closing
// the target stream.
await writeTask.ConfigureAwait(false);
}
The code above should read a file, swap bytes and rewrite to the same file in parallel.
As the other answer says, you have to read the file in chunks.
Since you are rewriting the same file, it's easiest to use the same stream for reading and writing.
using(var file = File.Open(path, FileMode.Open, FileAccess.ReadWrite)) {
// Read buffer. Size must be divisible by 6
var buffer = new byte[6*1000];
// Keep track of how much we've read in each iteration
var bytesRead = 0;
// Fill the buffer. Put the number of bytes into 'bytesRead'.
// Stop looping if we read less than 6 bytes.
// EOF will be signalled by Read returning -1.
while ((bytesRead = file.Read(buffer, 0, buffer.Length)) >= 6)
{
// Swap the bytes in the current buffer
for (int i = 0; i < bytesRead; i += 6)
{
Swap(ref buffer[i], ref buffer[i + 4]);
Swap(ref buffer[i + 1], ref buffer[i + 5]);
}
// Step back in the file, to where we filled the buffer from
file.Position -= bytesRead;
// Overwrite with the swapped bytes
file.Write(buffer, 0, bytesRead);
}
}
I read binary file to hex by block.
It is diffrent when I use FileStream.Read and File.ReadAllBytes
FileSteram.Read
int limit = 0;
if (openFileDlg.FileName.Length > 0)
{
fileName = openFileDlg.FileName;
FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
fsLen = (int)fs.Length;
int count = 0;
limit = 100;
byte[] read_buff = new byte[limit];
StringBuilder sb = new StringBuilder();
while ( (count = fs.Read(read_buff, 0, limit)) > 0)
{
foreach (byte b in read_buff)
{
sb.Append(Convert.ToString(b, 16).PadLeft(2, '0'));
}
}
rtxb_bin.AppendText(sb.ToString() + "\n");
}
File.ReadAllBytes
if (openFileDlg.FileName.Length > 0)
{
fileName = openFileDlg.FileName;
byte[] fileBytes = File.ReadAllBytes(fileName);
StringBuilder sb2 = new StringBuilder();
foreach (byte b2 in fileBytes)
{
sb2.Append(Convert.ToString(b2, 16).PadLeft(2, '0'));
}
rtxb_allbin.AppendText(sb2.ToString());
}
case 1, reasult is ...
........04c0020f00452a00421346108129844f2138448500208020250405250043188510812e0
and case 2 is
.......04c0020f00452a00421346108129844f2138448500208020250405250043188510812e044f212cc48120c24125404f2069c2c0008bff35f8f401efbd17047
FileStream.Read doesn't read after '12e0'
'44f212cc48120c24125404f2069c2c0008bff35f8f401efbd17047' is missing
How can I read all bytes using FileStream.Read?
Why FileStream.Read doesn't read last block?
Most likely it appears to you that it does not read last block. Suppose you have file of length 102. First iteration of you loop reads first 100 bytes, all is fine. But what happens on second (last) one? You read two bytes into read_buff, which is of length 100. Now that buffer contains 2 bytes of last block and 98 bytes of previous (first) block, because Read doesn't clear the buffer. Then you proceed with:
foreach (byte b in read_buff)
{
sb.Append(Convert.ToString(b, 16).PadLeft(2, '0'));
}
In result, sb has 100 bytes of first block, 2 bytes of last block, and then again 98 bytes of first block. If you don't look too closely, it might appear that it just skipped last block, while in reality it duplicated part of the previous one.
To fix, use count (indicating how much bytes were really read into the buffer) to work only with valid part of read_buff:
for (int i = 0; i < count; i++) {
sb.Append(Convert.ToString(read_buff[i], 16).PadLeft(2, '0'));
}
You need update offset and count.
Sintaxis
public override int Read(
byte[] array,
int offset,
int count
)
Example
public static byte[] ReadFile(string filePath)
{
byte[] buffer;
FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read);
try
{
int length = (int)fileStream.Length; // get file length
buffer = new byte[length]; // create buffer
int count; // actual number of bytes read
int sum = 0; // total number of bytes read
// read until Read method returns 0 (end of the stream has been reached)
while ((count = fileStream.Read(buffer, sum, length - sum)) > 0)
sum += count; // sum is a buffer offset for next reading
}
finally
{
fileStream.Close();
}
return buffer;
}
Reference
public static void ReadAndProcessLargeFile(string theFilename, long whereToStartReading = 0)
{
FileInfo info = new FileInfo(theFilename);
long fileLength = info.Length;
long timesToRead = (fileLength / megabyte);
long ctr = 0;
long timesRead = 0;
FileStream fileStram = new FileStream(theFilename, FileMode.Open, FileAccess.Read);
using (fileStram)
{
byte[] buffer = new byte[megabyte];
fileStram.Seek(whereToStartReading, SeekOrigin.Begin);
int bytesRead = 0;
//bytesRead = fileStram.Read(buffer, 0, megabyte);
//ctr = ctr + 1;
while ((bytesRead = fileStram.Read(buffer, 0, megabyte)) > 0)
{
ProcessChunk(buffer, bytesRead);
buffer = new byte[megabyte]; // This solves last read prob
}
}
}
private static void ProcessChunk(byte[] buffer, int bytesRead)
{
// Do the processing here
string utfString = Encoding.UTF8.GetString(buffer, 0, bytesRead);
Console.Write(utfString);
}
I am reading a file and writing stream of that to a file, what I want to do is writing multiple files in a single file and reading them by their offset.
While writing the files, I understand that i need to know the file offset and length of the stream to read back the file.
var file = #"d:\foo.pdf";
var stream = File.ReadAllBytes(file);
// here i have the length of the
Console.WriteLine(stream.LongLength);
using (var br = new BinaryWriter(File.Open(#"d:\foo.bin", FileMode.OpenOrCreate)))
{
br.Write(stream);
}
I need to find the offset while writing multiple files.
Also while reading back the files, how do I start from an offset and read forwards as long as the length?
Finally, Does this method reduces number of disc seeks?
To read back various fragments, you will need to store the individual file lengths. For example:
using(var dest = File.Open(#"d:\foo.bin", FileMode.OpenOrCreate))
{
Append(dest, file);
Append(dest, anotherFile);
}
...
static void AppendFile(Stream dest, string path)
{
using(var source = File.OpenRead(path))
{
var lenHeader = BitConverter.GetBytes(source.Length);
dest.Write(lenHeader, 0, 4);
source.CopyTo(dest);
}
}
Then to read back you can do things like:
using(var source = File.OpenRead(...))
{
int len = ReadLength(source);
stream.Seek(len, SeekOrigin.Current); // skip the first file
len = ReadLength(source);
// TODO: now read len-many bytes from the second file
}
static int ReadLength(Stream stream)
{
byte[] buffer = new byte[4];
int count = 4, offset = 0, read;
while(count != 0 && (read = stream.Read(buffer, offset, count)) > 0)
{
count -= read;
offset += read;
}
if (count != 0) throw new EndOfStreamException();
return BitConverter.ToInt32(buffer, 0);
}
As for reading len-many bytes; you can either just keep track of it and decrement it while reading, or you can create a length-limited Stream wrapper. Either works.
I'm trying to read the response stream from an HttpWebResponse object. I know the length of the stream (_response.ContentLength) however I keep getting the following exception:
Specified argument was out of the range of valid values.
Parameter name: size
While debugging, I noticed that at the time of the error, the values were as such:
length = 15032 //the length of the stream as defined by _response.ContentLength
bytesToRead = 7680 //the number of bytes in the stream that still need to be read
bytesRead = 7680 //the number of bytes that have been read (offset)
body.length = 15032 //the size of the byte[] the stream is being copied to
The peculiar thing is that the bytesToRead and bytesRead variables are ALWAYS 7680, regardless of the size of the stream (contained in the length variable). Any ideas?
Code:
int length = (int)_response.ContentLength;
byte[] body = null;
if (length > 0)
{
int bytesToRead = length;
int bytesRead = 0;
try
{
body = new byte[length];
using (Stream stream = _response.GetResponseStream())
{
while (bytesToRead > 0)
{
// Read may return anything from 0 to length.
int n = stream.Read(body, bytesRead, length);
// The end of the file is reached.
if (n == 0)
break;
bytesRead += n;
bytesToRead -= n;
}
stream.Close();
}
}
catch (Exception exception)
{
throw;
}
}
else
{
body = new byte[0];
}
_responseBody = body;
You want this line:
int n = stream.Read(body, bytesRead, length);
to be this:
int n = stream.Read(body, bytesRead, bytesToRead);
You are saying the maximum number of bytes to read is the stream's length, but it isn't since it is actually only the remaining information in the stream after the offset has been applied.
You also shouldn't need this part:
if (n == 0)
break;
The while should end the reading correctly, and it is possible that you won't read any bytes before you have finished the whole thing (if the stream is filling slower than you are taking the data out of it)
int bufferlength = 12488;
int pointer = 1;
int offset = 0;
int length = 0;
FileStream fstwrite = new FileStream("D:\\Movie.wmv", FileMode.Create);
while (pointer != 0)
{
byte[] buff = new byte[bufferlength];
FileStream fst = new FileStream("E:\\Movie.wmv", FileMode.Open);
pointer = fst.Read(buff, 0, bufferlength);
fst.Close();
fstwrite.Write(buff, offset , pointer);
offset += pointer;
}
I used the above code for splitting a file and place it in other drive.Im not able to set the correct offset and length for this routine can anyone help me to fix this
splitting in the sense ,i split it in "x" kbs and pass it somewhere make the same file in some other location
I find it atlast ,thanks to evry one who gave their valueble responses.
Currently you're always reading from the start of the file... and even if you weren't you'd just be copying the whole file.
Here's some code which will actually split a single file into multiple files:
public static void SplitFile(string inputFile,
string outputPrefix,
int chunkSize)
{
byte[] buffer = new byte[chunkSize];
using (Stream input = File.OpenRead(inputFile))
{
int index = 0;
while (input.Position < input.Length)
{
using (Stream output = File.Create(outputPrefix + index))
{
int chunkBytesRead = 0;
while (chunkBytesRead < chunkSize)
{
int bytesRead = input.Read(buffer,
chunkBytesRead,
chunkSize - chunkBytesRead);
// End of input
if (bytesRead == 0)
{
break;
}
chunkBytesRead += bytesRead;
}
output.Write(buffer, 0, chunkBytesRead);
}
index++;
}
}
}
Your reading bufferlength of bytes. Shouldn't you set the offset like this then?
offset += bufferlength;
Don't open your source file inside the loop, or you'll always read the first chunk.
Open it before the loop, then make sure your offset is applied to the read.