C# Calculate an MD5 of an Input and Output FileStream - c#

I'm using this, slightly modified, to copy large files from a file share with the ability to continue copying, if the download was disrupted. It runs in a BackroudWorker and reports progress. This works fine, but I'd like to have the ability to write the current MD5 hash to disk (the current total, not once for each block) each time a block of file data is written to disk WITH MINIMAL ADDITIONAL OVERHEAD. If a partial file is discovered, I'd like to read the MD5 hash from file, and if it is identical to that of the partial file, continue copying. When the file has been copied completely, the MD5 hash in the file should be that of the completly copied file. I'd like to use that later to determine that the files in source and destination are identical. Thanks for any help!
This is my current copy method:
public static bool CopyFile(List<CopyObjects> FileList, FSObjToCopy job, BackgroundWorker BW)
{
Stopwatch sw = new Stopwatch();
long RestartPosition = 0;
bool Retry = false;
int BYTES_TO_READ = (0x200000)
foreach (CopyObjects co in FileList)
{
FileInfo fi = co.file;
FileInfo fo = null;
if (fi.Directory.FullName.StartsWith($#"{Test_Updater_Core.ServerName}\{Test_Updater_Core.ServerTemplateRoot}"))
{
if (File.Exists(fi.FullName.Replace($#"{Test_Updater_Core.ServerName}\{Test_Updater_Core.ServerTemplateRoot}", $#"{ Test_Updater_Core.USBStore_Drive.driveInfo.Name.Replace("\\", "")}\{Test_Updater_Core.UsbTemplateRoot}")))
{
fi = new FileInfo(fi.FullName.Replace($#"{Test_Updater_Core.ServerName}\{Test_Updater_Core.ServerTemplateRoot}", $#"{Test_Updater_Core.USBStore_Drive.driveInfo.Name.Replace("\\", "")}\{Test_Updater_Core.UsbTemplateRoot}"));
co.destination = co.destination.Replace($#"{Test_Updater_Core.USBStore_Drive.driveInfo.Name.Replace("\\", "")}\{Test_Updater_Core.UsbTemplateRoot}", $#"{Test_Updater_Core.LocalInstallDrive}\{Test_Updater_Core.LocalTemplateRoot}");
fo = new FileInfo($"{fi.FullName.Replace($#"{Test_Updater_Core.USBStore_Drive.driveInfo.Name.Replace("\\", "")}\{Test_Updater_Core.UsbTemplateRoot}", $#"{Test_Updater_Core.LocalInstallDrive}\{Test_Updater_Core.LocalTemplateRoot}")}{Test_Updater_Core.TempFileExtension}");
}
}
//If a clean cancellation was requested, we do it here, otherwise the BackgroundWorker will be killed
if (BW.CancellationPending)
{
job.Status = FSObjToCopy._Status.Complete;
return false;
}
//If a pause is requested, we loop here until resume or termination has been signaled
while (job.PauseBackgroundWorker == true)
{
Thread.Sleep(100);
if (BW.CancellationPending)
{
job.Status = FSObjToCopy._Status.Complete;
return false;
}
Application.DoEvents();
}
if (fo == null)
fo = new FileInfo($"{fi.FullName.Replace(job.Source, co.destination)}{Test_Updater_Core.TempFileExtension}");
if (fo.Exists)
{
Retry = true;
RestartPosition = fo.Length - BYTES_TO_READ;
}
else
{
RestartPosition = 0;
Retry = false;
}
if (RestartPosition <= 0)
{
Retry = false;
}
sw.Start();
try
{
// Read source files into file streams
FileStream source = new FileStream(fi.FullName, FileMode.Open, FileAccess.Read);
// Additional way to write to file stream
FileStream dest = new FileStream(fo.FullName, FileMode.OpenOrCreate, FileAccess.Write);
// Actual read file length
int destLength = 0;
// If the length of each read is less than the length of the source file, read in chunks
if (BYTES_TO_READ < source.Length)
{
byte[] buffer = new byte[BYTES_TO_READ];
long copied = 0;
if (Retry)
{
source.Seek(RestartPosition, SeekOrigin.Begin);
dest.Seek(RestartPosition, SeekOrigin.Begin);
Retry = false;
}
while (copied <= source.Length - BYTES_TO_READ)
{
destLength = source.Read(buffer, 0, BYTES_TO_READ);
source.Flush();
dest.Write(buffer, 0, BYTES_TO_READ);
dest.Flush();
// Current position of flow
dest.Position = source.Position;
copied += BYTES_TO_READ;
job.CopiedSoFar += BYTES_TO_READ;
if (sw.ElapsedMilliseconds > 250)
{
job.PercComplete = (int)(float)((float)job.CopiedSoFar / (float)job.TotalFileSize * 100);
sw.Restart();
sw.Start();
job.ProgressCell.Value = job.PercComplete;
BW.ReportProgress(job.PercComplete < 100 ? job.PercComplete : 99);
}
if (BW.CancellationPending)
{
job.Status = FSObjToCopy._Status.Complete;
return false;
}
while (job.PauseBackgroundWorker == true)
{
Thread.Sleep(100);
if (BW.CancellationPending)
{
job.Status = FSObjToCopy._Status.Complete;
return false;
}
Application.DoEvents();
}
}
int left = (int)(source.Length - copied);
destLength = source.Read(buffer, 0, left);
source.Flush();
dest.Write(buffer, 0, left);
dest.Flush();
job.CopiedSoFar += left;
}
else
{
// If the file length of each copy is longer than that of the source file, the actual file length is copied directly.
byte[] buffer = new byte[source.Length];
source.Read(buffer, 0, buffer.Length);
source.Flush();
dest.Write(buffer, 0, buffer.Length);
dest.Flush();
job.CopiedSoFar += source.Length;
job.PercComplete = (int)(float)((float)job.CopiedSoFar / (float)job.TotalFileSize * 100);
job.ProgressCell.Value = job.PercComplete;
BW.ReportProgress(job.PercComplete < 100 ? job.PercComplete : 99);
}
source.Close();
dest.Close();
fo.LastWriteTimeUtc = fi.LastWriteTimeUtc;
if (File.Exists(fo.FullName))
{
if (File.Exists(fo.FullName.Replace($"{Test_Updater_Core.TempFileExtension}", "")))
{
File.Delete(fo.FullName.Replace($"{Test_Updater_Core.TempFileExtension}", ""));
}
File.Move(fo.FullName, fo.FullName.Replace($"{Test_Updater_Core.TempFileExtension}", ""));
}
job.ProgressCell.Value = job.PercComplete;
BW.ReportProgress(job.PercComplete);
}
catch (Exception ex)
{
MessageBox.Show($"There was an error copying:{Environment.NewLine}{fi}{Environment.NewLine}to:" +
$"{Environment.NewLine}{fo}{Environment.NewLine}The error is: {Environment.NewLine}{ex.Message}",
"Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
job.Status = FSObjToCopy._Status.Error;
return false;
}
finally
{
sw.Stop();
}
}
return true;
}

I decided to create Checksum files on the server that contain a series of checksums in each. As I copy the file, I add the checksums to an internal list, and compare them to the server list. If at some point, they do not match, I go back to the point where they were identical and start back up there. At the end of the copy job, I write the checksums from the internal list to disk, with the same name as of the server. If I'd like to check the integrity of a file, I can compare the server file to the local file and verify the checksums.

Related

How to sync progress bar with process

I am currently creating a file copying facility that works on console. There are 3 basic classes that exist within this, the first one is the program itself which takes a source and destination and is as follows:
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Source:");
string path = Console.ReadLine();
Console.WriteLine("target:");
string target = Console.ReadLine();
Copy newCopy = new Copy();
newCopy.CopyFunction(path, target);
Console.ReadLine();
}
}
The second class is the Copy.CS which is as follows:
class Copy
{
public void CopyFunction(string source, string destination)
{
string sourceFile = source;
string destinationFile = destination;
File.Copy(sourceFile, destinationFile);
Console.Write("Files are being copied... ");
using (var progress = new ProgressBar())
{
for (int i = 0; i <= 100; i++)
{
progress.Report((double)i / 100);
Thread.Sleep(20);
}
}
Console.WriteLine("File Copied");
}
}
For the final class, I implemented the ProgressBar.cs class provided by #DanielWolf
https://gist.github.com/DanielSWolf/0ab6a96899cc5377bf54
The problem I'm currently facing is that the file copying function works, and so does the progress bar, but they work separately. For example, the console will spend a while on a blank screen while it processes what's happening, and then after it's completed, a quick animation of the progress bar is displayed.
I was wondering if I could synchronise the progress bar with the copying process so that it moves at a similar rate while it's happening?
To achieve what you want to do, you need to update the progress bar as you copy the file. One way to do this is simply to copy the file by chunks and report progress as each chunk is copied. I modified your CopyFunction to do just that. Enjoy!
class Copy
{
public void CopyFunction(string sourcePath, string destinationPath)
{
byte[] buffer = new byte[1024 * 10]; // 10K buffer, you can change to larger size.
using (var progress = new ProgressBar())
using (FileStream source = new FileStream(sourcePath, FileMode.Open, FileAccess.Read))
{
long fileLength = source.Length;
using (FileStream dest = new FileStream(destinationPath, FileMode.Create, FileAccess.Write))
{
long totalBytes = 0;
int currentBlockSize = 0;
while ((currentBlockSize = source.Read(buffer, 0, buffer.Length)) > 0)
{
totalBytes += currentBlockSize;
dest.Write(buffer, 0, currentBlockSize);
progress.Report((double)totalBytes / fileLength);
}
progress.Report((double)1.0);
}
//File.Copy(sourceFile, destinationFile);
//Console.Write("Files are being copied... ");
//using (var progress = new ProgressBar())
//{
// for (int i = 0; i <= 100; i++)
// {
// progress.Report((double)i / 100);
// Thread.Sleep(20);
// }
//}
Console.WriteLine("File Copied");
}
}
}

Memory leak with an array c#

User specifies filename and block size. Original file splits into blocks with users block size (except last block). For each block calculates hash-function SHA256 and writes to the console.
This is program with 2 threads: first thread reading the original file and put into queue byte array of block; second thread removes byte array of block from queue and calculate hash.
After first iteration memory doesn't dispose until the program complete.
On next iterations memory allocates and disposes normally.
So, during next reading of part array I get OutOfMemoryException.
How can I manage memory correctly to avoid memory leak?
class Encryption
{
static FileInfo originalFile;
static long partSize = 0;
static long lastPartSize = 0;
static long numParts = 0;
static int lastPartNumber = 0;
static string[] hash;
static Queue<byte[]> partQueue = new Queue<byte[]>();
public Encryption(string _filename, long _partSize)
{
try
{
originalFile = new FileInfo(#_filename);
partSize = _partSize;
numParts = originalFile.Length / partSize;
lastPartSize = originalFile.Length % partSize;
if (lastPartSize != 0)
{
numParts++;
}
else if (lastPartSize == 0)
{
lastPartSize = partSize;
}
lastPartNumber = (int)numParts - 1;
hash = new string[numParts];
}
catch (FileNotFoundException fe)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
return;
}
catch (Exception e)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
}
}
private void readFromFile()
{
try
{
using (FileStream fs = new FileStream(originalFile.FullName, FileMode.Open, FileAccess.Read))
{
for (int i = 0; i < numParts; i++)
{
long len = 0;
if (i == lastPartNumber)
{
len = lastPartSize;
}
else
{
len = partSize;
}
byte[] part = new byte[len];
fs.Read(part, 0, (int)len);
partQueue.Enqueue(part);
part = null;
}
}
}
catch(Exception e)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
}
}
private static void hashToArray()
{
try
{
SHA256Managed sha256HashString = new SHA256Managed();
int numPart = 0;
while (numPart < numParts)
{
long len = 0;
if (numPart == lastPartNumber)
{
len = lastPartSize;
}
else
{
len = partSize;
}
hash[numPart] = sha256HashString.ComputeHash(partQueue.Dequeue()).ToString();
numPart++;
}
}
catch (Exception e)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
}
}
private void hashWrite()
{
try
{
Console.WriteLine("\nResult:\n");
for (int i = 0; i < numParts; i++)
{
Console.WriteLine("{0} : {1}", i, hash[i]);
}
}
catch(Exception e)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
}
}
public void threadsControl()
{
try
{
Thread readingThread = new Thread(readFromFile);
Thread calculateThread = new Thread(hashToArray);
readingThread.Start();
calculateThread.Start();
readingThread.Join();
calculateThread.Join();
hashWrite();
}
catch (Exception e)
{
Console.WriteLine("Error: {0}\nStackTrace: {1}", fe.Message, fe.StackTrace);
}
}
}
You should read some books about .NET internals before you writing such code. Your understanding of .NET memory model is completely wrong, this is why you getting such error. OutOfMemoryException occurs very rarely, if you care about your resourses, especially if you are dealing with arrays.
You should know that in .NET runtime there are two heaps for reference objects, basic one, and Large Objects Heap, and the most important difference between them is that LOH doesn't being compacted even after garbage collection.
You should know that all the arrays, even small ones, are going to the LOH, and the memory is being consumed very quickly. Also you should know that this line:
part = null;
doesn't dispose memory immidiately. Even worse, this line doesn't do anything at all, because you still have a reference to the part of the file you've read in the queue. This is why your memory goes out. You can try to fix this by calling the GC after each hash computing, but this is highly not recommended solution.
You should rewrite your algorithm (which is very simple case of the Producer/Consumer pattern) without storing whole file contents in memory simultaneously. This is quite easy - simply move out your part variable to the static field, and read the next file part into it. Introduce the EventWaitHandle (or one of it's child classes) in your code instead of queue, and simply compute the next hash right after you've read the next part of file.
I recommend you to start from the basics in threading in C# by reading the great series of Joe Albahari, and only after that try to implement such solutions. Good luck with your projects.

ASP.net C# : How to read 20 to 200 GB file line by line using File.ReadLines(fileName).GetEnumerator()?

We are trying with below code.
public static int SplitFile(string fileName, string tmpFolder, List<string> queue, int splitSize = 100000)
{
int chunk = 0;
if (!Directory.Exists(tmpFolder))
Directory.CreateDirectory(tmpFolder);
using (var lineIterator = File.ReadLines(fileName).GetEnumerator())
{
bool stillGoing = true;
for (chunk = 0; stillGoing; chunk++)
{
stillGoing = WriteChunk(lineIterator, splitSize, chunk, tmpFolder, queue);
}
}
return chunk;
}
private static bool WriteChunk(IEnumerator<string> lineIterator,
int splitSize, int chunk, string tmpFolder, List<string> queue)
{
try
{
//int tmpChunkSize = 1000;
//int tmpChunkInc = 0;
string splitFile = Path.Combine(tmpFolder, "file" + chunk + ".txt");
using (var writer = File.CreateText(splitFile))
{
queue.Add(splitFile);
for (int i = 0; i < splitSize; i++)
{
if (!lineIterator.MoveNext())
{
return false;
}
writer.WriteLine(lineIterator.Current);
}
}
return true;
}
catch (Exception)
{
throw;
}
}
It creates around 36 text files (around 800 MB), but starting throwing "Out of memory exception" in creation of 37th File at lineIterator.MoveNext().
While lineIterator.Current shows the value in debugger.
As It s a huge file you should read it Seek and ReadBytes methods of BinaryReader.
You can see a simple example here. After you use the ReadBytes check for the last new lines and write the process file in certain amount of lines you read. Don t write every line you read and also don t keep all the data in the memory.
The rest is in your hands.
Maybe it is realted to that one When does File.ReadLines free resources
IEnumerable doesn't inherit from IDisposable because typically, the class that implements it only gives you the promise of being enumerable, it hasn't actually done anything yet that warrants disposal.

what is the meaning of this error "out of memory" when using picturebox1.Image equal to image FromFile(fileName)?

I have a problem now with displaying an image from an exist file ...
try
{
bool EndFlag = false;
string fileLoc = #"../../../../samples/jpeg_" + DateTime.Now.ToString("yyyyMMdd_hhmmss") + ".jpg";
//Create a file Stream to save the body of the JPEG File content.
FileStream fs = null;
fs = new FileStream(fileLoc, FileMode.OpenOrCreate, FileAccess.Write);
do
{
ReadJpegFileCommand();
CamPort.DiscardOutBuffer();
CamPort.DiscardInBuffer();
for (int i = 0; i < 5; i++)
header[i] = (byte)CamPort.ReadByte();
if (((int)header[0] == 0x76) && (header[1] == 0x00) && (header[2] == 0x32) && (header[3] == 0x00) && (header[4] == 0x00))
{
for (int i = 0; i < 32; i++)
ImageBody[i] = (byte)CamPort.ReadByte();
/*
* writing the bytes that have been read till now to a file
*/
fs.Write(ImageBody, 0, ImageBody.Length);
for (int i = 1; i < ImageBody.Length; i++) // check if reached to the last two bytes(FF D9) of the body to stop reading the body.
{
if ((ImageBody[i - 1] == 0xFF) && (ImageBody[i - 0] == 0xD9))
{
EndFlag = true;
MessageBox.Show("FFD9 has been received");
OneSnap.Image =(Bitmap)System.Drawing.Image.FromStream(fs);
fs.Close();
}
}
}
else
{
MessageBox.Show("Error,Try again"); // The first 5 bytes does not match the header
}
for (int i = 0; i < footer.Length; i++)
{
footer[i] = (byte)CamPort.ReadByte();
}
// update the starting address
M += (UInt16)ImageBody.Length;
//Progress.PerformStep();
}while(!EndFlag);
}
catch (System.Exception ex) { MessageBox.Show(ex.Message); }
When I have used this statements :
OneSnap.Image =(Bitmap)System.Drawing.Image.FromStream(fs);
fs.Close();
I had this error : "Parameter is not valid"
but when I've tried with alternative way and replaced the previous statements by ::
fs.Close();
OneSnap.Image =(Bitmap)System.Drawing.Image.FromFile(fileLoc);
I showed the image in the picture box .. but then when I have executed the program more I had this error :: " Out of memory " and couldn't see the image in the picture box (OneSnap) >>> How to solve this ??
sample :: ( this image has been captured by link Sprite Jpeg Camera )
Looks like the file you created is not a valid picture, so can't be converted to a Bitmap.
See the official docuemtation:
Exception Condition
-------------------------------
OutOfMemoryException The file does not have a valid image format.
-or-
GDI+ does not support the pixel format of the file.
Can't see a way to "fix" this, but you can verify by trying to view the file in picture viewer; if you can view it then you might need something more complex than what System.Drawing offers.
Edit: might be easier than any of us imagined. Try changing the order of your lines:
fs.Close();
OneSnap.Image =(Bitmap)System.Drawing.Image.FromStream(fs);
Might be that while the stream is open, the Bitmap internal code can't read from the file.
Another approach is using MemoryStream instead. For this, first add a List to store all the bytes:
List<byte> arrAllBytes = new List<byte>();
Now instead of this line:
fs.Write(ImageBody, 0, ImageBody.Length);
Have this code:
arrAllBytes.AddRange(ImageBody);
And finally:
MemoryStream stream = new MemoryStream(arrAllBytes.ToArray())
OneSnap.Image = System.Drawing.Image.FromStream(stream);

How to read last "n" lines of log file [duplicate]

This question already has answers here:
Get last 10 lines of very large text file > 10GB
(21 answers)
Closed 1 year ago.
need a snippet of code which would read out last "n lines" of a log file. I came up with the following code from the net.I am kinda new to C sharp. Since the log file might be
quite large, I want to avoid overhead of reading the entire file.Can someone suggest any performance enhancement. I do not really want to read each character and change position.
var reader = new StreamReader(filePath, Encoding.ASCII);
reader.BaseStream.Seek(0, SeekOrigin.End);
var count = 0;
while (count <= tailCount)
{
if (reader.BaseStream.Position <= 0) break;
reader.BaseStream.Position--;
int c = reader.Read();
if (reader.BaseStream.Position <= 0) break;
reader.BaseStream.Position--;
if (c == '\n')
{
++count;
}
}
var str = reader.ReadToEnd();
Your code will perform very poorly, since you aren't allowing any caching to happen.
In addition, it will not work at all for Unicode.
I wrote the following implementation:
///<summary>Returns the end of a text reader.</summary>
///<param name="reader">The reader to read from.</param>
///<param name="lineCount">The number of lines to return.</param>
///<returns>The last lneCount lines from the reader.</returns>
public static string[] Tail(this TextReader reader, int lineCount) {
var buffer = new List<string>(lineCount);
string line;
for (int i = 0; i < lineCount; i++) {
line = reader.ReadLine();
if (line == null) return buffer.ToArray();
buffer.Add(line);
}
int lastLine = lineCount - 1; //The index of the last line read from the buffer. Everything > this index was read earlier than everything <= this indes
while (null != (line = reader.ReadLine())) {
lastLine++;
if (lastLine == lineCount) lastLine = 0;
buffer[lastLine] = line;
}
if (lastLine == lineCount - 1) return buffer.ToArray();
var retVal = new string[lineCount];
buffer.CopyTo(lastLine + 1, retVal, 0, lineCount - lastLine - 1);
buffer.CopyTo(0, retVal, lineCount - lastLine - 1, lastLine + 1);
return retVal;
}
Had trouble with your code. This is my version. Since its' a log file, something might be writing to it, so it's best making sure you're not locking it.
You go to the end. Start reading backwards until you reach n lines. Then read everything from there on.
int n = 5; //or any arbitrary number
int count = 0;
string content;
byte[] buffer = new byte[1];
using (FileStream fs = new FileStream("text.txt", FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
// read to the end.
fs.Seek(0, SeekOrigin.End);
// read backwards 'n' lines
while (count < n)
{
fs.Seek(-1, SeekOrigin.Current);
fs.Read(buffer, 0, 1);
if (buffer[0] == '\n')
{
count++;
}
fs.Seek(-1, SeekOrigin.Current); // fs.Read(...) advances the position, so we need to go back again
}
fs.Seek(1, SeekOrigin.Current); // go past the last '\n'
// read the last n lines
using (StreamReader sr = new StreamReader(fs))
{
content = sr.ReadToEnd();
}
}
A friend of mine uses this method (BackwardReader can be found here):
public static IList<string> GetLogTail(string logname, string numrows)
{
int lineCnt = 1;
List<string> lines = new List<string>();
int maxLines;
if (!int.TryParse(numrows, out maxLines))
{
maxLines = 100;
}
string logFile = HttpContext.Current.Server.MapPath("~/" + logname);
BackwardReader br = new BackwardReader(logFile);
while (!br.SOF)
{
string line = br.Readline();
lines.Add(line + System.Environment.NewLine);
if (lineCnt == maxLines) break;
lineCnt++;
}
lines.Reverse();
return lines;
}
Does your log have lines of similar length? If yes, then you can calculate average length of the line, then do the following:
seek to end_of_file - lines_needed*avg_line_length (previous_point)
read everything up to the end
if you grabbed enough lines, that's fine. If no, seek to previous_point - lines_needed*avg_line_length
read everything up to previous_point
goto 3
memory-mapped file is also a good method -- map the tail of file, calculate lines, map the previous block, calculate lines etc. until you get the number of lines needed
Here is my answer:-
private string StatisticsFile = #"c:\yourfilename.txt";
// Read last lines of a file....
public IList<string> ReadLastLines(int nFromLine, int nNoLines, out bool bMore)
{
// Initialise more
bMore = false;
try
{
char[] buffer = null;
//lock (strMessages) Lock something if you need to....
{
if (File.Exists(StatisticsFile))
{
// Open file
using (StreamReader sr = new StreamReader(StatisticsFile))
{
long FileLength = sr.BaseStream.Length;
int c, linescount = 0;
long pos = FileLength - 1;
long PreviousReturn = FileLength;
// Process file
while (pos >= 0 && linescount < nFromLine + nNoLines) // Until found correct place
{
// Read a character from the end
c = BufferedGetCharBackwards(sr, pos);
if (c == Convert.ToInt32('\n'))
{
// Found return character
if (++linescount == nFromLine)
// Found last place
PreviousReturn = pos + 1; // Read to here
}
// Previous char
pos--;
}
pos++;
// Create buffer
buffer = new char[PreviousReturn - pos];
sr.DiscardBufferedData();
// Read all our chars
sr.BaseStream.Seek(pos, SeekOrigin.Begin);
sr.Read(buffer, (int)0, (int)(PreviousReturn - pos));
sr.Close();
// Store if more lines available
if (pos > 0)
// Is there more?
bMore = true;
}
if (buffer != null)
{
// Get data
string strResult = new string(buffer);
strResult = strResult.Replace("\r", "");
// Store in List
List<string> strSort = new List<string>(strResult.Split('\n'));
// Reverse order
strSort.Reverse();
return strSort;
}
}
}
}
catch (Exception ex)
{
System.Diagnostics.Debug.WriteLine("ReadLastLines Exception:" + ex.ToString());
}
// Lets return a list with no entries
return new List<string>();
}
const int CACHE_BUFFER_SIZE = 1024;
private long ncachestartbuffer = -1;
private char[] cachebuffer = null;
// Cache the file....
private int BufferedGetCharBackwards(StreamReader sr, long iPosFromBegin)
{
// Check for error
if (iPosFromBegin < 0 || iPosFromBegin >= sr.BaseStream.Length)
return -1;
// See if we have the character already
if (ncachestartbuffer >= 0 && ncachestartbuffer <= iPosFromBegin && ncachestartbuffer + cachebuffer.Length > iPosFromBegin)
{
return cachebuffer[iPosFromBegin - ncachestartbuffer];
}
// Load into cache
ncachestartbuffer = (int)Math.Max(0, iPosFromBegin - CACHE_BUFFER_SIZE + 1);
int nLength = (int)Math.Min(CACHE_BUFFER_SIZE, sr.BaseStream.Length - ncachestartbuffer);
cachebuffer = new char[nLength];
sr.DiscardBufferedData();
sr.BaseStream.Seek(ncachestartbuffer, SeekOrigin.Begin);
sr.Read(cachebuffer, (int)0, (int)nLength);
return BufferedGetCharBackwards(sr, iPosFromBegin);
}
Note:-
Call ReadLastLines with nLineFrom starting at 0 for the last line and nNoLines as the number of lines to read back from.
It reverses the list so the 1st one is the last line in the file.
bMore returns true if there are more lines to read.
It caches the data in 1024 char chunks - so it is fast, you may want to increase this size for very large files.
Enjoy!
This is in no way optimal but for quick and dirty checks with small log files I've been using something like this:
List<string> mostRecentLines = File.ReadLines(filePath)
// .Where(....)
// .Distinct()
.Reverse()
.Take(10)
.ToList()
Something that you can now do very easily in C# 4.0 (and with just a tiny bit of effort in earlier versions) is use memory mapped files for this type of operation. Its ideal for large files because you can map just a portion of the file, then access it as virtual memory.
There is a good example here.
As #EugeneMayevski stated above, if you just need an approximate number of lines returned, each line has roughly the same line length and you're more concerned with performance especially for large files, this is a better implementation:
internal static StringBuilder ReadApproxLastNLines(string filePath, int approxLinesToRead, int approxLengthPerLine)
{
//If each line is more or less of the same length and you don't really care if you get back exactly the last n
using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
var totalCharsToRead = approxLengthPerLine * approxLinesToRead;
var buffer = new byte[1];
//read approx chars to read backwards from end
fs.Seek(totalCharsToRead > fs.Length ? -fs.Length : -totalCharsToRead, SeekOrigin.End);
while (buffer[0] != '\n' && fs.Position > 0) //find new line char
{
fs.Read(buffer, 0, 1);
}
var returnStringBuilder = new StringBuilder();
using (StreamReader sr = new StreamReader(fs))
{
returnStringBuilder.Append(sr.ReadToEnd());
}
return returnStringBuilder;
}
}
Most log files have a DateTime stamp. Although can be improved, the code below works well if you want the log messages from the last N days.
/// <summary>
/// Returns list of entries from the last N days.
/// </summary>
/// <param name="N"></param>
/// <param name="cSEP">field separator, default is TAB</param>
/// <param name="indexOfDateColumn">default is 0; change if it is not the first item in each line</param>
/// <param name="bFileHasHeaderRow"> if true, it will not include the header row</param>
/// <returns></returns>
public List<string> ReadMessagesFromLastNDays(int N, char cSEP ='\t', int indexOfDateColumn = 0, bool bFileHasHeaderRow = true)
{
List<string> listRet = new List<string>();
//--- replace msFileName with the name (incl. path if appropriate)
string[] lines = File.ReadAllLines(msFileName);
if (lines.Length > 0)
{
DateTime dtm = DateTime.Now.AddDays(-N);
string sCheckDate = GetTimeStamp(dtm);
//--- process lines in reverse
int iMin = bFileHasHeaderRow ? 1 : 0;
for (int i = lines.Length - 1; i >= iMin; i--) //skip the header in line 0, if any
{
if (lines[i].Length > 0) //skip empty lines
{
string[] s = lines[i].Split(cSEP);
//--- s[indexOfDateColumn] contains the DateTime stamp in the log file
if (string.Compare(s[indexOfDateColumn], sCheckDate) >= 0)
{
//--- insert at top of list or they'd be in reverse chronological order
listRet.Insert(0, s[1]);
}
else
{
break; //out of loop
}
}
}
}
return listRet;
}
/// <summary>
/// Returns DateTime Stamp as formatted in the log file
/// </summary>
/// <param name="dtm">DateTime value</param>
/// <returns></returns>
private string GetTimeStamp(DateTime dtm)
{
// adjust format string to match what you use
return dtm.ToString("u");
}

Categories