c# hash and php MD5 file/folder hash not the same - c#

I have to MD5 hash files/folders on both a client(C#) and a server(PHP) file structure. (Server land is PHP and client land is c#.) The problem is while they work they do not match. Any ideas would be greatly appreciated
Here are my two algorithms
C#
using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;
namespace nofolder
{
public class classHasher
{
/**********
* recursive folder MD5 hash of a dir
*/
MD5 hashAlgo = null;
StringBuilder sb;
public classHasher()
{
hashAlgo = new MD5CryptoServiceProvider();
}
public string getHash(String path)
{
// get the file attributes for file or directory
if (File.Exists(path)) return getHashOverFile(path);
if (Directory.Exists(path)) return getHashOverFolder(path);
return "";
}
public string getHashOverFolder(String path)
{
sb = new StringBuilder();
getFolderContents(path);
return sb.ToString().GetHashCode().ToString();
}
public string getHashOverFile(String filename)
{
sb = new StringBuilder();
getFileHash(filename);
return sb.ToString().GetHashCode().ToString();
}
private void getFolderContents(string fold)
{
foreach (var d in Directory.GetDirectories(fold))
{
getFolderContents(d);
}
foreach (var f in Directory.GetFiles(fold))
{
getFileHash(f);
}
}
private void getFileHash(String f)
{
using (FileStream file = new FileStream(f, FileMode.Open, FileAccess.Read))
{
byte[] retVal = hashAlgo.ComputeHash(file);
file.Close();
foreach (var y in retVal)
{
sb.Append(y.ToString());
}
}
}
}
}
PHP
function include__md5_dir($dir){
/**********
* recursive folder MD5 hash of a dir
*/
if (!is_dir($dir)){
return md5_file($dir);
}
$filemd5s = array();
$d = dir($dir);
while (false !== ($entry = $d->read())){
if ($entry != '.' && $entry != '..'){
if (is_dir($dir.'/'.$entry)){
$filemd5s[] = include__md5_dir($dir.'/'.$entry);
}
else{
$filemd5s[] = md5_file($dir.'/'.$entry);
}
}
}
$d->close();
return md5(implode('', $filemd5s));
}
EDIT.
I have decided the c# must change. the PHP is fine as it is. The first code that works 100% gets the bounty

Your PHP code is assembling hexadecimal numbers (as per the md5_file() documentation)
Your C# code is assembling non-0-padded decimal numbers.
You need to y.ToString("x2") to format as hexadecimal.
Also, return sb.ToString().GetHashCode().ToString(); is extremely wrong. Don't call GetHashCode(); it's not what you want.

I eventually fixed this myself and I include the answer for future posterity - the key to this solution was irradicating the different default dir ORDERING that linux and windows use. This was only tested on the linux server (Cent OS6.3) and Windows 7 Client.
C#
public class classHasher
{
/**********
* recursive folder MD5 hash of a dir
*/
MD5 hashAlgo = null;
StringBuilder sb;
public classHasher()
{
hashAlgo = new MD5CryptoServiceProvider();
}
public string UltraHasher(String path)
{
/**********
* recursive folder MD5 hash of a dir
*/
if (!Directory.Exists(path))
{
return getHashOverFile(path);
}
List<string> filemd5s = new List<string>();
List<string> dir = new List<string>();
if (Directory.GetDirectories(path) != null) foreach (var d in Directory.GetDirectories(path))
{
dir.Add(d);
}
if (Directory.GetFiles(path) != null) foreach (var f in Directory.GetFiles(path))
{
dir.Add(f);
}
dir.Sort();
foreach (string entry in dir)
{
if (Directory.Exists(entry))
{
string rtn = UltraHasher(entry.ToString());
//Debug.WriteLine(" ULTRRAAHASHER:! " + entry.ToString() + ":" + rtn);
filemd5s.Add(rtn);
}
if (File.Exists(entry))
{
string rtn = getHashOverFile(entry.ToString());
//Debug.WriteLine(" FILEEEEHASHER:! " + entry.ToString() + ":" + rtn);
filemd5s.Add(rtn);
}
}
//Debug.WriteLine(" ULTRRAASUMMMM:! " + String.Join("", filemd5s.ToArray()));
string tosend = CalculateMD5Hash(String.Join("", filemd5s.ToArray()));
//Debug.WriteLine(" YEAHHAHHAHHAH:! " + tosend);
return tosend;
}
public string getHashOverFile(String filename)
{
sb = new StringBuilder();
getFileHash(filename);
return sb.ToString();
}
private void getFileHash(String f)
{
using (FileStream file = new FileStream(f, FileMode.Open, FileAccess.Read))
{
byte[] retVal = hashAlgo.ComputeHash(file);
file.Close();
foreach (var y in retVal)
{
sb.Append(y.ToString("x2"));
}
}
}
public string CalculateMD5Hash(string input)
{
byte[] inputBytes = System.Text.Encoding.ASCII.GetBytes(input);
byte[] hash = hashAlgo.ComputeHash(inputBytes);
StringBuilder sz = new StringBuilder();
for (int i = 0; i < hash.Length; i++)
{
sz.Append(hash[i].ToString("x2"));
}
return sz.ToString();
}
}
PHP
function md5_dir($dir){
/**********
* recursive folder MD5 hash of a dir
*/
if (!is_dir($dir)){
return md5_file($dir);
}
$filemd5s = array();
$bit = array();
$d = scandir($dir);
foreach($d as $entry){
if ($entry != '.' && $entry != '..'){
$bit[] = $entry;
}
}
asort($bit);
foreach($bit as $entry){
if (is_dir($dir.'/'.$entry)){
$sz = md5_dir($dir.'/'.$entry);
//echo "\n ULTRRAAHASHER:! ".$dir.'/'.$entry.":$sz";
$filemd5s[] = $sz;
}
else{
$sz = md5_file($dir.'/'.$entry);
$filemd5s[] = $sz;
//echo "\n FILEEEEHASHER:! ".$dir.'/'.$entry.":$sz";
}
}
//echo "\n ULTRRAASUMMMM:! ".implode('', $filemd5s)."";
//echo "\n YEAHHAHHAHHAH:! ".md5(implode('', $filemd5s))."";
return md5(implode('', $filemd5s));
}
these two will traverse either a C# Windows and or a PHP linux folder and return the SAME hashes for all dirs (recursive, so it includes sub dirs) inside Linuxland and all inside Windowsland.

For sort like C# asort() is case sensitive so you will need per example natcasesort()

Related

Extract the hash value substring from each line in a TextBox

I am trying to make an app that will verify multiple file hashes at once. The way I've done it is like this: Hash source files from location 1 and output to a textbox filename, hash type and the hash itself.
Hash source files from location 2 and output to another text box the second filename, hash type and hash.
The problem is that that the paths are different, and there are several hashes to verify. I don't know how to split the string to make this work so that just the hashes get checked against the other hashes, without filesnames.
This is how I am getting the hash string:
string[] files = filePicketTextBox.Text.Split('\n');
if (files.Length > 0)
{
foreach (var file in files).. //do hash
Here is how the output of the hash gets done:
output = file + " MD5 Hash = " + sb.ToString() + "\n";
Here's what it looks like in the output textbox:
C:\Users\jj\Downloads\hasher.zip MD5 Hash = 8B0A222D30CA4962AFE30511695C8BB3
C:\Users\jj\Downloads\OfficeSetup.exe MD5 Hash = 2E210D07A9AE9B93FAEB0852A0DAFF83
And here's what I'm using to verify the hashes:
private void VerifyHashes(object sender, RoutedEventArgs e)
{
string output_one = outputTextBox.Text;
string output_two = output2TextBox.Text;
if (output_one == output_two)
{
Verification.Text = "HASHES VERIFIED";
}
else
{
Verification.Text = "NO MATCH";
}
Can anyone help me with how to split this to remove those filenames and take into account that multiple filenames will be getting verified at once?
Full code:
private void filePickerButton_Click(object sender, RoutedEventArgs e)
{
// Create the CommonOpenFIleDialog object
CommonOpenFileDialog dialog = new CommonOpenFileDialog();
dialog.InitialDirectory = "C:\\Users";
dialog.IsFolderPicker = false;
dialog.Multiselect = true;
// Check to see if we have a result
if (dialog.ShowDialog() == CommonFileDialogResult.Ok)
{
filePicketTextBox.Text = string.Join("\n", dialog.FileNames);
}
else
{
outputTextBox.Text = "Operation cancelled." + "\n";
}
}
private async void runButton_Click(object sender, RoutedEventArgs e)
{
File.Delete(Path.Combine(Environment.CurrentDirectory, "log.txt"));
// Detect and handle the event of a void filename
if (filePicketTextBox.Text == "")
{
outputTextBox.Text = "No file selected." + "\n";
return;
}
string[] files = filePicketTextBox.Text.Split('\n');
if (files.Length > 0)
{
foreach (var file in files)
{
// Detect and handle the event of a non-valid filename
try
{
var stream = File.OpenRead(file);
stream.Close();
}
catch
{
outputTextBox.Text = "Invalid filename." + "\n";
return;
}
// Detect event of no options selected
if (!(bool)md5CheckBox.IsChecked && !(bool)sha1CheckBox.IsChecked && !(bool)sha256CheckBox.IsChecked && !(bool)sha512CheckBox.IsChecked)
{
outputTextBox.Text = "No hash algorithm selected.";
}
// MD5 Calculation and Display
if ((bool)md5CheckBox.IsChecked)
{
await Task.Run(() =>
{
var result = checkMD5(file).Result;
WriteIntoTxtBox(result);
});
await Task.Delay(1000);
progressbar.Value = 0;
}
MD5 Calc code:
public async Task<string> checkMD5(string file)
{
string output;
using (var md5 = MD5.Create())
{
AddValueLoadingBar(20, true);
using (var stream = File.OpenRead(file))
{
byte[] hash = md5.ComputeHash(stream);
AddValueLoadingBar(60, true);
await Task.Delay(1000);
StringBuilder sb = new StringBuilder(hash.Length);
int toAdd = 30 / hash.Length;
foreach (byte b in hash)
{
sb.AppendFormat("{0:X2}", b);
AddValueLoadingBar(toAdd);
}
output = file + " MD5 Hash = " + sb.ToString() + "\n";
AddValueLoadingBar(100, true);
}
}
return output;
}
this works fine:
string[] Files = strFileBuf.Split('\n');
string[] MyDelim = { " MD5 Hash = " };
foreach (string OneRow in Files)
{
string[] TwoParts = OneRow.Split(MyDelim,StringSplitOptions.None);
string FileNameOnly = Path.GetFileName(TwoParts[0]);
string MyHash = TwoParts[1];
TextBox1.Text += ("File = " + FileNameOnly + " ->Hash = " + MyHash + "\n");
}
Output:
So, you can split on the each line (\n)
And then you can assume that the delimiter is the string part inbetween.
So, we split out to file name, and hash.
and then i display ONLY file name (without path) and the hash code.
***EDIT **************************************
Ok, the user wants the output sorted. They should have posted SOME mark up.
Here what we will do:
We assume TWO input strings - the from files, the "to" files.
As noted, I REALLY have to assume that some kind of list or something MUCH better then two strings are the SOURCE of these two file sets.
However, the code that follows could WELL be adapted - and be run directly off the original two "lists" of files.
These two strings - from a text box, from the bottom of the ocean - don't care.
So, we assume this markup on the page:
<asp:Button ID="Button1" runat="server" Height="31px" Text="Button" Width="110px" />
<br />
<asp:GridView ID="GridView1" runat="server">
</asp:GridView>
<br />
A simple button, and a simple gridView.
We will shove/send the results out to the Grid view - sorted.
The code would/could behind the above button could/would look like this:
protected void Button1_Click(object sender, EventArgs e)
{
string strFileBuf1 = ""; ' set this from/input files string
string strFileBuf2 = ""; ' set this to files string
SortedDictionary<string, string> Loc1Files;
SortedDictionary<string, string> Loc2Files;
Loc1Files = GetFiles(strFileBuf1);
Loc2Files = GetFiles(strFileBuf2);
DataTable MyTable = new DataTable();
MyTable.Columns.Add("FromFile", typeof(string));
MyTable.Columns.Add("ToFile", typeof(string));
MyTable.Columns.Add("Match", typeof(string));
foreach (KeyValuePair<string, string> OneFile in Loc1Files)
{
DataRow OneRow;
OneRow = MyTable.Rows.Add;
OneRow("FromFile") = OneFile.Key;
if (Loc2Files.ContainsKey(OneFile.Key))
{
OneRow("ToFile") = OneFile.Key;
// compare hash codes
if (OneFile.Value == Loc2Files[OneFile.Key])
OneRow("Match") = "Yes";
else
OneRow("Match") = "No";
}
else
{
OneRow("ToFile") = "missing";
OneRow("Match") = "missing";
}
}
GridView1.DataSource = MyTable;
GridView1.DataBind();
}
Output:
The function/sub that splits out the data and returns that sorted dict list is this - we assume those "two" sets of files as those strings.
public SortedDictionary<string, string> GetFiles(string str)
{
string[] Files = str.Split('\n');
SortedDictionary<string, string> MyList = new SortedDictionary<string, string>();
foreach (string OneRow in Files)
{
string[] MyDelim = { " MD5 Hash = " };
string[] TwoParts = OneRow.Split(MyDelim,StringSplitOptions.None);
MyList.Add(Path.GetFileName(TwoParts[0]), TwoParts[1]);
}
return MyList;
}
I've done quite a bit of work to reduce your code down to the bare essentials of computing your hashes without relying on the UI. I've then got the code that requires the UI separated out so that it has nothing to do with computing hashes.
To start with I created a single hashing function that you can use to compute any hash.
public Task<string> ComputeHash(string file, Func<HashAlgorithm> create)
{
return Task.Run(() =>
{
using (var crypto = create())
{
using (var stream = File.OpenRead(file))
{
return String.Concat(
crypto.ComputeHash(stream).Select(x => x.ToString("X2")));
}
}
});
}
I've now created two fields to store the list of files and the resulting hashes.
private string[] _fileNames = null;
private string[] _hashes = null;
I'll show the code that sets the _fileNames array later, but for now this is the code that you can call to compute your hashes:
private async Task<String> Run(bool md5, bool sha1, bool sha256, bool sha512)
{
File.Delete(Path.Combine(Environment.CurrentDirectory, "log.txt"));
Func<HashAlgorithm> hashAlgorithm = null;
if (md5) hashAlgorithm = () => MD5.Create();
else if (sha1) hashAlgorithm = () => SHA1.Create();
else if (sha256) hashAlgorithm = () => SHA256.Create();
else if (sha512) hashAlgorithm = () => SHA512.Create();
if (hashAlgorithm == null)
{
return "No hash algorithm selected.";
}
else if (_fileNames == null || _fileNames.Length == 0)
{
return "No file selected." + "\n";
}
else if (_fileNames.Any(f => !File.Exists(f)))
{
return "Invalid filename." + "\n";
}
else
{
var tasks = _fileNames.Select(f => ComputeHash(f, hashAlgorithm)).ToArray();
_hashes = await Task.WhenAll(tasks);
return "Success";
}
}
The hardest part of this code, in my opinion, is the use of Func<HashAlgorithm> hashAlgorithm to store the hashing algorithm. If you're not familiar with a Func<> then it's worth looking up, but think of it as a way to store a function in a variable so that you can call the function later.
Now, finally, we have the two bits of code that interact with the UI:
private void filePickerButton_Click(object sender, RoutedEventArgs e)
{
CommonOpenFileDialog dialog = new CommonOpenFileDialog();
dialog.InitialDirectory = "C:\\Users";
dialog.IsFolderPicker = false;
dialog.Multiselect = true;
if (dialog.ShowDialog() == CommonFileDialogResult.Ok)
{
_fileNames = dialog.FileNames.ToArray();
filePicketTextBox.Text = string.Join("\n", dialog.FileNames);
}
else
{
outputTextBox.Text = "Operation cancelled." + "\n";
}
}
private async void runButton_Click(object sender, RoutedEventArgs e)
{
string result = await Run(
md5CheckBox.IsChecked,
sha1CheckBox.IsChecked,
sha256CheckBox.IsChecked,
sha512CheckBox.IsChecked);
outputTextBox.Text = result;
}
The end result is that you have two arrays, _fileNames & _hashes that you can use to get the hash for each file name. No need for splitting text.
You should format your output with some delimiter other than a space as file names can have space in them, otherwise you will need to account for that.
Your solution is quite awkward and could certainly be reengineered for a better solution.
But to directly answer your question you can join your output into a Dictionary<string,string>
where the key would be the file name and value would the hash
var d1 = outputTextBox.Text.Split(Environment.NewLine.ToCharArray())
.Select(x => x.Split(' '))
.ToDictionary(
// use the file name without the directory as the key.
x => System.IO.Path.GetFileName(string.Join(" ", x.Take(x.Length - 4))),
// the has will be the last sequence of characters following the last space
x => x[x.Length - 1]
);
var d2 = output2TextBox.Text.Split(Environment.NewLine.ToCharArray())
.Select(x => x.Split(' '))
.ToDictionary(
// use the file name without the directory as the key.
x => System.IO.Path.GetFileName(string.Join(" ", x.Take(x.Length - 4))),
// the has will be the last sequence of characters following the last space
x => x[x.Length - 1]
);
After you would need to compare the contents of the two dictionaries using the file names as a key.
This can be done as follows
///compare the the keys of the two dictionaries match.
var areEqual = d1.Keys
.OrderBy(x=> x) // sort the first dictionaries keys
// comare the sorted keys to the sorted keys of the second dictionary
.SequenceEqual(d2.Keys.OrderBy(x=>x));
if (areEqual)
{
// if the keys match, then compare the hashes
areEqual = d1.Select(x => x.Value == d2[x.Key])
.All(x => x == true);
}
Now if you are only concerned with if the files in two directories match, you could just collect the hashes and compare them.
var outputTextBox = new TextBox();
var output2TextBox = new TextBox();
var hashes_left = outputTextBox.Text
.Split(Environment.NewLine.ToCharArray())
.Select(x => x.Substring(x.LastIndexOf(' ') + 1))
.OrderBy(x => x);
var hashes_right = output2TextBox.Text
.Split(Environment.NewLine.ToCharArray())
.Select(x => x.Substring(x.LastIndexOf(' ') + 1))
.OrderBy(x => x);
var areEqual = hashes_left.SequenceEqual(hashes_right);
However, even with these direct answers the proper thing would be to reengineer your code for a more efficient solution, such using a custom class in which you could use to compare strongly typed items in memory.
To clarify, reengineering the solution properly using custom classes would look something like this:
public class FileCollectionComparer
{
private HashAlgo algo;
private string[] x;
private string[] y;
public FileCollectionComparer(HashAlgo hashAlgo, string[] x, string[] y)
{
this.algo = hashAlgo;
this.x = x.OrderBy(z=> Path.GetFileName(z)).ToArray();
this.y = y.OrderBy(z => Path.GetFileName(z)).ToArray();
}
public bool AreEqual()
{
if (x.Length != y.Length)
return false;
else if (!x.Select(z=> Path.GetFileName(z))
.SequenceEqual(y.Select(z=> Path.GetFileName(z))))
return false;
else
{
var a = x.GetEnumerator();
var b = y.GetEnumerator();
while (a.MoveNext())
{
b.MoveNext()
var xHash = Hasher.GetHash((string)a.Current, algo);
var yHash = Hasher.GetHash((string)b.Current, algo);
if (xHash != yHash)
return false;
}
}
return true;
}
}
And we might define the HashAlgo enum and Hasher as:
public enum HashAlgo
{
MD5,
SHA1,
SHA256,
SHA512
}
public class Hasher
{
public static string GetHash(string filePath, HashAlgo algo)
{
HashAlgorithm hasher = null;
switch (algo)
{
case HashAlgo.MD5:
hasher = MD5.Create();
break;
case HashAlgo.SHA1:
hasher = SHA1.Create();
break;
case HashAlgo.SHA256:
hasher = SHA256.Create();
break;
case HashAlgo.SHA512:
hasher = SHA512.Create();
break;
default:
throw new InvalidEnumArgumentException(nameof(algo), (int)algo, typeof(HashAlgo));
}
using (var fs = File.OpenRead(filePath))
return string.Join("", hasher.ComputeHash(fs).Select(x => x.ToString("X2")));
}
}
Ok, it was as simple as adding this line in the hash method.
file = Path.GetFileName(file);
Here it is inside the method.
public async Task<string> checkMD5(string file)
{
string output;
using (var md5 = MD5.Create())
{
AddValueLoadingBar(20, true);
using (var stream = File.OpenRead(file))
{
byte[] hash = md5.ComputeHash(stream);
AddValueLoadingBar(60, true);
await Task.Delay(1000);
StringBuilder sb = new StringBuilder(hash.Length);
int toAdd = 30 / hash.Length;
foreach (byte b in hash)
{
sb.AppendFormat("{0:X2}", b);
AddValueLoadingBar(toAdd);
}
file = Path.GetFileName(file);
output = file + " MD5 Hash = " + sb.ToString() + "\n";
AddValueLoadingBar(100, true);
}
}
return output;
I added this right before the output, causing the output to not include the full path name which is the only thing that changes between the textboxes assuming the hashes are the same. The file name is the same, and if the hash is too, then it's verified.
This caused a lot of work but the answer was quite simple. My code definitely needs some re-writing though :) thanks everyone who answered this.

XML Serialization leaves file blank after restart

We have a problem where our industrial equipments software's .XML settings files become blank, yet they still have the correct number of bytes.
I have a feeling it might be caused by the way the customers are shutting down the PC as it tends to happen after they've down a shutdown, isolate, and boot. The way I save the files is,
Serialize to %temp% file
Validate that the newly created file starts with <?xml
If the /backup folders version of the file is older than a day, copy the existing file to the /backup folder
Copy new file to overwrite existing file.
I thought maybe it's related to encoding, disk caching, Windows Update, or Windows Recovery.
Looking for ideas as I've spent two years chasing down why this is happening.
As per request, here is the code.
public static bool SerializeObjXml(object Object2Serialize, string FilePath, Type type, bool gzip = false)
{
if (!Path.IsPathRooted(FilePath))
FilePath = Path.Combine(ApplicationDir, FilePath);
bool isSuccess = false;
var tmpFile = Path.GetTempFileName();
try
{
for (int i = 0; i < 3; i++)
{
try
{
Directory.CreateDirectory(Path.GetDirectoryName(FilePath));
if (gzip)
{
using (var ms = new MemoryStream())
{
XmlSerializer bf = new XmlSerializer(type);
bf.Serialize(ms, Object2Serialize);
ms.Position = 0;
using (var fileStream = new BinaryWriter(File.Open(tmpFile, FileMode.Create)))
{
using (GZipStream gzipStream = new GZipStream(fileStream.BaseStream, CompressionMode.Compress))
{
byte[] buffer = new byte[4096];
int numRead;
while ((numRead = ms.Read(buffer, 0, buffer.Length)) != 0)
{
gzipStream.Write(buffer, 0, numRead);
}
}
}
}
if (!FileChecker.isGZip(tmpFile))
throw new XmlException("Failed to write valid XML file " + FilePath);
}
else
{
using (var fs = new StreamWriter(File.Open(tmpFile, FileMode.Create), Encoding.UTF8))
{
XmlSerializer bf = new XmlSerializer(type);
bf.Serialize(fs, Object2Serialize);
}
if (!FileChecker.isXML(tmpFile))
throw new XmlException("Failed to write valid XML file " + FilePath);
}
isSuccess = true;
return true;
}
catch (XmlException)
{
return false;
}
catch (System.IO.DriveNotFoundException) { continue; }
catch (System.IO.DirectoryNotFoundException) { continue; }
catch (System.IO.FileNotFoundException) { continue; }
catch (System.IO.IOException) { continue; }
}
}
finally
{
if (isSuccess)
{
lock (FilePath)
{
try
{
//Delete existing .bak file
if (File.Exists(FilePath + ".bak"))
{
File.SetAttributes(FilePath + ".bak", FileAttributes.Normal);
File.Delete(FilePath + ".bak");
}
}
catch { }
try
{
//Make copy of file as .bak
if (File.Exists(FilePath))
{
File.SetAttributes(FilePath, FileAttributes.Normal);
File.Copy(FilePath, FilePath + ".bak", true);
}
}
catch { }
try
{
//Copy the temp file to the target
File.Copy(tmpFile, FilePath, true);
//Delete .bak file if no error
if (File.Exists(FilePath + ".bak"))
File.Delete(FilePath + ".bak");
}
catch { }
}
}
try
{
//Delete the %temp% file
if (File.Exists(tmpFile))
File.Delete(tmpFile);
}
catch { }
}
return false;
}
public static class FileChecker
{
const string gzipSig = "1F-8B-08";
static string xmlSig = "EF-BB-BF";// <?x";
public static bool isGZip(string filepath)
{
return FileChecker.CheckSignature(filepath, (3, gzipSig)) != null;
}
public static bool isXML(string filepath)
{
return FileChecker.CheckSignature(filepath, (3, xmlSig)) != null;
}
public static bool isGZipOrXML(string filepath, out bool isGZip, out bool isXML)
{
var sig = FileChecker.CheckSignature(filepath, (3, gzipSig), (3, xmlSig));
isXML = (sig == xmlSig);
isGZip = (sig == gzipSig);
return isXML || isGZip;
}
public static string CheckSignature(string filepath, params (int signatureSize, string expectedSignature)[] pairs)
{
if (String.IsNullOrEmpty(filepath))
throw new ArgumentException("Must specify a filepath");
if (String.IsNullOrEmpty(pairs[0].expectedSignature))
throw new ArgumentException("Must specify a value for the expected file signature");
int signatureSize = 0;
foreach (var pair in pairs)
if (pair.signatureSize > signatureSize)
signatureSize = pair.signatureSize;
using (FileStream fs = new FileStream(filepath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
if (fs.Length < signatureSize)
return null;
byte[] signature = new byte[signatureSize];
int bytesRequired = signatureSize;
int index = 0;
while (bytesRequired > 0)
{
int bytesRead = fs.Read(signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
foreach (var pair in pairs)
{
string actualSignature = BitConverter.ToString(signature, 0, pair.signatureSize);
if (actualSignature == pair.expectedSignature)
return actualSignature;
}
}
return null;
}
}
Using the operating system's move or copy file to overwrite an existing file is an atomic operation meaning the it wholly succeeds or doesn't and doesn't overlap other file operations.
Therefore what you have should work if that is how you are achieving step 4.
Copy new file to overwrite existing file.
If instead you are blanking out the existing file and re-writing the data I suspect that could be the the point of failure..
The issues while file space is being allocated the write is not occurring during shutdown, which leaves you when a file with bytes allocated without the data being flushed to disk.
During the OS shutdown, likely a ThreadAbortException is raised which triggers your finally block.
You can attempt to reproduce by calling Process.Start("shutdown", "-a") before your return statement but after you have set success = true.
I would suggest simplifying your code and have everything run inside of your try {} statement. This removes the possibility of having a state where success = true before your attempted your write to disk, which is then triggered in a finally statement trigged by a windows shutdown.
public static bool SerializeObjXml(
object Object2Serialize,
string FilePath,
Type type,
bool gzip = false)
{
if (!Path.IsPathRooted(FilePath))
FilePath = Path.Combine(ApplicationDir, FilePath);
Directory.CreateDirectory(FilePath);
for (int i = 0; i < 3; i++)
{
try
{
var tempFi = SerializeToXmlFile(Object2Serialize, type, gzip);
var fi = new FileInfo(FilePath);
if (fi.Exists)
fi.CopyTo(fi.FullName + ".bak", true);
tempFi.CopyTo(fi.FullName, true);
tempFi.Delete();
return true;
}
catch (Exception ex)
{
string message = $"[{DateTime.Now}] Error serializing file {FilePath}. {ex}";
File.WriteAllText(FilePath + ".log", message);
}
}
return false;
}
As a side note, you can simply use [Stream.CopyTo][1] and write directly to your temp file, without the need for intermediary streams or for manual buffer/byte read/write operations:
private static FileInfo SerializeToXmlFile(
object Object2Serialize,
Type type,
bool gzip)
{
var tmpFile = Path.GetTempFileName();
var tempFi = new FileInfo(tmpFile);
if (!gzip)
{
using (var fs = File.Open(tmpFile, FileMode.Create))
(new XmlSerializer(type)).Serialize(fs, Object2Serialize);
if (!FileChecker.isXML(tmpFile))
throw new Exception($"Failed to write valid XML file: {tmpFile}");
}
else
{
using (var fs = File.Open(tmpFile, FileMode.CreateNew))
using (var gz = new GZipStream(fs, CompressionMode.Compress))
(new XmlSerializer(type)).Serialize(fs, Object2Serialize);
if (!FileChecker.isGZip(tmpFile))
throw new Exception($"Failed to write valid XML gz file: {tmpFile}");
}
return tempFi;
}

C# save MD5 of each file in a directory, as well as files in all nested/child/sub directories, to a text file

I am trying to create a log of all files in a given directory (root directory is MyGlobals.finalPathForWork) as well as every file in a child or sub-directory that includes 1) all the files and paths; and 2) an MD5 of each file.
The first part of the code, MyMethod5, dealing with file paths works correctly. The problem is doing the same with the MD5s of each of those files. I think the code in CalculateMD5 should generate the MD5, but I don't know how to return the value/string it gets, for each file, and how to write that to the same text file like MyMethod5 does.
How do I use that MD5 that gets calculated in CalculateMD5 and write it to log.txt like in MyMethod5()?
public static void MyMethod5()
{
foreach (string file in Directory.EnumerateFiles(MyGlobals.finalPathForWork, "*.*", SearchOption.AllDirectories))
{
//Console.WriteLine(file);
File.AppendAllText(MyGlobals.finalPathForWork + #"\\Log.txt", file + Environment.NewLine);
CalculateMD5(file);
}
}
public static string CalculateMD5(string filename)
{
using (var md5 = MD5.Create())
{
using (var stream = File.OpenRead(filename))
{
var hash = md5.ComputeHash(stream);
return BitConverter.ToString(md5.ComputeHash(stream)).Replace("-", "").ToLower();
}
}
}
I ended up doing it with this, but if you see room for improvement please let me know:
public static void MyMethod5()
{
foreach (string file in Directory.EnumerateFiles(MyGlobals.finalPathForWork, "*.*", SearchOption.AllDirectories))
{
//Console.WriteLine(file);
File.AppendAllText(MyGlobals.finalPathForWork + #"\\Log.txt", file + Environment.NewLine);
CalculateMD5(file);
}
}
public static void CalculateMD5(string filename)
{
DateTime current = DateTime.Now;
string file = filename;
string output;
using (var md5 = MD5.Create())
{
using (var stream = File.OpenRead(file))
{
byte[] checksum = md5.ComputeHash(stream);
output = BitConverter.ToString(checksum).Replace("-", String.Empty).ToLower();
File.AppendAllText(MyGlobals.finalPathForWork + #"\\Log.txt", "Hash:" + output + Environment.NewLine);
}
}
}

SearchOption.AllDirectories filter

I am trying to filter out the path C:\$Recycle.bin in my file enumeration. How can I do this?
var files = Directory.GetFiles(path, "*.*", SearchOption.AllDirectories).OrderBy(p => p).ToList();
When I execute the above, I get the error below.
Additional information: Access to the path 'C:\$Recycle.Bin\S-1-5-21-1600837348-2291285090-976156579-500' is denied.
I also want to calc every file's md5. I have:
var mainDirectory = new DirectoryInfo("\\");
var files = GetDirectories(mainDirectory);
List<string> drives = new List<string>();
foreach (var file in files)
{
//Console.WriteLine(file.Name);
drives.Add(mainDirectory + file.Name);
}
MD5 md5 = MD5.Create();
foreach (string file in drives)
{
// hash path
string relativePath = file.Substring("\\".Length + 1);
byte[] pathBytes = Encoding.UTF8.GetBytes(relativePath.ToLower());
md5.TransformBlock(pathBytes, 0, pathBytes.Length, pathBytes, 0);
// hash contents
try
{
byte[] contentBytes = File.ReadAllBytes(file);
md5.TransformBlock(contentBytes, 0, contentBytes.Length, contentBytes, 0);
md5.TransformFinalBlock(contentBytes, 0, contentBytes.Length);
}
catch(UnauthorizedAccessException)
{
continue;
}
catch
{
continue;
}
Console.WriteLine(BitConverter.ToString(md5.Hash).Replace("-", "").ToLower());
}
Console.ReadKey();
The following could do it for you, but it is a quick and dirty way, because it does not handle any exceptions. I did not regard any readability and it is not fully tested.
static void Main(string[] args)
{
var mainDirectory = new DirectoryInfo("C:\\");
var files = GetFiles(mainDirectory, ".");
foreach (var file in files)
{
Console.WriteLine(file.Name);
}
Console.ReadKey();
}
static IEnumerable<DirectoryInfo> GetDirectories(DirectoryInfo parentDirectory)
{
DirectoryInfo[] childDirectories = null;
try
{
childDirectories = parentDirectory.GetDirectories();
}
catch (Exception)
{
}
yield return parentDirectory;
if (childDirectories != null)
{
foreach (var childDirectory in childDirectories)
{
var childDirectories2 = GetDirectories(childDirectory);
foreach (var childDirectory2 in childDirectories2)
{
yield return childDirectory2;
}
}
}
}
static IEnumerable<FileInfo> GetFiles(DirectoryInfo parentDirectory,
string searchPattern)
{
var directories = GetDirectories(parentDirectory);
foreach (var directory in directories)
{
FileInfo[] files = null;
try
{
files = directory.GetFiles(searchPattern);
}
catch (Exception)
{
}
if (files != null)
{
foreach (var file in files)
{
yield return file;
}
}
}
}

Using Key like "[]Server" into sortedDictionary, but why?

I am trying to understand and probably reuse part of DevExpress Demo code to save simple settings into ini file. I know I can use .NET System.Configuration doing what I want. Just for a smallish project, simple save it as a text file seem more flexible and light-weighted, at least that is what it seems.
Anyway, while I am playing with it, I am trying to understand why the code I am reading trying to add key as "[]Server" and "[]DBFormat" as key name. They do that for a reason I can not understand yet, I could probably use some help here.
Here is the Code I think relevant:
public class IniFile {
SortedDictionary<string, string> data = new SortedDictionary<string, string>();
...
public void Load(string path) {
using(StreamReader sr = new StreamReader(path)) {
string folder = "[]";
while(!sr.EndOfStream) {
string s = sr.ReadLine().Trim();
if(s.Length == 0 || s[0] == ';') continue;
if(s[0] == '[') {
folder = s;
continue;
}
string key, value;
int delim = s.IndexOf('=');
if(delim < 0) {
key = folder + s.Replace("[", string.Empty).Replace("]", string.Empty);
value = string.Empty;
} else {
key = folder + s.Remove(delim).TrimEnd().Replace("[", string.Empty).Replace("]", string.Empty);
value = s.Substring(delim + 1).TrimStart();
}
if(!data.ContainsKey(key)) data.Add(key, value);
else data[key] = value;
}
}
}
...
public void Save(string path) {
using(StreamWriter sw = new StreamWriter(path)) {
string folder = "[]";
foreach(string key in data.Keys) {
int delim = key.IndexOf(']');
string keyFolder = key.Remove(delim + 1);
string keyName = key.Substring(delim + 1);
if(keyFolder != folder) {
folder = keyFolder;
sw.WriteLine(folder);
}
sw.WriteLine(keyName + " = " + data[key]);
}
}
}
void AddRawValue(string key, string value) {
key = key.Trim();
value = value.Trim();
int folderBegin = key.IndexOf('[');
int folderEnd = key.IndexOf(']');
if(folderBegin != 0 || folderEnd < 0) throw new ArgumentException("key");
data.Add(key, value);
}
And here is part of the ini file itself:
DBFormat = "Mdb"
Login = "admin"
Password = ""
Server = "(localhost)"
Obvious, they go though the trouble to add [] into keyname for some reason, but end up not using it in the demo data. I am think they are using string inside [] to group settings?
I only skimmed it, but I'd imagine the intent might have been "[section]setting" so "[]setting" would represent a setting not in a section. Look how easy it is to get values, just a single string will do! (There seems to be confusion between a "section" and a "folder" -- which might be an above-par variable name for that code...)
Then again, I could be way off as I only invested about 10 seconds of time on that tripe >:)
Happy coding.

Categories