I need to store double values in binary files and read them. My data is stored in an array. I have tried the following code but apparently I am storing more values than the array size and I am reading totally wrong data. Like if I am storing 0.26 from array[0], I can see the very few first values in the binary file to be A4 70 3D... I don't get how it is converting 0.26 to these values and on what basis.
This code is for writing to binary file:
double [] DataCollection_array = new double[10000];
public void store_data()
{
Binary_filename = folder_path + "\\" + "Binary1.bin";
stream = new FileStream(folder_path + "\\" + "Binary1.bin", FileMode.Create);
binary_writer = new BinaryWriter(stream);
writetoBinary(DataCollection_array.size);
}
public void writetoBinary(int size)
{
for (int i = 0; i < size; i++)
{
binary_writer.Write(DataCollection_array[i]);
}
}
This code for reading the double values from a folder that contains binary files:
int bytes_counter1 = 0;
Channels = new List<double>[File_size];
public void read_data ()
{
path2 = Directory2.folder_path + "\\" + "Binary" + file_number + ".bin";
file_stream = new FileStream(path2, FileMode.Open, FileAccess.Read);
using (reader = new BinaryReader(file_stream))
{
if (bytes_counter1 < reader.BaseStream.Length)
{
reader.BaseStream.Seek((count + offset1), SeekOrigin.Begin);
Channels.Add((double)reader.ReadByte());
bytes_counter1++;
}
}
}
You are writing doubles:
binary_writer.Write(DataCollection_array[i]);
But you are only reading bytes:
Channels.Add((double)reader.ReadByte()); // Read one byte
Change it to:
Channels.Add(reader.ReadDouble()); // Read one double
Related
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
namespace Homework
{
class Program
{
static void Main(string[] args)
{
string mode = args[0];
string name = args[1];
if(mode == "split")
{
FileInfo fileinfo = new FileInfo(name);
int fileSize = (int)fileinfo.Length;
int partSize = 1024;
int numberOfPart = (int)Math.Ceiling((double)fileSize / partSize);
string chunkName = "";
int byteRemaining = fileSize;
FileStream list = File.OpenWrite(name + "_list");
StreamWriter list_write = new StreamWriter(list, System.Text.Encoding.UTF8);
list_write.WriteLine(name); //list 파일 첫 줄에 원본 파일 이름 저장
Console.WriteLine(name + "_list");
for(int count = 1; count <= numberOfPart; count++)
{
byte[] buffer = new byte[partSize];
chunkName = string.Format("{0}_{1}", name, count);
int chunkSize;
FileStream origin = File.OpenRead(name);
BinaryReader origin_read = new BinaryReader(origin);
if(byteRemaining >= partSize)
{
chunkSize = partSize;
}
else
{
chunkSize = byteRemaining;
}
buffer = origin_read.ReadBytes(chunkSize); //원본 읽기
FileStream chunk = File.OpenWrite(chunkName);
BinaryWriter chunk_write = new BinaryWriter(chunk);
chunk_write.Write(buffer); //chunk에 쓰기
byteRemaining -= chunkSize;
list_write.WriteLine(chunkName); //chunkName도 list 파일에 저장
Console.WriteLine(chunkName);
origin_read.Close();
chunk_write.Close();
}
list_write.Close();
}
else if(mode == "merge")
{
FileStream list = File.OpenRead(name); //list 파일 읽는 스트림
StreamReader list_read = new StreamReader(list, Encoding.Default, true);
string originName = list_read.ReadLine();
FileStream origin = File.OpenWrite(originName); //origin 파일 다시 만드는 스트림
BinaryWriter origin_write = new BinaryWriter(origin);
int partSize = 1024;
while(list_read.EndOfStream == false) //list 파일이 끝날때까지 읽어들임
{
string chunkName = list_read.ReadLine();
byte[] buffer = new byte[partSize];
FileStream chunk = File.OpenRead(chunkName); //각 chunk 읽는 스트림
BinaryReader chunk_read = new BinaryReader(chunk);
FileInfo fileinfo = new FileInfo(chunkName);
buffer = chunk_read.ReadBytes((int)fileinfo.Length); //파일 크기만큼 buffer로 읽어옴
origin_write.Write(buffer); //buffer로 옮긴 내용 원본파일에 쓰기
}
}
Console.ReadKey();
}
}
}
*Expected
split : split file into chunks of 1kb and make [filename]_list file which contain information of name of original file name and name of chunks([filename]_1, [filename]_2 . . .)
merge : read [filename]_list file which created while split process and merge chunks one file again. And after merging, merged file have to be exactly same as original file that we split above.
*Problem
In case of .txt file -> doing well as expected, but in .jpg file, split well but merged file is not same as original file.
Issue:
The issue with your code is that when you split the file into parts/sections, instead of splitting the file into seperate files each containing the next chuck-part of bytes, you splitting the file starting each time from the begining, resulting into the wrong merging file (image demonstrating the issue)
Solution:
So what you only have to do, is just to move out of the for loop 3 specific lines of code such that your code will look like this:
FileStream origin = File.OpenRead(name); // 1st line
BinaryReader origin_read = new BinaryReader(origin); // 2nd line
for (int count = 1; count <= numberOfPart; count++)
{
...
}
origin_read.Close(); //3rd line
list_write.Close();
Recomended solution:
Although, here a recomended solution too.
using System;
using System.IO;
using System.Linq;
namespace Homework
{
class Program
{
static void Main(string[] args)
{
string mode = args[0];
string file = args[1]; // sourcefile or sourcefile_list
try{
switch (mode){
case "split":
byte[] buffer = File.ReadAllBytes(file);
int partSize = 1024;
int numberOfPart = (int)Math.Ceiling((double)buffer.Length / partSize);
string chunkPathName; // The path-and-name of the chunk-file
// go through all parts
for (int count = 0; count < numberOfPart; count++)
{
chunkPathName = file + "_" + (count + 1);
//write all bytes to the destination-chunk file from a specific point and on.... and then "pass"/write the chuck's path-name into the _list file
File.WriteAllBytes(chunkPathName, buffer.Skip(count * partSize).Take(partSize).ToArray());
File.AppendAllText(file + "_list", chunkPathName + "\n");
Console.WriteLine("Splitting: " + chunkPathName);
}
break; // exit switch
case "merge":
// create a stream pointing to your desired-destination/origin-file
var originstream = new FileStream(file.Remove(file.Length - "_list".Length, "_list".Length), FileMode.Create);
Console.WriteLine("destination path = " + file.Remove(file.Length - "_list".Length) + "\n");
// go through each line of the file (_list)
foreach (string _chunkPathName in File.ReadAllLines(file))
{
Console.WriteLine("Merging: " + _chunkPathName);
// read all bytes from chunk and append them into the origin-file
byte[] chunk = File.ReadAllBytes(_chunkPathName);
originstream.Write(chunk, 0, chunk.Length);
}
// close the stream
originstream.Close();
break; // exit switch
}
Console.WriteLine("\n" + mode + " is done!");
}catch (Exception ex){
//code for any other type of exception
}
Console.ReadKey();
}
}
}
i've tried to make it clear enough with comments too..
I would like to achieve excel sheets comparison by comparing excels sheets transferred into byte arrays
Actually my code looks like:
public static Document FileToByteArray(string fileName)
{
System.IO.FileStream fs = new System.IO.FileStream(fileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
System.IO.BinaryReader binaryReader = new System.IO.BinaryReader(fs);
long byteLength = new System.IO.FileInfo(fileName).Length;
byte[] fileContent = binaryReader.ReadBytes((int)byteLength);
fs.Close();
fs.Dispose();
binaryReader.Close();
Document Document = new Document
{
DocContent = fileContent
};
return Document;
}
public class Document
{
public byte[] DocContent { get; set; }
}
And finally main code:
private static void CompareImportedExportedExcels(string ingredientName, string ingredientsExportFile, AuthorizedLayoutPage authorizedBackofficePage, IngredientsPage ingredientsPage)
{
authorizedBackofficePage.LeftMenuComponent.ChooseLeftSectionOption<IngredientsPage>();
ingredientsPage.FiltersComponent.UseStringFilter(FiltersOptions.IngredientName, ingredientName);
ingredientsPage.ExportIngredientsElement.Click();
var downloadResult = DownloadHelper.WaitUntilDownloadedCompare(ingredientsExportFile);
string ingredientExportExcelFile = DownloadHelper.SeleniumDownloadPath + ingredientsExportFile;
var exelToByteArray1 = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"..\..\..") + #"\TestData\" + "ImportFiles" + #"\IngredientsImport.xlsx");
var excelArray1 = ExcelsExtensions.FileToByteArray(exelToByteArray1);
var excelArray2 = ExcelsExtensions.FileToByteArray(ingredientExportExcelFile);
if (excelArray1.DocContent.Length == excelArray2.DocContent.Length)
{
Console.WriteLine("Excels are equal");
DownloadHelper.CheckFileDownloaded(ingredientsExportFile);
}
else
{
Console.WriteLine("Excels are not equal");
DownloadHelper.CheckFileDownloaded(ingredientsExportFile);
Assert.Fail("Seems that imported and exported excels were not the same! Check it!");
}
}
What's the current status:
Above code works correctly speaking about getting .Lenght and compare it between two excels. Problem appears with different comparison where firstly the exported excel is placed inside .ZIP file. I need to unpack it and then compare. Although excel sheets are the same .Lenght value is different and it fails.
var downloadResult = DownloadHelper.WaitUntilDownloadedCompare(productsExportFile);
string stockProductZIPFile = DownloadHelper.SeleniumDownloadPath + productsExportFile;
string stockProductUnzippedFilePath = DownloadHelper.SeleniumDownloadPath + productsExportFile;
var pathToUnzip = DownloadHelper.SeleniumDownloadPath + productsExportFolderFile;
ZipFile zip = ZipFile.Read(stockProductZIPFile);
zip.ExtractAll(pathToUnzip);
string stockProductExportedExcel = DownloadHelper.SeleniumDownloadPath + "\\ProductsExport" + #"\Stock Products.xlsx";
var exelToByteArray1 = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, #"..\..\..") + #"\TestData\" + "ImportFiles" + #"\StockProduct.xlsx");
var excelArray1 = ExcelsExtensions.FileToByteArray(exelToByteArray1);
var excelArray2 = ExcelsExtensions.FileToByteArray(stockProductExportedExcel);
if (excelArray1.DocContent.Length == excelArray2.DocContent.Length)
{
Console.WriteLine("Excels are equal");
DownloadHelper.CheckFileDownloaded(stockProductUnzippedFilePath);
DownloadHelper.CheckFileDownloaded(pathToUnzip);
}
else
{
Console.WriteLine("Excels are not equal");
DownloadHelper.CheckFileDownloaded(stockProductUnzippedFilePath);
DownloadHelper.CheckFileDownloaded(pathToUnzip);
Assert.Fail("Seems that imported and exported excels were not the same! Check it!");
}
Ideas to solve
First of all I'm not sure if comparing those two by .Lenght is good idea. For one case it works but for the other it's not. I'm not sure if it is connected with packing sheet to .zip format and then unpacking it? Actually in second (broken) scenario products size actually differs. Oracle product has 4 KB and exported one has 10 KB (even thought their data inside is the same).
I'm going to use List as a data structure to temporarily save real time data and I want to write it into a file.
The program receives millions of data in real time, so I want to reduce as much latency and overhead as possible. At first, I just combined data (String) and saved it in String (into a list), but I've found out that using a fixed list with structs is better because the process of combining strings is expensive(before writing a file, when temporarily saving real time data).
Now I'm wondering how to efficiently write structs in a list into a file.
List<struct_string> struct_list = new List<struct_string>(1000000);
FileStream fileStream = new FileStream(fileName, FileMode.Append, FileAccess.Write);
StreamWriter streamWriter = new StreamWriter(fileStream);
for (int num = 0; list_structs.Count > num; num++)
{
streamWriter.Write( list_structs[num].string1 + ", " +
list_structs[num].string2 + ", " +
list_structs[num].string3 + ", " +
list_structs[num].string4 + ", " +
list_structs[num].string5 + "\r\n");
}
internal struct struct_string
{
public string string1;
public string string2;
public string string3;
public string string4;
public string string5;
public struct_string(string _string1, string _string2, string _string3, string _string4, string _string5)
{
string1 = _string1;
string2 = _string2;
string3 = _string3;
string4 = _string4;
string5 = _string5;
}
}
This is what I could initially think of, but I think there should be built-in functions or better ways to do this.
To read/write them to a binary file do this:
Define the struct:
[Serializable]
public struct X
{
public int N {get; set;}
public string S {get; set;}
}
Read and write it using a BinaryFormatter:
string filename = #"c:\temp\list.bin";
var list = new List<X>();
list.Add(new X { N=1, S="No. 1"});
list.Add(new X { N=2, S="No. 2"});
list.Add(new X { N=3, S="No. 3"});
BinaryFormatter formatter = new BinaryFormatter();
using (System.IO.Stream ms = File.OpenWrite(filename))
{
formatter.Serialize(ms, list);
}
using (FileStream fs = File.Open(filename, FileMode.Open))
{
object obj = formatter.Deserialize(fs);
var newlist = (List<X>)obj;
foreach (X x in newlist)
{
Console.Out.WriteLine($"N={x.N}, S={x.S}");
}
}
The solution uses that the List class as well as the X struct is serializable.
Try using serialization instead, you have libraries for that and with the new System.Text.Json in .NET Core you have really neat performance, another popular is Newtonsoft for standard .NET.
I know this isn't a direct answer to your question, but I hope it helps.
I have a page where the User can either upload their own csv or enter values into a listbox which then creates a csv (in the background). Regardless of which way the csv gets created I need to upload that csv to our server via a byte stream.
My problem is that when Im creating the csv I shouldn't have to create a temporary file, I should be able to write to the stream then read it back for uploading. How can I remove the need for the temporary file?
current code which works (but uses temp file):
try {
string filename = DateTime.Now.ToString("MMddyyHmssf");
filename = filename + ".csv";
string directory = ConfigurationManager.AppSettings["TempDirectory"].ToString();
path = Path.Combine(directory, filename);
using (StreamWriter sw = File.CreateText(path)) {
foreach (ListItem item in this.lstAddEmailAddress.Items) {
sw.WriteLine(" , ," + item.ToString());
}
}
} catch (Exception ex) {
string error = "Cannot create temp csv file used for importing users by email address. Filepath: " + path + ". FileException: " + ex.ToString();
this.writeToLogs(error, 1338);
}
}
// put here for testing the byte array being sent vs ready byte[] byteArray = System.IO.File.ReadAllBytes(path);
myCsvFileStream = File.OpenRead(path);
nFileLen = (int)myCsvFileStream.Length;
I have tried
Stream myCsvFileStream;
using (StreamWriter sw = new StreamWriter(myCsvFileStream)) {
foreach (ListItem item in this.lstAddEmailAddress.Items) {
sw.WriteLine(" , ," + item.ToString());
}
}
However since myCsvFileStream is not initialized (because stream is a static class) it is always null.
Here is what I do with the data (byte stream) after creating the csv.
byte[] file = new byte[nFileLen];
myCsvFileStream.Read(file, 0, nFileLen);
bool response = this.repositoryService.SaveUsers(this.SelectedAccount.Id, file, this.authenticatedUser.SessionToken.SessionId);
myCsvFileStream.Close();
In the end I used StringBuilder to create my csv file contents. Then got a byte array of its contents and used that to populate my shared stream (I say shared because when the user enters their own CSV file it is a HttpPostedFile but when sending it to our server via the rest call (respositoryservices.saveusers) it uses the same byte stream that it would via this method)
StringBuilder csvFileString = new StringBuilder();
sharedStreamForBatchImport = new MemoryStream();
foreach (ListItem item in this.lstAddEmailAddress.Items) {
csvFileString.Append(",," + item.ToString() + "\\r\\n");
}
//get byte array of the string
byteArrayToBeSent = Encoding.ASCII.GetBytes(csvFileString.ToString());
//set length for read
byteArraySize = (int)csvFileString.Length;
//read bytes into the sharedStreamForBatchImport (byte array)
sharedStreamForBatchImport.Read(byteArrayToBeSent, 0, byteArraySize);
You want to create a new MemoryStream()
Here is a function I use to write CSV files
public static bool WriteCsvFile(string path, StringBuilder stringToWrite)
{
try
{
using (StreamWriter sw = new StreamWriter(path, false)) //false in ordre to overwrite the file if it already exists
{
sw.Write(stringToWrite);
return true;
}
}
catch (Exception)
{
return false;
}
}
stringToWrite is just a string that has been created that way :
public static bool WriteCsvFile(string path, DataTable myData)
{
if (myData == null)
return false;
//Information about the table we read
int nbRows = myData.Rows.Count;
int nbCol = myData.Columns.Count;
StringBuilder stringToWrite = new StringBuilder();
//We get the headers of the table
stringToWrite.Append(myData.Columns[0].ToString());
for (int i = 1; i < nbCol; ++i)
{
stringToWrite.Append(",");
stringToWrite.Append(myData.Columns[i].ToString());
}
stringToWrite.AppendLine();
//We read the rest of the table
for (int i = 0; i < nbRows; ++i)
{
stringToWrite.Append(myData.Rows[i][0].ToString());
for (int j = 1; j < nbCol; ++j)
{
stringToWrite.Append(",");
stringToWrite.Append(myData.Rows[i][j].ToString());
}
stringToWrite.AppendLine();
}
return WriteCsvFile(path, stringToWrite);
}
Morning,
I'm trying to split a large text file (15,000,000 rows) using StreamReader/StreamWriter. Is there a quicker way?
I tested it with 130,000 rows and it took 2min 40sec which implies 15,000,000 rows will take approx 5hrs which seems a bit excessive.
//Perform split.
public void SplitFiles(int[] newFiles, string filePath, int processorCount)
{
using (StreamReader Reader = new StreamReader(filePath))
{
for (int i = 0; i < newFiles.Length; i++)
{
string extension = System.IO.Path.GetExtension(filePath);
string temp = filePath.Substring(0, filePath.Length - extension.Length)
+ i.ToString();
string FilePath = temp + extension;
if (!File.Exists(FilePath))
{
for (int x = 0; x < newFiles[i]; x++)
{
DataWriter(Reader.ReadLine(), FilePath);
}
}
else
{
return;
}
}
}
}
public void DataWriter(string rowData, string filePath)
{
bool appendData = true;
using (StreamWriter sr = new StreamWriter(filePath, appendData))
{
{
sr.WriteLine(rowData);
}
}
}
Thanks for your help.
You haven't made it very clear, but I'm assuming that the value of each element of the newFiles array is the number of lines to copy from the original into that file. Note that currently you don't detect the situation where there's either extra data at the end of the input file, or it's shorter than expected. I suspect you want something like this:
public void SplitFiles(int[] newFiles, string inputFile)
{
string baseName = Path.GetFileNameWithoutExtension(inputFile);
string extension = Path.GetExtension(inputFile);
using (TextReader reader = File.OpenText(inputFile))
{
for (int i = 0; i < newFiles.Length; i++)
{
string outputFile = baseName + i + extension;
if (File.Exists(outputFile))
{
// Better than silently returning, I'd suggest...
throw new IOException("File already exists: " + outputFile);
}
int linesToCopy = newFiles[i];
using (TextWriter writer = File.CreateText(outputFile))
{
for (int j = 0; i < linesToCopy; j++)
{
string line = reader.ReadLine();
if (line == null)
{
return; // Premature end of input
}
writer.WriteLine(line);
}
}
}
}
}
Note that this still won't detect if there's any unconsumed input... it's not clear what you want to do in that situation.
One option for code clarity is to extract the middle of this into a separate method:
public void SplitFiles(int[] newFiles, string inputFile)
{
string baseName = Path.GetFileNameWithoutExtension(inputFile);
string extension = Path.GetExtension(inputFile);
using (TextReader reader = File.OpenText(inputFile))
{
for (int i = 0; i < newFiles.Length; i++)
{
string outputFile = baseName + i + extension;
// Could put this into the CopyLines method if you wanted
if (File.Exists(outputFile))
{
// Better than silently returning, I'd suggest...
throw new IOException("File already exists: " + outputFile);
}
CopyLines(reader, outputFile, newFiles[i]);
}
}
}
private static void CopyLines(TextReader reader, string outputFile, int count)
{
using (TextWriter writer = File.CreateText(outputFile))
{
for (int i = 0; i < count; i++)
{
string line = reader.ReadLine();
if (line == null)
{
return; // Premature end of input
}
writer.WriteLine(line);
}
}
}
There are utilities for splitting files that may outperform your solution - e.g. search for "split file by line".
If they don't suit, there are solutions for loading all the source file into memory and then writing out the files but that probably isn't appropriate given the size of the source file.
In terms of improving your code, a minor improvement would be the generation of the destination file path (and also clarifying the confusing between the source filePath you use and the destination files). You don't need to re-establish the source file extension each time in your loop.
The second improvement (and probably more significant improvement - as highlighted by commenters) is about how you write out the destination files - these seem to have a differing number of lines from the source (value in each newFiles entry) that you specify you want in individual destination files? So I'd suggest for each entry you read all the source file relevant to the next destination file, then output the destination rather than repeatedly opening a destination file. You could "gather" the lines in a StringBuilder/List etc - alternatively just write them directly out to the destination file (but only opening it once)
public void SplitFiles(int[] newFiles, string sourceFilePath, int processorCount)
{
string sourceDirectory = System.IO.Path.GetDirectoryName(sourceFilePath);
string sourceFileName = System.IO.Path.GetFileNameWithoutExtension(sourceFilePath);
string extension = System.IO.Path.GetExtension(sourceFilePath);
using (StreamReader Reader = new StreamReader(sourceFilePath))
{
for (int i = 0; i < newFiles.Length; i++)
{
string destinationFileNameWithExtension = string.Format("{0}{1}{2}", sourceFileName, i, extension);
string destinationFilePath = System.IO.Path.Combine(sourceDirectory, destinationFileNameWithExtension);
if (!File.Exists(destinationFilePath))
{
// Read all the lines relevant to this destination file
// and temporarily store them in memory
StringBuilder destinationText = new StringBuilder();
for (int x = 0; x < newFiles[i]; x++)
{
destinationText.Append(Reader.ReadLine());
}
DataWriter(destinationFilePath, destinationText.ToString());
}
else
{
return;
}
}
}
}
private static void DataWriter(string destinationFilePath, string content)
{
using (StreamWriter sr = new StreamWriter(destinationFilePath))
{
{
sr.Write(content);
}
}
}
I've recently had to do this for several hundred files under 2 GB each (up to 1.92 GB), and the fastest method I found (if you have the memory available) is StringBuilder. All the other methods I tried were painfully slow.
Please note that this is memory dependent. Adjust "CurrentPosition = 130000" accordingly.
string CurrentLine = String.Empty;
int CurrentPosition = 0;
int CurrentSplit = 0;
foreach (string file in Directory.GetFiles(#"C:\FilesToSplit"))
{
StringBuilder sb = new StringBuilder();
using (StreamReader sr = new StreamReader(file))
{
while ((CurrentLine = sr.ReadLine()) != null)
{
if (CurrentPosition == 130000) // Or whatever you want to split by.
{
using (StreamWriter sw = new StreamWriter(#"C:\FilesToSplit\SplitFiles\" + Path.GetFileNameWithoutExtension(file) + "-" + CurrentSplit + "." + Path.GetExtension(file)))
{
// Append this line too, so we don't lose it.
sb.Append(CurrentLine);
// Write the StringBuilder contents
sw.Write(sb.ToString());
// Clear the StringBuilder buffer, so it doesn't get too big. You can adjust this based on your computer's available memory.
sb.Clear();
// Increment the CurrentSplit number.
CurrentSplit++;
// Reset the current line position. We've found 130,001 lines of text.
CurrentPosition = 0;
}
}
else
{
sb.Append(CurrentLine);
CurrentPosition++;
}
}
}
// Reset the integers at the end of each file check, otherwise it can quickly go out of order.
CurrentPosition = 0;
CurrentSplit = 0;
}