I am reading around 300 txt files using the task concept. My requirement is to read each file , pick the date from every line , check in which week it comes and then compare it with the folder name which are actually the week names (like 41 , 42) whether they both are same or not. if not then write that file name and the line number. As the number of files and the size of files is huge , I am trying to use task concept to that I can speed up the process. Below is my code.
Any help would be appreciated. Thanks in advance.
private void browse_Click(object sender, EventArgs e)
{
try
{
string newFileName = "";
DialogResult result = folderBrowserDialog1.ShowDialog();
if (result == DialogResult.OK)
{
DateTime starttime = DateTime.Now;
string folderPath = Path.GetDirectoryName(folderBrowserDialog1.SelectedPath);
DirectoryInfo dInfo = new DirectoryInfo(folderPath);
string[] Allfiles = Directory.EnumerateFiles(folderPath, "*.txt", SearchOption.AllDirectories).ToArray();
foreach (DirectoryInfo folder in dInfo.GetDirectories())
{
newFileName = "Files_with_duplicate_TGMKT_Names_in_child_Folder_" + folder.Name + ".txt";
if (File.Exists(folderPath + "/" + newFileName))
{
File.Delete(folderPath + "/" + newFileName);
}
FileInfo[] folderFiles = folder.GetFiles();
if (folderFiles.Length != 0)
{
List<Task> tasks = new List<Task>();
foreach (var file in folderFiles)
{
var task = Task.Factory.StartNew(() =>
{
bool taskResult = ReadFile(file.FullName,folderPath,newFileName);
return taskResult;
});
tasks.Add(task);
}
Task.WaitAll(tasks.ToArray());
DateTime stoptime = DateTime.Now;
TimeSpan totaltime = stoptime.Subtract(starttime);
label6.Text = Convert.ToString(totaltime);
textBox1.Text = folderPath;
DialogResult result2 = MessageBox.Show("Read the files successfully.", "Important message", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
}
}
catch (Exception)
{
throw;
}
}
public bool ReadFile(string file , string folderPath , string newFileName)
{
int LineCount = 0;
string fileName = Path.GetFileNameWithoutExtension(file);
using (FileStream fs = File.Open(file, FileMode.Open))
using (BufferedStream bs = new BufferedStream(fs))
using (StreamReader sr = new StreamReader(bs))
{
for (int i = 0; i < 2; i++)
{
sr.ReadLine();
}
string oline;
while ((oline = sr.ReadLine()) != null)
{
LineCount = ++LineCount;
string[] eachLine = oline.Split(';');
string date = eachLine[30].Substring(1).Substring(0, 10);
DateTime Date = DateTime.ParseExact(date, "d", CultureInfo.InvariantCulture);
int week = new GregorianCalendar(GregorianCalendarTypes.Localized).GetWeekOfYear(Date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Saturday);
if (Convert.ToString(week) == folderName)
{
}
else
{
using (StreamWriter sw = new StreamWriter(folderPath + "/" + newFileName, true))
{
Filecount = ++Filecount;
sw.WriteLine(fileName + " " + "--" + " " + "Line number :" + " " + LineCount);
}
}
}
}
return true;
}
Your call MessageBox.Show inside ReadFile, which means every file has to write a message. This way you will get 300 messages.
Try to put your message after the WaitAll call outside the ReadFile method.
if (files.Length != 0)
{
List<Task> tasks = new List<Task>();
foreach (var file in files)
{
var task = Task.Factory.StartNew(() =>
{
bool taskResult = ReadFile(file);
return taskResult;
});
tasks.Add(task);
}
Task.WaitAll(tasks.ToArray());
// Message here
DialogResult result2 = MessageBox.Show("Read the files successfully.", "Important message", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
Related
my controller looks like
[HttpPost]
public FileResult methodname(string files)
{
String CurrentDate = DateTime.Now.Date.ToString("dd-MM-yyyy");
string dir = filepath + "/" + CurrentDate + "/";
string[] fileList = files.Split(",");
var count = fileList.Length;
try
{
_log.LogInformation("Enter Action with user name " + HttpContext.Session.GetString("UserName"));
using (var memoryStream = new MemoryStream())
{
using (var ziparchive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
{
for (int i = 0; i < count; i++)
{
ziparchive.CreateEntryFromFile(dir + fileList[i], fileList[i]);
}
}
return File(memoryStream.ToArray(), "application/zip", "Attachments.zip");
}
}
catch (Exception ex)
{
_log.LogError(ex.StackTrace);
return null;
}
}
I fixed my issue by adding the following line just below the click event:
event.preventDefault();
Good afternoon, please tell me how can I add files with the same name to the archive? To be like copying, the file becomes file(1).* and there are two files file and file(1) . I am using Ionic.Zip
string BackupDir = #"C:\Users\Desktop\dir\backup.zip";
string PathToFolder = #"C:\Users\Desktop\dir";
string[] AllFiles = Directory.GetFiles(PathToFolder, "*.*", SearchOption.AllDirectories);
using (ZipFile zip = new ZipFile(BackupDir, Encoding.UTF8))
{
foreach (string file in AllFiles)
{
try
{
DateTime FileCreationTime = File.GetCreationTime(file);
if (FileCreationTime >= DateTime.Now - new TimeSpan(60, 0, 0, 0))
{
Console.WriteLine(file);
zip.CompressionLevel = Ionic.Zlib.CompressionLevel.BestSpeed;
zip.AddFile(file, "");
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
zip.Save(BackupDir);
}
}
Try this:
//keep track of fileNames
var fileNames = new HashSet<string>(StringCompaer.OridialIgnoreCase);
foreach (string file in AllFiles)
{
try
{
DateTime FileCreationTime = File.GetCreationTime(file);
if (FileCreationTime >= DateTime.Now - new TimeSpan(60, 0, 0, 0))
{
Console.WriteLine(file);
zip.CompressionLevel = Ionic.Zlib.CompressionLevel.BestSpeed;
var fileName = Path.GetFileName(file);
if (fileNames.Add(fileName))
zip.AddFile(file, ""); //fileName could be added to fileNames = it is unique
else
{
//start with 1
int counter = 1;
//loop till you found a fileName thats not occupied
while (true)
{
//build the new file name
var newFileName = $"{Path.GetFileNameWithoutExtension(fileName)} - {counter}{Path.GetExtension(fileName}";
if (fileNames.Add(newFileName))
{
fileName = newFileName; //use the new fileName
break; //break the loop
}
//increase counter if newFileName is already in the list fileNames
counter++;
}
var zipFileEntry = zip.AddFile(file, "");
zipFileEntry.FileName = fileName;
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
zip.Save(BackupDir);
}
Check if file exists and add suffix to it's name.
Something like (Pseudocode):
int nameCount = 1;
fileName = file.Name;
while(fileName exists in archive)
{
nameCount++;
fileName = file.Name + "_" + nameCount;
}
I am writing on my pdf-word converter and I just received a really strange exception witch makes no sens to me.
Error:PdfiumViewer.PdfException:{"Unsupported security scheme"}
Its the first time that such a exception appears. but I have to be honest that I never tried to convert more then 3-4 files from pdf to word and right now I am doing more then 100 files.
Here is my code I am sry if its too long but I simply do not know on which line the error occurs
public static void PdfToImage()
{
try
{
Application application = null;
application = new Application();
string path = #"C:\Users\chnikos\Desktop\Test\Batch1\";
foreach (string file in Directory.EnumerateFiles(path, "*.pdf"))
{
var doc = application.Documents.Add();
using (var document = PdfiumViewer.PdfDocument.Load(file))
{
int pagecount = document.PageCount;
for (int index = 0; index < pagecount; index++)
{
var image = document.Render(index, 200, 200, true);
image.Save(#"C:\Users\chnikos\Desktop\Test\Batch1\output" + index.ToString("000") + ".png", ImageFormat.Png);
application.Selection.InlineShapes.AddPicture(#"C:\Users\chnikos\Desktop\Test\Batch1\output" + index.ToString("000") + ".png");
}
string getFileName = file.Substring(file.LastIndexOf("\\"));
string getFileWithoutExtras = Regex.Replace(getFileName, #"\\", "");
string getFileWihtoutExtension = Regex.Replace(getFileWithoutExtras, #".pdf", "");
string fileName = #"C:\Users\chnikos\Desktop\Test\Batch1\" + getFileWihtoutExtension;
doc.PageSetup.PaperSize = WdPaperSize.wdPaperA4;
foreach (Microsoft.Office.Interop.Word.InlineShape inline in doc.InlineShapes)
{
.....
}
doc.PageSetup.TopMargin = 28.29f;
doc.PageSetup.LeftMargin = 28.29f;
doc.PageSetup.RightMargin = 30.29f;
doc.PageSetup.BottomMargin = 28.29f;
application.ActiveDocument.SaveAs(fileName, WdSaveFormat.wdFormatDocument);
doc.Close();
string imagePath = #"C:\Users\chnikos\Desktop\Test\Batch1\";
Array.ForEach(Directory.GetFiles(imagePath, "*.png"), delegate(string deletePath) { File.Delete(deletePath); });
}
}
}
catch (Exception e)
{
Console.WriteLine("Error: " + e);
}
}
}
}
I'm developing an application that reads a xml file folder , and each file it to do some checks and copied to a new folder based on some criteria.
But memory usage continues to grow when it arrives in the foreach loop , and I believe that should not happen , because the variables do not increase at each iteration , are only overwritten.
Here is my code:
using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Windows.Forms;
using System.Xml;
namespace XMLOrganizer
{
public partial class Form1 : Form
{
string selectedFolder;
public Form1()
{
InitializeComponent();
comboBox1.DropDownStyle = ComboBoxStyle.DropDownList;
comboBox1.SelectedIndex = 0;
}
private void button1_Click(object sender, EventArgs e)
{
folderBrowserDialog1.ShowDialog();
selectedFolder = folderBrowserDialog1.SelectedPath;
organizeBtn.Enabled = true;
}
private void organizeBtn_Click(object sender, EventArgs e)
{
if (comboBox1.SelectedIndex == -1)
{
MessageBox.Show("Selecione o tipo de nota", "Erro!", MessageBoxButtons.OK, MessageBoxIcon.Exclamation,
MessageBoxDefaultButton.Button1);
return;
}
if (comboBox1.SelectedIndex != 2)
{
OrganizeXml(label2, selectedFolder, comboBox1);
}
//ORGANIZAR LOTES
else
{
string folder = selectedFolder;
label2.Text = "Arquivos sendo processados, aguarde...";
label2.Refresh();
string[] files = Directory.GetFiles(folder, "*.xml", SearchOption.AllDirectories);
int atualFile = 1, totalXML = files.Length;
foreach (string file in files)
{
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(file);
XmlNodeList enviNFe = xmlDocument.GetElementsByTagName("enviNFe");
string versao = ((XmlElement)enviNFe[0]).Attributes["versao"].Value;
XmlNodeList NFe = ((XmlElement)enviNFe[0]).GetElementsByTagName("NFe");
Directory.CreateDirectory(selectedFolder + #"\NOTAS");
label2.Text = "Processando arquivo " + atualFile + " de " + totalXML;
string notaXML;
foreach (XmlElement nota in NFe)
{
notaXML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><nfeProc versao=\"" + versao + "\" xmlns=\"http://www.portalfiscal.inf.br/nfe\">" + nota.OuterXml + "</nfeProc>";
XmlNodeList infNFe = nota.GetElementsByTagName("infNFe");
string chave = infNFe[0].Attributes["Id"].Value.Replace("NFe", "");
File.WriteAllText(selectedFolder + "\\NOTAS\\" + chave + ".xml", notaXML);
}
}
OrganizeXml(label2, selectedFolder + "\\NOTAS", comboBox1);
}
}
private static void OrganizeXml(Label label2, string selectedFolder, ComboBox comboBox1)
{
string folderMove = String.Empty;
string folder = selectedFolder;
label2.Text = "Arquivos sendo processados, aguarde...";
label2.Refresh();
string[] files = Directory.GetFiles(folder, "*.xml", SearchOption.AllDirectories);
int i = 1, arquivos = files.Length;
Directory.CreateDirectory(folder + #"\ORGANIZADO");
if (comboBox1.SelectedIndex != 2)
{
Directory.CreateDirectory(folder + #"\ORGANIZADO\OUTROS");
Directory.CreateDirectory(folder + #"\ORGANIZADO\LOTES");
}
foreach (string file in files)
{
XmlDocument xmlDocument = new XmlDocument();
try
{
xmlDocument.Load(file);
if (xmlDocument.DocumentElement.Name != "nfeProc")
{
XmlNodeList NFe = xmlDocument.GetElementsByTagName("NFe");
var nota = ((XmlElement) NFe[0]);
if (nota != null)
{
XmlNodeList infNFe = ((XmlElement) NFe[0]).GetElementsByTagName("infNFe");
string chave = infNFe[0].Attributes["Id"].Value.Replace("NFe", "");
string versao = infNFe[0].Attributes["versao"].Value;
string notaXML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><nfeProc versao=\"" + versao +
"\" xmlns=\"http://www.portalfiscal.inf.br/nfe\">" + nota.OuterXml +
"</nfeProc>";
string dirNote = Path.GetDirectoryName(file);
File.WriteAllText(dirNote + "\\fix_" + chave + ".xml", notaXML);
}
}
//
//
//
}
catch (XmlException)
{
XmlDocument doc = new XmlDocument();
string arquivo = ReadFileToString(file);
arquivo = RemoveSpecialCharacters(arquivo);
if (arquivo == "")
{
File.Move(file, folder + #"\ORGANIZADO\OUTROS\corrupt_" + Path.GetFileName(file));
continue;
}
try
{
doc.LoadXml(arquivo);
doc.PreserveWhitespace = true;
doc.Save(file);
}
catch (XmlException)
{
File.Move(file, folder + #"\ORGANIZADO\OUTROS\corrupt_" + Path.GetFileName(file));
files = files.Where(f => f != file).ToArray();
}
}
}
foreach (string file in files)
{
string arquivoLoad = file;
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(arquivoLoad);
XmlNodeList NFe = xmlDocument.GetElementsByTagName("NFe");
XmlNodeList enviNFe = xmlDocument.GetElementsByTagName("enviNFe");
if (NFe.Count == 0)
{
if (File.Exists(folder + #"\ORGANIZADO\OUTROS\no_NFe_" + Path.GetFileName(arquivoLoad)))
{
Random rnd = new Random();
File.Copy(arquivoLoad,
folder + #"\ORGANIZADO\OUTROS\no_NFe_" + rnd.Next(1, 5000) + Path.GetFileName(arquivoLoad));
}
else
{
File.Copy(arquivoLoad, folder + #"\ORGANIZADO\OUTROS\no_NFe_" + Path.GetFileName(arquivoLoad));
}
continue;
}
XmlNodeList infNFe = ((XmlElement)NFe[0]).GetElementsByTagName("infNFe");
string chave = infNFe[0].Attributes["Id"].Value.Replace("NFe", "");
if (xmlDocument.DocumentElement.Name != "nfeProc")
{
File.Move(arquivoLoad, folder + #"\ORGANIZADO\OUTROS\no_nfeProc_" + Path.GetFileName(arquivoLoad));
arquivoLoad = Path.GetDirectoryName(file) + "\\fix_" + chave + ".xml";
}
if (enviNFe.Count > 0)
{
if (File.Exists(folder + #"\ORGANIZADO\LOTES\" + Path.GetFileName(arquivoLoad)))
{
Random rnd = new Random();
File.Copy(arquivoLoad, folder + #"\ORGANIZADO\LOTES\" + rnd.Next(1, 5000) + Path.GetFileName(arquivoLoad));
}
else
{
File.Copy(arquivoLoad, folder + #"\ORGANIZADO\LOTES\" + Path.GetFileName(arquivoLoad));
}
continue;
}
//XmlNodeList infNFe = ((XmlElement)NFe[0]).GetElementsByTagName("infNFe");
XmlNodeList ide = ((XmlElement)infNFe[0]).GetElementsByTagName("ide");
string tpNF = ((XmlElement)ide[0]).GetElementsByTagName("tpNF")[0].InnerText;
//if (tpNF == "0") continue;
XmlNodeList emit = ((XmlElement)infNFe[0]).GetElementsByTagName("emit");
string emitInfoCod;
if (((XmlElement)emit[0]).GetElementsByTagName("CNPJ").Count > 0)
{
emitInfoCod = ((XmlElement)emit[0]).GetElementsByTagName("CNPJ")[0].InnerText;
}
else if (((XmlElement)emit[0]).GetElementsByTagName("CPF").Count > 0)
{
emitInfoCod = ((XmlElement)emit[0]).GetElementsByTagName("CPF")[0].InnerText;
}
else
{
emitInfoCod = "0";
}
string ide_dEmi = (((XmlElement)ide[0]).GetElementsByTagName("dEmi").Count > 0)
? ((XmlElement)ide[0]).GetElementsByTagName("dEmi")[0].InnerText
: ((XmlElement)ide[0]).GetElementsByTagName("dhEmi")[0].InnerText;
string[] data = ide_dEmi.Split('-');
string folderName = data[0] + "\\" + data[1];
string organizeStyle = String.Empty;
if (comboBox1.SelectedIndex == 0 || comboBox1.SelectedIndex == 2)
{
organizeStyle = folder + #"\ORGANIZADO\" + emitInfoCod + #"\" + folderName;
}
else
{
organizeStyle = folder + #"\ORGANIZADO\" + folderName + #"\" + emitInfoCod;
}
if (!Directory.Exists(organizeStyle))
{
Directory.CreateDirectory(organizeStyle);
}
folderMove = organizeStyle + "\\";
if (!File.Exists(folderMove + chave + ".xml"))
{
File.Copy(arquivoLoad, folderMove + chave + ".xml");
}
label2.Text = "Arquivos sendo processados, aguarde... (" + i + " / " + arquivos + ")";
label2.Refresh();
i++;
}
label2.Text = "Notas organizadas com sucesso!";
label2.Refresh();
}
public static string ReadFileToString(string filePath)
{
using (StreamReader streamReader = new StreamReader(filePath))
{
string text = streamReader.ReadToEnd();
streamReader.Close();
return text;
}
}
public static string RemoveSpecialCharacters(string str)
{
return Regex.Replace(str, #"[^\u0000-\u007F]", string.Empty);
}
private void exitBtn_Click(object sender, EventArgs e)
{
Application.Exit();
}
}
}
How can I determine what 's going on?
The value of a variable pointing to a reference type is not the object at all, its just the memory address where the object lives.
So when you do the following:
while (true)
{
var myVariable = new MyReferenceType();
}
The only memory you are really reusing is the variable itself (think of a 32 or 64 bit pointer). But on every iteration your are allocating somewhere in memory space enough to fit the new object you've just created and that memory is most definitely not the memory reserved to the previous object.
This is essentially why your memory usage is growing. The "old" objects of previous iterations with no live reference will eventually get collected by the GC, but that could be never if the GC decides that it has enough memory available to avoid it.
my requirement was reading some 300 text files each of around 40-70 MB line by line to perform some kind of checks . As the files are huge so I thought of using TPL concept i.e. tasks. if I am not using task concept, it is taking around 7 minutes but if I am doing it by using task , it is taking long time don't know why. I will be so thankful if someone please review my code and tell me where I am doing wrong in using task. Any help would be appreciated. Below is my code :
private void browse_Click(object sender, EventArgs e)
{
try
{
string newFileName1 = "";
string newFileName2 = "";
week = textBox2.Text;
if (week == null || week == "")
{
MessageBox.Show("Week cannot be null.");
return;
}
DialogResult result = folderBrowserDialog1.ShowDialog();
if (result == DialogResult.OK)
{
DateTime starttime = DateTime.Now;
string folderPath = Path.GetDirectoryName(folderBrowserDialog1.SelectedPath);
string folderName = Path.GetFileName(folderPath);
DirectoryInfo dInfo = new DirectoryInfo(folderPath);
foreach (DirectoryInfo folder in dInfo.GetDirectories())
{
newFileName1 = "Files_with_dates_mismatching_the_respective_week_" + folder.Name + ".txt";
newFileName2 = "Files_with_wrong_date_format_" + folder.Name + ".txt";
if (File.Exists(folderPath + "/" + newFileName1))
{
File.Delete(folderPath + "/" + newFileName1);
}
if (File.Exists(folderPath + "/" + newFileName2))
{
File.Delete(folderPath + "/" + newFileName2);
}
FileInfo[] folderFiles = folder.GetFiles();
if (folderFiles.Length != 0)
{
List<Task> tasks = new List<Task>();
foreach (var file in folderFiles)
{
var task = Task.Factory.StartNew(() =>
{
bool taskResult = ReadFile(file.FullName, folderPath, folderName, week);
return taskResult;
});
tasks.Add(task);
}
Task.WaitAll(tasks.ToArray());
DateTime stoptime = DateTime.Now;
TimeSpan totaltime = stoptime.Subtract(starttime);
label6.Text = Convert.ToString(totaltime);
textBox1.Text = folderPath;
DialogResult result2 = MessageBox.Show("Read the files successfully.", "Important message", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
}
}
catch (Exception)
{
throw;
}
}
public bool ReadFile(string file, string folderPath, string folderName, string week)
{
int LineCount = 0;
string fileName = Path.GetFileNameWithoutExtension(file);
using (FileStream fs = File.Open(file, FileMode.Open))
using (BufferedStream bs = new BufferedStream(fs))
using (StreamReader sr = new StreamReader(bs))
{
for (int i = 0; i < 2; i++)
{
sr.ReadLine();
}
string oline;
while ((oline = sr.ReadLine()) != null)
{
LineCount = ++LineCount;
string[] eachLine = oline.Split(';');
string date = eachLine[30].Substring(1).Substring(0, 10);
DateTime dt;
bool valid = DateTime.TryParseExact(date, "dd/MM/yyyy", CultureInfo.InvariantCulture, DateTimeStyles.None, out dt);
if (!valid)
{
Filecount = ++Filecount;
StreamWriter sw = new StreamWriter(folderPath + "/" + "Files_with_wrong_date_format_" + folderName + ".txt", true);
sw.WriteLine(fileName + " " + "--" + " " + "Line number :" + " " + LineCount);
sw.Close();
}
else
{
DateTime Date = DateTime.ParseExact(date, "d/M/yyyy", CultureInfo.InvariantCulture);
int calculatedWeek = new GregorianCalendar(GregorianCalendarTypes.Localized).GetWeekOfYear(Date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Saturday);
if (calculatedWeek == Convert.ToInt32(week))
{
}
else
{
Filecount = ++Filecount;
StreamWriter sw = new StreamWriter(folderPath + "/" + "Files_with_dates_mismatching_the_respective_week_" + folderName + ".txt", true);
sw.WriteLine(fileName + " " + "--" + " " + "Line number :" + " " + LineCount);
sw.Close();
}
}
}
}
return true;
}
Since you are using Task.WaitAll(tasks.ToArray()). Wait will synchronously block until the task completes. But can you try with await (Not in TPL), await will asynchronously wait until the task completes