I've got a List of Document
public class Document
{
public string[] fullFilePath;
public bool isPatch;
public string destPath;
public Document() { }
public Document(string[] fullFilePath, bool isPatch, string destPath)
{
this.fullFilePath = fullFilePath;
this.isPatch = isPatch;
this.destPath = destPath;
}
The fullFilepath should a List or an Array of Paths.
For example:
Document 1
---> C:\1.pdf
---> C:\2.pdf
Document 2
---> C:\1.pdf
---> C:\2.pdf
---> C:\3.pdf
etc.
My problem if I am using an array string all Documents got "null" in its fullFilePath.
If I'm using a List for the fullFilePath all Documents got the same entries from the last Document.
Here is how the List is filled:
int docCount = -1;
int i = 0;
List<Document> Documents = new List<Document>();
string[] sourceFiles = new string[1];
foreach (string file in filesCollected)
{
string bc;
string bcValue;
if (Settings.Default.barcodeEngine == "Leadtools")
{
bc = BarcodeReader.ReadBarcodeSymbology(file);
bcValue = "PatchCode";
}
else
{
bc = BarcodeReader.ReadBacrodes(file);
bcValue = "009";
}
if (bc == bcValue)
{
if(Documents.Count > 0)
{
Array.Clear(sourceFiles, 0, sourceFiles.Length);
Array.Resize<string>(ref sourceFiles, 1);
i = 0;
}
sourceFiles[i] = file ;
i++;
Array.Resize<string>(ref sourceFiles, i + 1);
Documents.Add(new Document(sourceFiles, true,""));
docCount++;
}
else
{
if (Documents.Count > 0)
{
sourceFiles[i] = file;
i++;
Array.Resize<string>(ref sourceFiles, i + 1);
Documents[docCount].fullFilePath = sourceFiles;
}
}
}
You are using the same instance of the array for every document. The instance is updated with a new list of files at every inner loop, but an array is a reference to an area of memory (oversimplification, I know but for the purpose of this answer is enough) and if you change the content of that area of memory you are changing it for every document.
You need to create a new instance of the source files for every new document you add to your documents list. Moreover, when you are not certain of the number of elements that you want to be included in the array, it is a lot better to use a generic List and remove all that code that handles the resizing of the array.
First change the class definition
public class Document
{
public List<string> fullFilePath;
public bool isPatch;
public string destPath;
public Document() { }
public Document(List<string> fullFilePath, bool isPatch, string destPath)
{
this.fullFilePath = fullFilePath;
this.isPatch = isPatch;
this.destPath = destPath;
}
}
And now change your inner loop to
foreach (string file in filesCollected)
{
string bc;
string bcValue;
....
if (bc == bcValue)
{
List<string> files = new List<string>();
files.Add(file);
Documents.Add(new Document(files, true, ""));
docCount++;
}
else
Documents[docCount].fullFilePath.Add(file);
}
Notice that when you need to add a new Document I build a new List<string>, add the current file and pass everything at the constructor (In reality this should be moved directly inside the constructor of the Document class). When you want to add just a new file you could add it directly to the public fullFilePath property
Moving the handling of the files inside the Documents class could be rewritten as
public class Document
{
public List<string> fullFilePath;
public bool isPatch;
public string destPath;
public Document()
{
// Every constructory initializes internally the List
fullFilePath = new List<string>();
}
public Document(string aFile, bool isPatch, string destPath)
{
// Every constructory initializes internally the List
fullFilePath = new List<string>();
this.fullFilePath.Add(aFile);
this.isPatch = isPatch;
this.destPath = destPath;
}
public void AddFile(string aFile)
{
this.fullFilePath.Add(aFile);
}
}
Of course, now in you calling code you pass only the new file or call AddFile without the need to check for the list initialization.
The issue should be here:
string[] sourceFiles = new string[1];
If you move this line of code in your foreach you should solve this problem because in your foreach you always use the same variable, so the same reference.
int docCount = -1;
int i = 0;
List<Document> Documents = new List<Document>();
foreach (string file in filesCollected)
{
string[] sourceFiles = new string[1];
string bc;
string bcValue;
if (Settings.Default.barcodeEngine == "Leadtools")
{
bc = BarcodeReader.ReadBarcodeSymbology(file);
bcValue = "PatchCode";
}
else
{
bc = BarcodeReader.ReadBacrodes(file);
bcValue = "009";
}
if (bc == bcValue)
{
if(Documents.Count > 0)
{
Array.Clear(sourceFiles, 0, sourceFiles.Length);
Array.Resize<string>(ref sourceFiles, 1);
i = 0;
}
sourceFiles[i] = file ;
i++;
Array.Resize<string>(ref sourceFiles, i + 1);
Documents.Add(new Document(sourceFiles, true,""));
docCount++;
}
else
{
if (Documents.Count > 0)
{
sourceFiles[i] = file;
i++;
Array.Resize<string>(ref sourceFiles, i + 1);
Documents[docCount].fullFilePath = sourceFiles;
}
}
}
Related
I have a set of instances of the Data class that I want to compare.
Each instance has an unknown number of items in it's Files property.
I want to compare each instance of Data to the others and set FoundDifference to true if a version difference is found between two files with the same Name value.
Is there a simple algorithm to accomplish this?
Here is a sample setup of how the objects might look.
In this example you'd want everything except for f1, f21, and f31 to set the FoundDifference to true
class Data
{
public string DC { get; set; }
public List<File> Files { get; set; }
}
class File
{
public string Name { get; set; }
public string Version { get; set; }
public bool FoundDifference { get; set; }
}
class Program
{
static void Main(string[] args)
{
Data d1 = new Data();
d1.DC = "DC1";
File f1 = new File();
f1.Name = "File1";
f1.Version = "1";
d1.Files.Add(f1);
File f2 = new File();
f2.Name = "File2";
f2.Version = "1";
d1.Files.Add(f2);
File f3 = new File();
f3.Name = "File3";
f3.Version = "1";
d1.Files.Add(f3);
//Another
Data d2 = new Data();
d2.DC = "DC2";
File f21 = new File();
f21.Name = "File1";
f21.Version = "1";
d2.Files.Add(f21);
File f22 = new File();
f22.Name = "File2";
f22.Version = "2";
d2.Files.Add(f22);
File f23 = new File();
f23.Name = "File3";
f23.Version = "1";
d2.Files.Add(f23);
//Another
Data d3 = new Data();
d3.DC = "DC3";
File f31 = new File();
f31.Name = "File1";
f31.Version = "1";
d3.Files.Add(f31);
File f32 = new File();
f32.Name = "File2";
f32.Version = "2";
d3.Files.Add(f32);
File f33 = new File();
f33.Name = "File3";
f33.Version = "5";
d3.Files.Add(f33);
//How Can I change All Files FoundDifference prop to true if FileName is the same and a difference is in Version is found??
Console.ReadLine();
}
I'd handle that by using a Dictionary<string, List<File>> to keep track of the files from each Data like this. First iterate all the files in all the datas then lookup the file name in the dictionary and if not found create a new list and add it. Then check if that list has any files with a different version. If one is found set all the flags and finally add the file to the list.
public void SetDifferences(IEnumerable<Data> datas)
{
var fileLookup = new Dictionary<string, List<File>>();
foreach(var file in datas.SelectMany(d => d.Files))
{
if(!fileLookup.TryGetValue(file.Name, out var fileList))
{
fileList = new List<File>();
fileLookup.Add(file.Name, fileList);
}
if(fileList.Any(f => f.Version != file.Version))
{
foreach(var other in fileList)
{
other.FoundDifference = true;
}
file.FoundDifference = true;
}
fileList.Add(file);
}
}
I have a log file that I am reading into different objects. One object starts at a Line that contains the words "Announce message" and the following lines contain the data that belongs to that message. This entry stops at a line that contains the word "Disposed".
I want to read all the data from between these 2 lines that, contains certain words.
Im currently using a Dictionary because the line with "Announce message" also contains a UID but the following lines contain the data for that UID.
How would you do that?
This is what i have come up with so far.
public static void P2PLogParser(List<FileInfo> fileList)
{
foreach (FileInfo fi in fileList)
{
//Læser alle linier i csv fil
foreach (var line in File.ReadAllLines(fi.FullName))
{
string MeterUID = GetMeterUID(line);
string MimHashcode = GetMimHashcode(line);
string FirmwareUploadStatus = GetFirmwareUploadStatus(line);
string IsKnown = GetIsKnown(line);
DateTime P2PTimeStamp = GetTimestamp(line);
if (IsMeterEntry(line) && !meters.ContainsKey(MeterUID))
{
string MeterNr = GetMeterUID(line).Replace("4B414D", "");
int meternr = int.Parse(MeterNr, System.Globalization.NumberStyles.HexNumber);
meters.Add(MeterUID, new Meter()
{
MeterUID = MeterUID,
MeterNR = meternr,
P2Pmeterentry = new List<P2PMeterEntry>()
});
}
if (IsMeterEntry(line))
{
P2PMeterEntry p2pmeter = new P2PMeterEntry
{
P2PTimeStamp = P2PTimeStamp,
MimHashcode = MimHashcode,
FirmwareUploadStatus = FirmwareUploadStatus,
IsKnown = IsKnown,
P2PMetersession = new List<P2PMeterSession>()
};
if (IsNoLongerMeterEntry(line))
{
string SessionLevel = GetLevel(line);
string SessionMessage = GetSessionMessage(line);
string Context = GetSessionContext(line);
P2PMeterSession MeterSession = new P2PMeterSession
{
SessionTimeStamp = P2PTimeStamp,
SessionLevel = SessionLevel,
SessionMessage = SessionMessage,
Context = Context
};
meterSession.Add(MeterSession);
}
meters[MeterUID].P2Pmeterentry.Add(p2pmeter);
}
}
}
}
and the IsMeterEntry and IsNoLongerMeterEntry
//IsMeterSession
public static bool IsMeterEntry(string text)
{
return text.ToLower().Contains("announce message received:");
}
public static bool IsNoLongerMeterEntry(string text)
{
return text.ToLower().Contains("context - disposed");
}
Implement a simple state machine with two states: IgnoreLine (initial state) and Announce.
for each line in log
if line contains "Announce message"
read UID
create a StringBuilder
set state=Announce
else if line contains "Disposed"
store the StringBuilder's content in the dictionary[uid]
set state=IgnoreLine
else if state==Announce and line contains "certain words"
append line to StringBuilder
So I need to save several links from a site, but when it reaches near the 64k links it gives me the error OutOfMemoryException.
Here is my code, please if someone could help me it would be wonderful.
Note : if you want to test (of course you have to edit to test, but it is not that much to edit), the url it receives is :
http://santacatarina.entrei.net/busca/listar_empresas.php?filter={0}&pagina={1}
The code :
namespace WebCrawler.SantaCatarina
{
class SCLinkFinder : ILinkFinder
{
private readonly Queue<char> _alfabeto;
private int _paginaAtual;
private char _letraAtual;
public SCLinkFinder()
{
_alfabeto = new Queue<char>();
_alfabeto.Enqueue('1');
_alfabeto.Enqueue('A');
_alfabeto.Enqueue('B');
_alfabeto.Enqueue('C');
_alfabeto.Enqueue('D');
_alfabeto.Enqueue('E');
_alfabeto.Enqueue('F');
_alfabeto.Enqueue('G');
_alfabeto.Enqueue('H');
_alfabeto.Enqueue('I');
_alfabeto.Enqueue('J');
_alfabeto.Enqueue('K');
_alfabeto.Enqueue('L');
_alfabeto.Enqueue('M');
_alfabeto.Enqueue('N');
_alfabeto.Enqueue('O');
_alfabeto.Enqueue('P');
_alfabeto.Enqueue('Q');
_alfabeto.Enqueue('R');
_alfabeto.Enqueue('S');
_alfabeto.Enqueue('T');
_alfabeto.Enqueue('U');
_alfabeto.Enqueue('V');
_alfabeto.Enqueue('W');
_alfabeto.Enqueue('X');
_alfabeto.Enqueue('Y');
_alfabeto.Enqueue('Z');
_paginaAtual = 1;
_letraAtual = _alfabeto.Dequeue();
}
public string[] Find(string url)
{
List<string> _empresas = new List<string>();
if (!_alfabeto.Any() && _letraAtual == ' ')
{
return _empresas.ToArray();
}
var webGet = new HtmlWeb();
var formattedUrl = String.Format(url, _letraAtual, _paginaAtual++);
var document = webGet.Load(formattedUrl);
var nodes = document.DocumentNode.SelectNodes("//div[#id='conteudo']/div[#class='gratuito']/p/a");
foreach (var node in nodes)
{
var href = node.GetAttributeValue("href", "");
_empresas.Add(href);
}
var elUrlProximaPagina = document.DocumentNode.SelectSingleNode("//div[#id='principal']/div[#id='conteudo']/div[#class='paginacao']/a[contains(#class,'nextPage')]");
if (elUrlProximaPagina == null)
{
_letraAtual = _alfabeto.Any() ? _alfabeto.Dequeue() : ' ';
_paginaAtual = 1;
}
Console.WriteLine(_letraAtual);
Console.WriteLine(_paginaAtual);
DadoPo.SalvarUrl();
return Find(url);
}
}
Ok, now the error is at another place, it is giving outofmemoryexception at
var document = webGet.Load(formattedUrl);
Persist the content of _empresas in the harddisk (database, physical file) after N times(1,000 for example) of scraped information from the website. And then clean the _empresas for a new set of information
What you are doing is pretty much using all the memory allowed by CLR for your PE
namespace WebCrawler.SantaCatarina
{
class SCLinkFinder : ILinkFinder
{
private readonly Queue<char> _alfabeto;
private int _paginaAtual;
private char _letraAtual;
public SCLinkFinder()
{
_alfabeto = new Queue<char>();
_alfabeto.Enqueue('1');
_alfabeto.Enqueue('A');
_alfabeto.Enqueue('B');
_alfabeto.Enqueue('C');
_alfabeto.Enqueue('D');
_alfabeto.Enqueue('E');
_alfabeto.Enqueue('F');
_alfabeto.Enqueue('G');
_alfabeto.Enqueue('H');
_alfabeto.Enqueue('I');
_alfabeto.Enqueue('J');
_alfabeto.Enqueue('K');
_alfabeto.Enqueue('L');
_alfabeto.Enqueue('M');
_alfabeto.Enqueue('N');
_alfabeto.Enqueue('O');
_alfabeto.Enqueue('P');
_alfabeto.Enqueue('Q');
_alfabeto.Enqueue('R');
_alfabeto.Enqueue('S');
_alfabeto.Enqueue('T');
_alfabeto.Enqueue('U');
_alfabeto.Enqueue('V');
_alfabeto.Enqueue('W');
_alfabeto.Enqueue('X');
_alfabeto.Enqueue('Y');
_alfabeto.Enqueue('Z');
_paginaAtual = 1;
_letraAtual = _alfabeto.Dequeue();
}
public string[] Find(string url)
{
List<string> _empresas = new List<string>();
if (!_alfabeto.Any() && _letraAtual == ' ')
{
return _empresas.ToArray();
}
var webGet = new HtmlWeb();
var formattedUrl = String.Format(url, _letraAtual, _paginaAtual++);
var document = webGet.Load(formattedUrl);
var nodes = document.DocumentNode.SelectNodes("//div[#id='conteudo']/div[#class='gratuito']/p/a");
foreach (var node in nodes)
{
var href = node.GetAttributeValue("href", "");
_empresas.Add(href);
}
var elUrlProximaPagina = document.DocumentNode.SelectSingleNode("//div[#id='principal']/div[#id='conteudo']/div[#class='paginacao']/a[contains(#class,'nextPage')]");
if (elUrlProximaPagina == null)
{
_letraAtual = _alfabeto.Any() ? _alfabeto.Dequeue() : ' ';
_paginaAtual = 1;
}
Console.WriteLine(_letraAtual);
Console.WriteLine(_paginaAtual);
//Your code to read _empresas and Persist in database(or file)
return Find(url);
}
}
}
I want that when I click the button "List all Customers", the code should read the Customer.csv file and display the information on the form called "List All Customers".
How can I do that?
public static void ReadFile()
{
StreamReader sr = File.OpenText("Customer.csv");
}
public static void LoadCustomers()
{
try
{
if (File.Exists("Customer.csv"))
{
string temp = null;
int count = 0;
using (StreamReader sr = File.OpenText(#"Customer.csv"))
{
while ((temp = sr.ReadLine()) != null)
{
temp = temp.Trim();
string[] lineHolder = temp.Split(',');
Customer tempCust = new Customer();
tempCust.customerName = lineHolder[0];
tempCust.customerAddress = lineHolder[1];
tempCust.customerZip = Convert.ToInt32(lineHolder[2]);
myCustArray[count] = tempCust;
count++;
}//end for loop
}
}
else
{
File.Create("Customer.csv");
}
}
catch (Exception e)
{
System.Windows.Forms.MessageBox.Show("File Loading Error: " + e.Message);
}
}
I'm not sure what kind of control you want to display this data in but your method could just return a list of Customer, then you can add to a ListBox, ListView or DataGrid
public static IEnumerable<Customer> LoadCustomers(string filename)
{
if (File.Exists(filename))
{
foreach (var line in File.ReadAllLines(filename).Where(l => l.Contains(',')))
{
var splitLine = line.Split(',');
if (splitLine.Count() >= 3)
{
yield return new Customer
{
customerName = splitLine[0].Trim(),
customerAddress = splitLine[1].Trim(),
customerZip = Convert.ToInt32(splitLine[2].Trim())
};
}
}
}
}
ListBox
listBox1.DisplayMember = "customerName";
listBox1.Items.AddRange(LoadCustomers(#"G:\Customers.csv").ToArray());
First, take advantage of the list object:
public static void ReadFile()
{
StreamReader sr = File.OpenText("Customer.csv");
}
public static void LoadCustomers()
{
try
{
if (File.Exists("Customer.csv"))
{
string temp = null;
var retList = new List<Customer>();
using (StreamReader sr = File.OpenText(#"Customer.csv"))
{
while ((temp = sr.ReadLine()) != null)
{
temp = temp.Trim();
string[] lineHolder = temp.Split(',');
retlist.add(new Customer(){
customerName = linerHolder[0],
customerAddress = lineHolder[1],
customerZip = Convert.ToInt32(lineHolder[2])
});
}//end for loop
}
}
else
{
File.Create("Customer.csv");
}
}
catch (Exception e)
{
System.Windows.Forms.MessageBox.Show("File Loading Error: " + e.Message);
}
}
just wrap it up in a class, call if from the controller and populate up the results. Depending on how often you will be updating this data, you might look into caching it, so you don't have to run this process every X seconds for each user.
I have multiple file in a folder with a naming convention
Name_MoreName_DDMMYYYY_SomeNumber_HHMMSS.txt
How can I get the file which has oldest Date and Time (i.e. oldest DDMMYYYY and HHMMSS).
Ex:
Name_MoreName_22012012_SomeNumber_072334.txt
Name_MoreName_22012012_SomeNumber_072134.txt
Name_MoreName_24012012_SomeNumber_072339.txt
Name_MoreName_22012012_SomeNumber_072135.txt
So the oldest file will be
Name_MoreName_22012012_SomeNumber_072134.txt
how can i take the oldest file only ?
Edit
This is what I have done so far.. in a forach loop I am reading file name one by one
private void FileInformation(string fileName, ref string concatFile)
{
try
{
string completeFileName = fileName.Trim();
string[] fileComponenets = completeFileName.Split('_');
string fileDate = string.Empty;
string fileTime = string.Empty;
if (fileComponenets.Length > 0)
{
fileDate = fileComponenets[4].Replace(".txt", "").Trim();
fileTime = fileComponenets[2].ToString();
concatFile = fileDate + "-" + fileTime;
}
}
catch (Exception ex)
{
}
}
-- Main function
string fileStats = string.Empty;
foreach (string filePath in arrFileCollection)
{
if (filePath.ToLower().Contains("Name_MoreName_")&&
filePath.ToLower().Contains(".txt"))
{
string concatFile = string.Empty;
FileInformation(filePath.Replace(dirPath, ""), ref concatFile);
fileStats = fileStats + "," + concatFile;
}
}
Now I am getting all date time in a string with comma seperated value. Now I am stuck up. How can I take the smallest among them and get the related file
EDIT2
Note: Framework is .NET 2.0
string oldestFile = Directory.EnumerateFiles(path)
.OrderBy(file => ExtractDateTimeFrom(file))
.First(); // FirstOrDefault
And write method which will parse your file name and extract date from it:
public static DateTime ExtractDateTimeFrom(string fileName)
{
Regex regex = new Regex(#".+_(\d\d\d\d\d\d\d\d)_.+_(\d\d\d\d\d\d).txt");
var match = regex.Match(fileName);
string dateString = match.Groups[1].Value + match.Groups[2].Value;
return DateTime.ParseExact(dateString, "ddMMyyyyHHmmsss", null);
}
.NET 2.0 Simplest solution:
string oldestFile = "";
DateTime oldestDate = DateTime.Max;
foreach(string fileName in Directory.GetFiles(path))
{
DateTime date = ExtractDateTimeFrom(fileName);
if (date < oldestDate)
{
oldestFile = fileName;
oldestDate = date;
}
}
something like this maybe?
string[] filePaths = Directory.GetFiles(#"c:\MyDir\");
Regex rex = new Regex(#"^.*_(\d+)\.txt");
int date = int.MaxValue;
int oldestdate = int.MaxValue;
String oldestfile;
foreach(String filePath in filePaths)
{
Match match = rex.Match(filePath);
if(match.Success)
date = int.Parse(match.Groups[0].Value);
if (date < oldestdate)
{
oldestdate = date;
oldestfile = filePath;
}
}
Use DirectoryInfo and FileInfo classes. For example, just to give idea:
IOrderedEnumerable<FileInfo> filesInfo = new DirectoryInfo("D:\\")
.EnumerateFiles()
.OrderBy(f=>f.FullName);
UPDATE: For .NET 2.0, I would suggest you to separate the comparison logic from your main code... so why not create a custom type implementing IComparable interface.
public class CustomFileInfo :IComparable<CustomFileInfo>
{
public string Name { get; set; }
public string MoreName { get; set; }
public DateTime FileDate { get; set; }
public int Number { get; set; }
public DateTime FileTime { get; set; }
public CustomFileInfo(string fileNameString)
{
string[] fileNameStringSplited = fileNameString.Split('_');
this.Name = fileNameStringSplited[0];
this.MoreName = fileNameStringSplited[1];
this.FileDate = DateTime.ParseExact(fileNameStringSplited[2], "ddMMyyyy", null);
this.Number = int.Parse(fileNameStringSplited[3]);
this.FileTime = DateTime.ParseExact(fileNameStringSplited[4], "HHmmss", null);
}
public int CompareTo(CustomFileInfo other)
{
// add more comparison criteria here
if (this.FileDate == other.FileDate)
return 0;
if (this.FileDate > other.FileDate)
return 1;
return -1;
}
}
And then in your code, you may simple get files using DirectoryInfo and compare each files...
FileInfo[] filesInfo = new DirectoryInfo("D:\\").GetFiles();
//set first file initially as minimum
CustomFileInfo oldestFileInfo = new CustomFileInfo(filesInfo[0].FullName);
for (int i = 1; i < filesInfo.Length; i++)
{
CustomFileInfo currentFileInfo = new CustomFileInfo(filesInfo[i].FullName);
//compare each file and keep the oldest file reference in oldestFileInfo
if (oldestFileInfo.CompareTo(currentFileInfo) < 0)
oldestFileInfo = currentFileInfo;
}
You may optimize code for use and customize the comparison code as per your criteria.
Use This:
Updated
List<string> address = new List<string>() { "Name_MoreName_22012011_SomeNumber_072334.txt",
"Name_MoreName_22012012_SomeNumber_072134.txt",
"Name_MoreName_24012012_SomeNumber_072339.txt",
"Name_MoreName_22012012_SomeNumber_072135.txt",};
DateTimeFormatInfo dtfi = new DateTimeFormatInfo();
dtfi.ShortDatePattern = "dd-MM-yyyy";
dtfi.DateSeparator = "-";
address = address.OrderBy(s => Convert.ToDateTime((s.Split('_')[2]).Insert(2, "-").Insert(5, "-"), dtfi)).ToList();
string oldest = address[0];