Reading respective lines of multiple text files to be combined

Reading respective lines of multiple text files to be combined - c#

Firstly, I'm new to C# and I'm having a hard time figuring out how I'd go about doing this.
Basically, I have multiple text files all with their own types of data. My aim is to read the first line of each of these files and combine them into one string so that I can sort them later by their respective days.
For example, in the first line of each file there could be the values...
File 1: 16/02/15
File 2: Monday
File 3: 75.730
File 4: 0.470
File 5: 75.260
File 6: 68182943
So I'd like to combine them in a string like so "16/02/15 Monday 75.730 0.470 75.260 68182943"
I'd also want to do this for the second, third, fourth line etc. There are a total of 144 entries or lines.
Here is the code I have so far. I'm unsure if I'm on the right track.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace BankAlgorithms
{
class Algorithms
{
static void Main(string[] args)
{
//Saves each individual text file into their own string arrays.
string[] Day = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\Day.txt");
string[] Date = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\Date.txt");
string[] Close = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\SH1_Close.txt");
string[] Diff = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\SH1_Diff.txt");
string[] Open = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\SH1_Open.txt");
string[] Volume = File.ReadAllLines(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files\SH1_Volume.txt");
//Lists all files currently stored within the directory
string[] bankFiles = Directory.GetFiles(#"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files");
Console.WriteLine("Bank Files currently saved within directory:\n");
foreach (string name in bankFiles)
{
Console.WriteLine(name);
}
Console.WriteLine("\nSelect the day you wish to view the data of (Monday-Friday). To view a grouped \nlist of all days, enter \"Day\"\n");
string selectedArray = Console.ReadLine();
if (selectedArray == "Day")
{
Console.WriteLine("Opening Day File...");
Console.WriteLine("\nDays grouped up in alphabetical order\n");
var sort = from s in Day
orderby s
select s;
foreach (string c in sort)
{
Console.WriteLine(c);
}
}
Console.ReadLine();
}
}
}

So this might be a little more than you strictly need, but I think it'll be robust, quite flexible and be able to handle huge files if need be.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace ConsoleApplication2
{
class Program
{
private static void Main(string[] args)
{
// const string folder = #"C:\Users\computing\Desktop\algorithms\CMP1124M_Assigment_Files";
const string folder = #"C:\Temp\SO";
var filenames = new[] { #"Date.txt", #"Day.txt", #"SH1_Close.txt", #"SH1_Diff.txt", #"SH1_Open.txt", #"SH1_Volume.txt" };
var dataCombiner = new DataCombiner(folder, filenames);
var stockParser = new StockParser();
foreach (var stock in dataCombiner.GetCombinedData(stockParser.Parse)) //can also use where clause here
{
if (ShowRow(stock))
{
var outputText = stock.ToString();
Console.WriteLine(outputText);
}
}
Console.ReadLine();
}
private static bool ShowRow(Stock stock)
{
//use input from user etc...
return (stock.DayOfWeek == "Tuesday" || stock.DayOfWeek == "Monday")
&& stock.Volume > 1000
&& stock.Diff < 10; // etc
}
}
internal class DataCombiner
{
private readonly string _folder;
private readonly string[] _filenames;
public DataCombiner(string folder, string[] filenames)
{
_folder = folder;
_filenames = filenames;
}
private static IEnumerable<string> GetFilePaths(string folder, params string[] filenames)
{
return filenames.Select(filename => Path.Combine(folder, filename));
}
public IEnumerable<T> GetCombinedData<T>(Func<string[], T> parserMethod) where T : class
{
var filePaths = GetFilePaths(_folder, _filenames).ToArray();
var files = filePaths.Select(filePath => new StreamReader(filePath)).ToList();
var lineCounterFile = new StreamReader(filePaths.First());
while (lineCounterFile.ReadLine() != null)// This can be replaced with a simple for loop if the files will always have a fixed number of rows
{
var rawData = files.Select(file => file.ReadLine()).ToArray();
yield return parserMethod(rawData);
}
}
}
internal class Stock
{
public DateTime Date { get; set; }
public string DayOfWeek { get; set; }
public double Open { get; set; }
public double Close { get; set; }
public double Diff { get; set; }
public int Volume { get; set; }
public override string ToString()
{
//Whatever format you want
return string.Format("{0:d} {1} {2} {3} {4} {5}", Date, DayOfWeek, Close, Diff, Open, Volume);
}
}
internal class StockParser
{
public Stock Parse(string[] rawData)
{
//TODO: Error handling required here
var stock = new Stock();
stock.Date = DateTime.Parse(rawData[0]);
stock.DayOfWeek = rawData[1];
stock.Close = double.Parse(rawData[2]);
stock.Diff = double.Parse(rawData[3]);
stock.Open = double.Parse(rawData[4]);
stock.Volume = int.Parse(rawData[5]);
return stock;
}
public string ParseToRawText(string[] rawData)
{
return string.Join(" ", rawData);
}
}
}
PS:
Instead of reading it from the file, I'd rather also calculate the DayOfWeek from the Date.
Also be careful when parsing dates from a different locale (eg. USA vs UK).
If you have an option I'd only use the ISO 8601 datetime format.

Access your file strings from a collection, use this code to read from each file and use a StringBuilder to build your string.
Read only the first few lines of text from a file
var builder = new StringBuilder();
foreach(var file in fileList)
{
using (StreamReader reader = new StreamReader(file))
{
builder.Append(reader.ReadLine());
}
}
return builder.ToString();

You could use following approach: put all in an string[][] first, then it's easier:
string[][] all = { Day, Date, Close, Diff, Open, Volume };
To get the minimum length of all:
int commonRange = all.Min(arr => arr.Length);
Now this is all you need:
string[] merged = Enumerable.Range(0, commonRange)
.Select(i => string.Join(" ", all.Select(arr => arr[i])))
.ToArray();
This is similar to a for-loop from 0 to commonRange where you access all arrays with the same index and use String.Join to get a single string from all files' lines.
Since you have commented that you want to merge only the lines of a specific day:
var lineIndexes = Day.Take(commonRange)
.Select((line, index) => new { line, index })
.Where(x => x.line.TrimStart().StartsWith("Monday", StringComparison.InvariantCultureIgnoreCase))
.Select(x => x.index);
string[] merged = lineIndexes
.Select(i => string.Join(" ", all.Select(arr => arr[i])))
.ToArray();

Related

How to collect array of matched value from string?

I am filling data from memory mapped file to string like :
AAPL,2013-1-2
Open:79.117
Close:78.433
High:79.286
Low:77.376
Volume:139948984
AAPL,2013-1-3
Open:78.268
Close:77.442
High:78.524
Low:77.286
Volume:88114464
and so on...
So now I want to make an array of close value of all days. And there are collection of thousands of days data in memory mapped file and string. So how can I fetch close value and can make array of its?
I am trying to make it's array but it's make whole data into single array. So it's not what i want.
string[] lines = System.IO.File.ReadAllLines(#"D:\mine.txt");
foreach (string line in lines)
{
// Use a tab to indent each line of the file.
Console.WriteLine("\t" + line);
}
byte[] bytes = new byte[10000000];
stream.ReadArray(0, bytes, 0, bytes.Length);
string txt = Encoding.UTF8.GetString(bytes).Trim('\0');`
So I need an array of all close value to fetch from that string. Like that:
{78.433, 77.442, etc..}

Try this:
decimal[] arrayOfCloses =
File
.ReadAllLines(#"D:\mine.txt")
.Select(x => x.Split(':'))
.Where(x => x.Length == 2)
.Where(x => x[0] == "Close")
.Select(x => decimal.Parse(x[1]))
.ToArray();

Try this:
File.ReadLines(#"D:\mine.txt")
// Pick only those lines starting with "Close"
.Where(line => line.StartsWith("Close:"))
// Get value, which follows colon, and parse it do double
.Select(line => double.Parse(line.Split(':')[1]))
// Convert result to an array
.ToArray();

I supposed your file Like this :
AAPL,2013-1-2
Open:79.117
Close:78.433
High:79.286
Low:77.376
Volume:139948984
AAPL,2013-1-3
Open:78.268
Close:77.442
High:78.524
Low:77.286
Volume:88114464
Try this
var lines = System.IO.File.ReadAllLines(#"C:\Users\bouyami\Documents\AB_ATELIER\1.txt").ToList();
var linesFiltred = lines.Where(x => x.StartsWith("Close")).ToList();
var result = linesFiltred.Select(x => x.Split(':')[1]).ToList();

Try following which accepts blank lines :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ConsoleApplication98
{
class Program
{
const string FILENAME = #"c:\temp\test.txt";
static void Main(string[] args)
{
AAPL aapl = new AAPL(FILENAME);
}
}
public class AAPL
{
static List<AAPL> aapls = new List<AAPL>();
private DateTime date { get; set; }
public decimal open { get; set; }
public decimal close { get; set; }
public decimal low { get; set; }
public decimal high { get; set; }
public int volume { get; set; }
public AAPL() { }
public AAPL(string filename)
{
StreamReader reader = new StreamReader(filename);
string line = "";
AAPL newAAPL = null;
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if (line.Length > 0)
{
if (line.StartsWith("AAPL"))
{
string dateStr = line.Substring(line.IndexOf(",") + 1);
date = DateTime.Parse(dateStr);
newAAPL = new AAPL();
aapls.Add(newAAPL);
newAAPL.date = date;
}
else
{
string[] splitArray = line.Split(new char[] { ':' });
switch (splitArray[0])
{
case "Open":
newAAPL.open = decimal.Parse(splitArray[1]);
break;
case "Close":
newAAPL.close = decimal.Parse(splitArray[1]);
break;
case "Low":
newAAPL.low = decimal.Parse(splitArray[1]);
break;
case "High":
newAAPL.high = decimal.Parse(splitArray[1]);
break;
case "Volume":
newAAPL.volume = int.Parse(splitArray[1]);
break;
}
}
}
}
}
}
}

How to Get Groups of Files from GetFiles()

I have to process files everyday. The files are named like so:
fg1a.mmddyyyy
fg1b.mmddyyyy
fg1c.mmddyyyy
fg2a.mmddyyyy
fg2b.mmddyyyy
fg2c.mmddyyyy
fg2d.mmddyyyy
If the entire file group is there for a particular date, I can process it. If it isn't there, I should not process it. I may have several partial file groups that run over several days. So when I have fg1a.12062017, fg1b.12062017 and fg1c.12062017, I can process that group (fg1) only.
Here is my code so far. It doesn't work because I can't figure out how to get only the full groups to add to the the processing file list.
fileList = Directory.GetFiles(#"c:\temp\");
string[] fileGroup1 = { "FG1A", "FG1B", "FG1C" }; // THIS IS A FULL GROUP
string[] fileGroup2 = { "FG2A", "FG2B", "FG2C", "FG2D" };
List<string> fileDates = new List<string>();
List<string> procFileList;
// get a list of file dates
foreach (string fn in fileList)
{
string dateString = fn.Substring(fn.IndexOf('.'), 9);
if (!fileDates.Contains(dateString))
{
fileDates.Add(dateString);
}
}
bool allFiles = true;
foreach (string fg in fileGroup1)
{
foreach (string fd in fileDates)
{
string finder = fg + fd;
bool foundIt = false;
foreach (string fn in fileList)
{
if (fn.ToUpper().Contains(finder))
{
foundIt = true;
}
}
if (!foundIt)
{
allFiles = false;
}
else
{
foreach (string fn in fileList)
{
procFileList.Add(fn);
}
}
}
}
foreach (string fg in fileGroup2)
{
foreach (string fd in fileDates)
{
string finder = fg + fd;
bool foundIt = false;
foreach (string fn in fileList)
{
if (fn.ToUpper().Contains(finder))
{
foundIt = true;
}
}
if (!foundIt)
{
allFiles = false;
}
else
{
foreach (string fn in fileList)
{
procFileList.Add(fn);
}
}
}
}
Any help or advice would be greatly appreciated.

Because it can sometimes get messy dealing with multiple lists, groupings, and parsing file names, I would start by creating a class that represents a FileGroupItem. This class would have a Parse method that takes in a file path, and then has properties that represent the group part and date part of the file name, as well as the full path to the file:
public class FileGroupItem
{
public string DatePart { get; set; }
public string GroupName { get; set; }
public string FilePath { get; set; }
public static FileGroupItem Parse(string filePath)
{
if (string.IsNullOrWhiteSpace(filePath)) return null;
// Split the file name on the '.' character to get the group and date parts
var fileParts = Path.GetFileName(filePath).Split('.');
if (fileParts.Length != 2) return null;
return new FileGroupItem
{
GroupName = fileParts[0],
DatePart = fileParts[1],
FilePath = filePath
};
}
}
Then, in my main code, I would create a list of the file group definitions, and then populate a list of FileGroupItems from the directory we're scanning. After that, we can determine if any file group definition is complete by comparing it's items (in a case-insensitive way) to the actual FileGroupItems we found in the directory (after first grouping the FileGroupItems by it's DatePart). If the intersection of these two lists has the same number of items as the file group definition, then it's complete and we can process that group.
Maybe it will make more sense in code:
private static void Main()
{
var scanDirectory = #"f:\public\temp\";
var processedDirectory = #"f:\public\temp2\";
// The lists that define a complete group
var fileGroupDefinitions = new List<List<string>>
{
new List<string> {"FG1A", "FG1B", "FG1C"},
new List<string> {"FG2A", "FG2B", "FG2C", "FG2D"}
};
// Populate a list of FileGroupItems from the files
// in our directory, and group them on the DatePart
var fileGroups = Directory.EnumerateFiles(scanDirectory)
.Select(FileGroupItem.Parse)
.GroupBy(f => f.DatePart);
// Now go through each group and compare the items
// for that date with our file group definitions
foreach (var fileGroup in fileGroups)
{
foreach (var fileGroupDefinition in fileGroupDefinitions)
{
// Get the intersection of the group definition and this file group
var intersection = fileGroup
.Where(f => fileGroupDefinition.Contains(
f.GroupName, StringComparer.OrdinalIgnoreCase))
.ToList();
// If all the items in the definition are there, then process the files
if (intersection.Count == fileGroupDefinition.Count)
{
foreach (var fileGroupItem in intersection)
{
Console.WriteLine($"Processing file: {fileGroupItem.FilePath}");
// Move the file to the processed directory
File.Move(fileGroupItem.FilePath,
Path.Combine(processedDirectory,
Path.GetFileName(fileGroupItem.FilePath)));
}
}
}
}
Console.WriteLine("\nDone!\nPress any key to exit...");
Console.ReadKey();
}

I think you could simplify your algorithm so you just have file groups as a prefix and a number of files to expect, fg1 is 3 files for a given date
I think your code to find the distinct dates present is a good idea, though you should use a hash set rather than a list, if you occasionally expect a large number of dates.. ("Valentine's Day?" - Ed)
Then you just need to work on the other loop that does the checking. An algorithm like this
//make a new Dictionary<string,int> for the filegroup prefixes and their counts3
//eg myDict["fg1"] = 3; myDict["fg2"] = 4;
//list the files in the directory, into an array of fileinfo objects
//see the DirectoryInfo.GetFiles method
//foreach string d in the list of dates
//foreach string fgKey in myDict.Keys - the list of group prefixes
//use a bit of Linq to get all the fileinfos with a
//name starting with the group and ending with the date
var grplist = myfileinfos.Where(fi => fi.Name.StartsWith(fg) && fi.Name.EndsWith(d));
//if the grplist.Count == the filegroup count ( myDict[fgKey] )
//then send every file in grplist for processing
//remember that grplist is a collection of fileinfo objects,
//if your processing method takes a string filename, use fileinfo.Fullname
Putting your file groupings into one dictionary will make things a lot easier than having them as x separate arrays
I haven't written all the code for you, but I've comment sketched the algorithm, and I've put in some of the more awkward bits like the link, dictionary declaration and how to fill it.. have a go at fleshing it out with code, ask any questions in a comment on this post

First, create an array of the groups to make processing easier:
var fileGroups = new[] {
new[] { "FG1A", "FG1B", "FG1C" },
new[] { "FG2A", "FG2B", "FG2C", "FG2D" }
};
Then you can convert the array into a Dictionary to map each name back to its group:
var fileGroupMap = fileGroups.SelectMany(g => g.Select(f => new { key = f, group = g })).ToDictionary(g => g.key, g => g.group);
Then, preprocess the files you get from the directory:
var fileList = from fname in Directory.GetFiles(...)
select new {
fname,
fdate = Path.GetExtension(fname),
ffilename = Path.GetFileNameWithoutExtension(fname).ToUpper()
};
Now you can take your fileList and group by date and group, and then filter to just completed groups:
var profFileList = (from file in fileList
group file by new { file.fdate, fgroup = fileGroupMap[file.ffilename] } into fng
where fng.Key.fgroup.All(f => fng.Select(fn => fn.ffilename).Contains(f))
from fn in fng
select fn.fname).ToList();
Since you didn't preserve the groups, I flattened the groups at the end of the query into just a list of files to be processed. If you needed, you could keep them in groups and process the groups instead.
Note: If a file exists that belongs to no group, you will get an error from the lookup in fileGroupMap. If that is a possiblity you can filter the fileList to just known names as follows:
var fileList = from fname in GetFiles
let ffilename = Path.GetFileNameWithoutExtension(fname).ToUpper()
where fileGroupMap.Keys.Contains(ffilename)
select new {
fname,
fdate = Path.GetExtension(fname),
ffilename
};
Also note that having a name in multiple groups will cause an error in the creation of fileGroupMap. If that is a possibility, the queries would become more complex and have to be written differently.

Here is a simple class
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string[] filenames = { "fg1a.12012017", "fg1b.12012017", "fg1c.12012017", "fg2a.12012017", "fg2b.12012017", "fg2c.12012017", "fg2d.12012017" };
new SplitFileName(filenames);
List<List<SplitFileName>> results = SplitFileName.GetGroups();
}
}
public class SplitFileName
{
public static List<SplitFileName> names = new List<SplitFileName>();
string filename { get; set; }
string prefix { get; set; }
string letter { get; set; }
DateTime date { get; set; }
public SplitFileName() { }
public SplitFileName(string[] splitNames)
{
foreach(string name in splitNames)
{
SplitFileName splitName = new SplitFileName();
names.Add(splitName);
splitName.filename = name;
string[] splitArray = name.Split(new char[] { '.' });
splitName.date = DateTime.ParseExact(splitArray[1],"MMddyyyy", System.Globalization.CultureInfo.InvariantCulture);
splitName.prefix = splitArray[0].Substring(0, splitArray[0].Length - 1);
splitName.letter = splitArray[0].Substring(splitArray[0].Length - 1,1);
}
}
public static List<List<SplitFileName>> GetGroups()
{
return names.OrderBy(x => x.letter).GroupBy(x => new { date = x.date, prefix = x.prefix })
.Where(x => string.Join(",",x.Select(y => y.letter)) == "a,b,c,d")
.Select(x => x.ToList())
.ToList();
}
}
}

With everyone's help, I solved it too. This is what I'm going with because it's the most maintainable for me but the solutions were so smart!!! Thanks everyone for your help.
private void CheckFiles()
{
var fileGroups = new[] {
new [] { "FG1A", "FG1B", "FG1C", "FG1D" },
new[] { "FG2A", "FG2B", "FG2C", "FG2D", "FG2E" } };
List<string> fileDates = new List<string>();
List<string> pfiles = new List<string>();
// get a list of file dates
foreach (string fn in fileList)
{
string dateString = fn.Substring(fn.IndexOf('.'), 9);
if (!fileDates.Contains(dateString))
{
fileDates.Add(dateString);
}
}
// check if a date has all the files
foreach (string fd in fileDates)
{
int fgCount = 0;
// for each file group
foreach (Array masterfg in fileGroups)
{
foreach (string fg in masterfg)
{
// see if all the files are there
bool foundIt = false;
string finder = fg + fd;
foreach (string fn in fileList)
{
if (fn.ToUpper().Contains(finder))
{
pfiles.Add(fn);
}
}
fgCount++;
}
if (fgCount == pfiles.Count())
{
foreach (string fn in pfiles)
{
procFileList.Add(fn);
}
pfiles.Clear();
}
else
{
pfiles.Clear();
}
}
}
return;
}

Filter out low values from imported CSV file, remove currency formatting from CSV file within the program

I've search around a bit prior to making a profile here in an attempt to alleviate the two mentioned problems.
This is for appraisal purposes, I get a CSV file from a website that contains a list of neighborhood home prices and the year those properties were built. The price column in formatted to currency and the other is general. The purpose of the code is to return, in a text file, the high, low, and average values of both the year built and the price. By manually formatting the price column to general and searching for anomalous year built values (ie. a year built of 0 or 971) I can get the result I want. I have to change the currency format to general for the code to work. I would like to streamline the process by performing both the filtering of anomalous values and the formatting within the code if possible.
Here is what I have so far:
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Prodominate_Values
{
class Program
{
static void Main(string[] args)
{
args = new[] { "C:/Users/Yeti/Desktop/Firm_Inventory.csv" };
var transactions = ReadTransactions(args[0]);
var averagePrice = CalculateAveragePrice(transactions);
var averageYear = CalculateAverageYear(transactions);
var lowPrice = transactions.Min(Transactions => Transactions.Price);
var highPrice = transactions.Max(Transactions => Transactions.Price);
var lowYear = transactions.Min(Transactions => Transactions.YearBuilt);
var highYear = transactions.Max(Transactions => Transactions.YearBuilt);
string[] lines = { lowPrice + " " + lowYear, highPrice + " " + highYear, averagePrice + " " + averageYear };
File.WriteAllLines(#"C:/Users/Yeti/Desktop/Firm_Inventory.txt", lines);
Console.WriteLine("Operation Completed");
Console.ReadLine();
}
static IList<Transaction> ReadTransactions (string path)
{
var list = new List<Transaction>();
foreach (var line in File.ReadLines(path).Skip(1))
{
Console.WriteLine(line);
list.Add(Transaction.FromLine(line));
}
return list;
}
static int CalculateAveragePrice(IEnumerable<Transaction> transactions)
{
var count = 0;
var total = 0;
foreach (var transaction in transactions)
{
count += 1;
total += transaction.Price;
}
return total / count;
}
static int CalculateAverageYear(IEnumerable<Transaction> transactions)
{
var count = 0;
var total = 0;
foreach (var transaction in transactions)
{
count += 1;
total += transaction.YearBuilt;
}
return total / count;
}
class Transaction
{
public int Price { get; set; }
public int YearBuilt { get; set; }
public static Transaction FromLine(string line)
{
var data = line.Split(',');
Console.WriteLine(Convert.ToString(int.Parse(data[1])));
return new Transaction()
{
Price = int.Parse(data[0]),
YearBuilt = int.Parse(data[1]),
};
}
}
}
}

Compare text files in C# and remove duplicate lines

1.txt:
Origination,destination,datetime,price
YYZ,YTC,2016-04-01 12:30,$550
YYZ,YTC,2016-04-01 12:30,$550
LKC,LKP,2016-04-01 12:30,$550
2.txt:
Origination|destination|datetime|price
YYZ|YTC|2016-04-01 12:30|$550
AMV|YRk|2016-06-01 12:30|$630
LKC|LKP|2016-12-01 12:30|$990
I have two text files with ',' and '|' as separators, and I want to create a console app in C# which reads these two files when I pass an origination and destination location from command prompt.
While searching, I want to ignore duplicate lines, and I want to display the results in order by price.
The output should be { origination } -> { destination } -> datetime -> price
Need help how to perform.

Here's a simple solution that works for your example files. It doesn't have any error checking for if the file is in a bad format.
using System;
using System.Collections.Generic;
class Program
{
class entry
{
public string origin;
public string destination;
public DateTime time;
public double price;
}
static void Main(string[] args)
{
List<entry> data = new List<entry>();
//parse the input files and add the data to a list
ParseFile(data, args[0], ',');
ParseFile(data, args[1], '|');
//sort the list (by price first)
data.Sort((a, b) =>
{
if (a.price != b.price)
return a.price > b.price ? 1 : -1;
else if (a.origin != b.origin)
return string.Compare(a.origin, b.origin);
else if (a.destination != b.destination)
return string.Compare(a.destination, b.destination);
else
return DateTime.Compare(a.time, b.time);
});
//remove duplicates (list must be sorted for this to work)
int i = 1;
while (i < data.Count)
{
if (data[i].origin == data[i - 1].origin
&& data[i].destination == data[i - 1].destination
&& data[i].time == data[i - 1].time
&& data[i].price == data[i - 1].price)
data.RemoveAt(i);
else
i++;
}
//print the results
for (i = 0; i < data.Count; i++)
Console.WriteLine("{0}->{1}->{2:yyyy-MM-dd HH:mm}->${3}",
data[i].origin, data[i].destination, data[i].time, data[i].price);
Console.ReadLine();
}
private static void ParseFile(List<entry> data, string filename, char separator)
{
using (System.IO.FileStream fs = System.IO.File.Open(filename, System.IO.FileMode.Open))
using (System.IO.StreamReader reader = new System.IO.StreamReader(fs))
while (!reader.EndOfStream)
{
string[] line = reader.ReadLine().Split(separator);
if (line.Length == 4)
{
entry newitem = new entry();
newitem.origin = line[0];
newitem.destination = line[1];
newitem.time = DateTime.Parse(line[2]);
newitem.price = double.Parse(line[3].Substring(line[3].IndexOf('$') + 1));
data.Add(newitem);
}
}
}
}

I'm not 100% clear on what the output of your program is supposed to be, so I'll leave that part of the implementation up to you. My strategy was to use a constructor method that takes a string (that you will read from a file) and a delimiter (since it varies) and use that to create objects which you can manipulate (e.g. add to hash sets, etc).
PriceObject.cs
using System;
using System.Globalization;
namespace ConsoleApplication1
{
class PriceObject
{
public string origination { get; set; }
public string destination { get; set; }
public DateTime time { get; set; }
public decimal price { get; set; }
public PriceObject(string inputLine, char delimiter)
{
string[] parsed = inputLine.Split(new char[] { delimiter }, 4);
origination = parsed[0];
destination = parsed[1];
time = DateTime.ParseExact(parsed[2], "yyyy-MM-dd HH:mm", CultureInfo.InvariantCulture);
price = Decimal.Parse(parsed[3], NumberStyles.Currency, new CultureInfo("en-US"));
}
public override bool Equals(object obj)
{
var item = obj as PriceObject;
return origination.Equals(item.origination) &&
destination.Equals(item.destination) &&
time.Equals(item.time) &&
price.Equals(item.price);
}
public override int GetHashCode()
{
unchecked
{
var result = 17;
result = (result * 23) + origination.GetHashCode();
result = (result * 23) + destination.GetHashCode();
result = (result * 23) + time.GetHashCode();
result = (result * 23) + price.GetHashCode();
return result;
}
}
}
}
Program.cs
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
HashSet<PriceObject> list1 = new HashSet<PriceObject>();
HashSet<PriceObject> list2 = new HashSet<PriceObject>();
using (StreamReader reader = File.OpenText(args[0]))
{
string line = reader.ReadLine(); // this will remove the header row
while (!reader.EndOfStream)
{
line = reader.ReadLine();
if (String.IsNullOrEmpty(line))
continue;
// add each line to our list
list1.Add(new PriceObject(line, ','));
}
}
using (StreamReader reader = File.OpenText(args[1]))
{
string line = reader.ReadLine(); // this will remove the header row
while (!reader.EndOfStream)
{
line = reader.ReadLine();
if (String.IsNullOrEmpty(line))
continue;
// add each line to our list
list2.Add(new PriceObject(line, '|'));
}
}
// merge the two hash sets, order by price
list1.UnionWith(list2);
List<PriceObject> output = list1.ToList();
output.OrderByDescending(x => x.price).ToList();
// display output here, e.g. define your own ToString method, etc
foreach (var item in output)
{
Console.WriteLine(item.ToString());
}
Console.ReadLine();
}
}
}

search text file using c# and display the line number and the complete line that contains the search keyword

I require help to search a text file (log file) using c# and display the line number and the complete line that contains the search keyword.

This is a slight modification from: http://msdn.microsoft.com/en-us/library/aa287535%28VS.71%29.aspx
int counter = 0;
string line;
// Read the file and display it line by line.
System.IO.StreamReader file = new System.IO.StreamReader("c:\\test.txt");
while((line = file.ReadLine()) != null)
{
if ( line.Contains("word") )
{
Console.WriteLine (counter.ToString() + ": " + line);
}
counter++;
}
file.Close();

Bit late to the game on this one, but happened across this post and thought I'd add an alternative answer.
foreach (var match in File.ReadLines(#"c:\LogFile.txt")
.Select((text, index) => new { text, lineNumber = index+ 1 })
.Where(x => x.text.Contains("SEARCHWORD")))
{
Console.WriteLine("{0}: {1}", match.lineNumber, match.text);
}
This uses:
File.ReadLines, which eliminates the need for a StreamReader, and it also plays nicely with LINQ's Where clause to return a filtered set of lines from a file.
The overload of Enumerable.Select that returns each element's index, which you can then add 1 to, to get the line number for the matching line.
Sample Input:
just a sample line
another sample line
first matching SEARCHWORD line
not a match
...here's aSEARCHWORDmatch
SEARCHWORD123
asdfasdfasdf
Output:
3: first matching SEARCHWORD line
5: ...here's aSEARCHWORDmatch
6: SEARCHWORD123

To export do Excel you can use the CSV file format, like the Pessimist wrote. If you are uncertain about what to write, try entering some data in MS Excel and click on "Save As" option in the Menu and choose CSV as file type.
Take care when writing a CSV file format as in some languages the default for separating values is not the comma. In brazilian portuguese, for example, the default is comma as decimal separator, dot as thousands separator and semicolon for separating values. Mind the culture when writing that.
The other alternative is using horizontal tabs as separators. Experiment to write a string, press the TAB key and then another string and paste it into Microsoft Excel. It is the default separator in that program.
If you're using an ad-hoc solution to your specific problem, either alternatives can be used without much thinking. If you are programming something to be used by other persons (or in other environments), mind the culture specific differences.
Oh, I've just remembered now: you can write a Spreadsheet using XML, you can do that with only the .NET package. I've done that years ago with C# .NET 2.0

I had a requirement where I needed to search through a list of directories looking for particular file types, containing a specific search terms but excluding other terms.
For example let's say you wanted to look through C:\DEV and only find .cs files that have terms "WriteLine" and "Readline" but not the term "hello".
I decided to write a small c# utility to do just this:
This is how you call it:
class Program
{
//Syntax:
//FileSearch <Directory> EXT <ext1> <ext2> LIKE <TERM1> <TERM2> NOT <TERM3> <TERM4>
//Example:
//Search for all files recursively in C:\Dev with an extension of cs that contain either "WriteLine" or "Readline" but not "hello"
//FileSearch C:\DEV EXT .cs LIKE "WriteLine" "ReadLine" NOT "hello"
static void Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("FileSearch <Directory> EXT <EXT1> LIKE <TERM1> <TERM2> NOT <TERM3> <TERM4>");
return;
}
Search s = new Search(args);
s.DoSearch();
}
}
This is the implementation:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
class Hit
{
public string File { get; set; }
public int LineNo { get; set; }
public int Pos { get; set; }
public string Line { get; set; }
public string SearchTerm { get; set; }
public void Print()
{
Console.WriteLine(File);
Console.Write("(" + LineNo + "," + Pos + ") ");
Console.WriteLine(Line);
}
}
class Search
{
string rootDir;
List<string> likeTerms;
List<string> notTerms;
List<string> extensions;
List<Hit> hitList = new List<Hit>();
//FileSearch <Directory> EXT .CS LIKE "TERM1" "TERM2" NOT "TERM3" "TERM4"
public Search(string[] args)
{
this.rootDir = args[0];
this.extensions = ParseTerms("EXT", "LIKE", args);
this.likeTerms = ParseTerms("LIKE", "NOT", args);
this.notTerms = ParseTerms("NOT", "", args);
Print();
}
public void Print()
{
Console.WriteLine("Search Dir:" + rootDir);
Console.WriteLine("Extensions:");
foreach (string s in extensions)
Console.WriteLine(s);
Console.WriteLine("Like Terms:");
foreach (string s in likeTerms)
Console.WriteLine(s);
Console.WriteLine("Not Terms:");
foreach (string s in notTerms)
Console.WriteLine(s);
}
private List<string> ParseTerms(string keyword, string stopword, string[] args)
{
List<string> list = new List<string>();
bool collect = false;
foreach (string arg in args)
{
string argu = arg.ToUpper();
if (argu == stopword)
break;
if (argu == keyword)
{
collect = true;
continue;
}
if(collect)
list.Add(arg);
}
return list;
}
private void SearchDir(string dir)
{
foreach (string file in Directory.GetFiles(dir, "*.*"))
{
string extension = Path.GetExtension(file);
if (extension != null && extensions.Contains(extension))
SearchFile(file);
}
foreach (string subdir in Directory.GetDirectories(dir))
SearchDir(subdir);
}
private void SearchFile(string file)
{
using (StreamReader sr = new StreamReader(file))
{
int lineNo = 0;
while (!sr.EndOfStream)
{
int pos = 0;
string term = "";
string line = sr.ReadLine();
lineNo++;
//Look through each likeTerm
foreach(string likeTerm in likeTerms)
{
pos = line.IndexOf(likeTerm, StringComparison.OrdinalIgnoreCase);
if (pos >= 0)
{
term = likeTerm;
break;
}
}
//If found make sure not in the not term
if (pos >= 0)
{
bool notTermFound = false;
//Look through each not Term
foreach (string notTerm in notTerms)
{
if (line.IndexOf(notTerm, StringComparison.OrdinalIgnoreCase) >= 0)
{
notTermFound = true;
break;
}
}
//If not term not found finally add to hitList
if (!notTermFound)
{
Hit hit = new Hit();
hit.File = file;
hit.LineNo = lineNo;
hit.Pos = pos;
hit.Line = line;
hit.SearchTerm = term;
hitList.Add(hit);
}
}
}
}
}
public void DoSearch()
{
SearchDir(rootDir);
foreach (Hit hit in hitList)
{
hit.Print();
}
}
}

We Keep Coding

C# (C-Sharp) is a programming language developed by Microsoft that runs on the .NET Framework.

Reading respective lines of multiple text files to be combined - c#

Related

How to collect array of matched value from string?

How to Get Groups of Files from GetFiles()

Filter out low values from imported CSV file, remove currency formatting from CSV file within the program

Compare text files in C# and remove duplicate lines

search text file using c# and display the line number and the complete line that contains the search keyword

Categories

Resources