Getting the sum per hour from 2 datatable - c#

I'm writing a txt file from 2 data table.
Following is the 2 data table.
dt1
Transaction No. Time Amount Date
1 10:00:00 200.00 03/05/2020
2 10:30:11 250.00 03/05/2020
3 11:05:22 140.00 03/05/2020
4 11:45:33 230.00 03/05/2020
5 12:15:10 220.00 03/05/2020
dt2
Transaction No. Added Amount Date
1 40.00 03/05/2020
2 25.00 03/05/2020
3 40.00 03/05/2020
4 30.00 03/05/2020
5 30.00 03/05/2020
following is my code
using (StreamWriter sw = File.AppendText(fileName))
{
for (int a = 6; a <= 23; a++)
{
string aa = a.ToString().PadLeft(2, '0');
double salex = double.Parse(dt1.Rows[0]["Amount"].ToString());
if (salex.Equals(""))
{
salex = 0;
}
else
{
salex = double.Parse(dt1.Rows[0]["Amount"].ToString());
}
double vatx = double.Parse(dt2.Rows[0]["Added Amount"].ToString());
if (vatx.Equals(""))
{
vatx = 0;
}
else
{
vatx = double.Parse(dt2.Rows[0]["Added Amount"].ToString());
}
double dailysaleHRLY = -salex + -vatx;
sw.Write(dtpDate.Value.ToString("MM/dd/yyyy") + ",");
sw.Write(aa + ":00" + ",");
sw.Write(dailysaleHRLY.ToString("0.00") + ",");
}
for (int a = 0; a <= 5; a++)
{
string aa = a.ToString().PadLeft(2, '0');
double salex = double.Parse(dt1.Rows[0]["Amount"].ToString());
if (salex.Equals(""))
{
salex = 0;
}
else
{
salex = double.Parse(dt1.Rows[0]["Amount"].ToString());
}
double vatx = double.Parse(dt2.Rows[0]["Added Amount"].ToString());
if (vatx.Equals(""))
{
vatx = 0;
}
else
{
vatx = double.Parse(dt2.Rows[0]["Added Amount"].ToString());
}
double dailysaleHRLY = -salex + -vatx;
sw.Write(dtpDate.Value.ToString("MM/dd/yyyy") + ",");
sw.Write(aa + ":00" + ",");
sw.Write(dailysaleHRLY.ToString("0.00") + ",");
}
MessageBox.Show("Txt File succesfully created!", "SYSTEM", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
This is the output of my code.
Date, Time, Sum
03/05/2020,06:00,515.00
03/05/2020,07:00,515.00
03/05/2020,08:00,515.00
03/05/2020,09:00,515.00
03/05/2020,10:00,515.00
03/05/2020,11:00,515.00
03/05/2020,12:00,515.00
03/05/2020,13:00,515.00
03/05/2020,14:00,515.00
03/05/2020,15:00,515.00
03/05/2020,16:00,515.00
03/05/2020,17:00,515.00
03/05/2020,18:00,515.00
03/05/2020,19:00,515.00
03/05/2020,20:00,515.00
03/05/2020,21:00,515.00
03/05/2020,22:00,515.00
03/05/2020,23:00,515.00
03/05/2020,00:00,515.00
03/05/2020,01:00,515.00
03/05/2020,02:00,515.00
03/05/2020,03:00,515.00
03/05/2020,04:00,515.00
03/05/2020,05:00,515.00
I just want to get the sum of Amount and Added Amount base on hour. Like this.
Date, Time, Sum
03/05/2020,06:00,0.00
03/05/2020,07:00,0.00
03/05/2020,08:00,0.00
03/05/2020,09:00,0.00
03/05/2020,10:00,515.00
03/05/2020,11:00,440.00
03/05/2020,12:00,250.00
03/05/2020,13:00,0.00
03/05/2020,14:00,0.00
03/05/2020,15:00,0.00
03/05/2020,16:00,0.00
03/05/2020,17:00,0.00
03/05/2020,18:00,0.00
03/05/2020,19:00,0.00
03/05/2020,20:00,0.00
03/05/2020,21:00,0.00
03/05/2020,22:00,0.00
03/05/2020,23:00,0.00
03/05/2020,00:00,0.00
03/05/2020,01:00,0.00
03/05/2020,02:00,0.00
03/05/2020,03:00,0.00
03/05/2020,04:00,0.00
03/05/2020,05:00,0.00

Assuming that you have two DataTable-s and you have them filled with the mentioned data.
var dt1 = new DataTable();
var dt2 = new DataTable();
dt1.Columns.AddRange(new[]
{
new DataColumn("Transaction No.", typeof(int)),
new DataColumn("Time", typeof(DateTime)),
new DataColumn("Amount", typeof(decimal)),
new DataColumn("Date", typeof(DateTime)),
});
dt2.Columns.AddRange(new[]
{
new DataColumn("Transaction No.", typeof(int)),
new DataColumn("Added Amount", typeof(decimal)),
new DataColumn("Date", typeof(DateTime)),
});
Note: The double types have been replaced with decimal types since its the right type to be used when dealing with money.
As I understand the problem, you want to group the rows of dt1 by hour part of the Time field, sum the Amount, and add to the sum the Added Amount from dt2 rows where their Transaction No. equals to any Transaction No. of the grouped rows of dt1.
This will do:
var group = dt1.AsEnumerable().GroupBy(x => x.Field<DateTime>(1).Hour);
var sb = new StringBuilder();
sb.Append("Date,");
sb.Append("Time,".PadLeft(12, ' '));
sb.AppendLine("Sum".PadLeft(5, ' '));
//if PadLeft is not required in the output, then just:
//sb.AppendLine($"Date, Time, Sum");
foreach (var g in group)
{
var sum = 0M;
foreach (var r in g)
sum += r.Field<decimal>(2) + dt2.AsEnumerable()
.Where(x => x.Field<int>(0) == r.Field<int>(0))
.Sum(x => x.Field<decimal>(1));
sb.AppendLine($"{g.First().Field<DateTime>(3).ToString("MM/dd/yyyy")}, {g.Key.ToString("00")}:00, {sum.ToString("0.00")}");
}
Note: You can use the fields names instead of their indexes.
The output is:
Date, Time, Sum
03/05/2020, 10:00, 515.00
03/05/2020, 11:00, 440.00
03/05/2020, 12:00, 250.00
I don't know whether the DataTable-s already contain the required data to generate the output mentioned in the last quote block or you want to append the rest before writing to the text file. In case of the second scenario, you can do something like:
var group = dt1.AsEnumerable().GroupBy(x => x.Field<DateTime>(1).Hour);
var sb = new StringBuilder();
sb.AppendLine($"Date, Time, Sum");
for (var i = 0; i < 24; i++)
{
var g = group.FirstOrDefault(x => x.Key == i);
if (g != null)
{
var sum = 0M;
foreach (var r in g)
sum += r.Field<decimal>(2) + dt2.AsEnumerable()
.Where(x => x.Field<int>(0) == r.Field<int>(0))
.Sum(x => x.Field<decimal>(1));
sb.AppendLine($"{g.First().Field<DateTime>(3).ToString("MM/dd/yyyy")}, {g.Key.ToString("00")}:00, {sum.ToString("0.00")}");
}
else
sb.AppendLine($"{group.First().First().Field<DateTime>(3).ToString("MM/dd/yyyy")}, {i.ToString("00")}:00, 0.00");
}
If you need to preserve the same order of the hours:
for (var ii = 6; ii < 30; ii++)
{
var i = ii > 23 ? ii % 24 : ii;
var g = group.FirstOrDefault(x => x.Key == i);
if (g != null)
{
//The same...
}
Finally, to create or overwrite the text file (fileName):
File.WriteAllText(fileName, sb.ToString());
Or to append the output:
File.AppendAllText(fileName, sb.ToString());

Related

Get col[0] values while col[1] is equal to previous col[1] iteration

I'm using a rate sheet from the US fed reserve site, which contains two cols
[0] == ShortDate (ex "1/1/1962")
[1] == Rate for that date in [0]
So the file looks like: 1/1/1962,3.8
Each time this utility runs, it gets/opens the file, removes all data for dates On and Before 5/31/1997. It then writes to a stream (in my case, another csv file) with the following headers: rowid,beginningdate,expirationdate,rate
The problem I have is, part of the requirement is that if the rate (col [1]) has the same value for some consecutive days, then don't list those days specifically. Instead, write the date of the first consecutive rate, then write the last date before the rate changes again.
My problem/question is:
How do I check against each iterated col[1] value before and after during a loop so I can write correct the beginning and expiring dates? I was hoping to bump the index of a value by one so I could get the values in the next row, but having a hard time doing that even.
I've tried my research at using as many Linq-based queries I could but I can't find anything that works and just need another pair of eyes / another idea.
Here is what I'm doing so far:
int count=0;
var rateContents = File.ReadAllLines(resFile);
List<DateTime> lstDT = new List<DateTime>();
List<string> lstRate = new List<string>();
List<string> d = new List<string>();
foreach (var x in rateContents)
{
lstDT.Add(Convert.ToDateTime(x.Split(',')[0]));
lstRate.Add(x.Split(',')[1]);
}
int recordsOn_and_after6_1_97 = lstDT.Where(z => Convert.ToDateTime(z) >= Convert.ToDateTime("6/1/1997")).Count();
int recordsbefore6_1_97 = lstDT.Where(z => Convert.ToDateTime(z) < Convert.ToDateTime("6/1/1997")).Count();
lstRate.RemoveRange(0, recordsbefore6_1_97);
lstDT.RemoveRange(0, recordsbefore6_1_97);
using (StreamWriter sw = new StreamWriter(desktoppath + "somefile.csv"))
{
while (count <= (recordsOn_and_after6_1_97 - 1))
{
//sw.WriteLine("Index Number,Effective Date,Expiration Date,Rate");
d.Add(Convert.ToDateTime(lstDT[count].ToShortDateString()) + "," + lstRate[count]);
count++;
}
}
count = 0;
/*Below is where I am stuck. Realistically I think I'd want to do something like: var variable = argh.val[count+1]; or something, but I can't get that to work either. I'm missing something simple at this point.*/
foreach (var argh in d.Select((val, ind) => new { Value = val, Index = ind}))
{
//var curdt = Convert.ToDateTime(argh.Split(',')[0]).ToShortDateString();
//var currate = argh.Split(',')[1].ToString();
var curdt = argh.Value.Split(',')[0];
var currate = argh.Value.Split(',')[1];
var x = currate.CompareTo(currate[count + 1].ToString()).ToString() ;
Console.WriteLine(x.ToString());
//Console.WriteLine("val +1: " + curdt + " --- rate: " + currate.TakeWhile(z => z <= 10).ToString());
count++;
//Console.WriteLine(prevcurdt +" --- "+ currate);
}
Please be gentle, I'm still learning new things dang near daily :-)
Thank you!
Here is my idea to filter the rates as you want them. Basically just a simple for loop which looks at the rates either side of the current one - if the next rate isn't the same, we know the current date is the expiration date of whatever entry we're currently on. If the last rate isn't the same, we know it's a new entry.
public class RateEntry
{
public DateTime StartDate { get; set; }
public DateTime ExpirationDate { get; set; }
public decimal Rate { get; set; }
}
class Program
{
const string DATE_FORMAT_IN = "yyyy-MM-dd";
const string DATE_FORMAT_OUT = "yyyy-MM-dd";
static void Main()
{
var inputRateDataRaw = File.ReadAllLines(#"c:\temp\RATES_IN.csv");
DateTime startDate = new DateTime(1997, 05, 31);
// parse the input dates and rates
var rateDataFiltered = inputRateDataRaw
.Select(rateData =>
{
var dataComponents = rateData.Split(',');
DateTime rateDate = DateTime.ParseExact(dataComponents[0], DATE_FORMAT_IN, null);
decimal rate = decimal.Parse(dataComponents[1]);
return new RateEntry() { StartDate = rateDate, Rate = rate };
})
.Where(a => a.StartDate > startDate)
.OrderBy(a => a.StartDate)
.ToList();
List<RateEntry> rateRanges = new List<RateEntry>();
for (int i = 0; i < rateDataFiltered.Count; i++)
{
RateEntry next = ((i + 1) == rateDataFiltered.Count) ? null : rateDataFiltered[i + 1];
RateEntry last = (i == 0) ? null : rateDataFiltered[i - 1];
RateEntry now = rateDataFiltered[i];
// if this is the first rate, or if the last rate isn't this rate, this is a new entry.
if (last == null || last.Rate != now.Rate)
rateRanges.Add(now);
// if the next rate isn't this one, then the current entry expiration is this one's start date.
if (next == null || next.Rate != now.Rate)
rateRanges.Last().ExpirationDate = now.StartDate;
}
// write the data out
using (StreamWriter writer = new StreamWriter(#"c:\temp\RATES_OUT.csv"))
{
writer.WriteLine("ROWID;STARTDATE;EXPIRATIONDATE;RATE");
for (int i = 0; i < rateRanges.Count; i++)
{
RateEntry rateEntry = rateRanges[i];
writer.WriteLine($"{i};{rateEntry.StartDate.ToString(DATE_FORMAT_OUT)};{rateEntry.ExpirationDate.ToString(DATE_FORMAT_OUT)};{rateEntry.Rate}");
}
}
Console.ReadKey();
}
};

How to fasten this parsing loop from textfile

I'm a c# begginer so I might have missed some easy tips.
Here's my question :
I have a loop, reading from a text file some data.
I Have 160 files with 14000 lines of 7 values inside.
i'm interested in getting only some of this line depending of a time correspondance form a existing column in datatable.
If i found a corresping line, I update the line in Datatable.
The actual code take about 6min by file.... It's really too long...
Any idea to fast up this loop ?
I allready tried to reduce the loop with a while loop...
File sample :
Sample of the file :
Col1 Col2 Col3 Col4 Col5 Col6
15943100 1188 1 2,49 2,28 75,3
15943200 1188 1 2,49 2,28 75,3
15943300 1188 1 2,50 2,28 75,3
15943400 1188 1 2,50 2,28 75,3
Here's the code : (Edited from Aluan Haddad answer but not faster)
foreach (string FileName in fileFastPath)
{
var start = DateTimeOffset.UtcNow;
var allLines = File.ReadAllLines(FileName);
foreach (var line in allLines.Where(line => line.Contains("Acquisition depuis")))
{
DateTimeOffset.TryParse(line.Split('\t')[1], out start);
}
double x = 0, y = 0, z = 0;
foreach (var (sp1, sp2, vear, milliseconds) in from line in allLines
where !line.Contains("Acquisition depuis")
select line.Split('\t') into values
where values.Length >= 5
where double.TryParse(values[3], out x)
where double.TryParse(values[4], out y)
where double.TryParse(values[5], out z)
let milliseconds = double.Parse(values[0])
where milliseconds % 1000 == 0
select (x, y, z, milliseconds))
{
var updatedStart = start + TimeSpan.FromMilliseconds(milliseconds);
var existingValue = Data
.Select($"Time = #{updatedStart: yyyy-MM-dd HH:mm:ss.fff}#")
.FirstOrDefault(existing => existing != null);
if (existingValue != null)
{
existingValue["SP1 Bar"] = sp1;
existingValue["SP2 Bar"] = sp2;
existingValue["VEAR_POS %"] = vear;
}
}
}
Try cleaning up your code so you can see the forest for the trees. As you do so, there are many optimizations that reveal themselves thanks to clearer program structure.
foreach (var fileName in fileFastPath)
{
var start = DateTimeOffset.UtcNow;
var allLines = File.ReadAllLines(fileName);
foreach (var line in allLines.Where(line => line.Contains("Acquisition depuis")))
{
DateTimeOffset.TryParse(line.Split('\t')[1], out start);
}
double x = 0, y = 0, z = 0;
var lineValues = from line in allLines
where !line.Contains("Acquisition depuis")
select line.Split('\t') into values
where double.TryParse(values[3], out x)
where double.TryParse(values[4], out y)
where double.TryParse(values[5], out z)
let milliseconds = double.Parse(values[0])
select (x, y, z, milliseconds);
foreach (var (sp1, sp2, vear, milliseconds) in lineValues)
{
var updatedStart = start + TimeSpan.FromMilliseconds(milliseconds);
var existingValue = Data
.Select($"Time = #{updatedStart: yyyy-MM-dd HH:mm:ss.fff}#")
.FirstOrDefault(existing => existing != null);
if (existingValue != null)
{
existingValue["SP1 Bar"] = sp1;
existingValue["SP2 Bar"] = sp2;
existingValue["VEAR_POS %"] = vear;
}
}
}

select related value in the C# list

ExposureDate ProfolioPrices LastPeriods HybridWeights CummulativeWeights
10/03/2010 1254.380054 24 0.01836824 1.83682%
10/26/2010 1258.260394 1 0.07623018 2.45984%
10/16/2010 1262.710843 11 0.04105873 3.91836%
10/07/2010 1264.986911 20 0.02352644 4.56572%
10/09/2010 1265.461347 18 0.02662566 5.52092%
10/19/2010 1265.76087 8 0.04943356 5.58428%
from above data i have to select 5% value. so there is no 5% so select related values near to 5%(4.56572 and 5.52092%) with respective prices also.
private static List<AgeWeightedHistorical> Service(double[] values, DateTime[] datetime)
{
List<AgeWeightedHistorical> Data = new List<AgeWeightedHistorical>();
for (int i = 0; i < values.Length; i++)
{
var ag = new AgeWeightedHistorical();
ag.ExposureDate = datetime[i];
ag.ProfolioPrices = values[i];
ag.LastPeriods = values.Length - Data.Count;
Data.Add(ag);
}
Data.ForEach(sd => sd.HybridWeights =
Math.Round((Math.Pow(0.94, sd.LastPeriods - 1)) * ((1 - 0.94) / (1 - Math.Pow(0.94, values.Length))),8));
Data = Data.OrderBy(sd => sd.ProfolioPrices).ToList();
foreach (var item in Data)
{
item.weights = item.CummulativeWeights + item.HybridWeights;
item.CummulativeWeights =Math.Round(Data.Sum(sd => sd.weights) * 100,5);
}
return Data;
}
this is my code how to select that value. suggest me..

Why I am getting null reference exceptions if the list has a length and no null values in it?

I have this piece of code, nothing fancy:
Modulo1.ForEach(t => fechasMaterias1.Add(t.Fecha.ToString("dddd d", culture)));
Modulo2.ForEach(t => fechasMaterias2.Add(t.Fecha.ToString("dddd d", culture)));
Modulo3.ForEach(t => fechasMaterias3.Add(t.Fecha.ToString("dddd d", culture)));
Modulo1.ForEach(t => horariosMaterias1.Add(t.HorarioInicio.ToString("hh\\:mm") + " a" + t.HorarioFin.ToString("hh\\:mm")));
Modulo2.ForEach(t => horariosMaterias2.Add(t.HorarioInicio.ToString("hh\\:mm") + " a" + t.HorarioFin.ToString("hh\\:mm")));
Modulo3.ForEach(t => horariosMaterias3.Add(t.HorarioInicio.ToString("hh\\:mm") + " a" + t.HorarioFin.ToString("hh\\:mm")));
//Modulo1
if (Modulo1.Count > 0)
{
string dia = String.Empty;
foreach (List<DateTime> item in diasMaterias1)
{
for (int i = 0; i < item.Count; i++)
{
if (i == 0)
dia += "Horario del " + item.ElementAt(i).ToString("d \\de MMMM", culture) + " al ";
else
dia += item.ElementAt(i).ToString("d \\de MMMM", culture);
}
dias1.Add(dia);
}
Modulo1.ForEach(t => view_result1.Add(new HorariosModulosViewModel
{
NombreMateria = t.Nombre,
ModuloNum = "Módulo " + NumerosModulos.Where(x => x.Equals(1)).FirstOrDefault().ToString(),
NombreModulo = ModulosNombre[0],
Grupo = t.Grupo.ToString(),
IdMateriasCursos = t.IdMateriasCursos
}));
for (int i = 0; i < dias1.Count; i++)
{
view_result1.ElementAt(i).dias = dias1.ElementAt(i);
}
for (int i = 0; i < fechasMaterias1.Count; i++)
{
view_result1.ElementAt(i).diasMateria.Add(fechasMaterias1.ElementAt(i));
}
for (int i = 0; i < horariosMaterias1.Count; i++)
{
view_result1.ElementAt(i).horarios.Add(horariosMaterias1.ElementAt(i));
}
And previously I have initialized the lists like so: List<string> fechasMaterias1 = new List<string>(); the same for horariosMaterias1 and 2 and so on, however, I keep getting a null reference exception in the loops on fechasMaterias1 and horariosMaterias1. The strange thing is when I debug they both have a length of 20 (which I expect to, and none of them have null values). I'm completely lost, where may the source of this error come from. view_result1 is a list of a standard POCO. By the way, I make sure Modulo1 has a length and it's only supposed to have that length of 20 elements by now, so basically I'm just building lists from that main list "(Modulo1)" and they hve the same length, so why is the loop crashing?
I just wanted to post the code I made to solve the problem, I got rid of most of the classical foreach or for loops and made use of linq like follows:
//Modulo1
List<string> aux1 = new List<string>();
for (int i = 0; i < fechasMaterias1.Count; i+=4)
{
aux1.Add(fechasMaterias1.ElementAt(i));
}
List<string> aux2 = new List<string>();
for (int i = 0; i < fechasMaterias2.Count; i += 4)
{
aux2.Add(fechasMaterias2.ElementAt(i));
}
List<string> aux3 = new List<string>();
for (int i = 0; i < fechasMaterias3.Count; i += 4)
{
aux3.Add(fechasMaterias3.ElementAt(i));
}
fechasMaterias1 = aux1;
fechasMaterias2 = aux2;
fechasMaterias3 = aux3;
if (Modulo1.Count > 0)
{
string dia = String.Empty;
foreach (List<DateTime> item in diasMaterias1)
{
for (int i = 0; i < item.Count; i++)
{
if (i == 0)
dia += "Horario del " + item.ElementAt(i).ToString("d \\de MMMM", culture) + " al ";
else
dia += item.ElementAt(i).ToString("d \\de MMMM", culture);
}
}
dias1.Add(dia);
Modulo1.ForEach(t => view_result1.Add(new HorariosModulosViewModel
{
NombreMateria = t.Nombre,
ModuloNum = "Módulo " + NumerosModulos.Where(x => x.Equals(1)).FirstOrDefault().ToString(),
NombreModulo = ModulosNombre[0],
Grupo = t.Grupo.ToString(),
IdMateriasCursos = t.IdMateriasCursos
}));
for (int i = 0; i < dias1.Count; i++)
{
view_result1.ElementAt(i).dias = dias1.FirstOrDefault();
}
view_result1.ForEach(t => t.diasMateria = fechasMaterias1);
var materiasListado1 = Modulo1.GroupBy(t=> t.IdMateriasCursos).ToList();
foreach(var item in materiasListado1) {
horariosMaterias1.Add(new Dictionary<int, string>() {{item.Key, (from U in item where U.IdMateriasCursos.Equals(item.Key) select U.HorarioInicio.ToString("hh\\:mm")+" a "+U.HorarioFin.ToString("hh\\:mm")).First()}});
}
view_result1.ForEach(t => t.horarios = horariosMaterias1.Where(p=> p.Keys.Contains(t.IdMateriasCursos)).ToList());
var maestrosListado1 = maestros1.GroupBy(t=> t.IdMateriasCursos).ToList();
foreach(var item in maestrosListado1)
{
losMaestros1.Add(item.Key, (from U in item where U.IdMateriasCursos.Equals(item.Key) select String.Join(" ", U.Titulo, U.Nombres, U.ApellidoPaterno, U.ApellidoMaterno)).ToList());
}
view_result1.ForEach(t => t.Maestros = losMaestros1.Where(x=> x.Key.Equals(t.IdMateriasCursos)).ToDictionary(r=> r.Key, v=> v.Value));
}
that way I iterate faster, I get rid of repeated values and now there are no exceptions in the code. Thank you all!

C# Processing Fixed Width Files - Solution Not Working

I have implemented Cuong's solution here:
C# Processing Fixed Width Files
Here is my code:
var lines = File.ReadAllLines(#fileFull);
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
var list = new List<KeyValuePair<int, int>>();
int startIndex = 0;
for (int i = 0; i < widthList.Count(); i++)
{
var pair = new KeyValuePair<int, int>(startIndex, widthList[i]);
list.Add(pair);
startIndex += widthList[i];
}
var csvLines = lines.Select(line => string.Join(",",
list.Select(pair => line.Substring(pair.Key, pair.Value))));
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
#fileFull = File Path & Name
The issue I have is the first line of the input file also contains digits. So it could be AAAAAABBC111111111DD2EEEEEE etc. For some reason the output from Cuong's code gives me CSV headings like 1111RRRR and 222223333.
Does anyone know why this is and how I would fix it?
Header row example:
AAAAAAAAAAAAAAAABBBBBBBBBBCCCCCCCCDEFCCCCCCCCCGGGGGGGGHHHHHHHHIJJJJJJJJKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQQQ1111RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR222222222333333333444444444555555555666666666777777777888888888999999999S00001111TTTTTTTTTTTTUVWXYZ!"£$$$$$$%&
Converted header row:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCDEFCCCCCC C C C GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111RRRR RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR2222 222223333 333334444 444445555 555556666 666667777 777778888 888889999 99999S000 0 1111 TTTTTTTTTTTT U V W X Y Z ! ",�,$$$$$$,%,&,"
Jodrell - I implemented your suggestion but the header output is like:
BBBBBBBBBBCCCCCC CCCCCCCCD DEFCCCC GGGGGGGG HHHHHHH IJJJJJJ KKKKLLL LLL MMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPPQQQQ1111RRRRRRRRRRRRRRRRR QQQ 111 RRR 33333333 44444444 55555555 66666666 77777777 88888888 99999999 S0000111 111 TTT UVWXYZ!"�$$ %&
As Jodrell already mentioned, your code doesn't work because it assumed that the character representing each column header is distinct. Change the code that parse the header widths would fix it.
Replace:
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
With:
var widthList = new List<int>();
var header = lines.First().ToArray();
for (int i = 0; i < header.Length; i++)
{
if (i == 0 || header[i] != header[i-1])
widthList.Add(0);
widthList[widthList.Count-1]++;
}
Parsed header columns:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCC D E F CCCCCCCCC GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111 RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR 222222222 333333333 444444444 555555555 666666666 777777777 888888888 999999999 S 0000 1111 TTTTTTTTTTTT U V W X Y Z ! " £ $$$$$$ % &
EDIT
Because the problem annoyed me I wrote some code that handles " and ,. This code replaces the header row with comma delimited alternating zeros and ones. Any commas or double quotes in the body are appropriately escaped.
static void FixedToCsv(string sourceFile)
{
if (sourceFile == null)
{
// Throw exception
}
var dir = Path.GetDirectory(sourceFile)
var destFile = string.Format(
"{0}{1}",
Path.GetFileNameWithoutExtension(sourceFile),
".csv");
if (dir != null)
{
destFile = Path.Combine(dir, destFile);
}
if (File.Exists(destFile))
{
// Throw Exception
}
var blocks = new List<KeyValuePair<int, int>>();
using (var output = File.OpenWrite(destFile))
{
using (var input = File.OpenText(sourceFile))
{
var outputLine = new StringBuilder();
// Make header
var header = input.ReadLine();
if (header == null)
{
return;
}
var even = false;
var lastc = header.First();
var counter = 0;
var blockCounter = 0;
foreach(var c in header)
{
counter++;
if (c == lastc)
{
blockCounter++;
}
else
{
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter - 1,
blockCounter));
blockCounter = 1;
outputLine.Append(',');
even = !even;
}
outputLine.Append(even ? '1' : '0');
lastc = c;
}
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter,
blockCounter));
outputLine.AppendLine();
var lineBytes = Encoding.UTF.GetBytes(outputLine.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
// Process Body
var inputLine = input.ReadLine();
while (inputLine != null)
{
foreach(var block in block.Select(b =>
inputLine.Substring(b.Key, b.Value)))
{
var sanitisedBlock = block;
if (block.Contains(',') || block.Contains('"'))
{
santitisedBlock = string.Format(
"\"{0}\"",
block.Replace("\"", "\"\""));
}
outputLine.Append(sanitisedBlock);
outputLine.Append(',');
}
outputLine.Remove(outputLine.Length - 1, 1);
outputLine.AppendLine();
lineBytes = Encoding.UTF8.GetBytes(outputLne.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
inputLine = input.ReadLine();
}
}
}
}
1 is repeated in your header row, so your two fours get counted as one eight and everything goes wrong from there.
(There is a block of four 1s after the Qs and another block of four 1s after the 0s)
Essentialy, your header row is invalid or, at least, doesen't work with the proposed solution.
Okay, you could do somthing like this.
public void FixedToCsv(string fullFile)
{
var lines = File.ReadAllLines(fullFile);
var firstLine = lines.First();
var widths = new List<KeyValuePair<int, int>>();
var innerCounter = 0;
var outerCounter = 0
var firstLineChars = firstLine.ToCharArray();
var lastChar = firstLineChars[0];
foreach(var c in firstLineChars)
{
if (c == lastChar)
{
innerCounter++;
}
else
{
widths.Add(new KeyValuePair<int, int>(
outerCounter
innerCounter);
innerCounter = 0;
lastChar = c;
}
outerCounter++;
}
var csvLines = lines.Select(line => string.Join(",",
widths.Select(pair => line.Substring(pair.Key, pair.Value))));
// Get filePath and fileName from fullFile here.
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
}

Categories