PDF Multi-level break and print - c#

I am trying print documents by simplex/duplex and then by envelope type (pressure seal or regular)
I have Boolean fields for Simplex and for PressureSeal in my Record class.
All pressure seal are simplex, then there are regular simplex and duplex documents.
I can currently print the pressure seal documents separate from the regular simplex. I need to be able to create the regular duplex documents.
I have some lines commented out that caused all documents to be duplicated.
So, I am looking for something that works like so:
if (Simplex)
if (pressureseal)
create output file
else
create regular simplex output file
else
create duplex output file
Here is my existing code
#region Mark Records By Splits
//splits - 3,7,13
var splits = Properties.Settings.Default.Splits.Split(',');
Dictionary<int, int> splitRanges = new Dictionary<int, int>();
int lastSplit = 0;
foreach (var split in splits)
{
// Attempt to convert split into a integer and skip it if we can't.
int splitNum;
if (!int.TryParse(split, out splitNum))
continue;
splitRanges.Add(lastSplit, splitNum);
lastSplit = Math.Max(lastSplit, splitNum + 1);
}
// Assign record splits.
foreach (var range in splitRanges)
{
var recordsInRange = NoticeParser.records
.Where(x => x.Sheets >= range.Key && x.Sheets <= range.Value)
.ToList();
recordsInRange.ForEach(x => x.Split = string.Format("{0}-{1}", range.Key, range.Value));
}
var unassignedRecords = NoticeParser.records.Where(x => x.Sheets >= lastSplit).ToList();
unassignedRecords.ForEach(x => x.Split = string.Format("{0}up", lastSplit));
#endregion
#region Sort out Pressure Seal records
var recordsGroupedByPressureSeal = NoticeParser.records
.GroupBy(x=>x.PressureSeal);
//var recordsGroupedBySimplex = NoticeParser.records.GroupBy(x => x.Simplex);
#endregion
int fCount = 0;
int nsCount = 0;
//foreach (var simdupGroup in recordsGroupedBySimplex)
//{
// var recordsGroupedBySimDup = simdupGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var pressureGroup in recordsGroupedByPressureSeal)
{
var recordsGroupedBySplit = pressureGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var recordsInSplit in recordsGroupedBySplit.Values)
{
string processingExecutable = Path.Combine(Properties.Settings.Default.RootFolder, Properties.Settings.Default.ProcessingExecutable);
string toProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "{0}_" + "toBCC.txt", fCount);
string fromProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "IBC_LN_Sort_FromBCC.txt");
// If a sortation executable is specified, run it.
if (recordsInSplit.Count >= Properties.Settings.Default.MinimumSortationCount &&
File.Exists(processingExecutable))
{
// log.Info("Sorting records...");
var processedRecords = recordsInSplit.ProcessAddresses<Record, RecordMap>(
processingExecutable,
toProcessingFile,
fromProcessingFile);
// Update records with the sortation fields.
recordsInSplit.UpdateAddresses(processedRecords);
}
else
{
toProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "{0}_no_sort_toBCC.txt", nsCount);
fromProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "IBC_LN_NoSort_FromBCC.txt");
//var processedRecords = recordsInSplit.ProcessAddresses<Record, RecordMap>(
// processingExecutable,
// toProcessingFile,
// fromProcessingFile);
// Update records with the sortation fields.
// recordsInSplit.UpdateAddresses(processedRecords);
// If not sorted, provide our own sequence number.
int sequence = 1;
recordsInSplit.ForEach(x => x.SequenceNumber = sequence++);
recordsInSplit.ForEach(x => x.TrayNumber = 1);
nsCount++;
}
fCount++;
}
}
//}
NoticeWriter noticeWriter = new NoticeWriter(noticeParser.reader);
#region Print by PressureSeal or Regular
//foreach (var simdupGroup in recordsGroupedBySimplex)
//{
// string printType = null;
// if (simdupGroup.Key)
// printType = "Simplex";
// else
// printType = "Duplex";
foreach (var splitGroup in recordsGroupedByPressureSeal)
{
string envType = ""; // envelope type
if (splitGroup.Key)
envType = "PressureSeal";
else
envType = "Regular";
var recordsGroupedBySplit = splitGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var recordsInSplit in recordsGroupedBySplit)
{
string outputName = string.Format("IBC_Daily_Notices_{0}_{1}",envType, /*printType,*/ recordsInSplit.Key);
noticeWriter.WriteOutputFiles(Properties.Settings.Default.OutputFolder, outputName, recordsInSplit.Value, Properties.Settings.Default.RecordsPerBatch);
}
}
//}
#endregion

Related

How to simplify if condition and foreach condition in c#.net?

I have a repeating foreach condition in my controller. How can I simplify it?
I almost reach 500 lines because of this. I've been using this for x8 each condition.
List<jewelry_dashboard_view_per_month> transactionmonthlynewloan = dashboardmanager.Get_MonthlyTransaction(search_branch, (monthlyonly + "01"), "N-", (monthlyonly + no_of_items), no_of_items, monthlyonly);
myNewLoanMontlyList.Add(transactionmonthlynewloan);
List<jewelry_dashboard_view_per_month> transactionmonthlyrenewal = dashboardmanager.Get_MonthlyTransaction(search_branch, (monthlyonly + "01"), "R-", (monthlyonly + no_of_items), no_of_items, monthlyonly);
myRenewalMontlyList.Add(transactionmonthlyrenewal);
This is the if condition
if (myNewLoanMontlyList[0].Count != 0)
{
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.id_data))
{monthly_newloan_data_ID.Add(internal_monthly_newloan_data);}
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.debit_data))
{monthly_newloan_data_debit.Add(internal_monthly_newloan_data);}
}
else
{
monthly_newloan_data_ID.Add(0);
monthly_newloan_data_debit.Add(0);
};
and this is the foreach condition
//newloan
int newloan_data_id = 0;
DateTime newloan_data_transdate = DateTime.Parse((DateTime.Today).ToString());
decimal newloan_data_debit = 0;
string newloan_data_txnname = "";
string newloan_data_branchID = "";
foreach (var newloan_data in newloan)
{
newloan_data_id = newloan_data.ID;
newloan_data_transdate = DateTime.Parse((newloan_data.Transdate).ToString());
newloan_data_debit = Decimal.Parse((newloan_data.Debit).ToString());
newloan_data_txnname = newloan_data.TransactionName;
newloan_data_branchID = newloan_data.BranchID;
};
datanewloan = new transaction_details()
{
ID = newloan_data_id,
Transdate = DateTime.Parse(newloan_data_transdate.ToString("yyyy-MM-dd")),
Debit = Decimal.Parse(newloan_data_debit.ToString()),
TransactionName = newloan_data_txnname,
BranchID = newloan_data_branchID
};
In Your scenario of if-else conditions:
if (myNewLoanMontlyList[0].Count != 0)
{
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.id_data))
{monthly_newloan_data_ID.Add(internal_monthly_newloan_data);}
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.debit_data))
{monthly_newloan_data_debit.Add(internal_monthly_newloan_data);}
}
else
{
monthly_newloan_data_ID.Add(0);
monthly_newloan_data_debit.Add(0);
};
If in the method, there is no other process after the if-else, you can use only if condition without the else part.
if (myNewLoanMontlyList[0].Count == 0)
{
monthly_newloan_data_ID.Add(0);
monthly_newloan_data_debit.Add(0);
}
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.id_data))
{monthly_newloan_data_ID.Add(internal_monthly_newloan_data);}
foreach (var internal_monthly_newloan_data in myNewLoanMontlyList[0].SelectMany(c => c.debit_data))
{monthly_newloan_data_debit.Add(internal_monthly_newloan_data);}
You can use Linq,
SelectMany: Projects each element of a sequence to an IEnumerable<T>. You need not to iterate again and add it to separate list
For your if condition look like,
if (myNewLoanMontlyList[0].Any())
{
monthly_newloan_data_ID = myNewLoanMontlyList[0].SelectMany(c => c.id_data);
monthly_newloan_data_debit = myNewLoanMontlyList[0].SelectMany(c => c.debit_data);
}
else
{
monthly_newloan_data_ID.Add(0);
monthly_newloan_data_debit.Add(0);
}
Select: Projects each element of a sequence into a new form. In your
case new form is instance of transaction_details
Instead of for loop use Linq .Select(),
var result = newloan.Select(x => new transaction_details(){
ID = x.ID,
Transdate = DateTime.Parse(x.Transdate.ToString("yyyy-MM-dd")),
Debit = Decimal.Parse((x.Debit).ToString()),
TransactionName = x.TransactionName,
BranchID = x.BranchID
}).LastOrDefault();
To get last element, I used LastOrDefault(). You can get individual element by index or by condition.

Difficulty using Orderby in Foreach loop C#

I have been trying to order my files by their substring at the end of their names which happens to end with a number that indicates their position relative to the rest of the files. (example: fs-1632_1.txt --> fs-1632_2.txt).
I am currently able to get the numbers and turn them into ints I just have problems getting the OrderBy Method to work correctly. I am mostly working off of this example of Orderby.
internal class Data
{
public string Name { get; set; }
public double Number { get; set; }
}
private void OrderByEx1(List<FileInfo> files)
{
int num = 0;
int index_num = 0;
string file_num = "";
string file_name = "";
foreach (FileInfo in files)
{
file_name = file.FullName;
file_name = Path.GetFileNameWithoutExtension(file_name);
index_num = file_name.LastIndexOf("_") + 1;
file_num = file_name.Substring(index_num);
num = Int32.Parse(file_num);
Data[] set = {new Data {Name = file_name, Number = num }};
}
IEnumerable<Data> query = set.OrderBy(data => data.Number);
foreach (Data file_s in query)
MessageBox.Show($"{file_s.Name} {file_s.Number}");
}
No need for the foreach-loop. You could use this safe LINQ approach:
files = files
.Select(f => new { File = f, Name = Path.GetFileNameWithoutExtension(f.Name) })
.Select(x => new
{
x.File,
x.Name,
Token = x.Name.Substring(x.Name.LastIndexOf("_", StringComparison.Ordinal) + 1)
})
.Select(x => new
{
x.File,
x.Name,
x.Token,
IsInt = int.TryParse(x.Token, out int number),
ParsedNumber = number
})
.OrderByDescending(x => x.IsInt)
.ThenBy(x => x.ParsedNumber)
.Select(x => x.File)
.ToList();
If there is no number or it can't be parsed to int the file will be listed at the bottom.
You are declaring a Data array named set, add a single element to it and then restart the loop forgetting what you have loaded in the previous loop. The order is executed only when you exit the loop, but at that point the set array contains a single element, the last one.
You need to add your Data structure to a list and then order that list
List<Data> dataFiles = new List<Data>();
foreach (FileInfo file in files)
{
file_name = file.FullName;
file_name = Path.GetFileNameWithoutExtension(file_name);
index_num = file_name.LastIndexOf("_") + 1;
file_num = file_name.Substring(index_num);
num = Int32.Parse(file_num);
dataFiles.Add(new Data {Name = file_name, Number = num });
}
// If you don't need the query var you can just order directly in the for loop
// IEnumerable<Data> query = dataFiles.OrderBy(data => data.Number);
foreach (Data file_s in dataFiles.OrderBy(data => data.Number))
{
MessageBox.Show(file_s.Name + " " + file_s.Number);
}

Replacing loops with linq code [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 5 years ago.
Improve this question
My current code is like this:
var results = new List<Results>();
var items = new List<string>
{
"B,0",
"A,1",
"B,2",
"A,3",
"A,4",
"B,5",
"A,6",
"A,7",
"B,8"
};
int size = 2;
int temp;
var tempResults = new List<int>();
var keys = items.Select(t => t.Split(',')[0]).Distinct().ToList();
//var values = items.Select(t => t.Split(',')[1]).ToList();
//var result = items.SelectMany(k => values, (k, v) => new {k, v});
foreach (var key in keys)
{
temp = 0;
tempResults = new List<int>();
foreach (var item in items)
{
if (item.Split(',')[0] == key)
{
tempResults.Add(Int32.Parse(item.Split(',')[1]));
temp++;
}
if (temp == size)
{
results.Add(new Results
{
Key = key,
Values = new List<int>(tempResults)
});
temp = 0;
tempResults.Clear();
}
}
}
foreach (Results r in results)
{
Console.WriteLine("Key: " + r.Key);
Console.WriteLine("Values: ");
foreach (int i in r.Values)
{
Console.WriteLine(i);
}
}
Everything works fine with it, but I am using two loops to get the results needed. I want to replace them with a LINQ expression and been trying, but can't seem to figure it out. Any help is appreciated.
You could use a combination of LINQ methods: .GroupBy, .Select, SelectMany and some data structures like Tuple<T1, T2>.
Provided that we have class:
class Results
{
public string Key { get; set; }
public List<int> Values { get; set; }
}
The solution could be:
int k = 0;
var result =
items.Select(x => // parse initial string
{
var strValue = x.Split(',');
return Tuple.Create(strValue[0], Convert.ToInt32(strValue[1]));
})
.GroupBy(x => x.Item1, y => y.Item2) // group by key
.Select(x => Tuple.Create(x.Key, x)) // flatten to IEnumerable
.SelectMany(x => // select fixed size data chunks
x.Item2.GroupBy(y => k++ / size, z => z)
.Select(z => Tuple.Create(x.Item1, z)))
.Select(x => // cast to resulting model type
new Results()
{
Key = x.Item1,
Values = x.Item2.ToList()
})
.ToList(); // Return enumeration as list
How about writing a couple extension methods?
const int partitionSize = 2;
var itemLookup = items.ToLookup(x => x.Split(',')[0], x => Int32.Parse(x.Split(',')[1]));
var partitionedItems = itemLookup.Partition(partitionSize);
foreach (var partition in partitionedItems)
foreach (var lookup in partition)
{
Console.WriteLine("Key: " + lookup.Key);
Console.WriteLine("Values: ");
foreach (var i in lookup.ToList())
{
Console.WriteLine(i);
}
}
public static class PartitionExtensions
{
public static IList<ILookup<K, V>> Partition<K, V>(this ILookup<K, V> lookup, int size)
{
return lookup.SelectMany(l => l.ToList().Partition(size).Select(p => p.ToLookup(x => l.Key, x => x))).ToList();
}
public static IList<IList<T>> Partition<T>(this IList<T> list, int size)
{
IList<IList<T>> results = new List<IList<T>>();
var itemCount = list.Count();
var partitionCount = itemCount / size;
//your paritioning method is truncating items that don't make up a full partition
//if you want the remaining items in a partial partition, use this code instead
//var partitionCount = ((itemCount % size == 0) ? itemCount : itemCount + size) / size;
for (var i = 0; i < partitionCount; i++)
{
results.Add(list.Skip(i * size).Take(size).ToList());
}
return results;
}
}
Not really a way to remove the inner loop, but you could shorten a bit your code with:
....
var keys = items.Select(t => t.Split(',')[0]).Distinct().ToList();
foreach (var key in keys)
{
var forKey = items.Where(x => x.Split(',')[0] == key)
.Select(k => int.Parse(k.Split(',')[1]));
for (int x = 0; x < forKey.Count(); x += size)
{
results.Add(new Results
{
Key = key,
Values = forKey.Skip(x).Take(size).ToList()
});
}
}
....
At least this approach will remove the need of the temporary variables and all the if checks inside the loop and will also include in your results the last value for the A key that has only one integer in its list.

How to make the custom parser for text file

Actually I set four columns using data table and I want this column retrieve value from text file. I used regex for remove the particular line from the text file.
My objective is that I want to show text file on the grid using data table so first I am trying to create data table and remove the line (show at the program) using regex.
Here I post my full code.
namespace class
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
StreamReader sreader = File.OpenText(#"C:\FareSearchRegex.txt");
string line;
DataTable dt = new DataTable();
DataRow dr;
dt.Columns.Add("PTC");
dt.Columns.Add("CUR");
dt.Columns.Add("TAX");
dt.Columns.Add("FARE BASIS");
while ((line = sreader.ReadLine()) != null)
{
var pattern = "---------- RECOMMENDATION 1 OF 3 IN GROUP 1 (USD 168.90)----------";
var result = Regex.Replace(line,pattern," ");
dt.Rows.Add(line);
}
}
}
class Class1
{
string PTC;
string CUR;
float TAX;
public string gsPTC
{
get{ return PTC; }
set{ PTC = value; }
}
public string gsCUR
{
get{ return CUR; }
set{ CUR = value; }
}
public float gsTAX
{
get{ return TAX; }
set{ TAX = value; }
}
}
}
If your format is strict(e.g. always 4 columns) and you want to remove only this complete line i don't see any reason to use regex:
var rows = File.ReadLines(#"C:\FareSearchRegex.txt")
.Where(l => l != "---------- RECOMMENDATION 1 OF 3 IN GROUP 1 (USD 168.90)----------")
.Select(l => new { line = l, items = l.Split(','), row = dt.Rows.Add() });
foreach (var x in rows)
x.row.ItemArray = x.items;
(assumed that the fields are separated by comma)
Edit: This works with your pastebin:
string header = " PTC CUR TAX FARE BASIS";
bool takeNextLine = false;
foreach (string line in File.ReadLines(#"C:\FareSearchRegex.txt"))
{
if (line.StartsWith(header))
takeNextLine = true;
else if (takeNextLine)
{
var tokens = line.Split(new[] { #" " }, StringSplitOptions.RemoveEmptyEntries);
dt.Rows.Add().ItemArray = tokens.Where((t, i) => i != 2).ToArray();
takeNextLine = false;
}
}
(since you have an empty column which you want to exclude from the result i've used the clumsy and possibly error-prone(?) query Where((t, i) => i != 2))
To parse the file you'll need to:
Split the text of the file into data chunks. A chunk, in your case can be identified by the header PTC CUR TAX FARE BASIS and by the TOTAL line. To split the text you'll need to tokenize the input as follows> (i) define a regular expression to match the headers, (ii) define a regular expression to match the Total lines (footers); Using (i) and (ii) you can join them by the order of appearance index and determine the total size of each chunk (see the line with (x,y)=>new{StartIndex = x.Match.Index, EndIndex = y.Match.Index + y.Match.Length}) below). Use String.Substring method to separate the chunks.
Extract the data from each individual chunk. Knowing that data is split by lines you just have to iterate through all lines in a chunk (ignoring header and footer) and process each line.
This code should help:
string file = #"C:\FareSearchRegex.txt";
string text = File.ReadAllText(file);
var headerRegex = new Regex(#"^(\)>)?\s+PTC\s+CUR\s+TAX\s+FARE BASIS$", RegexOptions.IgnoreCase | RegexOptions.Multiline);
var totalRegex = new Regex(#"^\s+TOTAL[\w\s.]+?$",RegexOptions.IgnoreCase | RegexOptions.Multiline);
var lineRegex = new Regex(#"^(?<Num>\d+)?\s+(?<PTC>[A-Z]+)\s+\d+\s(?<Cur>[A-Z]{3})\s+[\d.]+\s+(?<Tax>[\d.]+)",RegexOptions.IgnoreCase | RegexOptions.Multiline);
var dataIndices =
headerRegex.Matches(text).Cast<Match>()
.Select((m, index) => new{ Index = index, Match = m })
.Join(totalRegex.Matches(text).Cast<Match>().Select((m, index) => new{ Index = index, Match = m }),
x => x.Index,
x => x.Index,
(x, y) => new{ StartIndex = x.Match.Index, EndIndex = y.Match.Index + y.Match.Length });
var items = dataIndices
.Aggregate(new List<string>(), (list, x) =>
{
var item = text.Substring(x.StartIndex, x.EndIndex - x.StartIndex);
list.Add(item);
return list;
});
var result = items.SelectMany(x =>
{
var lines = x.Split(new string[]{Environment.NewLine, "\r", "\n"}, StringSplitOptions.RemoveEmptyEntries);
return lines.Skip(1) //Skip header
.Take(lines.Length - 2) // Ignore footer
.Select(line =>
{
var match = lineRegex.Match(line);
return new
{
Ptc = match.Groups["PTC"].Value,
Cur = match.Groups["Cur"].Value,
Tax = Convert.ToDouble(match.Groups["Tax"].Value)
};
});
});

C# Processing Fixed Width Files - Solution Not Working

I have implemented Cuong's solution here:
C# Processing Fixed Width Files
Here is my code:
var lines = File.ReadAllLines(#fileFull);
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
var list = new List<KeyValuePair<int, int>>();
int startIndex = 0;
for (int i = 0; i < widthList.Count(); i++)
{
var pair = new KeyValuePair<int, int>(startIndex, widthList[i]);
list.Add(pair);
startIndex += widthList[i];
}
var csvLines = lines.Select(line => string.Join(",",
list.Select(pair => line.Substring(pair.Key, pair.Value))));
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
#fileFull = File Path & Name
The issue I have is the first line of the input file also contains digits. So it could be AAAAAABBC111111111DD2EEEEEE etc. For some reason the output from Cuong's code gives me CSV headings like 1111RRRR and 222223333.
Does anyone know why this is and how I would fix it?
Header row example:
AAAAAAAAAAAAAAAABBBBBBBBBBCCCCCCCCDEFCCCCCCCCCGGGGGGGGHHHHHHHHIJJJJJJJJKKKKLLLLMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPQQQQ1111RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR222222222333333333444444444555555555666666666777777777888888888999999999S00001111TTTTTTTTTTTTUVWXYZ!"£$$$$$$%&
Converted header row:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCDEFCCCCCC C C C GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111RRRR RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR2222 222223333 333334444 444445555 555556666 666667777 777778888 888889999 99999S000 0 1111 TTTTTTTTTTTT U V W X Y Z ! ",�,$$$$$$,%,&,"
Jodrell - I implemented your suggestion but the header output is like:
BBBBBBBBBBCCCCCC CCCCCCCCD DEFCCCC GGGGGGGG HHHHHHH IJJJJJJ KKKKLLL LLL MMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPPQQQQ1111RRRRRRRRRRRRRRRRR QQQ 111 RRR 33333333 44444444 55555555 66666666 77777777 88888888 99999999 S0000111 111 TTT UVWXYZ!"�$$ %&
As Jodrell already mentioned, your code doesn't work because it assumed that the character representing each column header is distinct. Change the code that parse the header widths would fix it.
Replace:
var widthList = lines.First().GroupBy(c => c)
.Select(g => g.Count())
.ToList();
With:
var widthList = new List<int>();
var header = lines.First().ToArray();
for (int i = 0; i < header.Length; i++)
{
if (i == 0 || header[i] != header[i-1])
widthList.Add(0);
widthList[widthList.Count-1]++;
}
Parsed header columns:
AAAAAAAAAAAAAAAA BBBBBBBBBB CCCCCCCC D E F CCCCCCCCC GGGGGGGG HHHHHHHH I JJJJJJJJ KKKK LLLL MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN OOOOOOOOOOOOOOOOOOOOOOOOOOOOOO PPPP QQQQ 1111 RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR 222222222 333333333 444444444 555555555 666666666 777777777 888888888 999999999 S 0000 1111 TTTTTTTTTTTT U V W X Y Z ! " £ $$$$$$ % &
EDIT
Because the problem annoyed me I wrote some code that handles " and ,. This code replaces the header row with comma delimited alternating zeros and ones. Any commas or double quotes in the body are appropriately escaped.
static void FixedToCsv(string sourceFile)
{
if (sourceFile == null)
{
// Throw exception
}
var dir = Path.GetDirectory(sourceFile)
var destFile = string.Format(
"{0}{1}",
Path.GetFileNameWithoutExtension(sourceFile),
".csv");
if (dir != null)
{
destFile = Path.Combine(dir, destFile);
}
if (File.Exists(destFile))
{
// Throw Exception
}
var blocks = new List<KeyValuePair<int, int>>();
using (var output = File.OpenWrite(destFile))
{
using (var input = File.OpenText(sourceFile))
{
var outputLine = new StringBuilder();
// Make header
var header = input.ReadLine();
if (header == null)
{
return;
}
var even = false;
var lastc = header.First();
var counter = 0;
var blockCounter = 0;
foreach(var c in header)
{
counter++;
if (c == lastc)
{
blockCounter++;
}
else
{
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter - 1,
blockCounter));
blockCounter = 1;
outputLine.Append(',');
even = !even;
}
outputLine.Append(even ? '1' : '0');
lastc = c;
}
blocks.Add(new KeyValuePair<int, int>(
counter - blockCounter,
blockCounter));
outputLine.AppendLine();
var lineBytes = Encoding.UTF.GetBytes(outputLine.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
// Process Body
var inputLine = input.ReadLine();
while (inputLine != null)
{
foreach(var block in block.Select(b =>
inputLine.Substring(b.Key, b.Value)))
{
var sanitisedBlock = block;
if (block.Contains(',') || block.Contains('"'))
{
santitisedBlock = string.Format(
"\"{0}\"",
block.Replace("\"", "\"\""));
}
outputLine.Append(sanitisedBlock);
outputLine.Append(',');
}
outputLine.Remove(outputLine.Length - 1, 1);
outputLine.AppendLine();
lineBytes = Encoding.UTF8.GetBytes(outputLne.ToString());
outputLine.Clear();
output.Write(lineBytes, 0, lineBytes.Length);
inputLine = input.ReadLine();
}
}
}
}
1 is repeated in your header row, so your two fours get counted as one eight and everything goes wrong from there.
(There is a block of four 1s after the Qs and another block of four 1s after the 0s)
Essentialy, your header row is invalid or, at least, doesen't work with the proposed solution.
Okay, you could do somthing like this.
public void FixedToCsv(string fullFile)
{
var lines = File.ReadAllLines(fullFile);
var firstLine = lines.First();
var widths = new List<KeyValuePair<int, int>>();
var innerCounter = 0;
var outerCounter = 0
var firstLineChars = firstLine.ToCharArray();
var lastChar = firstLineChars[0];
foreach(var c in firstLineChars)
{
if (c == lastChar)
{
innerCounter++;
}
else
{
widths.Add(new KeyValuePair<int, int>(
outerCounter
innerCounter);
innerCounter = 0;
lastChar = c;
}
outerCounter++;
}
var csvLines = lines.Select(line => string.Join(",",
widths.Select(pair => line.Substring(pair.Key, pair.Value))));
// Get filePath and fileName from fullFile here.
File.WriteAllLines(filePath + "\\" + fileName + ".csv", csvLines);
}

Categories