Replacing loops with linq code [closed] - c#

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 5 years ago.
Improve this question
My current code is like this:
var results = new List<Results>();
var items = new List<string>
{
"B,0",
"A,1",
"B,2",
"A,3",
"A,4",
"B,5",
"A,6",
"A,7",
"B,8"
};
int size = 2;
int temp;
var tempResults = new List<int>();
var keys = items.Select(t => t.Split(',')[0]).Distinct().ToList();
//var values = items.Select(t => t.Split(',')[1]).ToList();
//var result = items.SelectMany(k => values, (k, v) => new {k, v});
foreach (var key in keys)
{
temp = 0;
tempResults = new List<int>();
foreach (var item in items)
{
if (item.Split(',')[0] == key)
{
tempResults.Add(Int32.Parse(item.Split(',')[1]));
temp++;
}
if (temp == size)
{
results.Add(new Results
{
Key = key,
Values = new List<int>(tempResults)
});
temp = 0;
tempResults.Clear();
}
}
}
foreach (Results r in results)
{
Console.WriteLine("Key: " + r.Key);
Console.WriteLine("Values: ");
foreach (int i in r.Values)
{
Console.WriteLine(i);
}
}
Everything works fine with it, but I am using two loops to get the results needed. I want to replace them with a LINQ expression and been trying, but can't seem to figure it out. Any help is appreciated.

You could use a combination of LINQ methods: .GroupBy, .Select, SelectMany and some data structures like Tuple<T1, T2>.
Provided that we have class:
class Results
{
public string Key { get; set; }
public List<int> Values { get; set; }
}
The solution could be:
int k = 0;
var result =
items.Select(x => // parse initial string
{
var strValue = x.Split(',');
return Tuple.Create(strValue[0], Convert.ToInt32(strValue[1]));
})
.GroupBy(x => x.Item1, y => y.Item2) // group by key
.Select(x => Tuple.Create(x.Key, x)) // flatten to IEnumerable
.SelectMany(x => // select fixed size data chunks
x.Item2.GroupBy(y => k++ / size, z => z)
.Select(z => Tuple.Create(x.Item1, z)))
.Select(x => // cast to resulting model type
new Results()
{
Key = x.Item1,
Values = x.Item2.ToList()
})
.ToList(); // Return enumeration as list

How about writing a couple extension methods?
const int partitionSize = 2;
var itemLookup = items.ToLookup(x => x.Split(',')[0], x => Int32.Parse(x.Split(',')[1]));
var partitionedItems = itemLookup.Partition(partitionSize);
foreach (var partition in partitionedItems)
foreach (var lookup in partition)
{
Console.WriteLine("Key: " + lookup.Key);
Console.WriteLine("Values: ");
foreach (var i in lookup.ToList())
{
Console.WriteLine(i);
}
}
public static class PartitionExtensions
{
public static IList<ILookup<K, V>> Partition<K, V>(this ILookup<K, V> lookup, int size)
{
return lookup.SelectMany(l => l.ToList().Partition(size).Select(p => p.ToLookup(x => l.Key, x => x))).ToList();
}
public static IList<IList<T>> Partition<T>(this IList<T> list, int size)
{
IList<IList<T>> results = new List<IList<T>>();
var itemCount = list.Count();
var partitionCount = itemCount / size;
//your paritioning method is truncating items that don't make up a full partition
//if you want the remaining items in a partial partition, use this code instead
//var partitionCount = ((itemCount % size == 0) ? itemCount : itemCount + size) / size;
for (var i = 0; i < partitionCount; i++)
{
results.Add(list.Skip(i * size).Take(size).ToList());
}
return results;
}
}

Not really a way to remove the inner loop, but you could shorten a bit your code with:
....
var keys = items.Select(t => t.Split(',')[0]).Distinct().ToList();
foreach (var key in keys)
{
var forKey = items.Where(x => x.Split(',')[0] == key)
.Select(k => int.Parse(k.Split(',')[1]));
for (int x = 0; x < forKey.Count(); x += size)
{
results.Add(new Results
{
Key = key,
Values = forKey.Skip(x).Take(size).ToList()
});
}
}
....
At least this approach will remove the need of the temporary variables and all the if checks inside the loop and will also include in your results the last value for the A key that has only one integer in its list.

Related

Performant running SUM OVER Partition in LINQ

I am trying to figure out the best way to calculate a running sum partition with a self joined collection using LINQ.
The query below is a somewhat simple example of what I am after. The output is the RowNumber, the RowType and the sum of all preceding RowValues within the current row's RowType.
DECLARE #T TABLE (RowNumber INT, RowType INT, RowValue INT)
INSERT #T VALUES (1,1,1),(2,1,1),(3,1,1),(4,1,1),(5,1,1),(6,2,1),(7,2,1),(8,2,1),(9,2,1),(10,2,1)
;WITH Data AS(SELECT RowNumber, RowType,RowValue FROM #T)
SELECT
This.RowNumber,
This.RowType,
RunningValue = COALESCE(This.RowValue + SUM(Prior.RowValue),This.RowValue)
FROM
Data This
LEFT OUTER JOIN Data Prior ON Prior.RowNumber < This.RowNumber AND Prior.RowType = This.RowType
GROUP BY
This.RowNumber,
This.RowType,
This.RowValue
/* OR
SELECT
This.RowNumber,
This.RowType,
RunningValue = SUM(RowValue) OVER (PARTITION BY RowType ORDER BY RowNUmber)
FROM
Data This
*/
Now, my not working attempt.
var joinedWithPreviousSums = allRows.Join(
allRows,
previousRows => new {previousRows.RowNumber, previousRows.RowType, previousRows.RowValue},
row=> new { row.RowNumber, row.RowType, row.RowValue},
(previousRows, row) => new { row.RowNumber, row.RowType, row.RowValue })
.Where(previousRows.RowType == row.RowType && previousRows.RowNumber < row.RowNumber)
.Select(row.RowNumber, row.RowType,RunningValue = Sum(previousRows.Value) + row.RowValue)).ToList()
Of course, the last two lines above are garbage and attempt to exemplify my desired projection while hinting at my lack of knowledge on performant complex LINQ projections.
I have read where some variation of the statement below could work and may be workable, however, is there a way to achieve similar results without yielding?
int s = 0;
var subgroup  = people.OrderBy(x => x.Amount)
                      .TakeWhile(x => (s += x.Amount) < 1000)
                      .ToList();
EDIT : I have been able to get the snippet below to work, however, I cant seem to partition or project over RowType.
namespace ConsoleApplication1
{
class Program
{
delegate string CreateGroupingDelegate(int i);
static void Main(string[] args)
{
List <TestClass> list = new List<TestClass>()
{
new TestClass(1, 1, 1),
new TestClass(2, 2, 5),
new TestClass(3, 1, 1 ),
new TestClass(4, 2, 5),
new TestClass(5, 1, 1),
new TestClass(6, 2, 5)
};
int running_total = 0;
var result_set = list.Select(x => new { x.RowNumber, x.RowType, running_total = (running_total = running_total + x.RowValue) }).ToList();
foreach (var v in result_set)
{
Console.WriteLine("list element: {0}, total so far: {1}",
v.RowNumber,
v.running_total);
}
Console.ReadLine();
}
}
public class TestClass
{
public TestClass(int rowNumber, int rowType, int rowValue)
{
RowNumber = rowNumber;
RowType = rowType;
RowValue = rowValue;
}
public int RowNumber { get; set; }
public int RowType { get; set; }
public int RowValue { get; set; }
}
}
Your answer can be simplified greatly, but does scale poorly even then, as it must go through the Where for each row to compute each row, so O(list.Count^2).
Here is the simpler version, which preserves the original order:
var result = list.Select(item => new {
RowType = item.RowType,
RowValue = list.Where(prior => prior.RowNumber <= item.RowNumber && prior.RowType == item.RowType).Sum(prior => prior.RowValue)
});
You can go through list once if are willing to sort. (If you know the order is correct, or can use a simpler sort, you can remove or replace the OrderBy/ThenBy.)
var ans = list.OrderBy(x => x.RowType)
.ThenBy(x => x.RowNumber)
.Scan(first => new { first.RowType, first.RowValue },
(res, cur) => res.RowType == cur.RowType ? new { res.RowType, RowValue = res.RowValue + cur.RowValue }
: new { cur.RowType, cur.RowValue }
);
This answer uses an extension method that is like Aggregate, but returns the intermediate results, based on the APL scan operator:
// TRes seedFn(T FirstValue)
// TRes combineFn(TRes PrevResult, T CurValue)
public static IEnumerable<TRes> Scan<T, TRes>(this IEnumerable<T> src, Func<T, TRes> seedFn, Func<TRes, T, TRes> combineFn) {
using (var srce = src.GetEnumerator()) {
if (srce.MoveNext()) {
var prev = seedFn(srce.Current);
while (srce.MoveNext()) {
yield return prev;
prev = combineFn(prev, srce.Current);
}
yield return prev;
}
}
}
My eyes were glazed over after seeing this. The answer to my long winded question after 6 hours of skulldrugery seems to be as simple as this. Thanks to #NetMage for pointing out the SelectMany that I was missing.
var result = list.SelectMany(item => list.Where(x => x.RowNumber <= item.RowNumber && x.RowType == item.RowType)
.GroupBy(g => g.RowType)
.Select(p => new
{
RowType = p.Max(s => s.RowType),
RowValue = p.Sum(s => s.RowValue)
}));

PDF Multi-level break and print

I am trying print documents by simplex/duplex and then by envelope type (pressure seal or regular)
I have Boolean fields for Simplex and for PressureSeal in my Record class.
All pressure seal are simplex, then there are regular simplex and duplex documents.
I can currently print the pressure seal documents separate from the regular simplex. I need to be able to create the regular duplex documents.
I have some lines commented out that caused all documents to be duplicated.
So, I am looking for something that works like so:
if (Simplex)
if (pressureseal)
create output file
else
create regular simplex output file
else
create duplex output file
Here is my existing code
#region Mark Records By Splits
//splits - 3,7,13
var splits = Properties.Settings.Default.Splits.Split(',');
Dictionary<int, int> splitRanges = new Dictionary<int, int>();
int lastSplit = 0;
foreach (var split in splits)
{
// Attempt to convert split into a integer and skip it if we can't.
int splitNum;
if (!int.TryParse(split, out splitNum))
continue;
splitRanges.Add(lastSplit, splitNum);
lastSplit = Math.Max(lastSplit, splitNum + 1);
}
// Assign record splits.
foreach (var range in splitRanges)
{
var recordsInRange = NoticeParser.records
.Where(x => x.Sheets >= range.Key && x.Sheets <= range.Value)
.ToList();
recordsInRange.ForEach(x => x.Split = string.Format("{0}-{1}", range.Key, range.Value));
}
var unassignedRecords = NoticeParser.records.Where(x => x.Sheets >= lastSplit).ToList();
unassignedRecords.ForEach(x => x.Split = string.Format("{0}up", lastSplit));
#endregion
#region Sort out Pressure Seal records
var recordsGroupedByPressureSeal = NoticeParser.records
.GroupBy(x=>x.PressureSeal);
//var recordsGroupedBySimplex = NoticeParser.records.GroupBy(x => x.Simplex);
#endregion
int fCount = 0;
int nsCount = 0;
//foreach (var simdupGroup in recordsGroupedBySimplex)
//{
// var recordsGroupedBySimDup = simdupGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var pressureGroup in recordsGroupedByPressureSeal)
{
var recordsGroupedBySplit = pressureGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var recordsInSplit in recordsGroupedBySplit.Values)
{
string processingExecutable = Path.Combine(Properties.Settings.Default.RootFolder, Properties.Settings.Default.ProcessingExecutable);
string toProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "{0}_" + "toBCC.txt", fCount);
string fromProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "IBC_LN_Sort_FromBCC.txt");
// If a sortation executable is specified, run it.
if (recordsInSplit.Count >= Properties.Settings.Default.MinimumSortationCount &&
File.Exists(processingExecutable))
{
// log.Info("Sorting records...");
var processedRecords = recordsInSplit.ProcessAddresses<Record, RecordMap>(
processingExecutable,
toProcessingFile,
fromProcessingFile);
// Update records with the sortation fields.
recordsInSplit.UpdateAddresses(processedRecords);
}
else
{
toProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "{0}_no_sort_toBCC.txt", nsCount);
fromProcessingFile = string.Format(Properties.Settings.Default.OutputFolder + "IBC_LN_NoSort_FromBCC.txt");
//var processedRecords = recordsInSplit.ProcessAddresses<Record, RecordMap>(
// processingExecutable,
// toProcessingFile,
// fromProcessingFile);
// Update records with the sortation fields.
// recordsInSplit.UpdateAddresses(processedRecords);
// If not sorted, provide our own sequence number.
int sequence = 1;
recordsInSplit.ForEach(x => x.SequenceNumber = sequence++);
recordsInSplit.ForEach(x => x.TrayNumber = 1);
nsCount++;
}
fCount++;
}
}
//}
NoticeWriter noticeWriter = new NoticeWriter(noticeParser.reader);
#region Print by PressureSeal or Regular
//foreach (var simdupGroup in recordsGroupedBySimplex)
//{
// string printType = null;
// if (simdupGroup.Key)
// printType = "Simplex";
// else
// printType = "Duplex";
foreach (var splitGroup in recordsGroupedByPressureSeal)
{
string envType = ""; // envelope type
if (splitGroup.Key)
envType = "PressureSeal";
else
envType = "Regular";
var recordsGroupedBySplit = splitGroup.GroupBy(x => x.Split).OrderBy(x => x.Key).ToDictionary(x => x.Key, x => x.ToList());
foreach (var recordsInSplit in recordsGroupedBySplit)
{
string outputName = string.Format("IBC_Daily_Notices_{0}_{1}",envType, /*printType,*/ recordsInSplit.Key);
noticeWriter.WriteOutputFiles(Properties.Settings.Default.OutputFolder, outputName, recordsInSplit.Value, Properties.Settings.Default.RecordsPerBatch);
}
}
//}
#endregion

Sort a List in which each element contains 2 Values

I have a text file that contains Values in this Format: Time|ID:
180|1
60 |2
120|3
Now I want to sort them by Time. The Output also should be:
60 |2
120|3
180|1
How can I solve this problem? With this:
var path = #"C:\Users\admin\Desktop\test.txt";
List<string> list = File.ReadAllLines(path).ToList();
list.Sort();
for (var i = 0; i < list.Count; i++)
{
Console.WriteLine(list[i]);
}
I got no success ...
3 steps are necessary to do the job:
1) split by the separator
2) convert to int because in a string comparison a 6 comes after a 1 or 10
3) use OrderBy to sort your collection
Here is a linq solution in one line doing all 3 steps:
list = list.OrderBy(x => Convert.ToInt32(x.Split('|')[0])).ToList();
Explanation
x => lambda expression, x denotes a single element in your list
x.Split('|')[0] splits each string and takes only the first part of it (time)
Convert.ToInt32(.. converts the time into a number so that the ordering will be done in the way you desire
list.OrderBy( sorts your collection
EDIT:
Just to understand why you got the result in the first place here is an example of comparison of numbers in string representation using the CompareTo method:
int res = "6".CompareTo("10");
res will have the value of 1 (meaning that 6 is larger than 10 or 6 follows 10)
According to the documentation->remarks:
The CompareTo method was designed primarily for use in sorting or alphabetizing operations.
You should parse each line of the file content and get values as numbers.
string[] lines = File.ReadAllLines("path");
// ID, time
var dict = new Dictionary<int, int>();
// Processing each line of the file content
foreach (var line in lines)
{
string[] splitted = line.Split('|');
int time = Convert.ToInt32(splitted[0]);
int ID = Convert.ToInt32(splitted[1]);
// Key = ID, Value = Time
dict.Add(ID, time);
}
var orderedListByID = dict.OrderBy(x => x.Key).ToList();
var orderedListByTime = dict.OrderBy(x => x.Value).ToList();
Note that I use your ID reference as Key of dictionary assuming that ID should be unique.
Short code version
// Key = ID Value = Time
var orderedListByID = lines.Select(x => x.Split('|')).ToDictionary(x => Convert.ToInt32(x[1]), x => Convert.ToInt32(x[0])).OrderBy(x => x.Key).ToList();
var orderedListByTime = lines.Select(x => x.Split('|')).ToDictionary(x => Convert.ToInt32(x[1]), x => Convert.ToInt32(x[0])).OrderBy(x => x.Value).ToList();
You need to convert them to numbers first. Sorting by string won't give you meaningful results.
times = list.Select(l => l.Split('|')[0]).Select(Int32.Parse);
ids = list.Select(l => l.Split('|')[1]).Select(Int32.Parse);
pairs = times.Zip(ids, (t, id) => new{Time = t, Id = id})
.OrderBy(x => x.Time)
.ToList();
Thank you all, this is my Solution:
var path = #"C:\Users\admin\Desktop\test.txt";
List<string> list = File.ReadAllLines(path).ToList();
list = list.OrderBy(x => Convert.ToInt32(x.Split('|')[0])).ToList();
for(var i = 0; i < list.Count; i++)
{
Console.WriteLine(list[i]);
}
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class TestClass {
public static void main(String[] args) {
List <LineItem> myList = new ArrayList<LineItem>();
myList.add(LineItem.getLineItem(500, 30));
myList.add(LineItem.getLineItem(300, 20));
myList.add(LineItem.getLineItem(900, 100));
System.out.println(myList);
Collections.sort(myList);
System.out.println("list after sort");
System.out.println(myList);
}
}
class LineItem implements Comparable<LineItem>{
int time;
int id ;
#Override
public String toString() {
return ""+ time + "|"+ id + " ";
}
#Override
public int compareTo(LineItem o) {
return this.time-o.time;
}
public static LineItem getLineItem( int time, int id ){
LineItem l = new LineItem();
l.time=time;
l.id=id;
return l;
}
}

How to do the sequential ordering?

I have looked into this Q/A , though it is working too some extent but not as expected. I want it to happen sequentially.How to do that?
Thanks in advance.
You can use Enumerable.Zip to combine the agents and accounts together (after repeating the list of agents to match or exceed the number of accounts). Then GroupBy agent.
var repeatCount = lstAccounts.Count / lstAgents.Count + 1;
var agents = Enumerable.Repeat(lstAgents, repeatCount).SelectMany(x => x);
// agents = { "Agent1", "Agent2", "Agent3", "Agent1", "Agent2", "Agent3" }
// lstAccounts = { "1001" , "1002" , "1003" , "1004" , "1005" }
var result = agents
.Zip(lstAccounts, (agent, account) => new { Agent = agent, Account = account })
.GroupBy(x => x.Agent)
.Select(g => new { Agent = g.Key, Accounts = g.Select(x => x.Account).ToList() })
.ToList();
It might not be the fastest way to do it, but it's short and readable.
Edit
Another way (probably nicer) to achieve the same result is to start by mapping each account to an index of agent using index % lstAgents.Count.
var result = lstAccounts
.Select((acc, index) => new { AgentIndex = index % lstAgents.Count, Account = acc })
.GroupBy(x => x.AgentIndex)
.Select(g => new { Agent = lstAgents[g.Key], Accounts = g.Select(x => x.Account).ToList() })
.ToList();
The algorithm is very similar to the one proposed by varocarbas, but expressed in a functional (not imperative) way.
I think that conventional loops are the best approach here: easy-to-build, clear and very scalable-/modifiable-friendly. For example:
Dictionary<string, List<string>> results = new Dictionary<string, List<string>>();
int i = -1;
while (i < lstAccounts.Count - 1)
{
for (int i2 = 0; i2 < lstAgents.Count; i2++)
{
i = i + 1;
string curAccount = lstAccounts[i];
string curAgent = lstAgents[i2];
if (!results.ContainsKey(curAgent)) results.Add(curAgent, new List<string>());
results[curAgent].Add(curAccount);
if (i >= lstAccounts.Count - 1) break;
}
}
Additionally, note that this approach is quite fast. As a reference: around 4-5 times faster (results after a simplistic test with one of the provided inputs and a Stopwatch) than the alternative proposed by Jakub in his answer.
You can try this approach with linq extention. Split extension method will split the accounts list into "n" parts (number of agents) so that you can assign each part to agents.
class Program
{
static void Main(string[] args)
{
List<string> lstAgents = new List<string>() { "Agent1", "Agent2","Agent3" };
List<string> lstAccounts = new List<string>() { "1001", "1002" ,"1003", "1004", "1005" };
var op = lstAccounts.Split(lstAgents.Count);
int i = 0;
foreach (var accounts in op)
{
//Get agent
Console.WriteLine("Account(s) for Agent: ", lstAgents[i]);
foreach (var acc in accounts)
{
Console.WriteLine(acc);
}
Console.WriteLine(Environment.NewLine);
i++;
}
Console.ReadKey();
}
}
static class LinqExtensions
{
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> list, int parts)
{
int i = 0;
var splits = from item in list
group item by i++ % parts into part
select part.AsEnumerable();
return splits;
}
}

Grouping by an unknown initial prefix

Say I have the following array of strings as an input:
foo-139875913
foo-aeuefhaiu
foo-95hw9ghes
barbazabejgoiagjaegioea
barbaz8gs98ghsgh9es8h
9a8efa098fea0
barbaza98fyae9fghaefag
bazfa90eufa0e9u
bazgeajga8ugae89u
bazguea9guae
aifeaufhiuafhe
There are 3 different prefixes used here, "foo-", "barbaz" and "baz" - however these prefixes are not known ahead of time (they could be something completely different).
How could you establish what the different common prefixes are so that they could then be grouped by? This is made a bit tricky since in the data I've provided there's two that start with "bazg" and one that starts "bazf" where of course "baz" is the prefix.
What I've tried so far is sorting them into alphabetical order, and then looping through them in order and counting how many characters in a row are identical to the previous. If the number is different or when 0 characters are identical, it starts a new group. The problem with this is it falls over at the "bazg" and "bazf" problem I mentioned earlier and separates those into two different groups (one with just one element in it)
Edit: Alright, let's throw a few more rules in:
Longer potential groups should generally be preferred over shorter ones, unless there is a closely matching group of less than X characters difference in length. (So where X is 2, baz would be preferred over bazg)
A group must have at least Y elements in it or not be a group at all
It's okay to simply throw away elements that don't match any of the 'groups' to within the rules above.
To clarify the first rule in relation to the second, if X was 0 and Y was 2, then the two 'bazg' entries would be in a group, and the 'bazf' would be thrown away because its on its own.
Well, here's a quick hack, probably O(something_bad):
IEnumerable<Tuple<String, IEnumerable<string>>> GuessGroups(IEnumerable<string> source, int minNameLength=0, int minGroupSize=1)
{
// TODO: error checking
return InnerGuessGroups(new Stack<string>(source.OrderByDescending(x => x)), minNameLength, minGroupSize);
}
IEnumerable<Tuple<String, IEnumerable<string>>> InnerGuessGroups(Stack<string> source, int minNameLength, int minGroupSize)
{
if(source.Any())
{
var tuple = ExtractTuple(GetBestGroup(source, minNameLength), source);
if (tuple.Item2.Count() >= minGroupSize)
yield return tuple;
foreach (var element in GuessGroups(source, minNameLength, minGroupSize))
yield return element;
}
}
Tuple<String, IEnumerable<string>> ExtractTuple(string prefix, Stack<string> source)
{
return Tuple.Create(prefix, PopWithPrefix(prefix, source).ToList().AsEnumerable());
}
IEnumerable<string> PopWithPrefix(string prefix, Stack<string> source)
{
while (source.Any() && source.Peek().StartsWith(prefix))
yield return source.Pop();
}
string GetBestGroup(IEnumerable<string> source, int minNameLength)
{
var s = new Stack<string>(source);
var counter = new DictionaryWithDefault<string, int>(0);
while(s.Any())
{
var g = GetCommonPrefix(s);
if(!string.IsNullOrEmpty(g) && g.Length >= minNameLength)
counter[g]++;
s.Pop();
}
return counter.OrderBy(c => c.Value).Last().Key;
}
string GetCommonPrefix(IEnumerable<string> coll)
{
return (from len in Enumerable.Range(0, coll.Min(s => s.Length)).Reverse()
let possibleMatch = coll.First().Substring(0, len)
where coll.All(f => f.StartsWith(possibleMatch))
select possibleMatch).FirstOrDefault();
}
public class DictionaryWithDefault<TKey, TValue> : Dictionary<TKey, TValue>
{
TValue _default;
public TValue DefaultValue {
get { return _default; }
set { _default = value; }
}
public DictionaryWithDefault() : base() { }
public DictionaryWithDefault(TValue defaultValue) : base() {
_default = defaultValue;
}
public new TValue this[TKey key]
{
get { return base.ContainsKey(key) ? base[key] : _default; }
set { base[key] = value; }
}
}
Example usage:
string[] input = {
"foo-139875913",
"foo-aeuefhaiu",
"foo-95hw9ghes",
"barbazabejgoiagjaegioea",
"barbaz8gs98ghsgh9es8h",
"barbaza98fyae9fghaefag",
"bazfa90eufa0e9u",
"bazgeajga8ugae89u",
"bazguea9guae",
"9a8efa098fea0",
"aifeaufhiuafhe"
};
GuessGroups(input, 3, 2).Dump();
Ok, well as discussed, the problem wasn't initially well defined, but here is how I'd go about it.
Create a tree T
Parse the list, for each element:
for each letter in that element
if a branch labeled with that letter exists then
Increment the counter on that branch
Descend that branch
else
Create a branch labelled with that letter
Set its counter to 1
Descend that branch
This gives you a tree where each of the leaves represents a word in your input. Each of the non-leaf nodes has a counter representing how many leaves are (eventually) attached to that node. Now you need a formula to weight the length of the prefix (the depth of the node) against the size of the prefix group. For now:
S = (a * d) + (b * q) // d = depth, q = quantity, a, b coefficients you'll tweak to get desired behaviour
So now you can iterate over each of the non-leaf node and assign them a score S. Then, to work out your groups you would
For each non-leaf node
Assign score S
Insertion sort the node in to a list, so the head is the highest scoring node
Starting at the root of the tree, traverse the nodes
If the node is the highest scoring node in the list
Mark it as a prefix
Remove all nodes from the list that are a descendant of it
Pop itself off the front of the list
Return up the tree
This should give you a list of prefixes. The last part feels like some clever data structures or algorithms could speed it up (the last part of removing all the children feels particularly weak, but if you input size is small, I guess speed isn't too important).
I'm wondering if your requirements aren't off. It seems as if you are looking for a specific grouping size as opposed to specific key size requirements. I have below a program that will, based on a specified group size, break up the strings into the largest possible groups up too, and including the group size specified. So if you specify a group size of 5, then it will group items on the smallest key possible to make a group of size 5. In your example it would group foo- as f since there is no need to make a more complex key as an identifier.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication2
{
class Program
{
/// <remarks><c>true</c> in returned dictionary key are groups over <paramref name="maxGroupSize"/></remarks>
public static Dictionary<bool,Dictionary<string, List<string>>> Split(int maxGroupSize, int keySize, IEnumerable<string> items)
{
var smallItems = from item in items
where item.Length < keySize
select item;
var largeItems = from item in items
where keySize < item.Length
select item;
var largeItemsq = (from item in largeItems
let key = item.Substring(0, keySize)
group item by key into x
select new { Key = x.Key, Items = x.ToList() } into aGrouping
group aGrouping by aGrouping.Items.Count() > maxGroupSize into x2
select x2).ToDictionary(a => a.Key, a => a.ToDictionary(a_ => a_.Key, a_ => a_.Items));
if (smallItems.Any())
{
var smallestLength = items.Aggregate(int.MaxValue, (acc, item) => Math.Min(acc, item.Length));
var smallItemsq = (from item in smallItems
let key = item.Substring(0, smallestLength)
group item by key into x
select new { Key = x.Key, Items = x.ToList() } into aGrouping
group aGrouping by aGrouping.Items.Count() > maxGroupSize into x2
select x2).ToDictionary(a => a.Key, a => a.ToDictionary(a_ => a_.Key, a_ => a_.Items));
return Combine(smallItemsq, largeItemsq);
}
return largeItemsq;
}
static Dictionary<bool, Dictionary<string,List<string>>> Combine(Dictionary<bool, Dictionary<string,List<string>>> a, Dictionary<bool, Dictionary<string,List<string>>> b) {
var x = new Dictionary<bool,Dictionary<string,List<string>>> {
{ true, null },
{ false, null }
};
foreach(var condition in new bool[] { true, false }) {
var hasA = a.ContainsKey(condition);
var hasB = b.ContainsKey(condition);
x[condition] = hasA && hasB ? a[condition].Concat(b[condition]).ToDictionary(c => c.Key, c => c.Value)
: hasA ? a[condition]
: hasB ? b[condition]
: new Dictionary<string, List<string>>();
}
return x;
}
public static Dictionary<string, List<string>> Group(int maxGroupSize, IEnumerable<string> items, int keySize)
{
var toReturn = new Dictionary<string, List<string>>();
var both = Split(maxGroupSize, keySize, items);
if (both.ContainsKey(false))
foreach (var key in both[false].Keys)
toReturn.Add(key, both[false][key]);
if (both.ContainsKey(true))
{
var keySize_ = keySize + 1;
var xs = from needsFix in both[true]
select needsFix;
foreach (var x in xs)
{
var fixedGroup = Group(maxGroupSize, x.Value, keySize_);
toReturn = toReturn.Concat(fixedGroup).ToDictionary(a => a.Key, a => a.Value);
}
}
return toReturn;
}
static Random rand = new Random(unchecked((int)DateTime.Now.Ticks));
const string allowedChars = "aaabbbbccccc"; // "aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";
static readonly int maxAllowed = allowedChars.Length - 1;
static IEnumerable<string> GenerateText()
{
var list = new List<string>();
for (int i = 0; i < 100; i++)
{
var stringLength = rand.Next(3,25);
var chars = new List<char>(stringLength);
for (int j = stringLength; j > 0; j--)
chars.Add(allowedChars[rand.Next(0, maxAllowed)]);
var newString = chars.Aggregate(new StringBuilder(), (acc, item) => acc.Append(item)).ToString();
list.Add(newString);
}
return list;
}
static void Main(string[] args)
{
// runs 1000 times over autogenerated groups of sample text.
for (int i = 0; i < 1000; i++)
{
var s = GenerateText();
Go(s);
}
Console.WriteLine();
Console.WriteLine("DONE");
Console.ReadLine();
}
static void Go(IEnumerable<string> items)
{
var dict = Group(3, items, 1);
foreach (var key in dict.Keys)
{
Console.WriteLine(key);
foreach (var item in dict[key])
Console.WriteLine("\t{0}", item);
}
}
}
}

Categories