I have a list of models of this type:
public class TourDude {
public int Id { get; set; }
public string Name { get; set; }
}
And here is my list:
public IEnumerable<TourDude> GetAllGuides {
get {
List<TourDude> guides = new List<TourDude>();
guides.Add(new TourDude() { Name = "Dave Et", Id = 1 });
guides.Add(new TourDude() { Name = "Dave Eton", Id = 1 });
guides.Add(new TourDude() { Name = "Dave EtZ5", Id = 1 });
guides.Add(new TourDude() { Name = "Danial Maze A", Id = 2 });
guides.Add(new TourDude() { Name = "Danial Maze B", Id = 2 });
guides.Add(new TourDude() { Name = "Danial", Id = 3 });
return guides;
}
}
I want to retrieve these records:
{ Name = "Dave Et", Id = 1 }
{ Name = "Danial Maze", Id = 2 }
{ Name = "Danial", Id = 3 }
The goal mainly to collapse duplicates and near duplicates (confirmable by the ID), taking the shortest possible value (when compared) as name.
Where do I start? Is there a complete LINQ that will do this for me? Do I need to code up an equality comparer?
Edit 1:
var result = from x in GetAllGuides
group x.Name by x.Id into g
select new TourDude {
Test = Exts.LongestCommonPrefix(g),
Id = g.Key,
};
IEnumerable<IEnumerable<char>> test = result.First().Test;
string str = test.First().ToString();
If you want to group the items by Id and then find the longest common prefix of the Names within each group, then you can do so as follows:
var result = from x in guides
group x.Name by x.Id into g
select new TourDude
{
Name = LongestCommonPrefix(g),
Id = g.Key,
};
using the algorithm for finding the longest common prefix from here.
Result:
{ Name = "Dave Et", Id = 1 }
{ Name = "Danial Maze ", Id = 2 }
{ Name = "Danial", Id = 3 }
static string LongestCommonPrefix(IEnumerable<string> xs)
{
return new string(xs
.Transpose()
.TakeWhile(s => s.All(d => d == s.First()))
.Select(s => s.First())
.ToArray());
}
I was able to achieve this by grouping the records on the ID then selecting the first record from each group ordered by the Name length:
var result = GetAllGuides.GroupBy(td => td.Id)
.Select(g => g.OrderBy(td => td.Name.Length).First());
foreach (var dude in result)
{
Console.WriteLine("{{Name = {0}, Id = {1}}}", dude.Name, dude.Id);
}
Related
I have problem with Linq, the code used to be processed in sql via stored procedure, but right now it supposed to be on code via linq, here's my database schema
SQL Fiddle
what I want is , from this data
orderNo
Type
serial
1
BN
BN 1
1
BE
BE 1
2
BN
BN 2
2
BE
BE 2
3
BN
BN 3
3
BE
BE 3
to be like this :
orderNo
be
bn
1
BE 1
BN 1
2
BE 2
BN 3
3
BE 2
BN 3
found one question and solution Source 1 - Stackoverflow , when I tried to my code, I got an issue with SelectMany
here's what I've tried
var results = data_tech.GroupBy(l => l.serial).SelectMany( g =>
new
{
Metadata = g.Key,
data = g
});
var pivoted = new List<PivotedEntity>();
foreach(var item in results)
{
pivoted.Add(
new PivotedEntity
{
Order= item.orderNo,
BE= item.data.Where(x => x.Name == "BE")
.FirstOrDefault().value,
BN= item.data.Where(x => x.Name == "BN")
.FirstOrDefault().value,
});
}
You can simply achieve this by changing the group by element serial to OrderNo. Let me give you an example,
var list = new List<Order>() {
new Order { orderNo = 1, Type = "BN", Serial = "BN 1" },
new Order { orderNo = 1, Type = "BE", Serial = "BE 1" },
new Order { orderNo = 2, Type = "BN", Serial = "BN 2" },
new Order { orderNo = 2, Type = "BE", Serial = "BE 2" },
new Order { orderNo = 3, Type = "BN", Serial = "BE 3" } ,
new Order { orderNo = 3, Type = "BE", Serial = "BN 3" } };
var results = list.GroupBy(l => l.orderNo).Select(g =>
new
{
Metadata = g.Key,
data = g
});
var pivoted = new List<PivotedEntity>();
foreach (var item in results)
{
pivoted.Add(
new PivotedEntity
{
Order = item.Metadata,
BE = item.data.Where(x => x.Type == "BE")
.FirstOrDefault().Serial,
BN = item.data.Where(x => x.Type == "BN")
.FirstOrDefault().Serial,
});
}
This will give you some output like this image.
Edit: Output PivotedEntity class =>
internal class PivotedEntity
{
public int Order { get; set; }
public string BE { get; set; }
public string BN { get; set; }
}
I have the below set of data
Where each City belongs to a specific Department, which belongs to a specific Region, which belongs to a specific Country (in this case there is only one country: France).
This data is contained in a CSV file which I can read from on a row-by-row basis, however my goal is to convert this data into a tree structure (with France being at the root).
Each of these nodes will be given a specific Id value, which is something I've already gone and done, but the tricky part is that each node here must also contain a ParentId (for instance Belley and Gex need the ParentId of Ain, but Moulins and Vichy need the ParentId of Aller).
Below is a snippet of code I've written that has assigned an Id value to each name in this data set, along with some other values:
int id = 0;
List<CoverageAreaLevel> coverageAreas = GetCoverageAreaDataFromCsv(path, true);
List<LevelList> levelLists = new List<LevelList>
{
new LevelList { Names = coverageAreas.Select(a => a.Level1).Distinct().ToList(), Level = "1" },
new LevelList { Names = coverageAreas.Select(a => a.Level2).Distinct().ToList(), Level = "2" },
new LevelList { Names = coverageAreas.Select(a => a.Level3).Distinct().ToList(), Level = "3" },
new LevelList { Names = coverageAreas.Select(a => a.Level4).Distinct().ToList(), Level = "4" }
};
List<CoverageArea> newCoverageAreas = new List<CoverageArea>();
foreach (LevelList levelList in levelLists)
{
foreach (string name in levelList.Names)
{
CoverageArea coverageArea = new CoverageArea
{
Id = id++.ToString(),
Description = name,
FullDescription = name,
Level = levelList.Level
};
newCoverageAreas.Add(coverageArea);
}
}
The levelLists variable contains a sort-of heirarchical structure of the data that I'm looking for, but none of the items in that list are linked together by anything.
Any idea of how this could be implemented? I can manually figure out each ParentId, but I'd like to automate this process, especially if this needs to be done in the future.
The solution from #Camilo is really good and pragmatic. I would also suggest the use of a tree.
A sample implementation:
var countries = models.GroupBy(xco => xco.Country)
.Select((xco, index) =>
{
var country = new Tree<String>();
country.Value = xco.Key;
country.Children = xco.GroupBy(xr => xr.Region)
.Select((xr, xrIndex) =>
{
var region = new Tree<String>();
region.Value = xr.Key;
region.Parent = country;
region.Children =
xr.GroupBy(xd => xd.Department)
.Select((xd, index) =>
{
var department = new Tree<String>();
department.Value = xd.Key;
department.Parent = region;
department.Children = xd
.Select(xc => new Tree<String> { Value = xc.City, Parent = department });
return department;
});
return region;
});
return country;
});
public class Tree<T>
{
public IEnumerable<Tree<T>> Children;
public T Value;
public Tree<T> Parent;
}
One way you could solve this is by building dictionaries with the names and IDs of each level.
Assuming you have data like this:
var models = new List<Model>
{
new Model { Country = "France", Region = "FranceRegionA", Department = "FranceDept1", City = "FranceA" },
new Model { Country = "France", Region = "FranceRegionA", Department = "FranceDept1", City = "FranceB" },
new Model { Country = "France", Region = "FranceRegionA", Department = "FranceDept2", City = "FranceC" },
new Model { Country = "France", Region = "FranceRegionB", Department = "FranceDept3", City = "FranceD" },
new Model { Country = "Italy", Region = "ItalyRegionA", Department = "ItalyDept1", City = "ItalyA" },
new Model { Country = "Italy", Region = "ItalyRegionA", Department = "ItalyDept2", City = "ItalyB" },
};
You could do something like this, which can probably be improved further if needed:
var countries = models.GroupBy(x => x.Country)
.Select((x, index) => Tuple.Create(x.Key, new { Id = index + 1 }))
.ToDictionary(x => x.Item1, x => x.Item2);
var regions = models.GroupBy(x => x.Region)
.Select((x, index) => Tuple.Create(x.Key, new { ParentId = countries[x.First().Country].Id, Id = index + 1 }))
.ToDictionary(x => x.Item1, x => x.Item2);
var departments = models.GroupBy(x => x.Department)
.Select((x, index) => Tuple.Create(x.Key, new { ParentId = regions[x.First().Region].Id, Id = index + 1 }))
.ToDictionary(x => x.Item1, x => x.Item2);
var cities = models
.Select((x, index) => Tuple.Create(x.City, new { ParentId = departments[x.Department].Id, Id = index + 1 }))
.ToDictionary(x => x.Item1, x => x.Item2);
The main idea is to leverage the index parameter of the Select method and the speed of dictionaries to find the parent ID.
Sample output from a fiddle:
countries:
[France, { Id = 1 }],
[Italy, { Id = 2 }]
regions:
[FranceRegionA, { ParentId = 1, Id = 1 }],
[FranceRegionB, { ParentId = 1, Id = 2 }],
[ItalyRegionA, { ParentId = 2, Id = 3 }]
departments:
[FranceDept1, { ParentId = 1, Id = 1 }],
[FranceDept2, { ParentId = 1, Id = 2 }],
[FranceDept3, { ParentId = 2, Id = 3 }],
[ItalyDept1, { ParentId = 3, Id = 4 }],
[ItalyDept2, { ParentId = 3, Id = 5 }]
cities:
[FranceA, { ParentId = 1, Id = 1 }],
[FranceB, { ParentId = 1, Id = 2 }],
[FranceC, { ParentId = 2, Id = 3 }],
[FranceD, { ParentId = 3, Id = 4 }],
[ItalyA, { ParentId = 4, Id = 5 }],
[ItalyB, { ParentId = 5, Id = 6 }]
I'm wanting to make an API call that gets all the unique survey IDs and put them into an array with total answer counts based on the unique answer value and list of user ids. For example: ICollection<Survey>
ID Survey_Id Answer User
1 Apple_Survey 1 Jones
2 Apple_Survey 1 Smith
3 Banana_Survey 2 Smith
4 Apple_Survey 3 Jane
5 Banana_Survey 2 John
The API result I currently have:
{Data: [
{
survey_id: "Apple_Survey",
answer: "1",
user: "Jones"
},
...
]}
Where I get stuck is in the code to process the data:
foreach (var info in data
.GroupBy(x => x.Survey_Id)
.Select(group => new { SurveyId = group.Key,
Count = group.Count() }) )
{
Console.WriteLine("{0} {1}", info.SurveyId, info.Count);
//Result: Apple_Survey 3 Banana_Survey 2
}
Ideal results:
{Data: [
{
survey_id: "Apple_Survey",
answers: [//Example: rating answer would be 1-10, not an ID
{answer: "1", count: 2, users: ["Jones", "Smith"]},
{answer: "3", count: 1, users: ["Jane"]}
]
},
...
]}
How can I get the distinct answers based on survey_id and the list of users based on the answer? Any help would be greatly appreciated!
See if following helps :
class Program
{
static void Main(string[] args)
{
List<Survey> surveys = new List<Survey>() {
new Survey() { ID = 1, Survey_Id = "Apple_Survey", Answer = 1, User = "Jones"},
new Survey() { ID = 2, Survey_Id = "Apple_Survey", Answer = 1, User = "Smith"},
new Survey() { ID = 3, Survey_Id = "Banana_Survey", Answer = 2, User = "Smith"},
new Survey() { ID = 4, Survey_Id = "Apple_Survey", Answer = 3, User = "Jane"},
new Survey() { ID = 5, Survey_Id = "Banana_Survey", Answer = 2, User = "John"}
};
var results = surveys.GroupBy(x => x.Survey_Id).Select(x => x.GroupBy(y => y.Answer)
.Select(y => new { answer = y.Key, count = y.Count(), users = y.Select(z => z.User).ToList()}).ToList())
.ToList();
}
}
public class Survey
{
public int ID { get; set; }
public string Survey_Id { get; set; }
public int Answer { get; set; }
public string User { get; set; }
}
A simple way is based on sql only.. you could use a query as :
select Survey_Id, Answer, COUNT(*) answer_count, group_concat(user) answer_user
from my_table
group Survey_Id, Answer
I'd go for
table.GroupBy( x => x.Survey_Id ).Select( x => new { Survey_Id=x.Key, Answers=x.GroupBy( y => y.Answer ).Select( y => new { Answer=y.Key, Count=y.Count(), Users=y.Select( z => z.User)})} )
That creates an ienumerable of pairs of a survey and an ienumerable of answers, each with its count and an ienumerable of the users that voted for that answer.
Try it out on dotnetfiddle.net!
I have the following list of TestParam... This is just a parameter list that is doing to determine how a query is going to be run. In the following case, the expected result would be to be executed against all the combinations of different parameters. Hence, a list of lists, with CustomerId 33 together with each product Id available in the list...
List<TestParam> testList = new List<TestParam>();
testList.Add(new TestParam() { Name = "CustomerId", Value = "33" });
testList.Add(new TestParam() { Name = "ProductId", Value = "1" });
testList.Add(new TestParam() { Name = "ProductId", Value = "2" });
testList.Add(new TestParam() { Name = "ProductId", Value = "3" });
testList.Add(new TestParam() { Name = "ProductId", Value = "4" });
testList.Add(new TestParam() { Name = "ProductId", Value = "5" });
testList.Add(new TestParam() { Name = "ProductId", Value = "6" });
testList.Add(new TestParam() { Name = "ProductId", Value = "7" });
testList.Add(new TestParam() { Name = "ProductId", Value = "8" });
TestParam is a normal encapsulated parameter class having a name and a value...
public class TestParam
{
public string Name { get; set; }
public string Value { get; set; }
}
The end result would be a list of lists, having CustomerId 33, with all the rest of the products. The same result would be acquired if I had different names and values in the list of TestParam (the above is just an example).
The following code, ends up with several lists depending on the combinations of the list above...
// First get a list of distinct unique param collections...
List<string> distinctParameterNames = new List<string>();
testList.GroupBy(x => x.Name).ForEach(paramName => {
distinctParameterNames.Add(paramName.Key);
});
// Get counts
List<int> combinationList = new List<int>();
foreach (var x in distinctParameterNames) {
combinationList.Add(testList.Where(y=>y.Name == x).Count());
}
// Will contain 2 lists, one having all combinations of parameters named CustomerId, and another with ProductId combinations...
List<List<TestParam>> parameterList = new List<List<TestParam>>();
foreach (var x in distinctParameterNames) {
// Loop
List<TestParam> parameter = new List<TestParam>();
testList.Where(paramName => paramName.Name == x).ForEach(y =>
{
parameter.Add(new TestParam() { Name = y.Name, Value = y.Value });
});
parameterList.Add(parameter);
}
It would be an intersect between the list, and the end result will be a list of lists, and each list will have the combinations below... So a run would return (in this case) :
Customer 33, Product Id 1
Customer 33, Product Id 2
Customer 33, Product Id 3
Customer 33, Product Id 4
Customer 33, Product Id 5
Customer 33, Product Id 6
Customer 33, Product Id 7
Customer 33, Product Id 8
What would be the most efficient and generic way to do this?
The following is the solution that I was looking for...
public static List<List<T>> AllCombinationsOf<T>(params List<T>[] sets)
{
// need array bounds checking etc for production
var combinations = new List<List<T>>();
// prime the data
foreach (var value in sets[0])
combinations.Add(new List<T> { value });
foreach (var set in sets.Skip(1))
combinations = AddExtraSet(combinations, set);
return combinations;
}
private static List<List<T>> AddExtraSet<T>
(List<List<T>> combinations, List<T> set)
{
var newCombinations = from value in set
from combination in combinations
select new List<T>(combination) { value };
return newCombinations.ToList();
}
Usage (continues with my code snippet of the question itself) :
var intersection = AllCombinationsOf(parameterList.ToArray());
get all the list of customer first like this
var customers = from a in testlist where a.name='customerid'
select a;
var products = from a in testlist where a.name='productid'
select a;
then loop customers
for(var c in customers)
{
loop products
for(var p in products)
{
var customerproducts = new CustomerProducts{
Customer = c.Name +' ' + c.Value
Product = p.Name + ' ' + p.value
};
then add it into a list
}
}
The list needs to be grouped by Name, then it can be joined several times depending on count of groups:
var groups = testList.GroupBy(_ => _.Name);
IEnumerable<IEnumerable<TestParam>> result = null;
foreach (var g in groups)
{
var current = g.Select(_ => new[] { _ });
if (result == null)
{
result = current;
continue;
}
result = result.Join(current, _ => true, _ => true, (actual, c) => actual.Concat(c));
}
// check result
foreach (var i in result)
{
Console.WriteLine(string.Join(", ", i.Select(_ => string.Format("{0}-{1}", _.Name, _.Value))));
}
I have 50,000 documents in my raven database, but when I I run this query the Id of the latestProfile object is returned as 9999 (the first id in the db is 0, so this is the ten thousandth item).
//find the profile with the highest ID now existing in the collection
var latestProfile = session.Query<SiteProfile>()
.Customize(c => c.WaitForNonStaleResults())
.OrderByDescending(p => p.Id)
.FirstOrDefault();
//lastProfile.Id is 9999 here
//See how many items there are in the collection. This returns 50,000
var count = session.Query<SiteProfile>()
.Customize(c => c.WaitForNonStaleResults()).Count();
My guess is that Raven is paging before my OrderByDescending statement, but
The default page size is 10, and even the max is 1024
All the Parts of this are either IRavenQueryable or IQueryable
It is also not a stale index as I have tested this with WaitForNonStaleResults()
My expected result here is the most recent id I added (50,000) to be the item returned here, but yet it is not.
Why not? This looks like a bug in Raven to me.
EDIT:
Ok, so I now know exactly why, but it still looks like a bug. Here is a list of the items from that same list actualised by a ToArray()
{ Id = 9999 },
{ Id = 9998 },
{ Id = 9997 },
{ Id = 9996 },
{ Id = 9995 },
{ Id = 9994 },
{ Id = 9993 },
{ Id = 9992 },
{ Id = 9991 },
{ Id = 9990 },
{ Id = 999 }, //<-- Whoops! This is text order not int order
{ Id = 9989 },
So even though my Id column is an integer because Raven stores it internally as a string it is ordering by that representation. Clearly Ravens Queryable implementation is resolving the ordering before checking types
I have read that you can define sort order to use integer sorting on defined indexes but really, this should not matter. In a strongly typed language integers should be sorted as integers.
Is there a way to make this Id ordering correct? Do I have actually have to resort to creating a special index on the id column just to get integers ordered correctly?
UPDATE 2:
I am now using an index as follows:
public SiteProfiles_ByProfileId()
{
Map = profiles => from profile in profiles
select new
{
profile.Id
};
Sort(x => x.Id, SortOptions.Int);
}
To try and force it to understand integers. I can see that my index is called via the Raven server console as follows:
Request # 249: GET - 3 ms - Bede.Profiles - 200 - /indexes/SiteProfiles/ByProfileId?&pageSize=1&sort=-__document_id&operationHeadersHash=-1789353429
Query:
Time: 3 ms
Index: SiteProfiles/ByProfileId
Results: 1 returned out of 20,000 total.
but still it comes back with string ordered results. I have seen advice not to use integers as the id, but that would cause massive issues on this project as there are 3rd parties referencing the current ids (in the old service this is designed to replace).
UPDATE 3: I have specific unit test that shows the issue. it appears to work fine for any integer property except for the Id.
[TestMethod]
public void Test_IndexAllowsCorrectIntSortingWhenNotId()
{
using (var store = new EmbeddableDocumentStore() {RunInMemory = true})
{
store.Initialize();
IndexCreation.CreateIndexes(typeof(MyFakeProfiles_ByProfileId).Assembly, store);
using (var session = store.OpenSession())
{
var profiles = new List<MyFakeProfile>()
{
new MyFakeProfile() { Id=80, Age = 80, FirstName = "Grandpa", LastName = "Joe"},
new MyFakeProfile() { Id=9, Age = 9,FirstName = "Jonny", LastName = "Boy"},
new MyFakeProfile() { Id=22, Age = 22, FirstName = "John", LastName = "Smith"}
};
foreach (var myFakeProfile in profiles)
{
session.Store(myFakeProfile, "MyFakeProfiles/" + myFakeProfile.Id);
}
session.SaveChanges();
var oldestPerson = session.Query<MyFakeProfile>().Customize(c => c.WaitForNonStaleResults())
.OrderByDescending(p => p.Age).FirstOrDefault();
var youngestPerson = session.Query<MyFakeProfile>().Customize(c => c.WaitForNonStaleResults())
.OrderBy(p => p.Age).FirstOrDefault();
var highestId = session.Query<MyFakeProfile>("MyFakeProfiles/ByProfileId").Customize(c => c.WaitForNonStaleResults())
.OrderByDescending(p => p.Id).FirstOrDefault();
var lowestId = session.Query<MyFakeProfile>("MyFakeProfiles/ByProfileId").Customize(c => c.WaitForNonStaleResults())
.OrderBy(p => p.Id).FirstOrDefault();
//sanity checks for ordering in Raven
Assert.AreEqual(80,oldestPerson.Age); //succeeds
Assert.AreEqual(9, youngestPerson.Age);//succeeds
Assert.AreEqual(80, highestId.Id);//fails
Assert.AreEqual(9, lowestId.Id);//fails
}
}
}
private void PopulateTestValues(IDocumentSession session)
{
var profiles = new List<MyFakeProfile>()
{
new MyFakeProfile() { Id=80, Age = 80, FirstName = "Grandpa", LastName = "Joe"},
new MyFakeProfile() { Id=9, Age = 9,FirstName = "Jonny", LastName = "Boy"},
new MyFakeProfile() { Id=22, Age = 22, FirstName = "John", LastName = "Smith"}
};
foreach (var myFakeProfile in profiles)
{
session.Store(myFakeProfile, "MyFakeProfiles/" + myFakeProfile.Id);
}
}
}
public class MyFakeProfile
{
public int Id { get; set; }
public int Age { get; set; }
public string FirstName { get; set; }
public string LastName { get; set; }
}
public class MyFakeProfiles_ByProfileId : AbstractIndexCreationTask<MyFakeProfile>
{
// The index name generated by this is going to be SiteProfiles/ByProfileId
public MyFakeProfiles_ByProfileId()
{
Map = profiles => from profile in profiles
select new
{
profile.Id
};
Sort(x => (int)x.Id, SortOptions.Int);
}
}
You need to specify the type of the field on the index, see http://ravendb.net/docs/2.5/client-api/querying/static-indexes/customizing-results-order
Side note, IDs in RavenDB are always strings. You seem to be trying to use integer IDs - don't do that.
You can provide multiple Sort field, as you have only defined it for Id:
public SiteProfiles_ByProfileId()
{
Map = profiles => from profile in profiles
select new
{
profile.Id
};
Sort(x => x.Id, SortOptions.Int);
Sort(x => x.Age, SortOptions.Int);
}
BUT ... I am unsure of the effects of applying a sort on a field that isn't mapped.
You may have to extend the mapping to select both fields, like this:
public SiteProfiles_ByProfileId()
{
Map = profiles => from profile in profiles
select new
{
profile.Id,
profile.Age
};
Sort(x => x.Id, SortOptions.Int);
Sort(x => x.Age, SortOptions.Int);
}