Filter multiple parameters with MongoDB provider - c#

I have the below document schema.
{
"name":"Name",
"region":"New Jersey",
"address":"92 Something Rd",
"city":"Jersey City",
"state":"NJ",
"zipCode":"07302",
"country":"USA",
amenities":[
"Sauna",
"Locker",
"Shop"
],
"services":[
"Car Rental",
"Transportation"
]
}
I want with one call to the server to get all the documents matching any of the filter arguments where map 1-1 meaning "state" = "NJ" OR "city" = "Jersey City" but also when any value of the list contained to any document array child, example [ "Sauna", "Locker" ] ANY IN "amenities". And it should be an OR concatenation of all the possible filters.
Using the C# MongoDB Driver I came up with the below methods so far in a MongoRepository class but doesn't return the desired results.
public async Task<IEnumerable<T>> DocumentsMatchEqFieldValueAsync<T>(string collectionName,
IDictionary<string, string> fieldsValues = null,
IDictionary<string, IEnumerable<string>> fieldsWithEnumerableValues = null,
IEnumerable<ObjectId> ids = null)
{
var cursor = await GetEqAsyncCursor<T>(collectionName, fieldsValues, fieldsWithEnumerableValues, ids).ConfigureAwait(false);
return await cursor.ToListAsync().ConfigureAwait(false);
}
protected Task<IAsyncCursor<T>> GetEqAsyncCursor<T>(string collectionName,
IDictionary<string, string> fieldsValues = null,
IDictionary<string, IEnumerable<string>> fieldsWithEnumerableValues = null,
IEnumerable<ObjectId> ids = null)
{
var collection = GetCollection<T>(collectionName);
var builder = Builders<T>.Filter;
// Not sure if this is the correct way to initialize it because it seems adding an empty filter condition returning ALL document;
FilterDefinition<T> filter = new BsonDocument();
if (fieldsValues != null &&
fieldsValues.Any())
{
filter = filter | fieldsValues
.Select(p => builder.Eq(p.Key, p.Value))
.Aggregate((p1, p2) => p1 | p2);
}
if (fieldsWithEnumerableValues != null &&
fieldsWithEnumerableValues.Any())
{
filter = filter | fieldsWithEnumerableValues
.Select(p => builder.AnyEq(p.Key, p.Value))
.Aggregate((p1, p2) => p1 | p2);
}
if (ids != null &&
ids.Any())
{
filter = filter | ids
.Select(p => builder.Eq("_id", p))
.Aggregate((p1, p2) => p1 | p2);
}
return collection.FindAsync(filter);
}
I want it to be generic so the client can call the method like this.
public async Task should_return_any_records_matching_all_possible_criteria()
{
// Arrange
IDocumentRepository documentRepository = new MongoRepository(_mongoConnectionString, _mongoDatabase);
// Act
var documents = await documentRepository.DocumentsMatchEqFieldValueAsync<BsonDocument>(Courses,
fieldsValues: new Dictionary<string, string>
{
{ "state", "NJ" },
{ "city", "Jersey City" }
},
fieldsWithEnumerableValues: new Dictionary<string, IEnumerable<string>>
{
{ "services", new List<string> { "Car Rental", "Locker" } },
{ "amenities", new List<string> { "Sauna", "Shop" } }
});
// Assert
documents.ShouldNotBeEmpty();
}
I would expect documents that have "state" = "NJ" OR "city" = "Jersey City" OR "services" CONTAINS ANY OF "Car Rental", "Locker" OR "amenities" CONTAINS ANY OF "Sauna", "Shop".

I am posting below the method I ended up using after some research for the future help of anyone looking to do the same. I found how to query using regex here, write plain MongoDB queries and add them to the filter collection here, and how to debug the generated query here.
After having all this information and a little bit of experimentation using the Studio 3T client find below the method.
protected Task<IAsyncCursor<T>> GetEqAsyncCursor<T>(string collectionName,
IDictionary<string, string> fieldEqValue = null,
IDictionary<string, string> fieldContainsValue = null,
IDictionary<string, IEnumerable<string>> fieldEqValues = null,
IDictionary<string, IEnumerable<string>> fieldElemMatchInValues = null,
IEnumerable<ObjectId> ids = null)
{
var collection = GetCollection<T>(collectionName);
var builder = Builders<T>.Filter;
IList<FilterDefinition<T>> filters = new List<FilterDefinition<T>>();
if (fieldEqValue != null &&
fieldEqValue.Any())
{
filters.Add(fieldEqValue
.Select(p => builder.Eq(p.Key, p.Value))
.Aggregate((p1, p2) => p1 | p2));
}
if (fieldContainsValue != null &&
fieldContainsValue.Any())
{
filters.Add(fieldContainsValue
.Select(p => builder.Regex(p.Key, new BsonRegularExpression($".*{p.Value}.*", "i")))
.Aggregate((p1, p2) => p1 | p2));
}
if (fieldEqValues != null &&
fieldEqValues.Any())
{
foreach (var pair in fieldEqValues)
{
foreach (var value in pair.Value)
{
filters.Add(builder.Eq(pair.Key, value));
}
}
}
if (fieldElemMatchInValues != null &&
fieldElemMatchInValues.Any())
{
var baseQuery = "{ \"%key%\": { $elemMatch: { $in: [%values%] } } }";
foreach (var item in fieldElemMatchInValues)
{
var replaceKeyQuery = baseQuery.Replace("%key%", item.Key);
var bsonQuery = replaceKeyQuery.Replace("%values%",
item.Value
.Select(p => $"\"{p}\"")
.Aggregate((value1, value2) => $"{value1},
{value2}"));
var filter = BsonSerializer.Deserialize<BsonDocument>(bsonQuery);
filters.Add(filter);
}
}
if (ids != null &&
ids.Any())
{
filters.Add(ids
.Select(p => builder.Eq("_id", p))
.Aggregate((p1, p2) => p1 | p2));
}
var filterConcat = builder.Or(filters);
// Here's how you can debug the generated query
//var documentSerializer = BsonSerializer.SerializerRegistry.GetSerializer<T>();
//var renderedFilter = filterConcat.Render(documentSerializer, BsonSerializer.SerializerRegistry).ToString();
return collection.FindAsync(filterConcat);
}

Related

Dynamic LINQ Performance

I have a routine (written with the generous help of others here) that allows me to take a List objects, and using any number of properties in any order it dynamically builds a TreeView structure with a Count at each node. This dynamic ability is a firm user requirement.
So a source List of:
{Prop1 = "A", Prop2 = "I", Prop3 = "X"},
{Prop1 = "A", Prop2 = "J", Prop3 = "X"},
{Prop1 = "B", Prop2 = "I", Prop3 = "X"},
{Prop1 = "B", Prop2 = "I", Prop3 = "Y"},
{Prop1 = "C", Prop2 = "K", Prop3 = "Z"}
Gives the following when the Selection is by Prop1 by Prop3:
Total (5)
- A(2)
- - X(2)
- B(2)
- - X(1)
- - Y(1)
- C(1)
- - Z(1)
Functionally this works fine. However, the performance leaves a lot to be desired when the number of distinct values increases. For example - one particular run in a dataset with 5K objects and 1K distinct values in Prop1 will take several seconds.
Here is the routine:
public static class TreeBuilder
{
public static Dictionary<string, TreeItem> BuildTree<TSource>(List<TSource> source, List<string> columns)
{
return new Dictionary<string, TreeItem>()
{
{ "Total",
new TreeItem()
{
Key = "Total",
RawKey = "Total",
Count = source.Count,
Items = GroupBySelector<TSource>(source, columns, 0, "Total")
}
}
};
}
public static MethodInfo GetGenericMethod(this Type type, string name, Type[] genericTypeArgs, Type[] paramTypes)
{
foreach (MethodInfo method in type.GetMethods())
if (method.Name == name)
{
var pa = method.GetParameters();
if (pa.Length == paramTypes.Length)
{
var genericMethod = method.MakeGenericMethod(genericTypeArgs);
if (genericMethod.GetParameters().Select(p => p.ParameterType).SequenceEqual(paramTypes))
return genericMethod;
}
}
return null;
}
private static MethodInfo GetGroupByMethodStatically<TElement, TKey>()
=> typeof(Enumerable).GetGenericMethod("GroupBy", new[] { typeof(TElement), typeof(TKey) }, new[] { typeof(IEnumerable<TElement>), typeof(Func<TElement, TKey>) });
private static MethodInfo GetEnumerableMethod(string methodName, Type tElement, Type tTKey)
{
var tIElement = typeof(IEnumerable<>).MakeGenericType(tElement);
var tFunction = typeof(Func<,>).MakeGenericType(tElement, tTKey);
return typeof(Enumerable).GetGenericMethod(methodName, new[] { tElement, tTKey }, new[] { tIElement, tFunction });
}
private static MethodInfo GetEnumerableMethod(string methodName, Type tElement)
{
var tIELEMENT = typeof(IEnumerable<>).MakeGenericType(tElement);
return typeof(Enumerable).GetGenericMethod(methodName, new[] { tElement }, new[] { tIELEMENT });
}
public static Dictionary<string, TreeItem> GroupBySelector<TElement>(IEnumerable<TElement> source, IList<string> columnNames, int entry = 0, string key = "")
{
if (source == null) throw new ArgumentNullException(nameof(source));
List<string> columnParameters = columnNames[entry].Split('|').ToList();
string columnName = columnParameters[0];
if (columnName == null) throw new ArgumentNullException(nameof(columnName));
if (columnName.Length == 0) throw new ArgumentException(nameof(columnName));
int nextEntry = entry + 1;
var tElement = typeof(TElement);
var tIElement = typeof(IEnumerable<TElement>);
var keyParm = Expression.Parameter(tElement);
var prop = Expression.Property(keyParm, columnName);
var param = Expression.Parameter(tIElement, "p");
var groupByMethod = GetEnumerableMethod("GroupBy", tElement, prop.Type);
var groupByExpr = Expression.Lambda(prop, keyParm);
var bodyExprCall = Expression.Call(groupByMethod, param, groupByExpr);
var tSelectInput = typeof(IGrouping<,>).MakeGenericType(prop.Type, tElement);
var selectParam = Expression.Parameter(tSelectInput, "p");
var tKey = typeof(TreeItem).GetMember("Key").Single();
var tRawKey = typeof(TreeItem).GetMember("RawKey").Single();
var tCount = typeof(TreeItem).GetMember("Count").Single();
var tParentKey = typeof(TreeItem).GetMember("ParentKey").Single();
var tItems = typeof(TreeItem).GetMember("Items").Single();
Expression selectParamKey = Expression.Property(selectParam, "Key");
Expression selectParamRawKey = selectParamKey;
if (selectParamKey.Type != typeof(string))
{
var toStringMethod = selectParamKey.Type.GetMethod("ToString", Type.EmptyTypes);
selectParamKey = Expression.Call(selectParamKey, toStringMethod);
}
if (selectParamRawKey.Type != typeof(string))
{
var toStringMethod = selectParamRawKey.Type.GetMethod("ToString", Type.EmptyTypes);
selectParamRawKey = Expression.Call(selectParamRawKey, toStringMethod);
}
var countMethod = GetEnumerableMethod("Count", tElement);
var countMethodExpr = Expression.Call(countMethod, selectParam);
var concatFullKeyExpr = Expression.Call(typeof(string).GetMethod("Concat", new[] { typeof(string), typeof(string), typeof(string) }),
Expression.Constant(key),
Expression.Constant("|"),
selectParamRawKey);
var groupBySelectorMethod = GetGenericMethod(MethodBase.GetCurrentMethod().DeclaringType, "GroupBySelector", new[] { tElement }, new[] { tIElement, typeof(IList<string>), typeof(int), typeof(string) });
var groupBySelectorMethodExpr = Expression.Call(groupBySelectorMethod, selectParam, Expression.Constant(columnNames), Expression.Constant(nextEntry), concatFullKeyExpr);
var newMenuItemExpr = Expression.New(typeof(TreeItem));
var selectBodyExpr = Expression.MemberInit(newMenuItemExpr, new[] {
Expression.Bind(tKey, selectParamKey),
Expression.Bind(tRawKey, selectParamRawKey),
Expression.Bind(tParentKey, Expression.Constant(key) ),
Expression.Bind(tCount, countMethodExpr),
Expression.Bind(tItems, groupBySelectorMethodExpr)
});
var selectBodyExprLamba = Expression.Lambda(selectBodyExpr, selectParam);
var selectBodyLastExpr = Expression.MemberInit(newMenuItemExpr, new[] {
Expression.Bind(tKey, selectParamKey),
Expression.Bind(tRawKey, selectParamRawKey),
Expression.Bind(tParentKey, Expression.Constant(key) ),
Expression.Bind(tCount, countMethodExpr)
});
var selectBodyLastExprLambda = Expression.Lambda(selectBodyLastExpr, selectParam);
var selectMethod = GetEnumerableMethod("Select", tSelectInput, typeof(TreeItem));
bodyExprCall = Expression.Call(selectMethod, bodyExprCall, (nextEntry < columnNames.Count) ? selectBodyExprLamba : selectBodyLastExprLambda);
var selectParamout = Expression.Parameter(typeof(TreeItem), "o");
Expression selectParamKeyout = Expression.Property(selectParamout, "FullKey");
var selectParamKeyLambda = Expression.Lambda(selectParamKeyout, selectParamout);
var lmi = GetEnumerableMethod("ToDictionary", typeof(TreeItem), typeof(string));
bodyExprCall = Expression.Call(lmi, bodyExprCall, selectParamKeyLambda);
var returnFunc = Expression.Lambda<Func<IEnumerable<TElement>, Dictionary<string, TreeItem>>>(bodyExprCall, param).Compile();
return returnFunc(source);
}
}
The routine is used to take data from a DB table and convert it into a hierarchical structure for use in a WPF TreeView.
Dictionary<string, TreeItem> treeItems = new Dictionary<string, TreeItem>();
treeItems = TreeBuilder.BuildTree<IDBRecord>(DBService.GetDBRecordList(), PropertySortList);
Can anyone offer any advice on how to improve the performance of this routine? Or suggest any alternative way of achieving the same result in a more performant way?
Thanks
A couple of optimizations are possible. A lot of time is spent in the call to Compile and you are calling Compile for each key at each level in the tree, which is adding up to a lot of overhead, about 7 seconds on my tests of 5k items. I first changed the code to pull out all static Reflection that had fixed types, so it is only done once per program run. That only made a small difference, since building the Expression tree is not the main issue.
I then changed the method to separate building the Expression from compiling the Expression and calling the resultant lambda. This allowed me to modify the recursive call to the Expression builder to instead be an inline Invoke of a new lambda for the new level. Then I called compile once on the resulting Expression and executed it. The new version no longer takes the entry parameter, but it could be put back in.
This reduced the overall time from about 7.6 seconds to 0.14 seconds for around a 50x speedup. A test with all three properties resulted in a 280x speedup.
If it is still possible for repeated calls to the method, adding a cache would be of even more benefit, though a quick test only shows about 14% time savings, and in the hundredths of a second of real time.
static MemberInfo tKey = typeof(TreeItem).GetMember("Key").Single();
static MemberInfo tRawKey = typeof(TreeItem).GetMember("RawKey").Single();
static MemberInfo tCount = typeof(TreeItem).GetMember("Count").Single();
static MemberInfo tParentKey = typeof(TreeItem).GetMember("ParentKey").Single();
static MemberInfo tItems = typeof(TreeItem).GetMember("Items").Single();
// Concat(string, string, string)
static MethodInfo Concat3MI = ((Func<string, string, string, string>)String.Concat).Method;
// new TreeItem() { ... }
static NewExpression newMenuItemExpr = Expression.New(typeof(TreeItem));
// Enumerable.ToDictionary<TreeItem>(IEnumerable<TreeItem>, Func<TreeItem,string>)
static MethodInfo ToDictionaryMI = GetEnumerableMethod("ToDictionary", typeof(TreeItem), typeof(string));
static Expression<Func<IEnumerable<TElement>, Dictionary<string, TreeItem>>> BuildGroupBySelector<TElement>(IList<string> columnNames, int entry, Expression key) {
List<string> columnParameters = columnNames[entry].Split('|').ToList();
string columnName = columnParameters[0];
if (columnName == null) throw new ArgumentNullException(nameof(columnName));
if (columnName.Length == 0) throw new ArgumentException(nameof(columnName));
int nextEntry = entry + 1;
var tElement = typeof(TElement);
var tIElement = typeof(IEnumerable<TElement>);
// (TElement kp)
var keyParm = Expression.Parameter(tElement, "kp");
// kp.columnName
var prop = Expression.Property(keyParm, columnName);
// (IEnumerable<TElement> p)
var IEParam = Expression.Parameter(tIElement, "p");
// GroupBy<TElement>(IEnumerable<TElement>, Func<TElement, typeof(kp.columnName)>)
var groupByMethod = GetEnumerableMethod("GroupBy", tElement, prop.Type);
// kp => kp.columnName
var groupByExpr = Expression.Lambda(prop, keyParm);
// GroupBy(p, kp => kp.columnName)
var bodyExprCall = Expression.Call(groupByMethod, IEParam, groupByExpr);
// typeof(IGrouping<typeof(kp.columnName), TElement>)
var tSelectInput = typeof(IGrouping<,>).MakeGenericType(prop.Type, tElement);
// (IGrouping<typeof(kp.columnName), TElement> sp)
var selectParam = Expression.Parameter(tSelectInput, "sp");
// sp.Key
Expression selectParamKey = Expression.Property(selectParam, "Key");
Expression selectParamRawKey = selectParamKey;
if (selectParamKey.Type != typeof(string)) {
var toStringMethod = selectParamKey.Type.GetMethod("ToString", Type.EmptyTypes);
// sp.Key.ToString()
selectParamKey = Expression.Call(selectParamKey, toStringMethod);
selectParamRawKey = selectParamKey;
}
// Count<TElement>()
var countMethod = GetEnumerableMethod("Count", tElement);
// sp.Count()
var countMethodExpr = Expression.Call(countMethod, selectParam);
LambdaExpression selectBodyExprLamba;
if (nextEntry < columnNames.Count) {
// Concat(key, "|", sp.Key.ToString())
var concatFullKeyExpr = Expression.Call(Concat3MI, key, Expression.Constant("|"), selectParamRawKey);
// p# => p#.GroupBy().Select().ToDictionary()
var groupBySelectorLambdaExpr = BuildGroupBySelector<TElement>(columnNames, nextEntry, (Expression)concatFullKeyExpr);
// Invoke(p# => p#..., sp#)
var groupBySelectorInvokeExpr = Expression.Invoke(groupBySelectorLambdaExpr, selectParam);
var selectBodyExpr = Expression.MemberInit(newMenuItemExpr, new[] {
Expression.Bind(tKey, selectParamKey),
Expression.Bind(tRawKey, selectParamRawKey),
Expression.Bind(tParentKey, key ),
Expression.Bind(tCount, countMethodExpr),
Expression.Bind(tItems, groupBySelectorInvokeExpr)
});
// sp => new TreeItem { Key = sp.Key.ToString(), RawKey = sp.Key.ToString(), ParentKey = key, Count = sp.Count(), Items = Invoke(p# => p#..., sp)) }
selectBodyExprLamba = Expression.Lambda(selectBodyExpr, selectParam);
}
else { // Last Level
var selectBodyExpr = Expression.MemberInit(newMenuItemExpr, new[] {
Expression.Bind(tKey, selectParamKey),
Expression.Bind(tRawKey, selectParamRawKey),
Expression.Bind(tParentKey, key ),
Expression.Bind(tCount, countMethodExpr)
});
// sp => new TreeItem { Key = sp.Key.ToString(), RawKey = sp.Key.ToString(), ParentKey = key, Count = sp.Count() }
selectBodyExprLamba = Expression.Lambda(selectBodyExpr, selectParam);
}
// Enumerable.Select<IGrouping<typeof<kp.columnName>, TElement>>(IEnumerable<IGrouping<>>, Func<IGrouping<>, TreeItem>)
var selectMethod = GetEnumerableMethod("Select", tSelectInput, typeof(TreeItem));
// p.GroupBy(kp => kp => kp.columnName).Select(sp => ...)
bodyExprCall = Expression.Call(selectMethod, bodyExprCall, selectBodyExprLamba);
// (TreeItem o)
var selectParamout = Expression.Parameter(typeof(TreeItem), "o");
// o.FullKey
Expression selectParamKeyout = Expression.Property(selectParamout, "FullKey");
// o => o.FullKey
var selectParamKeyLambda = Expression.Lambda(selectParamKeyout, selectParamout);
// p.GroupBy(...).Select(...).ToDictionary(o => o.FullKey)
bodyExprCall = Expression.Call(ToDictionaryMI, bodyExprCall, selectParamKeyLambda);
// p => p.GroupBy(kp => kp => kp.columnName).Select(sp => ...).ToDictionary(o => o.FullKey)
return Expression.Lambda<Func<IEnumerable<TElement>, Dictionary<string, TreeItem>>>(bodyExprCall, IEParam);
}
public static Dictionary<string, TreeItem> GroupBySelector<TElement>(IEnumerable<TElement> source, IList<string> columnNames, string key = "") {
if (source == null) throw new ArgumentNullException(nameof(source));
// p => p.GroupBy(kp => kp => kp.columnName).Select(sp => ...).ToDictionary(o => o.FullKey)
var returnFunc = BuildGroupBySelector<TElement>(columnNames, 0, Expression.Constant(key)).Compile();
return returnFunc(source);
}

How to make an List of Objects a parameter in a Lamba using C# Expression API

I'm trying to dynamically create an predicate to pass into a linq where clause. This is for a generic method that takes two list of the same Type and and list of property names to compare.
void SomeMethod<T>(List<T> oldRecords, List<T> newRecords, List<string> propertiesOfT)
{
// dynamically build predicate for this
var notMatch = oldRecords.Where(o => !newRecords.Any(n => n.Prop1 == o.Prop1 && n.Prop2 == o.Prop2)).ToList();
// do somethind with notMatch
}
I would like to convert this:
var notMatch = oldRecords.Where(o => !newRecords.Any(n => n.Prop1 == o.Prop1 && n.Prop2 == o.Prop2)).ToList();
To achieve this:
var predicate = "n => n.Prop1 == o.Prop1 && n.Prop2 == o.Prop2"; // sudo code
var notMatch = oldRecords.Where(o => !newRecords.Any(predicate));
or this
var predicate = "o => !newRecords.Any(n => n.Prop1 == o.Prop1 && n.Prop2 == o.Prop2)" // sudo code
var notMatch = oldRecords.Where(predicate);
How do I populate newRecords when dynamically creating the Expression?
And how would I reference parameter o and parameter n in the Expresssion.
I've gotten this far:
//construct the two parameters
var o = Expression.Parameter(typeof(T), "o");
var n = Expression.Parameter(typeof(T), "n");
// How to I go about populating o with values and n with values
// from oldRecords and newRecords? or is that no neccessary
var property = Expression.Property(o, typeof(T).GetProperty("Id").Name);
var value = Expression.Constant(Convert.ChangeType("12345", typeof(T).GetProperty("Id").PropertyType), typeof(T).GetProperty("Id").PropertyType);
BinaryExpression binaryExpression = Expression.MakeBinary(ExpressionType.Equal, property, value);
Any sudo code or clue where to look to achieve this?
With reflection it's quite easy. You just to have to think about it. Here's the working version.
void SomeMethod<T>(List<T> oldRecords, List<T> newRecords, List<string> propertiesOfT)
{
// get the list of property to match
var properties = propertiesOfT.Select(prop => typeof(T).GetProperty(prop)).ToList();
// Get all old record where we don't find any matching new record where all the property equal that old record
var notMatch = oldRecords.Where(o => !newRecords.Any(n => properties.All(prop => prop.GetValue(o).Equals(prop.GetValue(n))))).ToList();
}
And here's a sample set i tried and it works
public class test
{
public int id { get; set; } = 0;
public string desc { get; set; } = "";
public test(string s, int i)
{
desc = s;id = i;
}
}
private void Main()
{
var oldRecords = new List<test>()
{
new test("test",1),
new test("test",2)
};
var newRecords = new List<test>()
{
new test("test1",1),
new test("test",2)
};
SomeMethod(oldRecords, newRecords, new List<string>() { "id", "desc" });
}

Returning a LINQ database query from a Method

Hello everyone I have this query I am performing in multiple places. Instead of retyping the query over and over, I would like to be able to call a method that returns the query. I am not sure what to put as the return type for the method or if this is even possible to do. I use the query to write a csv file of the information, and I use the query to add items to my observable collection that is bound to a list view.
using (ProjectTrackingDBEntities context = new ProjectTrackingDBEntities())
{
var result = context.TimeEntries.Where(Entry => Entry.Date >= FilterProjectAfterDate
&& Entry.Date <= FilterProjectBeforerDate
&& (FilterProjectName != null ? Entry.ProjectName.Contains(FilterProjectName) : true))
.GroupBy(m => new { m.ProjectName, m.Phase })
.Join(context.Projects, m => new { m.Key.ProjectName, m.Key.Phase }, w => new { w.ProjectName, w.Phase }, (m, w) => new { te = m, proj = w })
.Select(m => new
{
Name = m.te.Key.ProjectName,
Phase = m.te.Key.Phase,
TimeWorked = m.te.Sum(w => w.TimeWorked),
ProposedCompletionDate = m.proj.ProposedCompletionDate,
ActualCompletionDate = m.proj.ActualCompletionDate,
Active = m.proj.Active,
StartDate = m.proj.StartDate,
Description = m.proj.Description,
EstimatedHours = m.proj.EstimatedHours
});
}
I am able to do both right now by retyping the query and performing the subsequent foreach() loops on the data. I would rather be able to do something like:
var ReturnedQuery = GetProjectsQuery();
foreach(var item in ReturnedQuery)
{
//do stuff
}
Any help would be appreciated.
You want to return IQueryable<T> with a known model that represents what it is you are returning. You should not return an anonymous type. Also you want to pass in the DbContext so it can be disposed of by the caller and not in the method otherwise you will receive an exception that the DbContext has been disposed of.
For example:
public IQueryable<ProjectModel> GetProjectQuery(ProjectTrackingDBEntities context) {
return context.TimeEntries.Where(Entry => Entry.Date >= FilterProjectAfterDate
&& Entry.Date <= FilterProjectBeforerDate
&& (FilterProjectName != null ? Entry.ProjectName.Contains(FilterProjectName) : true))
.GroupBy(m => new { m.ProjectName, m.Phase })
.Join(context.Projects, m => new { m.Key.ProjectName, m.Key.Phase }, w => new { w.ProjectName, w.Phase }, (m, w) => new { te = m, proj = w })
.Select(m => new ProjectModel
{
Name = m.te.Key.ProjectName,
Phase = m.te.Key.Phase,
TimeWorked = m.te.Sum(w => w.TimeWorked),
ProposedCompletionDate = m.proj.ProposedCompletionDate,
ActualCompletionDate = m.proj.ActualCompletionDate,
Active = m.proj.Active,
StartDate = m.proj.StartDate,
Description = m.proj.Description,
EstimatedHours = m.proj.EstimatedHours
});
}
ProjectModel.cs
public class ProjectModel {
public string Name {get;set;}
public string Phase {get;set;}
// rest of properties
}
Calling code
using (ProjectTrackingDBEntities context = new ProjectTrackingDBEntities())
{
var ReturnedQuery = GetProjectsQuery(context);
foreach(var item in ReturnedQuery)
{
//do stuff
}
}
It is easy to return the enumerator, but you can't return an enumerator for an anonymous type, unfortunately. Probably the easiest path forward for you would be to return enumerator over the full row object, like this:
public IEnumerable<TimeEntries> GetTimeEntries()
{
using (ProjectTrackingDBEntities context = new ProjectTrackingDBEntities())
{
return context.TimeEntries
.Where
(
Entry =>
Entry.Date >= FilterProjectAfterDate &&
Entry.Date <= FilterProjectBeforerDate &&
(FilterProjectName != null ? Entry.ProjectName.Contains(FilterProjectName) : true)
)
.GroupBy(m => new { m.ProjectName, m.Phase })
.Join
(
context.Projects,
m => new { m.Key.ProjectName, m.Key.Phase },
w => new { w.ProjectName, w.Phase },
(m, w) => new { te = m, proj = w }
);
}
)
}
And use it like this:
var query = GetTimeEntries();
foreach (var row in query.Select( m => new { Name = row.te.Key.ProjectName })
{
Console.WriteLine(row.Name);
}

Best algorithm to determine added and removed items when comparing to collections

I am looking for the best algorithm to compare 2 collections and determine which element got added and which element got removed.
private string GetInvolvementLogging(ICollection<UserInvolvement> newInvolvement, ICollection<UserInvolvement> oldInvolvement)
{
//I defined the new and old dictionary's for you to know what useful data is inside UserInvolvement.
//Both are Dictionary<int, int>, because The Involvement is just a enum flag. Integer. UserId is also Integer.
var newDict = newInvolvement.ToDictionary(x => x.UserID, x => x.Involvement);
var oldDict = oldInvolvement.ToDictionary(x => x.UserID, x => x.Involvement);
//I Want to compare new to old -> and get 2 dictionaries: added and removed.
var usersAdded = new Dictionary<int, Involvement>();
var usersRemoved = new Dictionary<int, Involvement>();
//What is the best algoritm to accomplish this?
return GetInvolvementLogging(usersAdded, usersRemoved);
}
private string GetInvolvementLogging(Dictionary<int, Involvement> usersAdded, Dictionary<int, Involvement> usersRemoved)
{
//TODO: generate a string based on those dictionaries.
return "Change in userinvolvement: ";
}
Added elements are only in newDict removed only in oldDict
var intersection = newDict.Keys.Intersect(oldDict.Keys);
var added = newDict.Keys.Except(intersection);
var removed = oldDict.Keys.Except(intersection);
EDIT
I modify your base function, dictionaries is no neded.
Example UserInvolvement implementation
class UserInvolvement
{
public int UserId;
public string Name;
public string OtherInfo;
public override bool Equals(object obj)
{
if (obj == null)
{
return false;
}
UserInvolvement p = obj as UserInvolvement;
if ((System.Object)p == null)
{
return false;
}
return (UserId == p.UserId) && (Name == p.Name) && (OtherInfo == p.OtherInfo);
}
public override string ToString()
{
return $"{UserId} - {Name} - {OtherInfo}";
}
}
And example function:
private static string GetInvolvementLogging(ICollection<UserInvolvement> newInvolvement,
ICollection<UserInvolvement> oldInvolvement)
{
var intersection = newInvolvement.Select(x => x.UserId).Intersect(oldInvolvement.Select(x => x.UserId));
var addedIds = newInvolvement.Select(x => x.UserId).Except(intersection);
var removedIds = oldInvolvement.Select(x => x.UserId).Except(intersection);
List<UserInvolvement> modifiedUI = new List<UserInvolvement>();
foreach (var i in intersection)
{
var ni = newInvolvement.First(a => a.UserId == i);
var oi = oldInvolvement.First(a => a.UserId == i);
if (!ni.Equals(oi))
{
modifiedUI.Add(ni);
}
}
List<UserInvolvement> addedUI = newInvolvement.Where(x => addedIds.Contains(x.UserId)).Select(w => w).ToList();
List<UserInvolvement> removedUI = oldInvolvement.Where(x => removedIds.Contains(x.UserId)).Select(w => w).ToList();
StringBuilder sb = new StringBuilder();
sb.AppendLine("Added");
foreach (var added in addedUI)
{
sb.AppendLine(added.ToString());
}
sb.AppendLine("Removed");
foreach (var removed in removedUI)
{
sb.AppendLine(removed.ToString());
}
sb.AppendLine("Modified");
foreach (var modified in modifiedUI)
{
sb.AppendLine(modified.ToString());
}
return sb.ToString();
}
And my test function:
static void Main(string[] args)
{
List<UserInvolvement> newUI = new List<UserInvolvement>()
{
new UserInvolvement()
{
UserId = 1,
Name = "AAA",
OtherInfo = "QQQ"
},
new UserInvolvement()
{
UserId = 2,
Name = "BBB",
OtherInfo = "123"
},
new UserInvolvement()
{
UserId = 4,
Name = "DDD",
OtherInfo = "123ert"
}
};
List<UserInvolvement> oldUI = new List<UserInvolvement>()
{
new UserInvolvement()
{
UserId = 2,
Name = "BBBC",
OtherInfo = "123"
},
new UserInvolvement()
{
UserId = 3,
Name = "CCC",
OtherInfo = "QQ44"
},
new UserInvolvement()
{
UserId = 4,
Name = "DDD",
OtherInfo = "123ert"
}
};
string resp = GetInvolvementLogging(newUI, oldUI);
WriteLine(resp);
ReadKey();
WriteLine("CU");
}
Result is:
Added
1 - AAA - QQQ
Removed
3 - CCC - QQ44
Modified
2 - BBB - 123
You could try with Linq:
var usersAdded = newDict.Except(oldDict);
var usersRemoved = oldDict.Except(newDict);
If you need dictionaries as a result you can cast:
var usersAdded = newDict.Except(oldDict).ToDictionary(x => x.Key, x => x.Value);
var usersRemoved = oldDict.Except(newDict).ToDictionary(x => x.Key, x => x.Value);
Think best algorithm will be
foreach (var newItem in newDict)
if (!oldDict.ContainsKey(newItem.Key) || oldDict[newItem.Key]!=newItem.Value)
usersAdded.Add(newItem.Key, newItem.Value);
foreach (var oldItem in oldDict)
if (!newDict.ContainsKey(oldItem.Key) || newDict[oldItem.Key]!=oldItem.Value)
usersRemoved.Add(oldItem.Key, oldItem.Value);
Finally this is my implementation of GetInvolvementLogging:
(the implementation of the string builder method is irrelevant for my question here)
private string GetInvolvementLogging(ICollection<UserInvolvement> newInvolvement, ICollection<UserInvolvement> oldInvolvement)
{
//I defined the new and old dictionary's to focus on the relevant data inside UserInvolvement.
var newDict = newInvolvement.ToDictionary(x => x.UserID, x => (Involvement)x.Involvement);
var oldDict = oldInvolvement.ToDictionary(x => x.UserID, x => (Involvement)x.Involvement);
var intersection = newDict.Keys.Intersect(oldDict.Keys); //These are the id's of the users that were and remain involved.
var usersAdded = newDict.Keys.Except(intersection);
var usersRemoved = oldDict.Keys.Except(intersection);
var addedInvolvement = newDict.Where(x => usersAdded.Contains(x.Key)).ToDictionary(x => x.Key, x => x.Value);
var removedInvolvement = oldDict.Where(x => usersRemoved.Contains(x.Key)).ToDictionary(x => x.Key, x => x.Value);
//Check if the already involved users have a changed involvement.
foreach(var userId in intersection)
{
var newInvolvementFlags = newDict[userId];
var oldInvolvementFlags = oldDict[userId];
if ((int)newInvolvementFlags != (int)oldInvolvementFlags)
{
var xor = newInvolvementFlags ^ oldInvolvementFlags;
var added = newInvolvementFlags & xor;
var removed = oldInvolvementFlags & xor;
if (added != 0)
{
addedInvolvement.Add(userId, added);
}
if (removed != 0)
{
removedInvolvement.Add(userId, removed);
}
}
}
return GetInvolvementLogging(addedInvolvement, removedInvolvement);
}

Convert nested JSON to CSV

I am converting a nested JSON object with more than 10 levels to CSV file in C# .NET.
I have been using JavaScriptSerializer().Deserialize<ObjectA>(json) or XmlNode xml = (XmlDocument)JsonConvert.DeserializeXmlNode(json) to break down the object. With the objects I can further write into CSV file. However now the JSON object further expand. Most data that is not really in use so I would prefer a raw data dump.
Is that easier way I can just dump the data into csv format without declaring the structure?
Sample JSON
{
"F1":1,
"F2":2,
"F3":[
{
"E1":3,
"E2":4
},
{
"E1":5,
"E2":6
},
{
"E1":7,
"E2":8,
"E3":[
{
"D1":9,
"D2":10
}
]
},
]
}
And my expected CSV output is
F1,F2,E1,E2,D1,D2
1,2
1,2,3,4
1,2,5,6
1,2,7,8,9,10
There's an inconsistency in your request: you want a row to be generated for the root object, which has children, but you don't want a row to be generated for the "F3[2]" object, which also has children. So it sounds like your rule is, "print a row for an object with at least one primitive-value property, as long as that object is either the root object or does not have descendant objects with at a least one primitive-value property". That's a little tricky, but can be done with LINQ to JSON
var obj = JObject.Parse(json);
// Collect column titles: all property names whose values are of type JValue, distinct, in order of encountering them.
var values = obj.DescendantsAndSelf()
.OfType<JProperty>()
.Where(p => p.Value is JValue)
.GroupBy(p => p.Name)
.ToList();
var columns = values.Select(g => g.Key).ToArray();
// Filter JObjects that have child objects that have values.
var parentsWithChildren = values.SelectMany(g => g).SelectMany(v => v.AncestorsAndSelf().OfType<JObject>().Skip(1)).ToHashSet();
// Collect all data rows: for every object, go through the column titles and get the value of that property in the closest ancestor or self that has a value of that name.
var rows = obj
.DescendantsAndSelf()
.OfType<JObject>()
.Where(o => o.PropertyValues().OfType<JValue>().Any())
.Where(o => o == obj || !parentsWithChildren.Contains(o)) // Show a row for the root object + objects that have no children.
.Select(o => columns.Select(c => o.AncestorsAndSelf()
.OfType<JObject>()
.Select(parent => parent[c])
.Where(v => v is JValue)
.Select(v => (string)v)
.FirstOrDefault())
.Reverse() // Trim trailing nulls
.SkipWhile(s => s == null)
.Reverse());
// Convert to CSV
var csvRows = new[] { columns }.Concat(rows).Select(r => string.Join(",", r));
var csv = string.Join("\n", csvRows);
Console.WriteLine(csv);
Using
public static class EnumerableExtensions
{
// http://stackoverflow.com/questions/3471899/how-to-convert-linq-results-to-hashset-or-hashedset
public static HashSet<T> ToHashSet<T>(this IEnumerable<T> source)
{
return new HashSet<T>(source);
}
}
Which outputs:
F1,F2,E1,E2,D1,D2
1,2
1,2,3,4
1,2,5,6
1,2,7,8,9,10
I wrote this and it is working for me
Here we save all breadcrumps of object tree in headers with format prop_prop
And save jarray property objects in headers in format prop1
public Dictionary<string, string> ComplexJsonToDictionary(JObject jObject, Dictionary<string, string> result, string field)
{
foreach (var property in jObject.Properties())
{
var endField = field + (string.IsNullOrEmpty(field) ? "" : "_") + property.Name;
var innerDictionary = new Dictionary<string, string>();
try
{
var innerValue = JObject.Parse(Convert.ToString(property.Value));
result.AddOrOverride(ComplexJsonToDictionary(innerValue, innerDictionary, endField));
}
catch (Exception)
{
try
{
var innerValues = JArray.Parse(Convert.ToString(property.Value));
try
{
var i = 0;
foreach (var token in innerValues)
{
var innerValue = JObject.Parse(Convert.ToString(token));
result.AddOrOverride(ComplexJsonToDictionary(innerValue, innerDictionary, endField+i++));
}
}
catch (Exception)
{
result.Add(endField, string.Join(",", innerValues.Values<string>()));
}
}
catch (Exception)
{
result.Add(endField, property.Value.ToString());
}
}
}
return result;
}
Thanks for atantion and please write review if appropriate.
pass the json parsed jobject to static extension, then it will return the array of jobjects with flatten, then convert to csv. some of the source code is taken from the other stack overflow and other resources, I don't have reference.
public static IEnumerable<string> JsonToCsvRowsWithHierarchyHeaders(string jsonData)
{
if (jsonData.Trim().StartsWith("[") && jsonData.Trim().EndsWith("]"))
{
var startString = "{\"appendRoot\":";
var endString = "}";
jsonData = $"{startString}{jsonData}{endString}";
}
var jObject = JsonConvert.DeserializeObject<JObject>(jsonData);
var flattenJObjects = JsonParserExtensions.FlattenJsonGetJObjects(jObject).ToList();
var csvRows = new List<string>();
if (flattenJObjects.Any())
{
var firstRow = flattenJObjects.First().Children<JProperty>().Select(x => x.Name).ToList();
var header = string.Join(delimeter, firstRow).Replace("appendRoot_", "").ToLower();
csvRows.Add(header);
foreach (var flattenJObject in flattenJObjects)
{
var tokens = flattenJObject.Children<JProperty>();
if (firstRow.Count() != tokens.Count())
{
var missMatchPropertyValues = firstRow.Select(cell => tokens.FirstOrDefault(x => x.Name == cell))
.Select(value => value == null
? string.Empty
: CheckAndUpdateRowCellValueTextQualifier(JsonConvert.DeserializeObject<string>(value.Value.ToString(Newtonsoft.Json.Formatting.None))))
.ToList();
var rowString = string.Join(delimeter, missMatchPropertyValues);
csvRows.Add(rowString);
}
else
{
var rowValue = tokens.Select(token =>
CheckAndUpdateRowCellValueTextQualifier(
JsonConvert.DeserializeObject<string>(
token.Value.ToString(Newtonsoft.Json.Formatting.None))));
var rowString = string.Join(delimeter, rowValue);
csvRows.Add(rowString);
}
}
}
return csvRows;
}
private static string CheckAndUpdateRowCellValueTextQualifier(string value)
{
const string q = #"""";
if (!string.IsNullOrEmpty(value) && value.Contains('\"'))
{
return value;
}
if (!string.IsNullOrEmpty(value) && (value.Contains(',') ||
value.Contains('"') ||
value.Contains('\n') || value.Contains('\r')))
{
return $"{q}{value}{q}";
}
return value;
}
}
public static class JsonParserExtensions
{
public static IEnumerable<JObject> FlattenJsonGetJObjects(JObject jObject, string parentName = null)
{
if (!(parentName is null))
jObject = RenamePropertiesByHierarchyName(jObject, parentName);
var fields = jObject.Properties().Where(p => p.Value.GetType().Name == "JValue").ToList();
var objects = jObject.Properties().Where(p => p.Value.GetType().Name == "JObject").ToList();
var arrays = jObject.Properties().Where(p => p.Value.GetType().Name == "JArray").ToList();
var objectsArray = arrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JObject")).ToList();
var valuesArray = arrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JValue")).ToList();
var nestedObjects = ProcessNestedObjects(objects);
var joinedInnerObjects = nestedObjects.Any()
? nestedObjects.Select(innerObject => JoinJObject(new JObject(fields), innerObject))
: new List<JObject> { new JObject(fields) };
var arraysObjectList = GetJObjectsFromArrayOfJProperties(objectsArray);
var arraysValueList = GetJObjectsFromArrayOfValues(valuesArray);
var joinedAll = joinedInnerObjects.SelectMany(inner => JoinMultipleJObjects(arraysObjectList, arraysValueList, inner));
return joinedAll;
}
public static List<JObject> ProcessNestedObjects(List<JProperty> jObjects)
{
var processNestedObjects = new List<JObject>();
var renamedJObjects = jObjects?.Select(obj => RenamePropertiesByHierarchyName(obj.Value.ToObject<JObject>(), obj.Name)).ToList();
if (!(renamedJObjects?.Count > 0)) return processNestedObjects;
var renamed = renamedJObjects.Aggregate((acc, next) => JoinJObject(acc, next));
var nestedObjects = renamed.Properties().Where(p => p.Value.GetType().Name == "JObject").ToList();
var nestedArrays = renamed.Properties().Where(p => p.Value.GetType().Name == "JArray").ToList();
var nestedObjectsArray = nestedArrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JObject")).ToList();
var nestedValuesArray = nestedArrays.Where(array => array.Value.All(elements => elements.GetType().Name == "JValue")).ToList();
nestedArrays.ForEach(p => renamed.Remove(p.Name));
nestedObjects.ForEach(p => renamed.Remove(p.Name));
var nestedObjectList = new List<JObject>();
var nestedMultipleObjectList = new List<JObject>();
foreach (var listJObjects in nestedObjects.Select(innerObject => FlattenJsonGetJObjects(innerObject.Value.ToObject<JObject>(), innerObject.Name)).ToList())
{
if (listJObjects.Count() > 1)
nestedMultipleObjectList.AddRange(listJObjects);
else
nestedObjectList.Add(listJObjects.First());
}
var jObjectsFromArrayOfJProperties = GetJObjectsFromArrayOfJProperties(nestedObjectsArray);
var jObjectsFromArrayOfValues = GetJObjectsFromArrayOfValues(nestedValuesArray);
var aggregate = nestedObjectList.Aggregate(renamed, (acc, next) => JoinJObject(acc, next));
var groupedNestedObjects = (nestedMultipleObjectList.Any()) ? nestedMultipleObjectList.Select(nested => JoinJObject(aggregate, nested))
: new List<JObject> { aggregate };
var groupedNestedObjectsList = groupedNestedObjects.Select(groupedNested => JoinMultipleJObjects(jObjectsFromArrayOfJProperties, jObjectsFromArrayOfValues, groupedNested));
processNestedObjects.AddRange(groupedNestedObjectsList.SelectMany(e => e));
return processNestedObjects;
}
public static List<JObject> JoinMultipleJObjects(List<JObject> nestedArraysObjectList, List<JObject> nestedArraysValueList, JObject groupedNestedObjects)
{
var result = new List<JObject>();
var joined = new List<JObject>();
if (!nestedArraysObjectList.Any())
joined.Add(groupedNestedObjects);
else
nestedArraysObjectList.ForEach(e => joined.Add(JoinJObject(groupedNestedObjects, e)));
result.AddRange(nestedArraysValueList.Any()
? nestedArraysValueList
.SelectMany(value => joined, (value, joinedItem) => JoinJObject(joinedItem, value)).ToList()
: joined);
return result;
}
public static List<JObject> GetJObjectsFromArrayOfJProperties(List<JProperty> nestedJProperties)
{
var fromArrayOfJProperties = new List<JObject>();
foreach (var jProperty in nestedJProperties)
{
var nestedArraysObjectList = new List<JObject>();
var name = jProperty.Name;
var jPropertyValue = jProperty.Value;
var renamedObjects = jPropertyValue?.Select(obj => RenamePropertiesByHierarchyName(obj.ToObject<JObject>(), name)).ToList();
foreach (var jObjects in renamedObjects.Select(innerObject => FlattenJsonGetJObjects(innerObject.ToObject<JObject>())))
{
nestedArraysObjectList.AddRange(jObjects);
}
if (fromArrayOfJProperties.Any() && nestedArraysObjectList.Any())
fromArrayOfJProperties = nestedArraysObjectList
.SelectMany(nested => fromArrayOfJProperties, (current, joined) => JoinJObject(joined, current)).ToList();
if (!fromArrayOfJProperties.Any())
fromArrayOfJProperties.AddRange(nestedArraysObjectList);
}
return fromArrayOfJProperties;
}
public static List<JObject> GetJObjectsFromArrayOfValues(List<JProperty> nestedValuesArray)
{
return (from innerArray in nestedValuesArray let name = innerArray.Name let values = innerArray.Value from innerValue in values select new JObject(new JProperty(name, innerValue.ToObject<JValue>()))).ToList();
}
public static JObject RenamePropertiesByHierarchyName(JObject jObject, string hierarchyName)
{
var properties = jObject.Properties().ToList().Select(p => new JProperty($"{hierarchyName}_{p.Name}", p.Value));
return new JObject(properties);
}
public static JObject JoinJObject(JObject parentJObject, JObject innerObject)
{
var joinJObject = new JObject
{
parentJObject.Properties(),
innerObject.Properties()
};
return joinJObject;
}
}

Categories