LINQ query optimization for slow grouping - c#

I have a LINQ query that gets data via Entity Framework Code First from an SQL database. This works, but it works very very slow.
This is the original query:
var tmpResult = from mdv in allMetaDataValues
where mdv.Metadata.InputType == MetadataInputType.String && mdv.Metadata.ShowInFilter && !mdv.Metadata.IsHidden && !string.IsNullOrEmpty(mdv.ValueString)
group mdv by new
{
mdv.ValueString,
mdv.Metadata
} into g
let first = g.FirstOrDefault()
select new
{
MetadataTitle = g.Key.Metadata.Title,
MetadataID = g.Key.Metadata.ID,
CollectionColor = g.Key.Metadata.Collection.Color,
CollectionID = g.Key.Metadata.Collection.ID,
MetadataValueCount = 0,
MetadataValueTitle = g.Key.ValueString,
MetadataValueID = first.ID
};
This is the generated SQL from the original query:
{SELECT
0 AS [C1],
[Project4].[Title] AS [Title],
[Project4].[ID] AS [ID],
[Extent9].[Color] AS [Color],
[Project4].[Collection_ID] AS [Collection_ID],
[Project4].[ValueString] AS [ValueString],
[Project4].[C1] AS [C2]
FROM (SELECT
[Project2].[ValueString] AS [ValueString],
[Project2].[ID] AS [ID],
[Project2].[Title] AS [Title],
[Project2].[Collection_ID] AS [Collection_ID],
(SELECT TOP (1)
[Filter4].[ID1] AS [ID]
FROM ( SELECT [Extent6].[ID] AS [ID1], [Extent6].[ValueString] AS [ValueString], [Extent7].[Collection_ID] AS [Collection_ID1], [Extent8].[ID] AS [ID2], [Extent8].[InputType] AS [InputType], [Extent8].[ShowInFilter] AS [ShowInFilter], [Extent8].[IsHidden] AS [IsHidden1]
FROM [dbo].[MetadataValue] AS [Extent6]
LEFT OUTER JOIN [dbo].[Media] AS [Extent7] ON [Extent6].[Media_ID] = [Extent7].[ID]
INNER JOIN [dbo].[Metadata] AS [Extent8] ON [Extent6].[Metadata_ID] = [Extent8].[ID]
WHERE ( NOT (([Extent6].[ValueString] IS NULL) OR (( CAST(LEN([Extent6].[ValueString]) AS int)) = 0))) AND ([Extent7].[IsHidden] <> cast(1 as bit))
) AS [Filter4]
WHERE (2 = CAST( [Filter4].[InputType] AS int)) AND ([Filter4].[ShowInFilter] = 1) AND ([Filter4].[IsHidden1] <> cast(1 as bit)) AND ([Filter4].[Collection_ID1] = #p__linq__0) AND (([Project2].[ValueString] = [Filter4].[ValueString]) OR (([Project2].[ValueString] IS NULL) AND ([Filter4].[ValueString] IS NULL))) AND (([Project2].[ID] = [Filter4].[ID2]) OR (1 = 0))) AS [C1]
FROM ( SELECT
[Distinct1].[ValueString] AS [ValueString],
[Distinct1].[ID] AS [ID],
[Distinct1].[Title] AS [Title],
[Distinct1].[Collection_ID] AS [Collection_ID]
FROM ( SELECT DISTINCT
[Filter2].[ValueString] AS [ValueString],
[Filter2].[ID3] AS [ID],
[Filter2].[InputType1] AS [InputType],
[Filter2].[Title1] AS [Title],
[Filter2].[ShowInFilter1] AS [ShowInFilter],
[Filter2].[IsHidden2] AS [IsHidden],
[Filter2].[Collection_ID2] AS [Collection_ID]
FROM ( SELECT [Filter1].[ValueString], [Filter1].[Collection_ID3], [Filter1].[IsHidden3], [Filter1].[ID3], [Filter1].[InputType1], [Filter1].[Title1], [Filter1].[ShowInFilter1], [Filter1].[IsHidden2], [Filter1].[Collection_ID2]
FROM ( SELECT [Extent1].[ValueString] AS [ValueString], [Extent2].[Collection_ID] AS [Collection_ID3], [Extent4].[IsHidden] AS [IsHidden3], [Extent5].[ID] AS [ID3], [Extent5].[InputType] AS [InputType1], [Extent5].[Title] AS [Title1], [Extent5].[ShowInFilter] AS [ShowInFilter1], [Extent5].[IsHidden] AS [IsHidden2], [Extent5].[Collection_ID] AS [Collection_ID2]
FROM [dbo].[MetadataValue] AS [Extent1]
LEFT OUTER JOIN [dbo].[Media] AS [Extent2] ON [Extent1].[Media_ID] = [Extent2].[ID]
INNER JOIN [dbo].[Metadata] AS [Extent3] ON [Extent1].[Metadata_ID] = [Extent3].[ID]
LEFT OUTER JOIN [dbo].[Metadata] AS [Extent4] ON [Extent1].[Metadata_ID] = [Extent4].[ID]
LEFT OUTER JOIN [dbo].[Metadata] AS [Extent5] ON [Extent1].[Metadata_ID] = [Extent5].[ID]
WHERE ( NOT (([Extent1].[ValueString] IS NULL) OR (( CAST(LEN([Extent1].[ValueString]) AS int)) = 0))) AND ([Extent2].[IsHidden] <> cast(1 as bit)) AND (2 = CAST( [Extent3].[InputType] AS int)) AND ([Extent3].[ShowInFilter] = 1)
) AS [Filter1]
WHERE [Filter1].[IsHidden3] <> cast(1 as bit)
) AS [Filter2]
WHERE [Filter2].[Collection_ID3] = #p__linq__0
) AS [Distinct1]
) AS [Project2] ) AS [Project4]
LEFT OUTER JOIN [dbo].[Collection] AS [Extent9] ON [Project4].[Collection_ID] = [Extent9].[ID]}
If we remove the "let first = g.FirstOrDefault()" and change "MetadataValueID = first.ID" to "MetadataValueID = 0" so that we just have a fixed ID = 0 for testing purposes, then the data loads very fast and the generated query itself is half the size compared to the original
So it seems that this part is making the query very slow:
let first = g.FirstOrDefault()
...
MetadataValueID = first.ID
};
How can this be rewritten?
If I try to rewrite the code, it is still slow:
MetadataValueID = g.Select(x => x.ID).FirstOrDefault()
or
let first = g.Select(x => x.ID).FirstOrDefault()
...
MetadataValueID = first
};
Any suggestions?

Using EF I have allways felt that it has problems efficiently translating stuff like g.Key.Metadata.Collection, so I try to join more explicitly and to include only fields, that are neccessary for your result. You can use include instead of join using repository pattern.
Then your query would look like this:
from mdv in allMetaDataValues.Include("Metadata").Include("Metadata.Collection")
where mdv.Metadata.InputType == MetadataInputType.String &&
mdv.Metadata.ShowInFilter &&
!mdv.Metadata.IsHidden &&
!string.IsNullOrEmpty(mdv.ValueString)
group mdv by new
{
MetadataID = mdv.Metadata.ID,
CollectionID = mdv.Metadata.Collection.ID,
mdv.Metadata.Title,
mdv.Metadata.Collection.Color,
mdv.ValueString
} into g
let first = g.FirstOrDefault().ID
select new
{
MetadataTitle = g.Key.Title,
MetadataID = g.Key.MetadataID,
CollectionColor = g.Key.Color,
CollectionID = g.Key.CollectionID,
MetadataValueCount = 0,
MetadataValueTitle = g.Key.ValueString,
MetadataValueID = first
}
Good tool for playing with linq is LinqPad.
The problem is also that:
let first = g.FirstOrDefault().ID
cannot be easily translated to SQL see this answer. But this rewrite simplifies the underlying query for it at least. It remains to me unclear, why you need first ID from a set without using orderby.
It could be rewriten like this:
let first = (from f in allMetaDataValues
where f.Metadata.ID == g.Key.MetadataID &&
f.ValuesString == g.Key.ValuesString select f.ID)
.FirstOrDefault()
This way you do not let EF write the query for you and you can specify exactly how to do the select.
To speed up the query you can also consider adding indexes to database according to the generated query - namely index using both colums used in where clause of this let first query.

Try the following solution.
Replace FirstOrDefault() with .Take(1). FirstOrDefault() is not lazy loaded.
var tmpResult = from mdv in allMetaDataValues
where mdv.Metadata.InputType == MetadataInputType.String && mdv.Metadata.ShowInFilter && !mdv.Metadata.IsHidden && !string.IsNullOrEmpty(mdv.ValueString)
group mdv by new
{
mdv.ValueString,
mdv.Metadata
} into g
let first = g.Take(1)
select new
{
MetadataTitle = g.Key.Metadata.Title,
MetadataID = g.Key.Metadata.ID,
CollectionColor = g.Key.Metadata.Collection.Color,
CollectionID = g.Key.Metadata.Collection.ID,
MetadataValueCount = 0,
MetadataValueTitle = g.Key.ValueString,
MetadataValueID = first.ID
};

Related

Why are multiple where in LINQ so slow?

Using C# and Linq to SQL, I found that my query with multiple where is orders of magnitude slower than with a single where / and.
Here is the query
using (TeradiodeDataContext dc = new TeradiodeDataContext())
{
var filterPartNumberID = 71;
var diodeIDsInBlades = (from bd in dc.BladeDiodes
select bd.DiodeID.Value).Distinct();
var diodesWithTestData = (from t in dc.Tests
join tt in dc.TestTypes on t.TestTypeID equals tt.ID
where tt.DevicePartNumberID == filterPartNumberID
select t.DeviceID.Value).Distinct();
var result = (from d in dc.Diodes
where d.DevicePartNumberID == filterPartNumberID
where diodesWithTestData.Contains(d.ID)
where !diodeIDsInBlades.Contains(d.ID)
orderby d.Name
select d);
var list = result.ToList();
// ~15 seconds
}
However, when the condition in the final query is this
where d.DevicePartNumberID == filterPartNumberID
& diodesWithTestData.Contains(d.ID)
& !diodeIDsInBlades.Contains(d.ID)
// milliseconds
it is very fast.
Comparing the SQL in result before calling ToList(), here are the queries (value 71 manually added in place of #params)
-- MULTIPLE WHERE
SELECT [t0].[ID], [t0].[Name], [t0].[M2MID], [t0].[DevicePartNumberID], [t0].[Comments], [t0].[Hold]
FROM [dbo].[Diode] AS [t0]
WHERE (NOT (EXISTS(
SELECT NULL AS [EMPTY]
FROM (
SELECT DISTINCT [t2].[value]
FROM (
SELECT [t1].[DiodeID] AS [value]
FROM [dbo].[BladeDiode] AS [t1]
) AS [t2]
) AS [t3]
WHERE [t3].[value] = [t0].[ID]
))) AND (EXISTS(
SELECT NULL AS [EMPTY]
FROM (
SELECT DISTINCT [t6].[value]
FROM (
SELECT [t4].[DeviceID] AS [value], [t5].[DevicePartNumberID]
FROM [dbo].[Test] AS [t4]
INNER JOIN [dbo].[TestType] AS [t5] ON [t4].[TestTypeID] = ([t5].[ID])
) AS [t6]
WHERE [t6].[DevicePartNumberID] = (71)
) AS [t7]
WHERE [t7].[value] = [t0].[ID]
)) AND ([t0].[DevicePartNumberID] = 71)
ORDER BY [t0].[Name]
and
-- SINGLE WHERE
SELECT [t0].[ID], [t0].[Name], [t0].[M2MID], [t0].[DevicePartNumberID], [t0].[Comments], [t0].[Hold]
FROM [dbo].[Diode] AS [t0]
WHERE ([t0].[DevicePartNumberID] = 71) AND (EXISTS(
SELECT NULL AS [EMPTY]
FROM (
SELECT DISTINCT [t3].[value]
FROM (
SELECT [t1].[DeviceID] AS [value], [t2].[DevicePartNumberID]
FROM [dbo].[Test] AS [t1]
INNER JOIN [dbo].[TestType] AS [t2] ON [t1].[TestTypeID] = ([t2].[ID])
) AS [t3]
WHERE [t3].[DevicePartNumberID] = (71)
) AS [t4]
WHERE [t4].[value] = [t0].[ID]
)) AND (NOT (EXISTS(
SELECT NULL AS [EMPTY]
FROM (
SELECT DISTINCT [t6].[value]
FROM (
SELECT [t5].[DiodeID] AS [value]
FROM [dbo].[BladeDiode] AS [t5]
) AS [t6]
) AS [t7]
WHERE [t7].[value] = [t0].[ID]
)))
ORDER BY [t0].[Name]
The two SQL queries execute in < 1 second in SSMS and produce the same results.
So I'm wondering why the first is slower on the LINQ side. It's worrying to me because I know I've used multiple where elsewhere, without being aware of a such a severe performance impact.
This question even has answered with both multiple & and where. And this answer even suggests using multiple where clauses.
Can anyone explain why this happens in my case?
Because writing like this
if (someParam1 != 0)
{
myQuery = myQuery.Where(q => q.SomeField1 == someParam1)
}
if (someParam2 != 0)
{
myQuery = myQuery.Where(q => q.SomeField2 == someParam2)
}
is NOT(upd) the same as (in case when someParam1 and someParam2 != 0)
myQuery = from t in Table
where t.SomeField1 == someParam1
&& t.SomeField2 == someParam2
select t;
is (NOT deleted) the same as
myQuery = from t in Table
where t.SomeField1 == someParam1
where t.SomeField2 == someParam2
select t;
UPD
Yes, I do mistake. Second query is same, first is not same.
First and Second queries not EXACTLY the same. Let me show you what I mean.
1st query with lamda-expression writen as
t.Where(r => t.SomeField1 == someParam1 && t.SomeField2 == someParam2)
2nd query as
t.Where(r => r.SomeField1 == someParam1).Where(r => r.SomeField2 == someParam2)
In this case in generated SQL Predicate with SomeField2 goes first (it is important, see below)
In 1st case we getting this SQL:
SELECT <all field from Table>
FROM table t
WHERE t.SomeField1 = :someParam1
AND t.SomeField2 = :someParam2
In 2 case the SQL is:
SELECT <all field from Table>
FROM table t
WHERE t.SomeField2 = :someParam2
AND t.SomeField1 = :someParam1
As we see there are 2 'same' SQLs. As we see, the OP's SQLs are also 'same', they are different in order of predicates in WHERE clause (as in my example). And I guess that SQL optimizer generate 2 different execution plans and may be(!!!) doing NOT EXISTS, then EXISTS and then filtering take more time than do first filtering and after that do EXISTS and NOT EXISTS
UPD2
It is a 'problem' of Linq Provider (ORM). I'm using another ORM (linq2db), and it generates for me EXACTLY the same SQLs in both cases.

Linq Left Outer Join with Count

I want to create this SQL query:
SELECT
a.[Seat],
b.[PlayerId],
b.[UserName],
b.[NickName],
COUNT(c.PlayerId) AS Trophy
FROM [dbo].[tbl_PlayerTableSeat] AS a
INNER JOIN [dbo].[tbl_Player] AS b ON a.[PlayerId] = b.[PlayerId]
INNER JOIN [dbo].[tbl_GameVirtualTable] AS d ON d.GameVirtualTableId = a.GameVirtualTableId
LEFT OUTER JOIN [dbo].[tbl_PlayerTableWinning] AS c ON a.[PlayerId] = c.[PlayerId] AND c.GameTableId = d.GameTableId
WHERE a.GameVirtualTableId = 36
GROUP BY a.[Seat], b.[PlayerId], b.[UserName], b.[NickName]
I have this Linq
var virtualTableSeatList = (from s in db.PlayerTableSeat
join p in db.Player on s.PlayerId equals p.PlayerId
join v in db.GameVirtualTable on s.GameVirtualTableId equals v.GameVirtualTableId
join w in db.PlayerTableWinning on new { X1 = s.PlayerId, X2 = v.GameTableId } equals new { X1 = w.PlayerId, X2 = w.GameTableId } into gj
from g in gj.DefaultIfEmpty()
where s.GameVirtualTableId == virtualGameTableId
group new { p, s } by new { p.PlayerId, s.Seat, p.NickName, p.UserName } into grp
select new VirtualTableSeatDto
{
PlayerId = grp.Key.PlayerId,
Seat = grp.Key.Seat,
NickName = grp.Key.NickName,
UserName = grp.Key.UserName,
Trophy = grp.Count()
}
).ToList();
From SQL Profiler, the Linq generates this SQL query:
exec sp_executesql N'SELECT
[GroupBy1].[K2] AS [PlayerId],
CAST( [GroupBy1].[K1] AS int) AS [C1],
[GroupBy1].[K4] AS [NickName],
[GroupBy1].[K3] AS [UserName],
[GroupBy1].[A1] AS [C2]
FROM ( SELECT
[Extent1].[Seat] AS [K1],
[Extent2].[PlayerId] AS [K2],
[Extent2].[UserName] AS [K3],
[Extent2].[NickName] AS [K4],
COUNT(1) AS [A1]
FROM [dbo].[tbl_PlayerTableSeat] AS [Extent1]
INNER JOIN [dbo].[tbl_Player] AS [Extent2] ON [Extent1].[PlayerId] = [Extent2].[PlayerId]
INNER JOIN [dbo].[tbl_GameVirtualTable] AS [Extent3] ON [Extent1].[GameVirtualTableId] = [Extent3].[GameVirtualTableId]
LEFT OUTER JOIN [dbo].[tbl_PlayerTableWinning] AS [Extent4] ON ([Extent1].[PlayerId] = [Extent4].[PlayerId]) AND ([Extent3].[GameTableId] = [Extent4].[GameTableId])
WHERE [Extent1].[GameVirtualTableId] = #p__linq__0
GROUP BY [Extent1].[Seat], [Extent2].[PlayerId], [Extent2].[UserName], [Extent2].[NickName]
) AS [GroupBy1]',N'#p__linq__0 int',#p__linq__0=36
I want to change COUNT(1) AS [A1] to COUNT([Extent4].[PlayerId]) AS [A1]
so it can return correct data.
I have no idea how to change the LinQ
Trophy = grp.Count()
so that it can count PlayerId of PlayerTableWinning instead of COUNT(1)
Updated: #Ivan Stoev
By adding the g into the group.
group new { p, s, g }
And sum the group
Trophy = grp.Sum(item => item.w != null ? 1 : 0)
It return the correct answer. However, it is using SUM instead of count. The SQL query generated is as below:
exec sp_executesql N'SELECT
[GroupBy1].[K2] AS [PlayerId],
CAST( [GroupBy1].[K1] AS int) AS [C1],
[GroupBy1].[K4] AS [NickName],
[GroupBy1].[K3] AS [UserName],
[GroupBy1].[A1] AS [C2]
FROM ( SELECT
[Filter1].[K1] AS [K1],
[Filter1].[K2] AS [K2],
[Filter1].[K3] AS [K3],
[Filter1].[K4] AS [K4],
SUM([Filter1].[A1]) AS [A1]
FROM ( SELECT
[Extent1].[Seat] AS [K1],
[Extent2].[PlayerId] AS [K2],
[Extent2].[UserName] AS [K3],
[Extent2].[NickName] AS [K4],
CASE WHEN ( NOT (([Extent4].[GameTableId] IS NULL) AND ([Extent4].[PlayerId] IS NULL) AND ([Extent4].[GameRoundId] IS NULL))) THEN 1 ELSE 0 END AS [A1]
FROM [dbo].[tbl_PlayerTableSeat] AS [Extent1]
INNER JOIN [dbo].[tbl_Player] AS [Extent2] ON [Extent1].[PlayerId] = [Extent2].[PlayerId]
INNER JOIN [dbo].[tbl_GameVirtualTable] AS [Extent3] ON [Extent1].[GameVirtualTableId] = [Extent3].[GameVirtualTableId]
LEFT OUTER JOIN [dbo].[tbl_PlayerTableWinning] AS [Extent4] ON ([Extent1].[PlayerId] = [Extent4].[PlayerId]) AND ([Extent3].[GameTableId] = [Extent4].[GameTableId])
WHERE [Extent1].[GameVirtualTableId] = #p__linq__0
) AS [Filter1]
GROUP BY [K1], [K2], [K3], [K4]
) AS [GroupBy1]',N'#p__linq__0 int',#p__linq__0=36
The only (but significant) difference between SQL COUNT(field) and COUNT(1) is that the former is excluding the NULL values, which when applied to the normally required field from the right side of a left outer join like in your case produces a different result when there are no matching records - the former returns 0 while the latter returns 1.
The "natural" LINQ equivalent would be Count(field != null), but that unfortunately is translated to a quite different SQL by the current EF query provider. So in such cases I personally use the closer equivalent expression Sum(field != null ? 1 : 0) which produces a much better SQL.
In order to apply the above to your query, you'll need an access to w inside the grouping, so change
group new { p, s }
to
group new { p, s, w }
and then use
Trophy = grp.Sum(item => item.w != null ? 1 : 0)

Linq to SQL assistance with joins

Please can you help me translate the following SQL query into LINQ
Its mainly the joins that i am having issues with.
The result will be LINQ to SQL code which gets executed.
many thanks
SELECT * FROM unit INNER JOIN
unit_measurement_total ON unit.prime_measurement_uri = unit_measurement_total.uri RIGHT OUTER JOIN
property_expense_schedule
INNER JOIN
unit_apportionment ON property_expense_schedule.uri = unit_apportionment.property_expense_schedule_uri
INNER JOIN
unit_apportionment_date ON unit_apportionment.uri = unit_apportionment_date.unit_apportionment_uri ON
unit_measurement_total.property_ref = unit_apportionment.property_ref
WHERE (property_expense_schedule.property_ref = ...)
This is the LINQ I have at the moment, but its not generating the same results as the SQL query. So i am going wrong with the joins and the right outer joins.
var query = (from units in context.units
join unitmestot in context.unit_measurement_total on units.prime_measurement_uri equals unitmestot.uri
from pes in context.property_expense_schedule
join unitapp in context.unit_apportionment on new { A = pes.uri, B = unitmestot.property_ref, C = unitmestot.unit_ref } equals new { A = unitapp.property_expense_schedule_uri, B = unitapp.property_ref, C = unitapp.unit_ref}
join unitappdate in context.unit_apportionment_date on unitapp.uri equals unitappdate.unit_apportionment_uri
select new Apportionment()
{
PropertyRef = units.property_ref.ToString(),
ScheduleName = pes.name,
ScheduleRef = pes.#ref.ToString(),
PropertyExpenseScheduleUri = pes.uri,
UnitRef = units.#ref.ToString(),
UnitName = "(" + units.#ref.ToString() + ")" + units.name,
ObseleteUnit = units.obsolete_unit,
ApportionmentPercentage = unitappdate.apportionment_percentage,
ToDate = unitappdate.to_date,
MeasurementBasis = unitmestot.measurement_basis,
MeasuredIn = unitmestot.measured_in,
MeasurementImperialTotal = unitmestot.measurement_imperial_total,
MeasurementMetricTotal = unitmestot.measurement_metric_total
}).Where(filter);
the latest code you supplied generates the following SQL and returns 0 rows.
SELECT [Extent1].[uri] AS [uri], CASE WHEN ([Join3].[property_ref1] IS NULL) THEN N'' ELSE CAST( [Join3].[property_ref1] AS nvarchar(max)) END AS [C1], [Extent3].[name] AS [name], CAST( [Extent3].[ref] AS nvarchar(max)) AS [C2], [Extent3].[uri] AS [uri1], CASE WHEN ([Join3].[ref] IS NULL) THEN N'' ELSE CAST( [Join3].[ref] AS nvarchar(max)) END AS [C3], N'(' + CASE WHEN (CASE WHEN ([Join3].[ref] IS NULL) THEN N'' ELSE CAST( [Join3].[ref] AS nvarchar(max)) END IS NULL) THEN N'' WHEN ([Join3].[ref] IS NULL) THEN N'' ELSE CAST( [Join3].[ref] AS nvarchar(max)) END + N')' + CASE WHEN ([Join3].[name] IS NULL) THEN N'' ELSE [Join3].[name] END AS [C4], [Join3].[obsolete_unit] AS [obsolete_unit], [Extent2].[apportionment_percentage] AS [apportionment_percentage], [Extent2].[to_date] AS [to_date], CASE WHEN ([Join3].[measurement_basis] IS NULL) THEN N'' ELSE [Join3].[measurement_basis] END AS [C5], CASE WHEN ([Join3].[measured_in] IS NULL) THEN N'' ELSE [Join3].[measured_in] END AS [C6], [Join3].[measurement_imperial_total] AS [measurement_imperial_total], [Join3].[measurement_metric_total] AS [measurement_metric_total] FROM [tramps].[unit_apportionment] AS [Extent1] INNER JOIN [tramps].[unit_apportionment_date] AS [Extent2] ON [Extent1].[uri] = [Extent2].[unit_apportionment_uri] INNER JOIN [tramps].[property_expense_schedule] AS [Extent3] ON [Extent1].[property_expense_schedule_uri] = [Extent3].[uri] LEFT OUTER JOIN (SELECT [Extent4].[property_ref] AS [property_ref2], [Extent4].[measurement_basis] AS [measurement_basis], [Extent4].[measured_in] AS [measured_in], [Extent4].[measurement_imperial_total] AS [measurement_imperial_total], [Extent4].[measurement_metric_total] AS [measurement_metric_total], [Extent5].[property_ref] AS [property_ref1], [Extent5].[ref] AS [ref], [Extent5].[name] AS [name], [Extent5].[obsolete_unit] AS [obsolete_unit] FROM [tramps].[unit_measurement_total] AS [Extent4] INNER JOIN [tramps].[unit] AS [Extent5] ON [Extent4].[uri] = [Extent5].[prime_measurement_uri] ) AS [Join3] ON [Extent1].[property_ref] = [Join3].[property_ref2] WHERE (N'101329' = (CASE WHEN ([Join3].[property_ref1] IS NULL) THEN N'' ELSE CAST( [Join3].[property_ref1] AS nvarchar(max)) END)) AND ( NOT (('Y' = [Join3].[obsolete_unit]) AND ([Join3].[obsolete_unit] IS NOT NULL)))
LINQ does not support Right Outer Join, so it should be simulated by swapping the left and right parts and performing a Left Outer Join (which is not so natural, but at least the pattern in known - join clause (C# Reference)).
With that being said, I think the equivalent LINQ query should be something like this:
from unitapp in context.unit_apportionment
join unitappdate in context.unit_apportionment_date on unitapp.uri equals unitappdate.unit_apportionment_uri
join pes in context.property_expense_schedule on unitapp.property_expense_schedule_uri equals pes.uri
join unitmestot in context.unit_measurement_total on unitapp.property_ref equals unitmestot.property_ref
into unitapp_unitmesstot from unitmestot in unitapp_unitmesstot.DefaultIfEmpty() // Left Outer Join
join units in context.units on unitmestot.uri equals units.prime_measurement_uri
select new Apportionment()
{
// ...
}
EDIT: Looks like EF is generating additional criteria for the inner join right after the left outer join which is causing differences with the original SQL query. In such case you can try grouping the right part into a subquery in attempt to change the join order:
from unitapp in context.unit_apportionment
join unitappdate in context.unit_apportionment_date on unitapp.uri equals unitappdate.unit_apportionment_uri
join pes in context.property_expense_schedule on unitapp.property_expense_schedule_uri equals pes.uri
join right in (
from unitmestot in context.unit_measurement_total
join units in context.units on unitmestot.uri equals units.prime_measurement_uri
select new { unitmestot, units }
) on unitapp.property_ref equals right.unitmestot.property_ref
into outerJoin from right in outerJoin.DefaultIfEmpty() // Left Outer Join
let unitmestot = right.unitmestot
let units = right.units
select new Apportionment()
{
// ...
}

Convert T-SQL to Fluent Linq C# with the same SQL generated output

I've the following schema in my database:
The point is to have for each User, his last read datetime per Thread.
I can also check this dates by Forums & Universes with and aggregation if needed.
A Forum is considered Read, if all Thread are Read.
A Thread is considered Read, if the User's Lastview is more or equal to the last created post date (from Post.CreatedAt).
I've made the following T-SQL request to achieve this goals per Forums:
SELECT
F.Id,
CASE WHEN SUM(V.IsRead) = COUNT(V.IsRead) THEN 1 ELSE 0 END AS IsRead
FROM Forum F
LEFT JOIN Thread T ON T.Id_Forum = F.Id
LEFT JOIN
(
SELECT
P.Id_Thread,
CASE WHEN MAX(P.CreatedAt) < MAX(V.LastView) THEN 1 ELSE 0 END AS IsRead
FROM Post P
INNER JOIN Thread T ON P.Id_Thread = T.Id
INNER JOIN Forum F ON T.Id_Forum = F.Id
LEFT JOIN Thread_View V ON P.Id_Thread = V.Id_Thread AND V.Id_User = #Id_User
WHERE F.Id_Universe = #Id_Universe
GROUP BY P.Id_Thread
) V ON T.Id = V.Id_Thread
WHERE F.Id_Universe = #Id_Universe
GROUP BY F.Id
ORDER BY F.Id
It works perferctly, BUT, i now want to create this request from Linq using EntityFramework... And I'm stuck...
Here is what I've made, but it generates a very much complicated query, and i'm afraid about performance...
var viewsQuery = context.Posts
.Where(p => p.Thread.Forum.Id_Universe == idUniverse)
.GroupJoin
(
context.Thread_Views.Where(v => v.Id_User == idUser),
p => p.Id_Thread,
v => v.Id_Thread,
(p, v) => new { Id_Thread = p.Id_Thread, Id_Forum = p.Thread.Id_Forum, CreatedAt = p.CreatedAt, LastView = v.Max(_v => _v.LastView) }
)
.Select(r => new { Id_Thread = r.Id_Thread, Id_Forum = r.Id_Forum, IsRead = r.CreatedAt < r.LastView });
var forumQuery = context.Forums.Where(f => f.Id_Universe == idUniverse).GroupJoin
(
viewsQuery.DefaultIfEmpty(),
forum => forum.Id,
view => view.Id_Forum,
(forum, views) => new
{
Forum = forum.Id,
IsRead = views.Any() && views.All(v => v.IsRead),
}
);
The SQL output generated by linq is this (slightly edited to be more readable), and it's so ugly...
SELECT
[Project1].[Id] AS [Id],
CASE WHEN
(
(
EXISTS
(
SELECT
1 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable1]
INNER JOIN
(
SELECT [Extent3].[Id_Forum] AS [Id_Forum], [Extent4].[Id_Universe] AS [Id_Universe]
FROM [dbo].[Post] AS [Extent2]
INNER JOIN [dbo].[Thread] AS [Extent3] ON [Extent2].[Id_Thread] = [Extent3].[Id]
INNER JOIN [dbo].[Forum] AS [Extent4] ON [Extent3].[Id_Forum] = [Extent4].[Id] ) AS [Join2] ON 1 = 1
WHERE ([Join2].[Id_Universe] = 3) AND ([Project1].[Id] = [Join2].[Id_Forum])
)
)
AND
(
NOT EXISTS
(
SELECT
1 AS [C1]
FROM ( SELECT 1 AS X ) AS [SingleRowTable2]
INNER JOIN
(
SELECT
[Project3].[CreatedAt] AS [CreatedAt],
[Project3].[Id_Forum] AS [Id_Forum],
(
SELECT MAX([Extent8].[LastView]) AS [A1]
FROM [dbo].[Thread_View] AS [Extent8]
WHERE ([Extent8].[Id_User] = 79775) AND ([Project3].[Id_Thread] = [Extent8].[Id_Thread])
) AS [C1]
FROM
(
SELECT
[Extent5].[Id_Thread] AS [Id_Thread],
[Extent5].[CreatedAt] AS [CreatedAt],
[Extent6].[Id] AS [Id],
[Extent6].[Id_Forum] AS [Id_Forum]
FROM [dbo].[Post] AS [Extent5]
INNER JOIN [dbo].[Thread] AS [Extent6] ON [Extent5].[Id_Thread] = [Extent6].[Id]
INNER JOIN [dbo].[Forum] AS [Extent7] ON [Extent6].[Id_Forum] = [Extent7].[Id]
WHERE [Extent7].[Id_Universe] = 3
) AS [Project3]
) AS [Project4] ON 1 = 1
WHERE
(
(
(
CASE
WHEN ([Project4].[CreatedAt] < [Project4].[C1]) THEN cast(1 as bit)
WHEN ( NOT ([Project4].[CreatedAt] < [Project4].[C1])) THEN cast(0 as bit)
END
) <> 1
)
OR
(
CASE
WHEN ([Project4].[CreatedAt] < [Project4].[C1]) THEN cast(1 as bit)
WHEN ( NOT ([Project4].[CreatedAt] < [Project4].[C1])) THEN cast(0 as bit)
END IS NULL
)
) AND ([Project1].[Id] = [Project4].[Id_Forum])
)
)
) THEN cast(1 as bit)
ELSE cast(0 as bit) END AS [C1]
FROM
(
SELECT
[Extent1].[Id] AS [Id]
FROM [dbo].[Forum] AS [Extent1]
WHERE [Extent1].[Id_Universe] = 3
) AS [Project1]
EDIT : Found a working linq query with the same result, but it is so ugly i'm very afraid about performance...
var forumsWhichAreUnread = from forum on context.Forums
let isUnRead = forum.Threads.All(thread =>
thread.ThreadViews
.Where(view => view.UserId = userId)
.Max(view => view.Lastview)
<
thread.Posts.Max(post => post.CreatedAt))
select new {forum, isUnRead};
Translation
Get each forum in context.Forums
Only give me forums where every single thread...
the largest threadview.LastView time is smaller than the Latest Post Date.
Select out those forum.

Optimising LINQ-to-SQL queries

I have a very heavy LINQ-to-SQL query, which does a number of joins onto different tables to return an anonymous type. The problem is, if the amount of rows returned is fairly large (> 200), then the query becomes awfully slow and ends up timing out. I know I can increase the data context timeout setting, but that's a last resort.
I'm just wondering if my query would work better if I split it up, and do my comparisons as LINQ-to-Objects queries so I can possibly even use PLINQ to maximise the the processing power. But I'm that's a foreign concept to me, and I can't get my head around on how I would split it up. Can anyone offer any advice? I'm not asking for the code to be written for me, just some general guidance on how I could improve this would be great.
Note I've ensured the database has all the correct keys that I'm joining on, and I've ensured these keys are up to date.
The query is below:
var cons = (from c in dc.Consignments
join p in dc.PODs on c.IntConNo equals p.Consignment into pg
join d in dc.Depots on c.DeliveryDepot equals d.Letter
join sl in dc.Accounts on c.Customer equals sl.LegacyID
join ss in dc.Accounts on sl.InvoiceAccount equals ss.LegacyID
join su in dc.Accounts on c.Subcontractor equals su.Name into sug
join sub in dc.Accountsubbies on ss.ID equals sub.AccountID into subg
where (sug.FirstOrDefault() == null
|| sug.FirstOrDefault().Customer == false)
select new
{
ID = c.ID,
IntConNo = c.IntConNo,
LegacyID = c.LegacyID,
PODs = pg.DefaultIfEmpty(),
TripNumber = c.TripNumber,
DropSequence = c.DropSequence,
TripDate = c.TripDate,
Depot = d.Name,
CustomerName = c.Customer,
CustomerReference = c.CustomerReference,
DeliveryName = c.DeliveryName,
DeliveryTown = c.DeliveryTown,
DeliveryPostcode = c.DeliveryPostcode,
VehicleText = c.VehicleReg + c.Subcontractor,
SubbieID = sug.DefaultIfEmpty().FirstOrDefault().ID.ToString(),
SubbieList = subg.DefaultIfEmpty(),
ScanType = ss.PODScanning == null ? 0 : ss.PODScanning
});
Here's the generated SQL as requested:
{SELECT [t0].[ID], [t0].[IntConNo], [t0].[LegacyID], [t6].[test], [t6].[ID] AS [ID2], [t6].[Consignment], [t6].[Status], [t6].[NTConsignment], [t6].[CustomerRef], [t6].[Timestamp], [t6].[SignedBy], [t6].[Clause], [t6].[BarcodeNumber], [t6].[MainRef], [t6].[Notes], [t6].[ConsignmentRef], [t6].[PODedBy], (
SELECT COUNT(*)
FROM (
SELECT NULL AS [EMPTY]
) AS [t10]
LEFT OUTER JOIN (
SELECT NULL AS [EMPTY]
FROM [dbo].[PODs] AS [t11]
WHERE [t0].[IntConNo] = [t11].[Consignment]
) AS [t12] ON 1=1
) AS [value], [t0].[TripNumber], [t0].[DropSequence], [t0].[TripDate], [t1].[Name] AS [Depot], [t0].[Customer] AS [CustomerName], [t0].[CustomerReference], [t0].[DeliveryName], [t0].[DeliveryTown], [t0].[DeliveryPostcode], [t0].[VehicleReg] + [t0].[Subcontractor] AS [VehicleText], CONVERT(NVarChar,(
SELECT [t16].[ID]
FROM (
SELECT TOP (1) [t15].[ID]
FROM (
SELECT NULL AS [EMPTY]
) AS [t13]
LEFT OUTER JOIN (
SELECT [t14].[ID]
FROM [dbo].[Account] AS [t14]
WHERE [t0].[Subcontractor] = [t14].[Name]
) AS [t15] ON 1=1
ORDER BY [t15].[ID]
) AS [t16]
)) AS [SubbieID],
(CASE
WHEN [t3].[PODScanning] IS NULL THEN #p0
ELSE [t3].[PODScanning]
END) AS [ScanType], [t3].[ID] AS [ID3]
FROM [dbo].[Consignments] AS [t0]
INNER JOIN [dbo].[Depots] AS [t1] ON [t0].[DeliveryDepot] = [t1].[Letter]
INNER JOIN [dbo].[Account] AS [t2] ON [t0].[Customer] = [t2].[LegacyID]
INNER JOIN [dbo].[Account] AS [t3] ON [t2].[InvoiceAccount] = [t3].[LegacyID]
LEFT OUTER JOIN ((
SELECT NULL AS [EMPTY]
) AS [t4]
LEFT OUTER JOIN (
SELECT 1 AS [test], [t5].[ID], [t5].[Consignment], [t5].[Status], [t5].[NTConsignment], [t5].[CustomerRef], [t5].[Timestamp], [t5].[SignedBy], [t5].[Clause], [t5].[BarcodeNumber], [t5].[MainRef], [t5].[Notes], [t5].[ConsignmentRef], [t5].[PODedBy]
FROM [dbo].[PODs] AS [t5]
) AS [t6] ON 1=1 ) ON [t0].[IntConNo] = [t6].[Consignment]
WHERE ((NOT (EXISTS(
SELECT TOP (1) NULL AS [EMPTY]
FROM [dbo].[Account] AS [t7]
WHERE [t0].[Subcontractor] = [t7].[Name]
ORDER BY [t7].[ID]
))) OR (NOT (((
SELECT [t9].[Customer]
FROM (
SELECT TOP (1) [t8].[Customer]
FROM [dbo].[Account] AS [t8]
WHERE [t0].[Subcontractor] = [t8].[Name]
ORDER BY [t8].[ID]
) AS [t9]
)) = 1))) AND ([t2].[Customer] = 1) AND ([t3].[Customer] = 1)
ORDER BY [t0].[ID], [t1].[ID], [t2].[ID], [t3].[ID], [t6].[ID]
}
Try moving the subcontractor join up higher and push the where clause along with it. That way you're not unnecessarily making joins which would fail at the end.
I would also modify the select for the subcontractor id, so you don't get the Id of a potentially null value.
var cons = (from c in dc.Consignments
join su in dc.Accounts on c.Subcontractor equals su.Name into sug
where (sug.FirstOrDefault() == null || sug.FirstOrDefault().Customer == false)
join p in dc.PODs on c.IntConNo equals p.Consignment into pg
join d in dc.Depots on c.DeliveryDepot equals d.Letter
join sl in dc.Accounts on c.Customer equals sl.LegacyID
join ss in dc.Accounts on sl.InvoiceAccount equals ss.LegacyID
join sub in dc.Accountsubbies on ss.ID equals sub.AccountID into subg
let firstSubContractor = sug.DefaultIfEmpty().FirstOrDefault()
select new
{
ID = c.ID,
IntConNo = c.IntConNo,
LegacyID = c.LegacyID,
PODs = pg.DefaultIfEmpty(),
TripNumber = c.TripNumber,
DropSequence = c.DropSequence,
TripDate = c.TripDate,
Depot = d.Name,
CustomerName = c.Customer,
CustomerReference = c.CustomerReference,
DeliveryName = c.DeliveryName,
DeliveryTown = c.DeliveryTown,
DeliveryPostcode = c.DeliveryPostcode,
VehicleText = c.VehicleReg + c.Subcontractor,
SubbieID = firstSubContractor == null ? "" : firstSubContractor.ID.ToString(),
SubbieList = subg.DefaultIfEmpty(),
ScanType = ss.PODScanning == null ? 0 : ss.PODScanning
});

Categories