I have a TVP+SP insert strategy implemented as i need to insert big amounts of rows (probably concurrently) while being able to get some info in return like Id and stuff. Initially I'm using EF code first approach to generate the DB structure. My entities:
FacilityGroup
public class FacilityGroup
{
public int Id { get; set; }
[Required]
public string Name { get; set; }
public string InternalNotes { get; set; }
public virtual List<FacilityInstance> Facilities { get; set; } = new List<FacilityInstance>();
}
FacilityInstance
public class FacilityInstance
{
public int Id { get; set; }
[Required]
[Index("IX_FacilityName")]
[StringLength(450)]
public string Name { get; set; }
[Required]
public string FacilityCode { get; set; }
//[Required]
public virtual FacilityGroup FacilityGroup { get; set; }
[ForeignKey(nameof(FacilityGroup))]
[Index("IX_FacilityGroupId")]
public int FacilityGroupId { get; set; }
public virtual List<DataBatch> RelatedBatches { get; set; } = new List<DataBatch>();
public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}
BatchRecord
public class BatchRecord
{
public long Id { get; set; }
//todo index?
public string ItemName { get; set; }
[Index("IX_Supplier")]
[StringLength(450)]
public string Supplier { get; set; }
public decimal Quantity { get; set; }
public string ItemUnit { get; set; }
public string EntityUnit { get; set; }
public decimal ItemSize { get; set; }
public decimal PackageSize { get; set; }
[Index("IX_FamilyCode")]
[Required]
[StringLength(4)]
public string FamilyCode { get; set; }
[Required]
public string Family { get; set; }
[Index("IX_CategoryCode")]
[Required]
[StringLength(16)]
public string CategoryCode { get; set; }
[Required]
public string Category { get; set; }
[Index("IX_SubCategoryCode")]
[Required]
[StringLength(16)]
public string SubCategoryCode { get; set; }
[Required]
public string SubCategory { get; set; }
public string ItemGroupCode { get; set; }
public string ItemGroup { get; set; }
public decimal PurchaseValue { get; set; }
public decimal UnitPurchaseValue { get; set; }
public decimal PackagePurchaseValue { get; set; }
[Required]
public virtual DataBatch DataBatch { get; set; }
[ForeignKey(nameof(DataBatch))]
public int DataBatchId { get; set; }
[Required]
public virtual FacilityInstance FacilityInstance { get; set; }
[ForeignKey(nameof(FacilityInstance))]
[Index("IX_FacilityInstance")]
public int FacilityInstanceId { get; set; }
[Required]
public virtual Currency Currency { get; set; }
[ForeignKey(nameof(Currency))]
public int CurrencyId { get; set; }
}
DataBatch
public class DataBatch
{
public int Id { get; set; }
[Required]
public string Name { get; set; }
public DateTime DateCreated { get; set; }
public BatchStatus BatchStatus { get; set; }
public virtual List<FacilityInstance> RelatedFacilities { get; set; } = new List<FacilityInstance>();
public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}
And then my SQL Server related code, TVP Structure:
CREATE TYPE dbo.RecordImportStructure
AS TABLE (
ItemName VARCHAR(MAX),
Supplier VARCHAR(MAX),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(MAX),
EntityUnit VARCHAR(MAX),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(4),
Family VARCHAR(MAX),
CategoryCode VARCHAR(MAX),
Category VARCHAR(MAX),
SubCategoryCode VARCHAR(MAX),
SubCategory VARCHAR(MAX),
ItemGroupCode VARCHAR(MAX),
ItemGroup VARCHAR(MAX),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(MAX),
CurrencyCode VARCHAR(MAX)
);
Insert stored procedure:
CREATE PROCEDURE dbo.ImportBatchRecords (
#BatchId INT,
#ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE #ErrorCode int
DECLARE #Step varchar(200)
--Clear old stuff?
--TRUNCATE TABLE dbo.BatchRecords;
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
#BatchId,
--FacilityInstanceId,
--CurrencyId
(SELECT TOP 1 f.Id from dbo.FacilityInstances f WHERE f.FacilityCode=FacilityCode),
(SELECT TOP 1 c.Id from dbo.Currencies c WHERE c.CurrencyCode=CurrencyCode)
FROM #ImportTable;
And finally my quick, test only solution to execute this stuff on .NET side.
public class BatchRecordDataHandler : IBulkDataHandler<BatchRecordImportItem>
{
public async Task<int> ImportAsync(SqlConnection conn, SqlTransaction transaction, IEnumerable<BatchRecordImportItem> src)
{
using (var cmd = new SqlCommand())
{
cmd.CommandText = "ImportBatchRecords";
cmd.Connection = conn;
cmd.Transaction = transaction;
cmd.CommandType = CommandType.StoredProcedure;
cmd.CommandTimeout = 600;
var batchIdParam = new SqlParameter
{
ParameterName = "#BatchId",
SqlDbType = SqlDbType.Int,
Value = 1
};
var tableParam = new SqlParameter
{
ParameterName = "#ImportTable",
TypeName = "dbo.RecordImportStructure",
SqlDbType = SqlDbType.Structured,
Value = DataToSqlRecords(src)
};
cmd.Parameters.Add(batchIdParam);
cmd.Parameters.Add(tableParam);
cmd.Transaction = transaction;
using (var res = await cmd.ExecuteReaderAsync())
{
var resultTable = new DataTable();
resultTable.Load(res);
var cnt = resultTable.AsEnumerable().Count();
return cnt;
}
}
}
private IEnumerable<SqlDataRecord> DataToSqlRecords(IEnumerable<BatchRecordImportItem> src)
{
var tvpSchema = new[] {
new SqlMetaData("ItemName", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Supplier", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Quantity", SqlDbType.Decimal),
new SqlMetaData("ItemUnit", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("EntityUnit", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemSize", SqlDbType.Decimal),
new SqlMetaData("PackageSize", SqlDbType.Decimal),
new SqlMetaData("FamilyCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Family", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("CategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Category", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("SubCategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("SubCategory", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemGroupCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemGroup", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("PurchaseValue", SqlDbType.Decimal),
new SqlMetaData("UnitPurchaseValue", SqlDbType.Decimal),
new SqlMetaData("PackagePurchaseValue", SqlDbType.Decimal),
new SqlMetaData("FacilityInstanceId", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("CurrencyId", SqlDbType.VarChar, SqlMetaData.Max),
};
var dataRecord = new SqlDataRecord(tvpSchema);
foreach (var importItem in src)
{
dataRecord.SetValues(importItem.ItemName,
importItem.Supplier,
importItem.Quantity,
importItem.ItemUnit,
importItem.EntityUnit,
importItem.ItemSize,
importItem.PackageSize,
importItem.FamilyCode,
importItem.Family,
importItem.CategoryCode,
importItem.Category,
importItem.SubCategoryCode,
importItem.SubCategory,
importItem.ItemGroupCode,
importItem.ItemGroup,
importItem.PurchaseValue,
importItem.UnitPurchaseValue,
importItem.PackagePurchaseValue,
importItem.FacilityCode,
importItem.CurrencyCode);
yield return dataRecord;
}
}
}
Import entity structure:
public class BatchRecordImportItem
{
public string ItemName { get; set; }
public string Supplier { get; set; }
public decimal Quantity { get; set; }
public string ItemUnit { get; set; }
public string EntityUnit { get; set; }
public decimal ItemSize { get; set; }
public decimal PackageSize { get; set; }
public string FamilyCode { get; set; }
public string Family { get; set; }
public string CategoryCode { get; set; }
public string Category { get; set; }
public string SubCategoryCode { get; set; }
public string SubCategory { get; set; }
public string ItemGroupCode { get; set; }
public string ItemGroup { get; set; }
public decimal PurchaseValue { get; set; }
public decimal UnitPurchaseValue { get; set; }
public decimal PackagePurchaseValue { get; set; }
public int DataBatchId { get; set; }
public string FacilityCode { get; set; }
public string CurrencyCode { get; set; }
}
Please don't mind useless reader at the end, doesn't really do much. So without the reader inserting 2.5kk rows took around 26 minutes while SqlBulkCopy took around 6+- minutes. Is there something I'm doing fundamentally wrong? I’m using IsolationLevel.Snapshot if this matters. Using SQL Server 2014, free to change DB structure and indices.
UPD 1
Done a couple of adjustments/improvement attempts described by #Xedni, specifically:
Limited all string fields that didn't have a max length to some fixed length
Changed all TVP members from VARCHAR(MAX) to VARCHAR(*SomeValue*)
Added a unique index to FacilityInstance->FacilityCode
Added a unique index to Curreency->CurrencyCode
Tried adding WITH RECOMPILE to my SP
Tried using DataTable instead of IEnumerable<SqlDataRecord>
Tried batchinng data into smaller buckets, 50k and 100k per SP execution instead of 2.5kk
My structure is now like this:
CREATE TYPE dbo.RecordImportStructure
AS TABLE (
ItemName VARCHAR(4096),
Supplier VARCHAR(450),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(2048),
EntityUnit VARCHAR(2048),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(16),
Family VARCHAR(512),
CategoryCode VARCHAR(16),
Category VARCHAR(512),
SubCategoryCode VARCHAR(16),
SubCategory VARCHAR(512),
ItemGroupCode VARCHAR(16),
ItemGroup VARCHAR(512),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(450),
CurrencyCode VARCHAR(4)
);
So far no noticeable performance gains unfortunately, 26-28 min as before
UPD 2
Checked the execution plan - indices are my bane?
UPD 3
Added OPTION (RECOMPILE); at the end of my SP, gained a minor boost, now sitting at ~25m for 2.5kk
You could set traceflag 2453:
FIX: Poor performance when you use table variables in SQL Server 2012 or SQL Server 2014
When you use a table variable in a batch or procedure, the query is compiled and optimized for the initial empty state of table variable. If this table variable is populated with many rows at runtime, the pre-compiled query plan may no longer be optimal. For example, the query may be joining a table variable with nested loop since it is usually more efficient for small number of rows. This query plan can be inefficient if the table variable has millions of rows. A hash join may be a better choice under such condition. To get a new query plan, it needs to be recompiled. Unlike other user or temporary tables, however, row count change in a table variable does not trigger a query recompile. Typically, you can work around this with OPTION (RECOMPILE), which has its own overhead cost.
The trace flag 2453 allows the benefit of query recompile without OPTION (RECOMPILE). This trace flag differs from OPTION (RECOMPILE) in two main aspects.
(1) It uses the same row count threshold as other tables. The query does not need to be compiled for every execution unlike OPTION (RECOMPILE). It would trigger recompile only when the row count change exceeds the predefined threshold.
(2) OPTION (RECOMPILE) forces the query to peek parameters and optimize the query for them. This trace flag does not force parameter peeking.
You can turn on trace flag 2453 to allow a table variable to trigger recompile when enough number of rows are changed. This may allow the query optimizer to choose a more efficient plan
Try with the following stored procedure:
CREATE PROCEDURE dbo.ImportBatchRecords (
#BatchId INT,
#ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE #ErrorCode int
DECLARE #Step varchar(200)
CREATE TABLE #FacilityInstances
(
Id int NOT NULL,
FacilityCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY=ON)
);
CREATE TABLE #Currencies
(
Id int NOT NULL,
CurrencyCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY = ON)
)
INSERT INTO #FacilityInstances(Id, FacilityCode)
SELECT Id, FacilityCode FROM dbo.FacilityInstances
WHERE FacilityCode IS NOT NULL AND Id IS NOT NULL;
INSERT INTO #Currencies(Id, CurrencyCode)
SELECT Id, CurrencyCode FROM dbo.Currencies
WHERE CurrencyCode IS NOT NULL AND Id IS NOT NULL
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
#BatchId,
F.Id,
C.Id
FROM
#FacilityInstances F RIGHT OUTER HASH JOIN
(
#Currencies C
RIGHT OUTER HASH JOIN #ImportTable IT
ON C.CurrencyCode = IT.CurrencyCode
)
ON F.FacilityCode = IT.FacilityCode
This enforces the execution plan to use hash match joins instead of nested loops. I think the culprit of bad performance is the first nested loop that performs an index scan for each row in #ImportTable
I don't know if CurrencyCode is unique in Currencies table, so I create the temporal table #Currencies with unique currency codes.
I don't know if FacilityCode is unique in Facilities table, so I create the temporal table #FacilityInstances with unique facility codes.
If they are unique you don't need the temporal tables, you can use the permanent tables directly.
Assuming CurrencyCode and FacilityCode are unique the following stored procedure would be better because it doesn't create unnecessary temporary tables:
CREATE PROCEDURE dbo.ImportBatchRecords (
#BatchId INT,
#ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE #ErrorCode int
DECLARE #Step varchar(200)
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
#BatchId,
F.Id,
C.Id
FROM
dbo.FacilityInstances F RIGHT OUTER HASH JOIN
(
dbo.Currencies C
RIGHT OUTER HASH JOIN #ImportTable IT
ON C.CurrencyCode = IT.CurrencyCode
)
ON F.FacilityCode = IT.FacilityCode
I would guess your proc could use some love. Without seeing an execution plan its hard to say for sure, but here are some thoughts.
A table variable (which a table-valued-parameter essentially is) is always assumed by SQL Server to contain exactly 1 row (even if it doesn't). This is irrelevant for many cases, but you have two correlated subqueries in your insert list which is where I'd focus my attention. It's more than likely hammering that poor table variable with a bunch of nested loop joins because of the cardinality estimate. I would consider putting the rows from your TVP into a temp table, updating the temp table with the IDs from FacilityInstances and Currencies then do your final insert from that.
Well... why not just use SQL Bulk Copy?
There's plenty of solutions out there that help you convert a collection of entities into a IDataReader object that can be handed directly to SqlBulkCopy.
This is a good start...
https://github.com/matthewschrager/Repository/blob/master/Repository.EntityFramework/EntityDataReader.cs
Then it becomes as simple as...
SqlBulkCopy bulkCopy = new SqlBulkCopy(connection);
IDataReader dataReader = storeEntities.AsDataReader();
bulkCopy.WriteToServer(dataReader);
I've used this code, the one caveat is that you need to be quite careful about the definition of your entity. The order of the properties in the entity determines the order of the columns exposed by the IDataReader and this needs to correlate with the order of the columns in the table that you are bulk copying to.
Alternatively there's other code here..
https://www.codeproject.com/Tips/1114089/Entity-Framework-Performance-Tuning-Using-SqlBulkC
I know there is an accepted answer, but I can't resist. I believe you can improve the performance 20-50% over the accepted answer.
The key is to SqlBulkCopy to the final table dbo.BatchRecords directly.
To make this happen you need FacilityInstanceId and CurrencyId before to SqlBulkCopy. To get them, load SELECT Id, FacilityCode FROM FacilityIntances and SELECT Id, CurrencyCode FROM Currencies into collections, then build a dictionary:
var facilityIdByFacilityCode = facilitiesCollection.ToDictionary(x => x.FacilityCode, x => x.Id);
var currencyIdByCurrencyCode = currenciesCollection.ToDictionnary(x => x.CurrencyCode, x => x.Id);
Once you have the dictionaries, getting the id's from the codes is constant time cost. This is equivalent and very similar to HASH MATCH JOIN in SQL Server, but at the client side.
The other barrier you need to tear down is to get the Id column of new inserted rows in dbo.BatchRecords table. Actually can you get the Ids before inserting them.
Make the Id column "sequence driven":
CREATE SEQUENCE BatchRecords_Id_Seq START WITH 1;
CREATE TABLE BatchRecords
(
Id int NOT NULL CONSTRAINT DF_BatchRecords_Id DEFAULT (NEXT VALUE FOR BatchRecords_Id_Seq),
.....
CONSTRAINT PK_BatchRecords PRIMARY KEY (Id)
)
One you have the BatchRecords collection, you know how many records are in it. You can then reserve a contiguous range of sequences. Execute the following T-SQL:
DECLARE #BatchCollectionCount int = 2500 -- Replace with the actual value
DECLARE #range_first_value sql_variant
DECLARE #range_last_value sql_variant
EXEC sp_sequence_get_range
#sequence_name = N'BatchRecords_Id_Seq',
#range_size = #BatchCollectionCount,
#range_first_value = #range_first_value OUTPUT,
#range_last_value = #range_last_value OUTPUT
SELECT
CAST(#range_first_value AS INT) AS range_first_value,
CAST(#range_last_value AS int) as range_last_value
This returns range_first_value and range_last_value. You can now assign BatchRecord.Id to each record:
int id = range_first_value;
foreach (var record in batchRecords)
{
record.Id = id++;
}
Next, you can SqlBulkCopy the batch record collection directly into the final table dbo.BatchRecords.
To get a DataReader from an IEnumerable<T> to feed SqlBulkCopy.WriteToServer you can use code like this which is part of EntityLite, a micro ORM I developed.
You can make it even faster if you cache facilityIdByFacilityCode and currencyIdByCurrencyCode. To be sure these dictionaries are up to date you can use SqlDependencyor techniques like this one.
Related
I'm playing around EntityFrameworkCore with WebAPI while building voting app exercise.
I want to make the code in async way where is possible.
So should I have to use the nested query in async way somehow(// Problem 1, // Problem 2)?
/* The target of the question - the query*/
var pollResults =
await _context.Polls
.Select(poll => new PollDto
{
Id = poll.Id,
Question = poll.Question,
CreatedAt = poll.CreatedAt,
Options = poll.Options
.Select(option => new OptionDto
{
Id = option.Id,
Value = option.Value,
VotesCount = option.Votes.Count() // Problem 1
})
.ToList(), // Problem 2
LastVotedAt = _context.PollVotes.Where(vote=>vote.PollId == poll.Id).Select(vote => vote.VoteDate).SingleOrDefault()
})
.ToListAsync();
/* Domain classes */
public class Poll
{
public int Id { get; set; }
public ICollection<PollOption> Options { get; set; } = new List<PollOption>();
public ICollection<PollVote> Votes { get; set; } = new List<PollVote>();
}
public class PollOption
{
public int Id { get; set; }
public string Value { get; set; }
public int PollId { get; set; }
public Poll Poll { get; set; }
public ICollection<PollVote> Votes { get; set; } = new List<PollVote>();
}
public class PollVote
{
public int Id { get; set; }
public int PollId { get; set; }
public Poll Poll { get; set; }
public int OptionId { get; set; }
public PollOption Option { get; set; }
public DateTime VoteDate { get; set; }
}
/* Dto classes */
public class PollDto
{
public int Id { get; set; }
public string Question { get; set; }
public ICollection<OptionDto> Options { get; set; } = new List<OptionDto>();
public DateTime LastVotedAt { get; set; }
}
public class OptionDto
{
public int Id { get; set; }
public string Value { get; set; }
public int VotesCount { get; set; }
}
So in not nested queries Count and SingleOrDefault would make request to the database and it should be executed in async way. But in my case the whole query is a single request.
Should I have to modify something to done the methods Count and SingleOrDefault in async way ? Or calling ToListAsync at end is enough?
I believe the answer is that 1 request to the database goes in 1 async call. But I didn't find any solution in the internet.
ToListAsync() at the end is enough. Expressions inside the query are used by EF to compose the query. They are not "executed" as SQL like they would have been as stand-alone statements against the DbSets.
For instance when I run something similar:
var parents = await context.Parents
.Select(x => new
{
x.ParentId,
x.Name,
Children = x.Children.Select(c => new { c.ChildId, c.Name }).ToList(),
ChildCount = x.Children.Count()
}).ToListAsync();
in a test and set a breakpoint with a profiler running. The statement produces a single SQL statement:
SELECT
[Project2].[ParentId] AS [ParentId],
[Project2].[Name] AS [Name],
[Project2].[C2] AS [C1],
[Project2].[C1] AS [C2],
[Project2].[ChildId] AS [ChildId],
[Project2].[Name1] AS [Name1]
FROM ( SELECT
[Project1].[ParentId] AS [ParentId],
[Project1].[Name] AS [Name],
[Extent3].[ChildId] AS [ChildId],
[Extent3].[Name] AS [Name1],
CASE WHEN ([Extent3].[ChildId] IS NULL) THEN CAST(NULL AS int) ELSE 1 END AS [C1],
[Project1].[C1] AS [C2]
FROM (SELECT
[Extent1].[ParentId] AS [ParentId],
[Extent1].[Name] AS [Name],
(SELECT
COUNT(1) AS [A1]
FROM [dbo].[Children] AS [Extent2]
WHERE [Extent1].[ParentId] = [Extent2].[ParentId]) AS [C1]
FROM [dbo].[Parents] AS [Extent1] ) AS [Project1]
LEFT OUTER JOIN [dbo].[Children] AS [Extent3] ON [Project1].[ParentId] = [Extent3].[ParentId]
) AS [Project2]
ORDER BY [Project2].[ParentId] ASC, [Project2].[C1] ASC
go
Not 3 queries that you might be concerned would block. This was when looking at the navigation properties for related records.
The bigger question I saw when looking at your example to double-check was this line:
LastVotedAt = _context.PollVotes.Where(vote=>vote.PollId == poll.Id).Select(vote => vote.VoteDate).SingleOrDefault()
As this would go back directly to the Context rather than access votes through a collection on the Poll. But I tried that as well and it too still resulted in a single query.
Children = x.Children.Select(c => new { c.ChildId, c.Name }).ToList(),
ChildCount = x.Children.Count(),
YoungestChild = context.Children.OrderBy(c=>c.BirthDate).Where(c=>c.ParentId == x.ParentId).FirstOrDefault()
In my test example I go back to the context to retrieve the Youngest child for the parent record rather than the Children navigation property. In this case it still executes as 1 query.
For questions like this I definitely recommend creating an EF experimentation sandbox project with a local database, then leverage and SQL profiler tool to watch the SQL statements being produced and when they are executed. Async is useful for queries that are expected to take a while to run, but should be used sparingly as they can lower the overall performance of the queries being run when used on every trivial query.
When I try to update the following entity, if any of the items in the collection already exist in the db (ie already has a ID value assigned) the sql statement generates an insert for that record and includes the ID field in the insert, but the ID is an Auto Increment field and hence yields a "Cannot insert explicit value for identity column in table".
Entities:
public class Event : FullAuditedEntity<int>, IMustHaveTenant
{
public const int MaxNameLength = 128;
public const int MaxDescriptionLength = 2048;
public virtual int TenantId { get; set; }
[Required]
[StringLength(MaxNameLength)]
public virtual string Name { get; set; }
[StringLength(MaxDescriptionLength)]
public virtual string Description { get; set; }
public virtual DateTime DateTimeStart { get; set; }
public virtual DateTime DateTimeEnd { get; set; }
[ForeignKey("EventId")]
public virtual ICollection<EventOutlet> Outlets { get; set; }
}
public class EventOutlet : CreationAuditedEntity<int>, IMustHaveTenant
{
public int TenantId { get; set; }
[ForeignKey("EventId")]
public virtual Event AssignedEvent { get; set; }
public virtual int EventId { get; set; }
[ForeignKey("OutletId")]
public virtual Outlet AssignedOutlet { get; set; }
public virtual int OutletId { get; set; }
}
CRUD Update call:
public override async Task<EventDto> Update(EventDto input)
{
CheckUpdatePermission();
var evnt = await _eventManager.GetA(input.Id);
MapToEntity(input, evnt);
CheckErrors(await _eventManager.UpdateA(evnt));
return await Get(input);
}
In the following case there are 2 records in the EventOutlet collection, one that already exists in the db, the other is a new entry. The following sql statement is generated to update the entity. It tries the insert the existing record in the collection supplying the ID field which causes the error, ie this should be an update statement, then it inserts the new record in the collection without the ID field, which is correct. Then updates the entity with its fields which is correct.
What am I missing in the update routine for the existing items in the collection to be updated rather than attempting to be inserted again.
Cheers
exec sp_executesql N'SET NOCOUNT ON;
INSERT INTO [AppEventOutlets] ([Id], [CreationTime], [CreatorUserId], [EventId], [OutletId], [TenantId])
VALUES (#p0, #p1, #p2, #p3, #p4, #p5);
INSERT INTO [AppEventOutlets] ([CreationTime], [CreatorUserId], [EventId], [OutletId], [TenantId])
VALUES (#p6, #p7, #p8, #p9, #p10);
SELECT [Id]
FROM [AppEventOutlets]
WHERE ##ROWCOUNT = 1 AND [Id] = scope_identity();
UPDATE [AppEvents] SET [CreationTime] = #p11, [CreatorUserId] = #p12, [DateTimeEnd] = #p13, [DateTimeStart] = #p14, [DeleterUserId] = #p15, [DeletionTime] = #p16, [Description] = #p17, [IsDeleted] = #p18, [LastModificationTime] = #p19, [LastModifierUserId] = #p20, [Name] = #p21, [TenantId] = #p22
WHERE [Id] = #p23;
SELECT ##ROWCOUNT;
',N'#p0 int,#p1 datetime2(7),#p2 bigint,#p3 int,#p4 int,#p5 int,#p6 datetime2(7),#p7 bigint,#p8 int,#p9 int,#p10 int,#p23 int,#p11 datetime2(7),#p12 bigint,#p13 datetime2(7),#p14 datetime2(7),#p15 bigint,#p16 datetime2(7),#p17 nvarchar(2048),#p18 bit,#p19 datetime2(7),#p20 bigint,#p21 nvarchar(128),#p22 int',#p0=2009,#p1='2018-05-18 14:46:11.4780000',#p2=4,#p3=1,#p4=1003,#p5=2,#p6='2018-05-24 22:10:36.7558598',#p7=4,#p8=1,#p9=1008,#p10=2,#p23=1,#p11='2018-05-09 17:03:49.6870000',#p12=4,#p13='2018-05-11 07:24:38',#p14='2018-05-10 07:24:35',#p15=NULL,#p16=NULL,#p17=N'Friday',#p18=0,#p19='2018-05-24 22:10:46.7874563',#p20=4,#p21=N'Event 1',#p22=2
I have a CashFlowView:
CREATE VIEW [dbo].[CashFlowView]
AS
WITH CTE AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY RateDate) AS ID
, SUM(CASE WHEN C.CurrencyName = 'Br' THEN T.AmountMoney ELSE 0 END) AS AmountBYR
, SUM(CASE WHEN C.CurrencyName = 'Usd' THEN T.AmountMoney ELSE 0 END) AS AmountUSD
, CR.RateDate AS [DATE]
FROM Transactions AS T
INNER JOIN Accounts AS A ON A.AccountID = T.CurrentAccountID
INNER JOIN Currencies AS C ON C.CurrencyID = A.CurrencyID
RIGHT OUTER JOIN CurrencyRates AS CR ON CR.RateDate = T.ExecutionDate
GROUP BY CR.RateDate
)
SELECT
ID
, A.AmountBYR
, (SELECT SUM(B.AmountBYR) FROM CTE B WHERE B.ID<=A.ID) AS BalanceBYR
, A.AmountUSD
, (SELECT SUM(B.AmountUSD) FROM CTE B WHERE B.ID<=A.ID) AS BalanceUSD
, [Date]
FROM CTE AS A
Then I've added the Entity:
public class CashFlowView
{
[Key]
public int ID { get; set; }
public decimal AmountBYR { get; set; }
public decimal BalanceBYR { get; set; }
public decimal AmountUSD { get; set; }
public decimal BalanceUSD { get; set; }
public DateTime Date { get; set; }
}
And, as I understand, I need to add this code to my context:
public DbSet<CashFlowView> CashFlowView { get; set; }
And now I wanna to use my View:
IList<CashFlowView> listView;
using (var _db = new EconomicAppContext())
{
listView = _db.CashFlowView.ToList();
}
But listView is empty. How I may create correct mapping to View (maybe using migration) and use it?
I did it. Try to combine this article
http://www.paragon-inc.com/resources/blogs-posts/a-certain-point-of-view-part-1-ef-code-first
And use Entity Framework Power Tools to find needed result. And check connection. I've got problems with perfomance, so use dispose method carefully.
I have a fairly simple (code first) model:
Employee
[Table("vEmployee")] //note v - it's a view
public class Employee
{
[Key]
public int EmployeeNumber { get; set; }
public string FirstName { get; set; }
public string LastName { get; set; }
}
EmployeeHolidayEntitlement
[Table("tblEmployeeHolidayEntitlement")]
public class EmployeeHolidayEntitlement
{
[Key]
public int EmployeeNumber { get; set; }
public virtual Employee Employee { get; set; }
public decimal StandardEntitlement { get; set; }
//.....omitted for brevity
}
Note that EmployeeHolidayEntitlement is mapped to a table, and Employee is mapped to a view
When building my context, I do:
(not sure if this is correct!)
modelBuilder.Entity<Employee>()
.HasOptional(x => x.HolidayEntitlement)
.WithRequired(x => x.Employee);
Now, when I query, like this:
var db = new ApiContext();
var result = db.Employees.ToList();
It's very slow.
If I look in SQL profiler, I can see that instead of one statement (joining vEmployee and tblEmployeeHolidayEntitlement) I get many statements executed (one per Employee record) - for example:
First, it selects from vEmployee
SELECT
[Extent1].[id] AS [EmployeeNumber],
[Extent1].[FirstName] AS [FirstName],
[Extent1].[LastName] AS [LastName],
FROM [dbo].[vEmployee] AS [Extent1]
then one of these for each record returned
exec sp_executesql N'SELECT
[Extent1].[EmployeeNumber] AS [EmployeeNumber],
[Extent1].[StandardEntitlement] AS [StandardEntitlement]
FROM [dbo].[tblEmployeeHolidayEntitlement] AS [Extent1]
WHERE [Extent1].[EmployeeNumber] = #EntityKeyValue1',N'#EntityKeyValue1 int',#EntityKeyValue1=175219
This doesn't seem right to me -
I would of thought it should be doing something more along the lines of a LEFT JOIN like
SELECT *
FROM [dbo].[vEmployee] employee
LEFT JOIN
[dbo].[tblEmployeeHolidayEntitlement employeeEntitlement
ON
employee.id = employeeEntitlement.employeenumber
You have to use the Include method, like db.Employees.Include(e => e.HolidayEntitlement).ToList(). If you don't and you access the property you'll trigger lazy loading. That's what's happening to you.
For more information check the documentation on loading. The short of it is that if it always joined your entire object graph it'd be unacceptably slow.
I've only just started looking at Dapper.net and have just been experimenting with some different queries, one of which is producing weird results that i wouldn't expect.
I have 2 tables - Photos & PhotoCategories, of which are related on CategoryID
Photos Table
PhotoId (PK - int)
CategoryId (FK - smallint)
UserId (int)
PhotoCategories Table
CategoryId (PK - smallint)
CategoryName (nvarchar(50))
My 2 classes:
public class Photo
{
public int PhotoId { get; set; }
public short CategoryId { get; set; }
public int UserId { get; set; }
public PhotoCategory PhotoCategory { get; set; }
}
public class PhotoCategory
{
public short CategoryId { get; set; }
public string CategoryName { get; set; }
{
I want to use multi-mapping to return an instance of Photo, with a populated instance of the related PhotoCategory.
var sql = #"select p.*, c.* from Photos p inner
join PhotoCategories c
on p.CategoryID = c.CategoryID where p.PhotoID = #pid";
cn.Open();
var myPhoto = cn.Query<Photo, PhotoCategory, Photo>(sql,
(photo, photoCategory) => { photo.PhotoCategory = photoCategory;
return photo; },
new { pid = photoID }, null, true, splitOn: "CategoryID").Single();
When this is executed, not all of the properties are getting populated (despite the same names between the DB table and in my objects.
I noticed that if I don't 'select p.* etc.' in my SQL, and instead.
I explicitly state the fields.
I want to return EXCLUDING p.CategoryId from the query, then everything gets populated (except obviously the CategoryId against the Photo object which I've excluded from the select statement).
But i would expect to be able to include that field in the query, and have it, as well as all the other fields queried within the SQL, to get populated.
I could just exclude the CategoryId property from my Photo class, and always use Photo.PhotoCategory.CategoryId when i need the ID.
But in some cases I might not want to populate the PhotoCategory object when I get an instance of
the Photo object.
Does anyone know why the above behavior is happening? Is this normal for Dapper?
I just committed a fix for this:
class Foo1
{
public int Id;
public int BarId { get; set; }
}
class Bar1
{
public int BarId;
public string Name { get; set; }
}
public void TestMultiMapperIsNotConfusedWithUnorderedCols()
{
var result = connection.Query<Foo1,Bar1,
Tuple<Foo1,Bar1>>(
"select 1 as Id, 2 as BarId, 3 as BarId, 'a' as Name",
(f,b) => Tuple.Create(f,b), splitOn: "BarId")
.First();
result.Item1.Id.IsEqualTo(1);
result.Item1.BarId.IsEqualTo(2);
result.Item2.BarId.IsEqualTo(3);
result.Item2.Name.IsEqualTo("a");
}
The multi-mapper was getting confused if there was a field in the first type, that also happened to be in the second type ... AND ... was used as a split point.
To overcome now dapper allow for the Id field to show up anywhere in the first type. To illustrate.
Say we have:
classes: A{Id,FooId} B{FooId,Name}
splitOn: "FooId"
data: Id, FooId, FooId, Name
The old method of splitting was taking no account of the actual underlying type it was mapping. So ... it mapped Id => A and FooId, FooId, Name => B
The new method is aware of the props and fields in A. When it first encounters FooId in the stream it does not start a split, since it knows that A has a property called FooId which needs to be mapped, next time it sees FooId it will split, resulting in the expected results.
I'm having a similar problem. It's to do with the fact that both the child and the parent have the same name for the field that is being split on. The following for example works:
class Program
{
static void Main(string[] args)
{
var createSql = #"
create table #Users (UserId int, Name varchar(20))
create table #Posts (Id int, OwnerId int, Content varchar(20))
insert #Users values(99, 'Sam')
insert #Users values(2, 'I am')
insert #Posts values(1, 99, 'Sams Post1')
insert #Posts values(2, 99, 'Sams Post2')
insert #Posts values(3, null, 'no ones post')
";
var sql =
#"select * from #Posts p
left join #Users u on u.UserId = p.OwnerId
Order by p.Id";
using (var connection = new SqlConnection(#"CONNECTION STRING HERE"))
{
connection.Open();
connection.Execute(createSql);
var data = connection.Query<Post, User, Post>(sql, (post, user) => { post.Owner = user; return post; }, splitOn: "UserId");
var apost = data.First();
apost.Content = apost.Content;
connection.Execute("drop table #Users drop table #Posts");
}
}
}
class User
{
public int UserId { get; set; }
public string Name { get; set; }
}
class Post
{
public int Id { get; set; }
public int OwnerId { get; set; }
public User Owner { get; set; }
public string Content { get; set; }
}
But the following does not because "UserId" is used in both tables and both objects.
class Program
{
static void Main(string[] args)
{
var createSql = #"
create table #Users (UserId int, Name varchar(20))
create table #Posts (Id int, UserId int, Content varchar(20))
insert #Users values(99, 'Sam')
insert #Users values(2, 'I am')
insert #Posts values(1, 99, 'Sams Post1')
insert #Posts values(2, 99, 'Sams Post2')
insert #Posts values(3, null, 'no ones post')
";
var sql =
#"select * from #Posts p
left join #Users u on u.UserId = p.UserId
Order by p.Id";
using (var connection = new SqlConnection(#"CONNECTION STRING HERE"))
{
connection.Open();
connection.Execute(createSql);
var data = connection.Query<Post, User, Post>(sql, (post, user) => { post.Owner = user; return post; }, splitOn: "UserId");
var apost = data.First();
apost.Content = apost.Content;
connection.Execute("drop table #Users drop table #Posts");
}
}
}
class User
{
public int UserId { get; set; }
public string Name { get; set; }
}
class Post
{
public int Id { get; set; }
public int UserId { get; set; }
public User Owner { get; set; }
public string Content { get; set; }
}
Dapper's mapping seems to get very confused in this scenario. Think this describes the issue but is there a solution / workaround we can employ (OO design decisions aside)?
I know this question is old but thought I would save someone 2 minutes with the obvious answer to this: Just alias one id from one table:
ie:
SELECT
user.Name, user.Email, user.AddressId As id, address.*
FROM
User user
Join Address address
ON user.AddressId = address.AddressId