CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN DECLARE @startDate DATE = DATEADD(DAY,-6,@endDate) SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END
如果Sale表很大,则SELECT可能需要很长时间才能执行,这显然是因为优化器由于局部变量而无法进行优化.我们测试了使用变量然后硬编码日期运行SELECT部分,执行时间从约9分钟到约1秒.
我们有许多基于“固定”日期范围(周,月,8周等)查询的存储过程,因此输入参数只是@endDate,而@startDate是在过程中计算的.
问题是,在WHERE子句中避免变量的最佳做法是什么,以免损害优化器?
我们提出的可能性如下所示.这些是最佳实践,还是有其他方法?
使用包装程序将变量转换为参数.
参数不会像局部变量那样影响优化器.
CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN DECLARE @startDate DATE = DATEADD(DAY,@endDate) EXECUTE DateRangeProc @startDate,@endDate END CREATE PROCEDURE DateRangeProc(@startDate DATE,@endDate DATE) AS BEGIN SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END
使用参数化动态sql.
CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN DECLARE @startDate DATE = DATEADD(DAY,@endDate) DECLARE @sql NVARCHAR(4000) = N' SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate ' DECLARE @param NVARCHAR(4000) = N'@startDate DATE,@endDate DATE' EXECUTE sp_executesql @sql,@param,@startDate = @startDate,@endDate = @endDate END
使用“硬编码”动态sql.
CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN DECLARE @startDate DATE = DATEADD(DAY,@endDate) DECLARE @sql NVARCHAR(4000) = N' SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate ' SET @sql = REPLACE(@sql,'@startDate',CONVERT(NCHAR(10),@startDate,126)) SET @sql = REPLACE(@sql,'@endDate',@endDate,126)) EXECUTE sp_executesql @sql END
直接使用DATEADD()函数.
CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN DATEADD(DAY,@endDate) AND @endDate END
使用可选参数.
我不确定分配参数是否会产生与分配变量相同的问题,因此这可能不是一个选项.我不太喜欢这个解决方案,但包括它的完整性.
CREATE PROCEDURE WeeklyProc(@endDate DATE,@startDate DATE = NULL) AS BEGIN SET @startDate = DATEADD(DAY,@endDate) SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END
– 更新 –
感谢您的建议和意见.阅读完之后,我用各种方法进行了一些时序测试.我在这里添加结果作为参考.
运行1没有计划.运行2在运行1之后立即具有完全相同的参数,因此它将使用运行1中的计划.
NoProc时间用于在存储过程外的SSMS中手动运行SELECT查询.
TestProc1-7是原始问题的查询.
TestProcA-B基于Mikael Eriksson的建议.数据库中的列是DATE,因此我尝试将参数作为DATETIME传递并使用隐式转换(testProcA)和显式转换(testProcB)运行.
TestProcC-D基于Kenneth Fisher的建议.我们已经将日期查找表用于其他事项,但我们没有针对每个期间范围的特定列.我尝试过的变化仍然使用BETWEEN但是在较小的查找表上进行并加入到较大的表中.我将进一步研究是否可以使用特定的查找表,虽然我们的周期是固定的,但有很多不同的查找表.
Total rows in Sale table: 136,424,366 Run 1 (ms) Run 2 (ms) Procedure cpu Elapsed cpu Elapsed Comment NoProc constants 6567 62199 2870 719 Manual query with constants NoProc variables 9314 62424 3993 998 Manual query with variables testProc1 6801 62919 2871 736 Hard coded range testProc2 8955 63190 3915 979 Parameter and variable range testProc3 8985 63152 3932 987 Wrapper procedure with parameter range testProc4 9142 63939 3931 977 Parameterized dynamic sql testProc5 7269 62933 2933 728 Hard coded dynamic sql testProc6 9266 63421 3915 984 Use DATEADD on DATE testProc7 2044 13950 1092 1087 Dummy parameter testProcA 12120 61493 5491 1875 Use DATEADD on DATETIME without CAST testProcB 8612 61949 3932 978 Use DATEADD on DATETIME with CAST testProcC 8861 61651 3917 993 Use lookup table,Sale first testProcD 8625 61740 3994 1031 Use lookup table,Sale last
这是测试代码.
------ SETUP ------ IF OBJECT_ID(N'testDimDate',N'U') IS NOT NULL DROP TABLE testDimDate IF OBJECT_ID(N'testProc1',N'P') IS NOT NULL DROP PROCEDURE testProc1 IF OBJECT_ID(N'testProc2',N'P') IS NOT NULL DROP PROCEDURE testProc2 IF OBJECT_ID(N'testProc3',N'P') IS NOT NULL DROP PROCEDURE testProc3 IF OBJECT_ID(N'testProc3a',N'P') IS NOT NULL DROP PROCEDURE testProc3a IF OBJECT_ID(N'testProc4',N'P') IS NOT NULL DROP PROCEDURE testProc4 IF OBJECT_ID(N'testProc5',N'P') IS NOT NULL DROP PROCEDURE testProc5 IF OBJECT_ID(N'testProc6',N'P') IS NOT NULL DROP PROCEDURE testProc6 IF OBJECT_ID(N'testProc7',N'P') IS NOT NULL DROP PROCEDURE testProc7 IF OBJECT_ID(N'testProcA',N'P') IS NOT NULL DROP PROCEDURE testProcA IF OBJECT_ID(N'testProcB',N'P') IS NOT NULL DROP PROCEDURE testProcB IF OBJECT_ID(N'testProcC',N'P') IS NOT NULL DROP PROCEDURE testProcC IF OBJECT_ID(N'testProcD',N'P') IS NOT NULL DROP PROCEDURE testProcD GO CREATE TABLE testDimDate ( DateKey DATE NOT NULL,CONSTRAINT PK_DimDate_DateKey UNIQUE NONCLUSTERED (DateKey ASC) ) GO DECLARE @dateTimeStart DATETIME = '2000-01-01' DECLARE @dateTimeEnd DATETIME = '2100-01-01' ;WITH CTE AS ( --Anchor member defined SELECT @dateTimeStart FullDate UNION ALL --Recursive member defined referencing CTE SELECT FullDate + 1 FROM CTE WHERE FullDate + 1 <= @dateTimeEnd ) SELECT CAST(FullDate AS DATE) AS DateKey INTO #DimDate FROM CTE OPTION (MAXRECURSION 0) INSERT INTO testDimDate (DateKey) SELECT DateKey FROM #DimDate ORDER BY DateKey ASC DROP TABLE #DimDate GO -- Hard coded date range. CREATE PROCEDURE testProc1 AS BEGIN SET NOCOUNT ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN '2012-12-09' AND '2012-12-10' END GO -- Parameter and variable date range. CREATE PROCEDURE testProc2(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,-1,@endDate) SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END GO -- Parameter date range. CREATE PROCEDURE testProc3a(@startDate DATE,@endDate DATE) AS BEGIN SET NOCOUNT ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END GO -- Wrapper procedure. CREATE PROCEDURE testProc3(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,@endDate) EXEC testProc3a @startDate,@endDate END GO -- Parameterized dynamic sql. CREATE PROCEDURE testProc4(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,@endDate) DECLARE @sql NVARCHAR(4000) = N'SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate' DECLARE @param NVARCHAR(4000) = N'@startDate DATE,@endDate DATE' EXEC sp_executesql @sql,@endDate = @endDate END GO -- Hard coded dynamic sql. CREATE PROCEDURE testProc5(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,@endDate) DECLARE @sql NVARCHAR(4000) = N'SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN ''@startDate'' AND ''@endDate''' SET @sql = REPLACE(@sql,126)) SET @sql = REPLACE(@sql,126)) EXEC sp_executesql @sql END GO -- Explicitly use DATEADD on a DATE. CREATE PROCEDURE testProc6(@endDate DATE) AS BEGIN SET NOCOUNT ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN DATEADD(DAY,@endDate) AND @endDate END GO -- Dummy parameter. CREATE PROCEDURE testProc7(@endDate DATE,@startDate DATE = NULL) AS BEGIN SET NOCOUNT ON SET @startDate = DATEADD(DAY,@endDate) SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END GO -- Explicitly use DATEADD on a DATETIME with implicit CAST for comparison with SaleDate. -- Based on the answer from Mikael Eriksson. CREATE PROCEDURE testProcA(@endDateTime DATETIME) AS BEGIN SET NOCOUNT ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN DATEADD(DAY,@endDateTime) AND @endDateTime END GO -- Explicitly use DATEADD on a DATETIME but CAST to DATE for comparison with SaleDate. -- Based on the answer from Mikael Eriksson. CREATE PROCEDURE testProcB(@endDateTime DATETIME) AS BEGIN SET NOCOUNT ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN CAST(DATEADD(DAY,@endDateTime) AS DATE) AND CAST(@endDateTime AS DATE) END GO -- Use a date lookup table,Sale first. -- Based on the answer from Kenneth Fisher. CREATE PROCEDURE testProcC(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,@endDate) SELECT SUM(Value) FROM Sale J INNER JOIN testDimDate D ON D.DateKey = J.SaleDate WHERE D.DateKey BETWEEN @startDate AND @endDate END GO -- Use a date lookup table,Sale last. -- Based on the answer from Kenneth Fisher. CREATE PROCEDURE testProcD(@endDate DATE) AS BEGIN SET NOCOUNT ON DECLARE @startDate DATE = DATEADD(DAY,@endDate) SELECT SUM(Value) FROM testDimDate D INNER JOIN Sale J ON J.SaleDate = D.DateKey WHERE D.DateKey BETWEEN @startDate AND @endDate END GO ------ TEST ------ SET STATISTICS TIME OFF DECLARE @endDate DATE = '2012-12-10' DECLARE @startDate DATE = DATEADD(DAY,@endDate) DBCC FREEPROCCACHE WITH NO_INFOMSGS DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS RAISERROR('Run 1: NoProc with constants',0) WITH NOWAIT SET STATISTICS TIME ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN '2012-12-09' AND '2012-12-10' SET STATISTICS TIME OFF RAISERROR('Run 2: NoProc with constants',0) WITH NOWAIT SET STATISTICS TIME ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN '2012-12-09' AND '2012-12-10' SET STATISTICS TIME OFF DBCC FREEPROCCACHE WITH NO_INFOMSGS DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS RAISERROR('Run 1: NoProc with variables',0) WITH NOWAIT SET STATISTICS TIME ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate SET STATISTICS TIME OFF RAISERROR('Run 2: NoProc with variables',0) WITH NOWAIT SET STATISTICS TIME ON SELECT SUM(Value) FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate SET STATISTICS TIME OFF DECLARE @sql NVARCHAR(4000) DECLARE _cursor CURSOR LOCAL FAST_FORWARD FOR SELECT procedures.name,procedures.object_id FROM sys.procedures WHERE procedures.name LIKE 'testProc_' ORDER BY procedures.name ASC OPEN _cursor DECLARE @name SYSNAME DECLARE @object_id INT FETCH NEXT FROM _cursor INTO @name,@object_id WHILE @@FETCH_STATUS = 0 BEGIN SET @sql = CASE (SELECT COUNT(*) FROM sys.parameters WHERE object_id = @object_id) WHEN 0 THEN @name WHEN 1 THEN @name + ' ''@endDate''' WHEN 2 THEN @name + ' ''@startDate'',''@endDate''' END SET @sql = REPLACE(@sql,'@name',@name) SET @sql = REPLACE(@sql,CONVERT(NVARCHAR(10),126)) DBCC FREEPROCCACHE WITH NO_INFOMSGS DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS RAISERROR('Run 1: %s',@sql) WITH NOWAIT SET STATISTICS TIME ON EXEC sp_executesql @sql SET STATISTICS TIME OFF RAISERROR('Run 2: %s',@sql) WITH NOWAIT SET STATISTICS TIME ON EXEC sp_executesql @sql SET STATISTICS TIME OFF FETCH NEXT FROM _cursor INTO @name,@object_id END CLOSE _cursor DEALLOCATE _cursor
解决方法
存储过程中的查询在执行存储过程时编译,而不是在执行查询时编译,因此sql Server必须在此处理这些值…
CREATE PROCEDURE WeeklyProc(@endDate DATE) AS BEGIN DECLARE @startDate DATE = DATEADD(DAY,@endDate) SELECT -- Stuff FROM Sale WHERE SaleDate BETWEEN @startDate AND @endDate END
是@endDate的已知值和@startDate的未知值.这将使sql Server猜测@startDate上为过滤器返回的行的30%,以及@endDate的统计信息.如果您有一个包含大量行的大表,可以为您提供扫描操作,您可以从搜索中获益最多.
您的包装程序解决方案确保sql Server在编译DateRangeProc时看到值,以便它可以为@endDate和@startDate使用已知值.
您的动态查询都会导致相同的事情,这些值在编译时是已知的.
具有默认空值的那个有点特殊. sql Server在编译时已知的值是@endDate的已知值和@startDate的null.在中间使用null将为您提供0行,但在这些情况下sql Server始终猜测为1.在这种情况下,这可能是一件好事,但是如果您使用较大的日期间隔调用存储过程,其中扫描将是最佳选择,它可能最终会进行一系列搜索.
我离开了“直接使用DATEADD()函数”到这个答案的结尾,因为它是我将使用的那个,并且它也有一些奇怪的东西.
首先,当在where子句中使用sql Server时,sql Server不会多次调用该函数. DATEADD is considered runtime constant.
我认为在编译查询时会对DATEADD进行评估,这样您就可以很好地估计返回的行数.但在这种情况下并非如此.
sql Server根据参数中的值进行估计,无论您使用DATEADD做什么(在sql Server 2012上测试),因此在您的情况下,估计值将是在@endDate上注册的行数.为什么这样做我不知道,但它与使用数据类型DATE有关.转移到存储过程和表中的DATETIME并且估计将是准确的,这意味着在DATETIME的编译时考虑DATEADD而不是DATE.
因此,为了总结这个相当冗长的答案,我建议使用包装程序解决方案.它始终允许sql Server在编译查询时使用提供的值,而无需使用动态sql.
PS:
在评论中你有两个建议.
选项(OPTIMIZE FOR UNKNOWN)将为您提供返回行的9%的估计值,OPTION(RECOMPILE)将使sql Server查看参数值,因为每次都会重新编译查询.