I have a performance question about Common table expressions in SQL Server. In our developer team we use a lot of chaining CTEs when building our queries. I am currently working on a query which had terrible performance. But I found out that if I in the middle of the chain inserted all the records up to that CTE in a temporary table instead and then continued but selecting from that temp table I improved performance significantly. Now I would like to get some help to understand if this type of change only applies to this specific query and why the two cases you will see below differ so much in performance. Or could we possibly overuse CTEs in our team and can we gain performance generally by learning from this case?
请解释一下这里到底发生了什么...
代码是完整的, 您可以在 SQL SQL 服务器上运行, 2008 年, 也可能是 2005 年 。 有一部分被评出, 我的想法是您可以通过评出一个或另一个来切换两个案例。 您可以看到可以将您的区块批注放在哪里, 我用 < code> - 区块批注在这里标记了这些地方 code> 和 < code> - end 区块批注在这里 code>
慢速的运行情况是未提醒的默认 。 您在这里 :
--Declare tables to use in example.
CREATE TABLE #Preparation
(
Date DATETIME NOT NULL
,Hour INT NOT NULL
,Sales NUMERIC(9,2)
,Items INT
);
CREATE TABLE #Calendar
(
Date DATETIME NOT NULL
)
CREATE TABLE #OpenHours
(
Day INT NOT NULL,
OpenFrom TIME NOT NULL,
OpenTo TIME NOT NULL
);
--Fill tables with sample data.
INSERT INTO #OpenHours (Day, OpenFrom, OpenTo)
VALUES
(1, 10:00 , 20:00 ),
(2, 10:00 , 20:00 ),
(3, 10:00 , 20:00 ),
(4, 10:00 , 20:00 ),
(5, 10:00 , 20:00 ),
(6, 10:00 , 20:00 ),
(7, 10:00 , 20:00 )
DECLARE @CounterDay INT = 0, @CounterHour INT = 0, @Sales NUMERIC(9, 2), @Items INT;
WHILE @CounterDay < 365
BEGIN
SET @CounterHour = 0;
WHILE @CounterHour < 5
BEGIN
SET @Items = CAST(RAND() * 100 AS INT);
SET @Sales = CAST(RAND() * 1000 AS NUMERIC(9, 2));
IF @Items % 2 = 0
BEGIN
SET @Items = NULL;
SET @Sales = NULL;
END
INSERT INTO #Preparation (Date, Hour, Items, Sales)
VALUES (DATEADD(DAY, @CounterDay, 2011-01-01 ), @CounterHour + 13, @Items, @Sales);
SET @CounterHour += 1;
END
INSERT INTO #Calendar (Date) VALUES (DATEADD(DAY, @CounterDay, 2011-01-01 ));
SET @CounterDay += 1;
END
--Here the query starts.
;WITH P AS (
SELECT DATEADD(HOUR, Hour, Date) AS Hour
,Sales
,Items
FROM #Preparation
),
O AS (
SELECT DISTINCT DATEADD(HOUR, SV.number, C.Date) AS Hour
FROM #OpenHours AS O
JOIN #Calendar AS C ON O.Day = DATEPART(WEEKDAY, C.Date)
JOIN master.dbo.spt_values AS SV ON SV.number BETWEEN DATEPART(HOUR, O.OpenFrom) AND DATEPART(HOUR, O.OpenTo)
),
S AS (
SELECT O.Hour, P.Sales, P.Items
FROM O
LEFT JOIN P ON P.Hour = O.Hour
)
--block comment here case 1 (slow performing)
--With this technique it takes about 34 seconds.
,N AS (
SELECT
A.Hour
,A.Sales AS SalesOrg
,CASE WHEN COALESCE(B.Sales, C.Sales, 1) < 0
THEN 0 ELSE COALESCE(B.Sales, C.Sales, 1) END AS Sales
,A.Items AS ItemsOrg
,COALESCE(B.Items, C.Items, 1) AS Items
FROM S AS A
OUTER APPLY (SELECT TOP 1 *
FROM S
WHERE Hour <= A.Hour
AND Sales IS NOT NULL
AND DATEDIFF(DAY, Hour, A.Hour) = 0
ORDER BY Hour DESC) B
OUTER APPLY (SELECT TOP 1 *
FROM S
WHERE Sales IS NOT NULL
AND DATEDIFF(DAY, Hour, A.Hour) = 0
ORDER BY Hour) C
)
--end block comment here case 1 (slow performing)
/*--block comment here case 2 (fast performing)
--With this technique it takes about 2 seconds.
SELECT * INTO #tmpS FROM S;
WITH
N AS (
SELECT
A.Hour
,A.Sales AS SalesOrg
,CASE WHEN COALESCE(B.Sales, C.Sales, 1) < 0
THEN 0 ELSE COALESCE(B.Sales, C.Sales, 1) END AS Sales
,A.Items AS ItemsOrg
,COALESCE(B.Items, C.Items, 1) AS Items
FROM #tmpS AS A
OUTER APPLY (SELECT TOP 1 *
FROM #tmpS
WHERE Hour <= A.Hour
AND Sales IS NOT NULL
AND DATEDIFF(DAY, Hour, A.Hour) = 0
ORDER BY Hour DESC) B
OUTER APPLY (SELECT TOP 1 *
FROM #tmpS
WHERE Sales IS NOT NULL
AND DATEDIFF(DAY, Hour, A.Hour) = 0
ORDER BY Hour) C
)
--end block comment here case 2 (fast performing)*/
SELECT * FROM N ORDER BY Hour
IF OBJECT_ID( tempdb..#tmpS ) IS NOT NULL DROP TABLE #tmpS;
DROP TABLE #Preparation;
DROP TABLE #Calendar;
DROP TABLE #OpenHours;
如果您想试着理解我在最后一步中在做什么, 我有一个问题要问:
不同的是,我把S的结果放在一个临时桌子里, 放在第2个临时桌子里, 以防万一我在下一个CTE中直接使用S。