February 26, 2010 at 8:38 pm
And, finally, here is the (fully configurable and generally awesome) test rig code, for those that like to play along at home:
-- Switch to tempdb
USE tempdb;
-- Suppress 'x row(s) affected' messages
SET NOCOUNT ON;
GO
-- Conditional object drops
IF OBJECT_ID(N'tempdb.dbo.Test', N'U') IS NOT NULL DROP TABLE dbo.Test;
IF OBJECT_ID(N'dbo.GetNums', N'IF') IS NOT NULL DROP FUNCTION dbo.GetNums;
GO
-- An in-line TVF to generate a sequence of integers
CREATE FUNCTION dbo.GetNums(@n AS BIGINT)
RETURNS TABLE
WITH SCHEMABINDING
AS RETURN
WITH
L0 AS (SELECT 1 AS C UNION ALL SELECT 1),
L1 AS (SELECT 1 AS C FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS C FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS C FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS C FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS C FROM L4 AS A CROSS JOIN L4 AS B),
NUM AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS n FROM L5)
SELECT TOP(@n) n FROM NUM
ORDER BY n;
GO
-- Test table
CREATE TABLE dbo.Test
(
row_id BIGINT IDENTITY PRIMARY KEY,
section INTEGER NOT NULL,
date_time DATETIME NULL
)
GO
-- Parameters for populating the test table
DECLARE @RowCount BIGINT, -- Total number of rows to generate
@GroupCount INTEGER, -- Groups to split them into
@NullCount BIGINT; -- Total number of random NULLs in the date_time column
SELECT @RowCount = 1000000,
@GroupCount = 50,
@NullCount = 10000;
-- Generate the test data
RAISERROR('Creating %I64d rows of test data in %i groups...', 0, 1, @RowCount, @GroupCount) WITH NOWAIT;
WITH Numbers
AS (
SELECT section = n % @GroupCount,
date_time = DATEADD(DAY, CHECKSUM(NEWID()) % 36525, '20010101')
FROM dbo.GetNums(@RowCount)
)
INSERT dbo.Test WITH (TABLOCK)
(
section,
date_time
)
SELECT N.section,
N.date_time
FROM Numbers N;
-- Set date_time to NULL for (at most) @NullCount random rows
RAISERROR('Setting date_time to NULL in %I64d random rows...', 0, 1, @NullCount) WITH NOWAIT;
WITH Random
AS (
-- @NullCount random values [0:1)
SELECT chance = RAND(CHECKSUM(NEWID()))
FROM dbo.GetNums(@NullCount)
),
IdRange
AS (
-- Highest and lowest identity values from the test table
SELECT min_id = MIN(row_id),
max_id = MAX(row_id)
FROM dbo.Test
),
ToUpdate
AS (
-- Covert random values to be in the range of row_ids
SELECT row_id = CONVERT(INTEGER, Random.chance * IdRange.max_id) + IdRange.min_id
FROM IdRange
CROSS
APPLY Random
)
-- Set date_time in the rows chosen to NULL
UPDATE T
SET date_time = NULL
FROM ToUpdate
JOIN dbo.Test T
ON T.row_id = ToUpdate.row_id;
-- An index to help all the methods
RAISERROR('Creating index...', 0, 1) WITH NOWAIT;
CREATE NONCLUSTERED INDEX nc1 ON dbo.Test (section, date_time)
WITH (FILLFACTOR = 100, SORT_IN_TEMPDB = ON, MAXDOP = 1);
-- Run the tests
RAISERROR('Running tests...', 0, 1) WITH NOWAIT;
DECLARE @BitBucket_Section INTEGER,
@BitBucket_DateTime DATETIME;
SET STATISTICS IO, TIME ON;
RAISERROR('Test 1: NULLIF', 0, 1) WITH NOWAIT;
SELECT @BitBucket_Section = section,
@BitBucket_DateTime = NULLIF(MIN(ISNULL(date_time, '19000101')), '19000101')
FROM dbo.Test AS T1
GROUP BY T1.section;
RAISERROR('Test 2: EXISTS with MIN', 0, 1) WITH NOWAIT;
SELECT @BitBucket_Section = section,
@BitBucket_DateTime =
CASE
WHEN NOT EXISTS (SELECT * FROM dbo.Test AS T2 WHERE date_time IS NULL AND T2.Section = T1.Section) THEN MIN(T1.date_time)
ELSE NULL
END
FROM dbo.Test T1
GROUP BY T1.section;
RAISERROR('Test 3: EXISTS with TOP', 0, 1) WITH NOWAIT;
SELECT @BitBucket_Section = S.section,
@BitBucket_DateTime =
CASE
WHEN EXISTS (SELECT * FROM dbo.Test T WHERE T.section = S.section AND date_time IS NULL)
THEN NULL
ELSE (SELECT TOP (1) date_time FROM dbo.Test T WHERE T.section = S.section ORDER BY date_time ASC)
END
FROM (
SELECT DISTINCT section
FROM dbo.Test
) S
RAISERROR('Test 4: DISTINCT with TOP', 0, 1) WITH NOWAIT;
SELECT @BitBucket_Section = S.section,
@BitBucket_DateTime = CA.date_time
FROM (
SELECT DISTINCT section
FROM dbo.Test
) S
CROSS
APPLY (
SELECT TOP (1)
date_time
FROM dbo.Test T2
WHERE T2.section = S.section
ORDER BY
date_time ASC
) CA
RAISERROR('Test 5: COUNTs with MIN', 0, 1) WITH NOWAIT;
SELECT @BitBucket_Section = section,
@BitBucket_DateTime =
CASE
WHEN (COUNT(*) = COUNT(date_time)) THEN MIN(date_time)
ELSE NULL
END
FROM dbo.Test AS T1
GROUP BY T1.section;
RAISERROR('Test 6: ROW_NUMBER', 0, 1) WITH NOWAIT;
WITH CTE
AS (
SELECT section,
date_time,
rn = ROW_NUMBER() OVER (PARTITION BY section ORDER BY date_time ASC)
FROM dbo.Test T1
)
SELECT @BitBucket_Section = section,
@BitBucket_DateTime = date_time
FROM CTE
WHERE rn = 1;
RAISERROR('Test 7: Recursive CTE with APPLY', 0, 1) WITH NOWAIT;
WITH Sections
AS (
-- Recursive CTE to find the section numbers with seeks rather than a scan + aggregate
SELECT section = MIN(T1.section)
FROM dbo.Test T1
UNION ALL
SELECT S.section + 1
FROM Sections S
WHERE EXISTS
(
SELECT *
FROM dbo.Test T2
WHERE T2.section = S.section + 1
)
)
SELECT @BitBucket_Section = Sections.section,
@BitBucket_DateTime = CA.min_date_time
FROM Sections
CROSS
APPLY (
SELECT min_date_time =
CASE
-- Seek for any NULL
WHEN EXISTS (SELECT * FROM dbo.Test T3 WHERE T3.section = Sections.section AND T3.date_time IS NULL)
THEN NULL
-- Possible seek for lowest value (not performed if NULL was found)
ELSE (SELECT TOP (1) T4.date_time FROM dbo.Test T4 WHERE T4.section = Sections.section AND T4.date_time IS NOT NULL ORDER BY T4.date_time ASC)
END
) CA
OPTION (MAXRECURSION 0);
SET STATISTICS IO, TIME OFF;
Paul
Viewing post 16 (of 15 total)
You must be logged in to reply to this topic. Login to reply