Any way to have MIN take NULLS into account?

  • And, finally, here is the (fully configurable and generally awesome) test rig code, for those that like to play along at home:

    -- Switch to tempdb

    USE tempdb;

    -- Suppress 'x row(s) affected' messages

    SET NOCOUNT ON;

    GO

    -- Conditional object drops

    IF OBJECT_ID(N'tempdb.dbo.Test', N'U') IS NOT NULL DROP TABLE dbo.Test;

    IF OBJECT_ID(N'dbo.GetNums', N'IF') IS NOT NULL DROP FUNCTION dbo.GetNums;

    GO

    -- An in-line TVF to generate a sequence of integers

    CREATE FUNCTION dbo.GetNums(@n AS BIGINT)

    RETURNS TABLE

    WITH SCHEMABINDING

    AS RETURN

    WITH

    L0 AS (SELECT 1 AS C UNION ALL SELECT 1),

    L1 AS (SELECT 1 AS C FROM L0 AS A CROSS JOIN L0 AS B),

    L2 AS (SELECT 1 AS C FROM L1 AS A CROSS JOIN L1 AS B),

    L3 AS (SELECT 1 AS C FROM L2 AS A CROSS JOIN L2 AS B),

    L4 AS (SELECT 1 AS C FROM L3 AS A CROSS JOIN L3 AS B),

    L5 AS (SELECT 1 AS C FROM L4 AS A CROSS JOIN L4 AS B),

    NUM AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS n FROM L5)

    SELECT TOP(@n) n FROM NUM

    ORDER BY n;

    GO

    -- Test table

    CREATE TABLE dbo.Test

    (

    row_id BIGINT IDENTITY PRIMARY KEY,

    section INTEGER NOT NULL,

    date_time DATETIME NULL

    )

    GO

    -- Parameters for populating the test table

    DECLARE @RowCount BIGINT, -- Total number of rows to generate

    @GroupCount INTEGER, -- Groups to split them into

    @NullCount BIGINT; -- Total number of random NULLs in the date_time column

    SELECT @RowCount = 1000000,

    @GroupCount = 50,

    @NullCount = 10000;

    -- Generate the test data

    RAISERROR('Creating %I64d rows of test data in %i groups...', 0, 1, @RowCount, @GroupCount) WITH NOWAIT;

    WITH Numbers

    AS (

    SELECT section = n % @GroupCount,

    date_time = DATEADD(DAY, CHECKSUM(NEWID()) % 36525, '20010101')

    FROM dbo.GetNums(@RowCount)

    )

    INSERT dbo.Test WITH (TABLOCK)

    (

    section,

    date_time

    )

    SELECT N.section,

    N.date_time

    FROM Numbers N;

    -- Set date_time to NULL for (at most) @NullCount random rows

    RAISERROR('Setting date_time to NULL in %I64d random rows...', 0, 1, @NullCount) WITH NOWAIT;

    WITH Random

    AS (

    -- @NullCount random values [0:1)

    SELECT chance = RAND(CHECKSUM(NEWID()))

    FROM dbo.GetNums(@NullCount)

    ),

    IdRange

    AS (

    -- Highest and lowest identity values from the test table

    SELECT min_id = MIN(row_id),

    max_id = MAX(row_id)

    FROM dbo.Test

    ),

    ToUpdate

    AS (

    -- Covert random values to be in the range of row_ids

    SELECT row_id = CONVERT(INTEGER, Random.chance * IdRange.max_id) + IdRange.min_id

    FROM IdRange

    CROSS

    APPLY Random

    )

    -- Set date_time in the rows chosen to NULL

    UPDATE T

    SET date_time = NULL

    FROM ToUpdate

    JOIN dbo.Test T

    ON T.row_id = ToUpdate.row_id;

    -- An index to help all the methods

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    CREATE NONCLUSTERED INDEX nc1 ON dbo.Test (section, date_time)

    WITH (FILLFACTOR = 100, SORT_IN_TEMPDB = ON, MAXDOP = 1);

    -- Run the tests

    RAISERROR('Running tests...', 0, 1) WITH NOWAIT;

    DECLARE @BitBucket_Section INTEGER,

    @BitBucket_DateTime DATETIME;

    SET STATISTICS IO, TIME ON;

    RAISERROR('Test 1: NULLIF', 0, 1) WITH NOWAIT;

    SELECT @BitBucket_Section = section,

    @BitBucket_DateTime = NULLIF(MIN(ISNULL(date_time, '19000101')), '19000101')

    FROM dbo.Test AS T1

    GROUP BY T1.section;

    RAISERROR('Test 2: EXISTS with MIN', 0, 1) WITH NOWAIT;

    SELECT @BitBucket_Section = section,

    @BitBucket_DateTime =

    CASE

    WHEN NOT EXISTS (SELECT * FROM dbo.Test AS T2 WHERE date_time IS NULL AND T2.Section = T1.Section) THEN MIN(T1.date_time)

    ELSE NULL

    END

    FROM dbo.Test T1

    GROUP BY T1.section;

    RAISERROR('Test 3: EXISTS with TOP', 0, 1) WITH NOWAIT;

    SELECT @BitBucket_Section = S.section,

    @BitBucket_DateTime =

    CASE

    WHEN EXISTS (SELECT * FROM dbo.Test T WHERE T.section = S.section AND date_time IS NULL)

    THEN NULL

    ELSE (SELECT TOP (1) date_time FROM dbo.Test T WHERE T.section = S.section ORDER BY date_time ASC)

    END

    FROM (

    SELECT DISTINCT section

    FROM dbo.Test

    ) S

    RAISERROR('Test 4: DISTINCT with TOP', 0, 1) WITH NOWAIT;

    SELECT @BitBucket_Section = S.section,

    @BitBucket_DateTime = CA.date_time

    FROM (

    SELECT DISTINCT section

    FROM dbo.Test

    ) S

    CROSS

    APPLY (

    SELECT TOP (1)

    date_time

    FROM dbo.Test T2

    WHERE T2.section = S.section

    ORDER BY

    date_time ASC

    ) CA

    RAISERROR('Test 5: COUNTs with MIN', 0, 1) WITH NOWAIT;

    SELECT @BitBucket_Section = section,

    @BitBucket_DateTime =

    CASE

    WHEN (COUNT(*) = COUNT(date_time)) THEN MIN(date_time)

    ELSE NULL

    END

    FROM dbo.Test AS T1

    GROUP BY T1.section;

    RAISERROR('Test 6: ROW_NUMBER', 0, 1) WITH NOWAIT;

    WITH CTE

    AS (

    SELECT section,

    date_time,

    rn = ROW_NUMBER() OVER (PARTITION BY section ORDER BY date_time ASC)

    FROM dbo.Test T1

    )

    SELECT @BitBucket_Section = section,

    @BitBucket_DateTime = date_time

    FROM CTE

    WHERE rn = 1;

    RAISERROR('Test 7: Recursive CTE with APPLY', 0, 1) WITH NOWAIT;

    WITH Sections

    AS (

    -- Recursive CTE to find the section numbers with seeks rather than a scan + aggregate

    SELECT section = MIN(T1.section)

    FROM dbo.Test T1

    UNION ALL

    SELECT S.section + 1

    FROM Sections S

    WHERE EXISTS

    (

    SELECT *

    FROM dbo.Test T2

    WHERE T2.section = S.section + 1

    )

    )

    SELECT @BitBucket_Section = Sections.section,

    @BitBucket_DateTime = CA.min_date_time

    FROM Sections

    CROSS

    APPLY (

    SELECT min_date_time =

    CASE

    -- Seek for any NULL

    WHEN EXISTS (SELECT * FROM dbo.Test T3 WHERE T3.section = Sections.section AND T3.date_time IS NULL)

    THEN NULL

    -- Possible seek for lowest value (not performed if NULL was found)

    ELSE (SELECT TOP (1) T4.date_time FROM dbo.Test T4 WHERE T4.section = Sections.section AND T4.date_time IS NOT NULL ORDER BY T4.date_time ASC)

    END

    ) CA

    OPTION (MAXRECURSION 0);

    SET STATISTICS IO, TIME OFF;

    Paul

Viewing post 16 (of 15 total)

You must be logged in to reply to this topic. Login to reply