Slow union query

  • Hi,

    Ive been trying to find a way around a heavy query that we use. We have a table "users" with primary key and three other tables with foreign keys to "users". The query selects all distict users that exist in foreign tables.

    BEGIN TRAN

    SET STATISTICS IO ON

    SET STATISTICS TIME ON

    CREATE TABLE #a(id INT PRIMARY KEY)

    INSERT #a VALUES (1)

    INSERT #a VALUES (2)

    INSERT #a VALUES (3)

    INSERT #a VALUES (4)

    INSERT #a VALUES (5)

    INSERT #a VALUES (6)

    INSERT #a VALUES (7)

    INSERT #a VALUES (8)

    CREATE TABLE #b(id INT PRIMARY KEY, userid INT)

    INSERT #b VALUES (1,1)

    INSERT #b VALUES (2,1)

    INSERT #b VALUES (3,2)

    INSERT #b VALUES (4,3)

    INSERT #b VALUES (5,4)

    INSERT #b VALUES (6,5)

    INSERT #b VALUES (7,6)

    INSERT #b VALUES (8,7)

    INSERT #b VALUES (9,8)

    CREATE TABLE #c(id INT PRIMARY KEY, userid INT)

    INSERT #c VALUES (1, 1)

    INSERT #c VALUES (2, 1)

    INSERT #c VALUES (3, 2)

    INSERT #c VALUES (4, 3)

    CREATE TABLE #d(id INT PRIMARY KEY, userid INT)

    INSERT #d VALUES (1, 1)

    INSERT #d VALUES (2, 1)

    INSERT #d VALUES (3, 2)

    INSERT #d VALUES (4, 3)

    SELECT COUNT(*) as users FROM

    (

    SELECT DISTINCT #a.id FROM #a INNER JOIN #b ON #a.id=#b.userid

    UNION

    SELECT DISTINCT #a.id FROM #a INNER JOIN #c ON #a.id=#c.userid

    UNION

    SELECT DISTINCT #a.id FROM #a INNER JOIN #d ON #a.id=#d.userid

    ) AS allvalues

    DROP TABLE #a

    DROP TABLE #b

    DROP TABLE #c

    DROP TABLE #d

    SET STATISTICS TIME OFF

    SET STATISTICS IO OFF

    PRINT REPLICATE('=',80)

    ROLLBACK

    Is there any faster way to do the same query?

    ty for your time

  • Have no idea why you did use transaction in your query...

    Also, UNION makes distinct output already, so you don't need to SELECT DISTINCT...

    The following should execute faster, as it will join to #a once:

    --BEGIN TRAN

    SET STATISTICS IO ON

    SET STATISTICS TIME ON

    CREATE TABLE #a(id INT PRIMARY KEY)

    INSERT #a VALUES (1)

    INSERT #a VALUES (2)

    INSERT #a VALUES (3)

    INSERT #a VALUES (4)

    INSERT #a VALUES (5)

    INSERT #a VALUES (6)

    INSERT #a VALUES (7)

    INSERT #a VALUES (8)

    CREATE TABLE #b(id INT PRIMARY KEY, userid INT)

    INSERT #b VALUES (1,1)

    INSERT #b VALUES (2,1)

    INSERT #b VALUES (3,2)

    INSERT #b VALUES (4,3)

    INSERT #b VALUES (5,4)

    INSERT #b VALUES (6,5)

    INSERT #b VALUES (7,6)

    INSERT #b VALUES (8,7)

    INSERT #b VALUES (9,8)

    CREATE TABLE #c(id INT PRIMARY KEY, userid INT)

    INSERT #c VALUES (1, 1)

    INSERT #c VALUES (2, 1)

    INSERT #c VALUES (3, 2)

    INSERT #c VALUES (4, 3)

    CREATE TABLE #d(id INT PRIMARY KEY, userid INT)

    INSERT #d VALUES (1, 1)

    INSERT #d VALUES (2, 1)

    INSERT #d VALUES (3, 2)

    INSERT #d VALUES (4, 3)

    ;with allusers

    as

    (

    SELECT userid FROM #b

    UNION

    SELECT userid FROM #c

    UNION

    SELECT userid FROM #d

    )

    SELECT COUNT(*) as users

    FROM #a u

    JOIN allusers a ON u.id = a.userid

    DROP TABLE #a

    DROP TABLE #b

    DROP TABLE #c

    DROP TABLE #d

    SET STATISTICS TIME OFF

    SET STATISTICS IO OFF

    PRINT REPLICATE('=',80)

    --ROLLBACK

    You can play around with flushing the results of UNION query into #table and index it instead of using CTE.

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Oh sorry there was some scope creep... users have a registered field, that needs to be included in every where clause, so the current CTE solution wont work.

    SET STATISTICS IO ON

    SET STATISTICS TIME ON

    CREATE TABLE #a(id INT PRIMARY KEY, registered DATE)

    INSERT #a VALUES (1, '2011-01-01')

    INSERT #a VALUES (2, '2011-01-01')

    INSERT #a VALUES (3, '2011-01-01')

    INSERT #a VALUES (4, '2011-01-01')

    INSERT #a VALUES (5, '2011-01-01')

    INSERT #a VALUES (6, '2011-01-01')

    INSERT #a VALUES (7, '2011-01-01')

    INSERT #a VALUES (8, '2011-01-01')

    CREATE TABLE #b(id INT PRIMARY KEY, userid INT)

    INSERT #b VALUES (1,1)

    INSERT #b VALUES (2,1)

    INSERT #b VALUES (3,2)

    INSERT #b VALUES (4,3)

    INSERT #b VALUES (5,4)

    INSERT #b VALUES (6,5)

    INSERT #b VALUES (7,6)

    INSERT #b VALUES (8,7)

    INSERT #b VALUES (9,8)

    CREATE TABLE #c(id INT PRIMARY KEY, userid INT)

    INSERT #c VALUES (1, 1)

    INSERT #c VALUES (2, 1)

    INSERT #c VALUES (3, 2)

    INSERT #c VALUES (4, 3)

    CREATE TABLE #d(id INT PRIMARY KEY, userid INT)

    INSERT #d VALUES (1, 1)

    INSERT #d VALUES (2, 1)

    INSERT #d VALUES (3, 2)

    INSERT #d VALUES (4, 3)

    SELECT COUNT(*) as users FROM

    (

    SELECT #a.id FROM #a INNER JOIN #b ON #a.id=#b.userid WHERE DATEDIFF(day, registered, getdate())<1000

    UNION

    SELECT #a.id FROM #a INNER JOIN #c ON #a.id=#c.userid WHERE DATEDIFF(day, registered, getdate())<1000

    UNION

    SELECT #a.id FROM #a INNER JOIN #d ON #a.id=#d.userid WHERE DATEDIFF(day, registered, getdate())<1000

    ) AS allvalues

    DROP TABLE #a

    DROP TABLE #b

    DROP TABLE #c

    DROP TABLE #d

    SET STATISTICS TIME OFF

    SET STATISTICS IO OFF

    PRINT REPLICATE('=',80)

  • 1. You don't need to SELECT DISTINCT when using UNION, as UNION will dedupe results itself

    2. Your "WHERE DAY(registered)<1000" clause is useless, as there is no such date for which DAY function will return anything more than 31.

    ... untill you are using some unknown to public calendar system which allows to have more than 31 days in a month and SQLServer still works with it:-)

    Please clarify.

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Sorry, edited.

  • OOO! I can see you've edited it.

    Just add your WHERE clause where CTE is joined to the #a:

    ;with allusers

    as

    (

    SELECT userid FROM #b

    UNION

    SELECT userid FROM #c

    UNION

    SELECT userid FROM #d

    )

    SELECT COUNT(*) as users

    FROM #a u

    JOIN allusers a ON u.id = a.userid

    WHERE DATEDIFF...

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Nice! It seems indeed to be slighty faster, the executaion plan shows query cost of my old query being 58% and CTE query 42%...

    And statistics info old query vs cte

    (1 row(s) affected)

    Table 'users'. Scan count 15, logical reads 24447, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'userworkexperience'. Scan count 9, logical reads 923, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'usereducation'. Scan count 5, logical reads 2734, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 63 ms, elapsed time = 112 ms.

    ================================================================================

    (1 row(s) affected)

    Table 'userworkexperience'. Scan count 10, logical reads 923, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'users'. Scan count 10, logical reads 16298, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'usereducation'. Scan count 5, logical reads 2734, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 63 ms, elapsed time = 76 ms.

    ================================================================================

    If there's anything else i can do to optimise it, ill be glad to hear it 😀

  • try this:

    --step 1. calc datediff first!

    select id, DATEDIFF(day, registered, getdate()) dd

    into #aWithDD

    -- you may want to try indexing it

    --create index ix_#aWithDD on #aWithDD(dd)

    -- also try without CTE:

    SELECT userid INTO #allusers FROM #b

    UNION SELECT userid FROM #c

    UNION SELECT userid FROM #d

    -- index it:

    create unique clustered index ix_#allusers on allusers(userid)

    -- fincal query

    SELECT COUNT(*) as users

    FROM #aWithDD u

    JOIN #allusers a ON u.id = a.userid

    WHERE u.dd < 1000

    _____________________________________________
    "The only true wisdom is in knowing you know nothing"
    "O skol'ko nam otkrytiy chudnyh prevnosit microsofta duh!":-D
    (So many miracle inventions provided by MS to us...)

    How to post your question to get the best and quick help[/url]

  • Better would be to calculate the date back and use the date for the comparison.

    Using non-deterministic functions on date columns in where clauses is generally a no-no... if there are indexes on the table on the date column, they won't be used.

    i.e.

    SET STATISTICS IO ON

    SET STATISTICS TIME ON

    DECLARE @olddate DATE = CAST(DATEADD(dd,-1000,GETDATE()) AS DATE)

    --Your sample data doesn't have anything this old... so I'll add one

    CREATE TABLE #a(id INT PRIMARY KEY, registered DATE)

    INSERT #a VALUES (1, '2011-01-01')

    INSERT #a VALUES (2, '2011-01-01')

    INSERT #a VALUES (3, '2011-01-01')

    INSERT #a VALUES (4, '2011-01-01')

    INSERT #a VALUES (5, '2011-01-01')

    INSERT #a VALUES (6, '2011-01-01')

    INSERT #a VALUES (7, '2011-01-01')

    INSERT #a VALUES (8, '2011-01-01')

    INSERT #a VALUES (9, '1996-01-01') --added

    CREATE TABLE #b(id INT PRIMARY KEY, userid INT)

    INSERT #b VALUES (1,1)

    INSERT #b VALUES (2,1)

    INSERT #b VALUES (3,2)

    INSERT #b VALUES (4,3)

    INSERT #b VALUES (5,4)

    INSERT #b VALUES (6,5)

    INSERT #b VALUES (7,6)

    INSERT #b VALUES (8,7)

    INSERT #b VALUES (9,8)

    INSERT #b VALUES (10,9) --added

    CREATE TABLE #c(id INT PRIMARY KEY, userid INT)

    INSERT #c VALUES (1, 1)

    INSERT #c VALUES (2, 1)

    INSERT #c VALUES (3, 2)

    INSERT #c VALUES (4, 3)

    CREATE TABLE #d(id INT PRIMARY KEY, userid INT)

    INSERT #d VALUES (1, 1)

    INSERT #d VALUES (2, 1)

    INSERT #d VALUES (3, 2)

    INSERT #d VALUES (4, 3)

    ;WITH allusers AS (

    SELECT userid FROM #b

    UNION

    SELECT userid FROM #c

    UNION

    SELECT userid FROM #d

    )

    SELECT

    usercount = COUNT(*)

    FROM

    #a a

    WHERE EXISTS (SELECT 1 FROM allusers b WHERE a.id = b.userid)

    AND a.registered < @olddate

    DROP TABLE #a

    DROP TABLE #b

    DROP TABLE #c

    DROP TABLE #d

    SET STATISTICS TIME OFF

    SET STATISTICS IO OFF

    PRINT REPLICATE('=',80)



    --Mark Tassin
    MCITP - SQL Server DBA
    Proud member of the Anti-RBAR alliance.
    For help with Performance click this link[/url]
    For tips on how to post your problems[/url]

Viewing 9 posts - 1 through 8 (of 8 total)

You must be logged in to reply to this topic. Login to reply