February 16, 2012 at 6:18 am
Hi,
Ive been trying to find a way around a heavy query that we use. We have a table "users" with primary key and three other tables with foreign keys to "users". The query selects all distict users that exist in foreign tables.
BEGIN TRAN
SET STATISTICS IO ON
SET STATISTICS TIME ON
CREATE TABLE #a(id INT PRIMARY KEY)
INSERT #a VALUES (1)
INSERT #a VALUES (2)
INSERT #a VALUES (3)
INSERT #a VALUES (4)
INSERT #a VALUES (5)
INSERT #a VALUES (6)
INSERT #a VALUES (7)
INSERT #a VALUES (8)
CREATE TABLE #b(id INT PRIMARY KEY, userid INT)
INSERT #b VALUES (1,1)
INSERT #b VALUES (2,1)
INSERT #b VALUES (3,2)
INSERT #b VALUES (4,3)
INSERT #b VALUES (5,4)
INSERT #b VALUES (6,5)
INSERT #b VALUES (7,6)
INSERT #b VALUES (8,7)
INSERT #b VALUES (9,8)
CREATE TABLE #c(id INT PRIMARY KEY, userid INT)
INSERT #c VALUES (1, 1)
INSERT #c VALUES (2, 1)
INSERT #c VALUES (3, 2)
INSERT #c VALUES (4, 3)
CREATE TABLE #d(id INT PRIMARY KEY, userid INT)
INSERT #d VALUES (1, 1)
INSERT #d VALUES (2, 1)
INSERT #d VALUES (3, 2)
INSERT #d VALUES (4, 3)
SELECT COUNT(*) as users FROM
(
SELECT DISTINCT #a.id FROM #a INNER JOIN #b ON #a.id=#b.userid
UNION
SELECT DISTINCT #a.id FROM #a INNER JOIN #c ON #a.id=#c.userid
UNION
SELECT DISTINCT #a.id FROM #a INNER JOIN #d ON #a.id=#d.userid
) AS allvalues
DROP TABLE #a
DROP TABLE #b
DROP TABLE #c
DROP TABLE #d
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
ROLLBACK
Is there any faster way to do the same query?
ty for your time
February 16, 2012 at 6:36 am
Have no idea why you did use transaction in your query...
Also, UNION makes distinct output already, so you don't need to SELECT DISTINCT...
The following should execute faster, as it will join to #a once:
--BEGIN TRAN
SET STATISTICS IO ON
SET STATISTICS TIME ON
CREATE TABLE #a(id INT PRIMARY KEY)
INSERT #a VALUES (1)
INSERT #a VALUES (2)
INSERT #a VALUES (3)
INSERT #a VALUES (4)
INSERT #a VALUES (5)
INSERT #a VALUES (6)
INSERT #a VALUES (7)
INSERT #a VALUES (8)
CREATE TABLE #b(id INT PRIMARY KEY, userid INT)
INSERT #b VALUES (1,1)
INSERT #b VALUES (2,1)
INSERT #b VALUES (3,2)
INSERT #b VALUES (4,3)
INSERT #b VALUES (5,4)
INSERT #b VALUES (6,5)
INSERT #b VALUES (7,6)
INSERT #b VALUES (8,7)
INSERT #b VALUES (9,8)
CREATE TABLE #c(id INT PRIMARY KEY, userid INT)
INSERT #c VALUES (1, 1)
INSERT #c VALUES (2, 1)
INSERT #c VALUES (3, 2)
INSERT #c VALUES (4, 3)
CREATE TABLE #d(id INT PRIMARY KEY, userid INT)
INSERT #d VALUES (1, 1)
INSERT #d VALUES (2, 1)
INSERT #d VALUES (3, 2)
INSERT #d VALUES (4, 3)
;with allusers
as
(
SELECT userid FROM #b
UNION
SELECT userid FROM #c
UNION
SELECT userid FROM #d
)
SELECT COUNT(*) as users
FROM #a u
JOIN allusers a ON u.id = a.userid
DROP TABLE #a
DROP TABLE #b
DROP TABLE #c
DROP TABLE #d
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
--ROLLBACK
You can play around with flushing the results of UNION query into #table and index it instead of using CTE.
February 16, 2012 at 7:39 am
Oh sorry there was some scope creep... users have a registered field, that needs to be included in every where clause, so the current CTE solution wont work.
SET STATISTICS IO ON
SET STATISTICS TIME ON
CREATE TABLE #a(id INT PRIMARY KEY, registered DATE)
INSERT #a VALUES (1, '2011-01-01')
INSERT #a VALUES (2, '2011-01-01')
INSERT #a VALUES (3, '2011-01-01')
INSERT #a VALUES (4, '2011-01-01')
INSERT #a VALUES (5, '2011-01-01')
INSERT #a VALUES (6, '2011-01-01')
INSERT #a VALUES (7, '2011-01-01')
INSERT #a VALUES (8, '2011-01-01')
CREATE TABLE #b(id INT PRIMARY KEY, userid INT)
INSERT #b VALUES (1,1)
INSERT #b VALUES (2,1)
INSERT #b VALUES (3,2)
INSERT #b VALUES (4,3)
INSERT #b VALUES (5,4)
INSERT #b VALUES (6,5)
INSERT #b VALUES (7,6)
INSERT #b VALUES (8,7)
INSERT #b VALUES (9,8)
CREATE TABLE #c(id INT PRIMARY KEY, userid INT)
INSERT #c VALUES (1, 1)
INSERT #c VALUES (2, 1)
INSERT #c VALUES (3, 2)
INSERT #c VALUES (4, 3)
CREATE TABLE #d(id INT PRIMARY KEY, userid INT)
INSERT #d VALUES (1, 1)
INSERT #d VALUES (2, 1)
INSERT #d VALUES (3, 2)
INSERT #d VALUES (4, 3)
SELECT COUNT(*) as users FROM
(
SELECT #a.id FROM #a INNER JOIN #b ON #a.id=#b.userid WHERE DATEDIFF(day, registered, getdate())<1000
UNION
SELECT #a.id FROM #a INNER JOIN #c ON #a.id=#c.userid WHERE DATEDIFF(day, registered, getdate())<1000
UNION
SELECT #a.id FROM #a INNER JOIN #d ON #a.id=#d.userid WHERE DATEDIFF(day, registered, getdate())<1000
) AS allvalues
DROP TABLE #a
DROP TABLE #b
DROP TABLE #c
DROP TABLE #d
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
February 16, 2012 at 7:48 am
1. You don't need to SELECT DISTINCT when using UNION, as UNION will dedupe results itself
2. Your "WHERE DAY(registered)<1000" clause is useless, as there is no such date for which DAY function will return anything more than 31.
... untill you are using some unknown to public calendar system which allows to have more than 31 days in a month and SQLServer still works with it:-)
Please clarify.
February 16, 2012 at 7:49 am
Sorry, edited.
February 16, 2012 at 7:51 am
OOO! I can see you've edited it.
Just add your WHERE clause where CTE is joined to the #a:
;with allusers
as
(
SELECT userid FROM #b
UNION
SELECT userid FROM #c
UNION
SELECT userid FROM #d
)
SELECT COUNT(*) as users
FROM #a u
JOIN allusers a ON u.id = a.userid
WHERE DATEDIFF...
February 16, 2012 at 8:20 am
Nice! It seems indeed to be slighty faster, the executaion plan shows query cost of my old query being 58% and CTE query 42%...
And statistics info old query vs cte
(1 row(s) affected)
Table 'users'. Scan count 15, logical reads 24447, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'userworkexperience'. Scan count 9, logical reads 923, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'usereducation'. Scan count 5, logical reads 2734, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 63 ms, elapsed time = 112 ms.
================================================================================
(1 row(s) affected)
Table 'userworkexperience'. Scan count 10, logical reads 923, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'users'. Scan count 10, logical reads 16298, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'usereducation'. Scan count 5, logical reads 2734, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 63 ms, elapsed time = 76 ms.
================================================================================
If there's anything else i can do to optimise it, ill be glad to hear it 😀
February 16, 2012 at 8:40 am
try this:
--step 1. calc datediff first!
select id, DATEDIFF(day, registered, getdate()) dd
into #aWithDD
-- you may want to try indexing it
--create index ix_#aWithDD on #aWithDD(dd)
-- also try without CTE:
SELECT userid INTO #allusers FROM #b
UNION SELECT userid FROM #c
UNION SELECT userid FROM #d
-- index it:
create unique clustered index ix_#allusers on allusers(userid)
-- fincal query
SELECT COUNT(*) as users
FROM #aWithDD u
JOIN #allusers a ON u.id = a.userid
WHERE u.dd < 1000
February 16, 2012 at 9:54 am
Better would be to calculate the date back and use the date for the comparison.
Using non-deterministic functions on date columns in where clauses is generally a no-no... if there are indexes on the table on the date column, they won't be used.
i.e.
SET STATISTICS IO ON
SET STATISTICS TIME ON
DECLARE @olddate DATE = CAST(DATEADD(dd,-1000,GETDATE()) AS DATE)
--Your sample data doesn't have anything this old... so I'll add one
CREATE TABLE #a(id INT PRIMARY KEY, registered DATE)
INSERT #a VALUES (1, '2011-01-01')
INSERT #a VALUES (2, '2011-01-01')
INSERT #a VALUES (3, '2011-01-01')
INSERT #a VALUES (4, '2011-01-01')
INSERT #a VALUES (5, '2011-01-01')
INSERT #a VALUES (6, '2011-01-01')
INSERT #a VALUES (7, '2011-01-01')
INSERT #a VALUES (8, '2011-01-01')
INSERT #a VALUES (9, '1996-01-01') --added
CREATE TABLE #b(id INT PRIMARY KEY, userid INT)
INSERT #b VALUES (1,1)
INSERT #b VALUES (2,1)
INSERT #b VALUES (3,2)
INSERT #b VALUES (4,3)
INSERT #b VALUES (5,4)
INSERT #b VALUES (6,5)
INSERT #b VALUES (7,6)
INSERT #b VALUES (8,7)
INSERT #b VALUES (9,8)
INSERT #b VALUES (10,9) --added
CREATE TABLE #c(id INT PRIMARY KEY, userid INT)
INSERT #c VALUES (1, 1)
INSERT #c VALUES (2, 1)
INSERT #c VALUES (3, 2)
INSERT #c VALUES (4, 3)
CREATE TABLE #d(id INT PRIMARY KEY, userid INT)
INSERT #d VALUES (1, 1)
INSERT #d VALUES (2, 1)
INSERT #d VALUES (3, 2)
INSERT #d VALUES (4, 3)
;WITH allusers AS (
SELECT userid FROM #b
UNION
SELECT userid FROM #c
UNION
SELECT userid FROM #d
)
SELECT
usercount = COUNT(*)
FROM
#a a
WHERE EXISTS (SELECT 1 FROM allusers b WHERE a.id = b.userid)
AND a.registered < @olddate
DROP TABLE #a
DROP TABLE #b
DROP TABLE #c
DROP TABLE #d
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
Viewing 9 posts - 1 through 8 (of 8 total)
You must be logged in to reply to this topic. Login to reply