November 4, 2011 at 2:45 am
What about another unspectacular solustion:
SELECT ISNULL((SELECT TOP 1 RowNumber as FirstUnusedID
FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID )
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1),(ISNULL((SELECT MAX(ID) + 1 FROM #testEnvironment2), 1)))
BTW, I'm great fan aon RBAR !:-)
November 4, 2011 at 4:22 am
To offer a complete environment inc. TimeStamps:
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
CREATE TABLE #testEnvironment2
(ID INTEGER NOT NULL PRIMARY KEY
CHECK (ID > 0));
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS ID
FROM t5 x, t5 y)
INSERT INTO #testEnvironment2
SELECT ID
FROM tally
WHERE ID <= 1000000 AND
(ID%3 = 0 OR ID%5 = 0 OR ID = 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT COUNT(*)
FROM #testEnvironment2
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment2
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment2);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tallli AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment2 a
RIGHT OUTER JOIN tallli b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== MY Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment2))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
--CleanUp
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
I would be pleased for any comments on this performance rating !
November 4, 2011 at 4:58 am
rahu21 (11/4/2011)
What about another unspectacular solustion:SELECT ISNULL((SELECT TOP 1 RowNumber as FirstUnusedID
FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID )
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1),(ISNULL((SELECT MAX(ID) + 1 FROM #testEnvironment2), 1)))
BTW, I'm great fan aon RBAR !:-)
It's good when there's an index. Less so when there isn't, but far superior to the CELKO solution.
SELECT TOP 1000000 ABS(CHECKSUM(NewId())) AS ID
INTO #testEnvironment2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
--DELETE non-unique rows
DELETE FROM #testEnvironment2
WHERE ID IN (SELECT ID
FROM #testEnvironment2
GROUP BY ID
HAVING COUNT(ID) > 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT COUNT(*)
FROM #testEnvironment2
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment2
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment2);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tallli AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment2 a
RIGHT OUTER JOIN tallli b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== MY Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment2))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
--CleanUp
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
========== BASELINE ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1667'. Scan count 1, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 157 ms, elapsed time = 161 ms.
========== CELKO Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1667'. Scan count 18, logical reads 2008798, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 299108 ms, elapsed time = 82807 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1667'. Scan count 5, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 408 ms, elapsed time = 136 ms.
========== MY Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1667'. Scan count 6, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 1217 ms, elapsed time = 652 ms.
Of course, if we're looking at performance we could improve the tally table solution.
SELECT TOP 1000000 ABS(CHECKSUM(NewId())) AS ID
INTO #testEnvironment2
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
--DELETE non-unique rows
DELETE FROM #testEnvironment2
WHERE ID IN (SELECT ID
FROM #testEnvironment2
GROUP BY ID
HAVING COUNT(ID) > 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT COUNT(*)
FROM #testEnvironment2
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment2
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment2);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tallli AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment2 a
RIGHT OUTER JOIN tallli b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== MY Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment2))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CROSS APPLY =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT c.num AS nextID
FROM (SELECT TOP 1 ID
FROM #testEnvironment2
ORDER BY ID ASC) a
CROSS APPLY (SELECT TOP 1 b.num
FROM tally b
WHERE b.num <> a.ID
ORDER BY b.num ASC) c
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
--CleanUp
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
========== BASELINE ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1901'. Scan count 1, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 125 ms, elapsed time = 125 ms.
========== CELKO Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1901'. Scan count 18, logical reads 2008706, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 341610 ms, elapsed time = 92349 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1901'. Scan count 5, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 719 ms, elapsed time = 197 ms.
========== MY Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1901'. Scan count 6, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 1829 ms, elapsed time = 942 ms.
========== CROSS APPLY ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D1B58'. Scan count 5, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 453 ms, elapsed time = 141 ms.
================================================================================
Results skewed in favour of the CROSS APPLY/TALLY method, so let's go back to the previous test environment where the TALLY table was defeated.
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
CREATE TABLE #testEnvironment2
(ID INTEGER NOT NULL PRIMARY KEY
CHECK (ID > 0));
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS ID
FROM t5 x, t5 y)
INSERT INTO #testEnvironment2
SELECT ID
FROM tally
WHERE ID <= 1000000 AND
(ID%3 = 0 OR ID%5 = 0 OR ID = 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT COUNT(*)
FROM #testEnvironment2
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment2
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment2);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tallli AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment2 a
RIGHT OUTER JOIN tallli b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== MY Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment2)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment2))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CROSS APPLY =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT c.num AS nextID
FROM (SELECT TOP 1 ID
FROM #testEnvironment2
ORDER BY ID ASC) a
CROSS APPLY (SELECT TOP 1 b.num
FROM tally b
WHERE b.num <> a.ID
ORDER BY b.num ASC) c
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT REPLICATE('=',80)
--CleanUp
IF object_id('tempdb..#testEnvironment2') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment2
END
========== BASELINE ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D19F4'. Scan count 1, logical reads 754, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 31 ms, elapsed time = 37 ms.
========== CELKO Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D19F4'. Scan count 2, logical reads 1510, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 234 ms, elapsed time = 237 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D19F4'. Scan count 1, logical reads 54, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 31 ms, elapsed time = 35 ms.
========== MY Solution ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D19F4'. Scan count 2, logical reads 6, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 1 ms.
========== CROSS APPLY ==========
(1 row(s) affected)
Table '#testEnvironment2___________________________________________________________________________________________________0000000D19F4'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
================================================================================
We have a winner
November 4, 2011 at 8:46 am
Hmm, I wonder how these set of tests will fare if the minimum open value is something like 500M, or 1.8B. Can you set up a test for where the next value is VERY high up the integer list? Wonder how that will affect perf...
Best,
Kevin G. Boles
SQL Server Consultant
SQL MVP 2007-2012
TheSQLGuru on googles mail service
November 4, 2011 at 9:10 am
TheSQLGuru (11/4/2011)
Hmm, I wonder how these set of tests will fare if the minimum open value is something like 500M, or 1.8B. Can you set up a test for where the next value is VERY high up the integer list? Wonder how that will affect perf...
Sure. I'm unwilling to test the TALLY solutions due to the requirement to create a TALLY table with 1,000,000,000 rows, but here is the test environment.
--Standard TestEnvironment of 1,000,000 rows of random-ish data
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
--1,000,000,000 Random rows of data
SELECT TOP 1000000000 IDENTITY(INT,1,1) AS ID
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
--Delete random ID - should be higher than 1,000,0000
DELETE FROM #testEnvironment
WHERE ID = ABS(CHECKSUM(NEWID()))
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID
FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID) AS RowNumber, ID
FROM #testEnvironment ) T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1
FROM #testEnvironment))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
I'm running it now on a test server, will update when I get a result (will probably take quite some time to just build the #testEnvironment temp table).
Hmmm, may have to give this up. The test server I'm running this on has 8GB of RAM, of which 7.81GB is now in use at the 15 minute mark.
I've killed it off. The server is needed by QA. Assuming I remember, will set it off again tonight when no-one is around.
November 4, 2011 at 3:28 pm
you are gonna blow up your tlog with that single-statement insert aren't you? I wonder if it might not be faster to batch it up.
Best,
Kevin G. Boles
SQL Server Consultant
SQL MVP 2007-2012
TheSQLGuru on googles mail service
November 4, 2011 at 9:37 pm
Heh... now do some testing where no numbers are missing and see the other side of this story. 😉
--Jeff Moden
Change is inevitable... Change for the better is not.
November 6, 2011 at 6:49 am
TheSQLGuru (11/3/2011)
It is QUITE difficult (without the OUTPUT clause anyway, which is thankfully available now) to do sequencing like this.
We've always been able to do it safely and efficiently without the OUTPUT clause, using this sort of pattern:
UPDATE SequenceMaster
SET @NewID = next_value = next_value + 1
WHERE sequence_name = @sequence_name;
http://sqlblog.com/blogs/paul_white/archive/2010/10/19/sequence-tables.aspx
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
November 13, 2011 at 1:45 pm
SQL Kiwi (11/6/2011)
TheSQLGuru (11/3/2011)
It is QUITE difficult (without the OUTPUT clause anyway, which is thankfully available now) to do sequencing like this.We've always been able to do it safely and efficiently without the OUTPUT clause, using this sort of pattern:
UPDATE SequenceMaster
SET @NewID = next_value = next_value + 1
WHERE sequence_name = @sequence_name;
http://sqlblog.com/blogs/paul_white/archive/2010/10/19/sequence-tables.aspx
I've been using this type of sequence table handling for years and mostly no problems with it. This is for cash registers getting the next ticket number. Even with multiple stores, each having their own set of ticket numbers, there's no concurrency problem since the checkout rate at the stores doesn't hit the sequence table more than 30 times a minute.
There is a problem, however, when the sequence table is used to hold other data than the next number. I found that some people tried to use the sequence table as a join in a longish report query and this created instant deadlocks with the cash registers trying to get the next sequence number.
Sequence tables should never hold data that could be required in a report type query.
Todd Fifield
November 13, 2011 at 2:27 pm
tfifield (11/13/2011)
I found that some people tried to use the sequence table as a join in a longish report query and this created instant deadlocks with the cash registers trying to get the next sequence number. Sequence tables should never hold data that could be required in a report type query.
Hi Todd,
I agree that sequence tables should be dedicated and not co-opted for anything else, but another way to reduce deadlocking would be to run a row-versioning isolation level (usually RCSI) to avoid taking shared locks. For similar reasons, the sequence table should only have one index on it.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
November 14, 2011 at 7:15 am
SQL Kiwi (11/6/2011)
TheSQLGuru (11/3/2011)
It is QUITE difficult (without the OUTPUT clause anyway, which is thankfully available now) to do sequencing like this.We've always been able to do it safely and efficiently without the OUTPUT clause, using this sort of pattern:
UPDATE SequenceMaster
SET @NewID = next_value = next_value + 1
WHERE sequence_name = @sequence_name;
http://sqlblog.com/blogs/paul_white/archive/2010/10/19/sequence-tables.aspx
Sorry Paul - I don't think your solution is applicable to the OPs situation. Your scenario is for ONE number that gets updated. OP has LOTS of numbers and wants a number picked out of that (min available).
Best,
Kevin G. Boles
SQL Server Consultant
SQL MVP 2007-2012
TheSQLGuru on googles mail service
November 14, 2011 at 7:37 am
TheSQLGuru (11/14/2011)
Sorry Paul - I don't think your solution is applicable to the OPs situation. Your scenario is for ONE number that gets updated. OP has LOTS of numbers and wants a number picked out of that (min available).
No need to apologise; I was responding to a concurrency point I thought you were making regarding OUTPUT being better than a SELECT...INSERT pattern.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
November 14, 2011 at 8:56 am
SQL Kiwi (11/14/2011)
TheSQLGuru (11/14/2011)
Sorry Paul - I don't think your solution is applicable to the OPs situation. Your scenario is for ONE number that gets updated. OP has LOTS of numbers and wants a number picked out of that (min available).No need to apologise; I was responding to a concurrency point I thought you were making regarding OUTPUT being better than a SELECT...INSERT pattern.
Ahh - and for the single-value scenario your solution is clearly much better than OUTPUT and light-years ahead of anything else (until Denali anyway). 🙂
Best,
Kevin G. Boles
SQL Server Consultant
SQL MVP 2007-2012
TheSQLGuru on googles mail service
November 14, 2011 at 1:48 pm
SQL Kiwi (11/13/2011)
I agree that sequence tables should be dedicated and not co-opted for anything else, but another way to reduce deadlocking would be to run a row-versioning isolation level (usually RCSI) to avoid taking shared locks. For similar reasons, the sequence table should only have one index on it.
Good point Paul.
I'll see if I can get the vendor for the product to implement this.
Todd Fifield
November 15, 2011 at 1:47 am
CELKO (11/15/2011)
Please post DDL, so that people do not have to guess what the keys, constraints, Declarative Referential Integrity, data types, etc. in your schema are. Please learn to follow ISO-11179 data element naming conventions and formatting rules. Temporal data should use ISO-8601 formats. Code should be in Standard SQL as much as possible and not local dialect.Sample data is also a good idea, along with clear specifications. It is very hard to debug code when you do not let us see it. If you want to learn how to ask a question on a Newsgroup, look at: http://www.catb.org/~esr/faqs/smart-questions.html
>> I need to create SQL script which will go through all records [sic: rows are not records] and find smallest unused number in list of unsorted numbers [tables are not sorted!]. <<
>> I have table with user_id (PRIMARY KEY), user_nbr ( INTEGER ), full_name (VARCHAR(n)) and there are 2000 records [sic] in this table. <,
No check digits? No verifications?
WITH Available User_Ids(user_id)
AS
(SELECT user_id -1
FROM Users
WHERE (user_id -1)
NOT IN (SELECT user_id FROM Users)
UNION ALL
SELECT user_id +1
FROM Users
WHERE (user_id +1)
NOT IN (SELECT user_id FROM Users))
SELECT MIN (user_id
FROM Available User_Ids;
Wow, déjà vu. At least you changed your solution I guess.
Let's include what you wrote last time in the mix, shall we?
CELKO
>> I need to create SQL script which will go through all records [sic: rows are not records] and find smallest unused number in list [sic: tables are not lists] of unsorted [sic: tables have no ordering, they are sets] numbers. <,That was a lot of ignorance in one paragraph
>> I have table with id (primary key), user_nbr number (int), fullname (varchar) and there are 2000 records [sic] in this table. <<
Please post DDL, so that people do not have to guess what the keys, constraints, Declarative Referential Integrity, data types, etc. in your schema are. Please learn to follow ISO-11179 data element naming conventions and formatting rules. Temporal data should use ISO-8601 formats. Code should be in Standard SQL as much as possible and not local dialect.
Sample data is also a good idea, along with clear specifications. It is very hard to debug code when you do not let us see it. If you want to learn how to ask a question on a Newsgroup, look at: http://www.catb.org/~esr/faqs/smart-questions.html
What is the magical, generic “id”? I hope you did not use an IDENTITY or other non-relational construct when you have a proper key in user_nbr. Here is my guess at your table:
CREATE TABLE Users
(user_nbr INTEGER NOT NULL PRIMARY KEY
CHECK (user_id > 0),
user_name VARCHAR (25) NOT NULL);
SELECT MIN (user_id +1) AS unused_user_id_min
FROM Users
WHERE (user_id + 1) NOT IN (SELECT user_id FROM Users);
OK, well last time it turned out that your solution was not only slow, but incorrect. So let's see how your new one holds up.
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
--1,000,000 random rows of data
SELECT TOP 1000000 ABS(CHECKSUM(NewId())) AS ID
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
--DELETE non-unique rows
DELETE FROM #testEnvironment
WHERE ID IN (SELECT ID
FROM #testEnvironment
GROUP BY ID
HAVING COUNT(ID) > 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN(ID)
FROM #testEnvironment
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s first Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
--Use a real tally table for performance, here's one on the fly for testing.
--If you're unsure about what a tally table is, or what it's for, please read
--this article --> http://www.sqlservercentral.com/articles/T-SQL/62867/
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment a
RIGHT OUTER JOIN tally b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== ROW_NUMBER Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s second Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH Available (ID) AS (
SELECT ID -1
FROM #testEnvironment
WHERE (ID -1) NOT IN (SELECT ID FROM #testEnvironment)
UNION ALL
SELECT ID +1
FROM #testEnvironment
WHERE (ID +1) NOT IN (SELECT ID FROM #testEnvironment))
SELECT MIN (ID)
FROM Available
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
--CleanUp
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
I'm executing this over the interwebs since I'm working from home, so the timings will be higher than before.
The MIN ID reported by "BASELINE", was 3774. Which means the lowest positive unused "ID" is 1.
CELKO solution 1 reports - 3775
Tally solution reports - 1
Row_Number solution reports - 1
CELKO solution 2 reports - 3773
OK, so again the CELKO solution reports the wrong lowest unused ID. Timings? IO?
========== BASELINE ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001D'. Scan count 1, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 219 ms, elapsed time = 213 ms.
========== CELKO's first Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001D'. Scan count 18, logical reads 2008758, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 167078 ms, elapsed time = 44340 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001D'. Scan count 5, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 268 ms, elapsed time = 84 ms.
========== ROW_NUMBER Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001D'. Scan count 6, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 936 ms, elapsed time = 478 ms.
========== CELKO's second Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001D'. Scan count 36, logical reads 4017516, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 333825 ms, elapsed time = 89445 ms.
Hang on, the new CELKO solution is even slower than the old one! OK, let's try and give the CELKO solutions a better chance by using an index and where the lowest ID is not 1.
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
CREATE TABLE #testEnvironment
(ID INTEGER NOT NULL PRIMARY KEY
CHECK (ID > 0));
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS ID
FROM t5 x, t5 y)
INSERT INTO #testEnvironment
SELECT ID
FROM tally
WHERE ID <= 1000000 AND
(ID%3 = 0 OR ID%5 = 0 OR ID = 1)
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN(ID)
FROM #testEnvironment
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s first Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
--Use a real tally table for performance, here's one on the fly for testing.
--If you're unsure about what a tally table is, or what it's for, please read
--this article --> http://www.sqlservercentral.com/articles/T-SQL/62867/
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment a
RIGHT OUTER JOIN tally b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== ROW_NUMBER Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s second Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH Available (ID) AS (
SELECT ID -1
FROM #testEnvironment
WHERE (ID -1) NOT IN (SELECT ID FROM #testEnvironment)
UNION ALL
SELECT ID +1
FROM #testEnvironment
WHERE (ID +1) NOT IN (SELECT ID FROM #testEnvironment))
SELECT MIN (ID)
FROM Available
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
--CleanUp
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
Baseline MIN is - 1
I know from my code that the next lowest number missing is a 2.
CELKO solution 1 reports - 2
Tally solution reports - 2
Row_Number solution reports - 2
CELKO solution 2 reports - 0
CELKO's second solution assumes the lowest ID is 0, which could be argued as correct so I won't totally disagree. That is down to the OP, but personally I felt that we were after positive integers. How about the timings?
========== BASELINE ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001E'. Scan count 1, logical reads 3, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
========== CELKO's first Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001E'. Scan count 2, logical reads 1510, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 218 ms, elapsed time = 218 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001E'. Scan count 1, logical reads 54, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 15 ms, elapsed time = 29 ms.
========== ROW_NUMBER Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001E'. Scan count 2, logical reads 6, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
========== CELKO's second Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________00000000001E'. Scan count 4, logical reads 3020, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 453 ms, elapsed time = 449 ms.
Well, as before the ROW_NUMBER solution beats the TALLY, but both CELKO solutions lag way behind.
OK, let's try a table with one missing ID.
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
DECLARE @numberToBeDeleted INT
SELECT @numberToBeDeleted = (ABS(CHECKSUM(NEWID())) % 60000) + 1
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
DELETE FROM #testEnvironment
WHERE ID = @numberToBeDeleted
PRINT '========== BASELINE =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT @numberToBeDeleted
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s first Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT MIN (ID +1) AS unused_user_id_min
FROM #testEnvironment
WHERE (ID + 1) NOT IN (SELECT ID FROM #testEnvironment);
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== TALLY TABLE Solution =========='
--Use a real tally table for performance, here's one on the fly for testing.
--If you're unsure about what a tally table is, or what it's for, please read
--this article --> http://www.sqlservercentral.com/articles/T-SQL/62867/
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
tally AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS num
FROM t4 x, t4 y)
SELECT MIN(num) AS nextID
FROM #testEnvironment a
RIGHT OUTER JOIN tally b ON a.ID = b.num
WHERE ID IS NULL
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== ROW_NUMBER Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
SELECT TOP 1 RowNumber as FirstUnusedID FROM (SELECT ROW_NUMBER() OVER(ORDER BY ID)
AS RowNumber,ID FROM #testEnvironment)T
WHERE RowNumber <> ID
ORDER BY 1,((SELECT MAX(ID) + 1 FROM #testEnvironment))
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
PRINT '========== CELKO''s second Solution =========='
SET STATISTICS IO ON
SET STATISTICS TIME ON
;WITH Available (ID) AS (
SELECT ID -1
FROM #testEnvironment
WHERE (ID -1) NOT IN (SELECT ID FROM #testEnvironment)
UNION ALL
SELECT ID +1
FROM #testEnvironment
WHERE (ID +1) NOT IN (SELECT ID FROM #testEnvironment))
SELECT MIN (ID)
FROM Available
SET STATISTICS TIME OFF
SET STATISTICS IO OFF
--CleanUp
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
Baseline deleted ID is 25915
CELKO solution 1 reports - 25915
Tally solution reports - 25915
Row_Number solution reports - 25915
CELKO solution 2 reports - 0
Again, 0 could be argued as correct, we'd have to check with the OP. Timings?
========== BASELINE ==========
(1 row(s) affected)
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
========== CELKO's first Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________000000000020'. Scan count 10, logical reads 3218, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 1359 ms, elapsed time = 368 ms.
========== TALLY TABLE Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________000000000020'. Scan count 5, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 204 ms, elapsed time = 78 ms.
========== ROW_NUMBER Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________000000000020'. Scan count 6, logical reads 1609, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 610 ms, elapsed time = 233 ms.
========== CELKO's second Solution ==========
(1 row(s) affected)
Table '#testEnvironment____________________________________________________________________________________________________000000000020'. Scan count 20, logical reads 6436, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 2797 ms, elapsed time = 713 ms.
Tally table wins easily, followed by ROW_NUMBER, then the CELKO's first solution and finally CELKO's second solution brings up the rear.
Viewing 15 posts - 16 through 30 (of 38 total)
You must be logged in to reply to this topic. Login to reply