Faster way to populate number table variation with inline tally?

Question

Post reply

Faster way to populate number table variation with inline tally?

sqldriver

SSChampion

Points: 10440
More actions
December 27, 2014 at 8:41 am

#296252

Hi,
I have to create a table like this across a bunch of servers. Does anyone see a faster way to do it? I'm thinking that I'm overlooking something with needing two additional CTEs, but maybe not. I have it at 17 seconds, which isn't much faster than a while loop solution that's currently in place.
Thanks
DECLARE @START DATETIME,
@msg NVARCHAR(MAX) = N''
USE tempdb
SELECT @START = GETDATE()
CREATE TABLE dbo.EulerSource ( [SID] INT, Euler BIGINT )
;WITH E1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),-- 10
E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10
E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100
E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000
E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000
E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000
E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000
E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000
EulerOrder AS (
SELECT TOP 12963600
[EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600
FROM e8
),
EulerRuler AS (
SELECT eo.EulerOrder,
[EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1
FROM EulerOrder eo
)
INSERT dbo.EulerSource
( [SID] ,
Euler
)
SELECT el.EulerOrder ,
el.EulerRuler
FROM EulerRuler el
WHERE el.EulerRuler >= el.EulerOrder
ORDER BY el.EulerOrder
SELECT @msg = 'CTE table created in '
+ CAST(DATEDIFF(MILLISECOND, @START, GETDATE()) AS VARCHAR)
+ ' milliseconds.'
PRINT (@msg)
SELECT @START = GETDATE()
CREATE CLUSTERED INDEX [CQoS_SID] ON [dbo].[EulerSource]
(
[SID] ASC
)WITH (ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
SELECT @msg = 'CTE clustered index created at '
+ CAST(DATEDIFF(MILLISECOND, @START, GETDATE()) AS VARCHAR)
+ ' milliseconds.'
PRINT @msg

Viewing 15 posts - 1 through 15 (of 30 total)

You must be logged in to reply to this topic. Login to reply

Eirikur Eiriksson SSC Guru Points: 182951 More actions · Answer 1

Quick(er) suggestion, simplifying the query and loosing the sort required by the parallel execution plan

😎

USE tempdb;

GO

SET NOCOUNT ON;

DECLARE @INT_BUCKET INT = 0;

DECLARE @BIGINT_BUCKET BIGINT = 0;

DECLARE @TIMING_RESULTS TABLE

(

TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

,TR_TX VARCHAR(100) NOT NULL

);

DECLARE @SAMPLE_SIZE INT = 12963600;

-- Test one starts

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

;WITH E1(n) AS

(

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

),-- 10

E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

EulerOrder AS (

SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

[EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

FROM e8

),

EulerRuler AS (

SELECT eo.EulerOrder,

[EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

FROM EulerOrder eo

)

SELECT

@INT_BUCKET = el.EulerOrder

,@BIGINT_BUCKET = el.EulerRuler

FROM EulerRuler el

WHERE el.EulerRuler >= el.EulerOrder

ORDER BY el.EulerOrder

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

,EulerOrder AS

(

SELECT TOP (@SAMPLE_SIZE)

[EulerOrder] = NM.N / 3600

,[EulerRuler] = NM.N % 3600

FROM NUMS NM

)

SELECT

@INT_BUCKET = EU.EulerOrder

,@BIGINT_BUCKET = EU.EulerRuler

FROM EulerOrder EU

WHERE EU.EulerRuler >= EU.EulerOrder;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

SELECT

TR.TR_TX AS OPERATION

,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

FROM @TIMING_RESULTS TR

GROUP BY TR.TR_TX

ORDER BY 2;

/* uncomment this section to get the execution plan */

;WITH XMLNAMESPACES ( DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')

,QUERY_PLAN AS

(

SELECT

XP.query_plan

FROM sys.dm_exec_requests ER

CROSS APPLY sys.dm_exec_query_plan(ER.plan_handle) XP

WHERE ER.session_id = @@SPID

)

SELECT

QP.query_plan

FROM QUERY_PLAN QP;

Results

OPERATIONDURATION

Eirikur's version3778410

sqldriver's version10323425

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 2

As always, the documentation is in the code. First, the run results.

OPERATION DURATION

---------------------------------------------------------------------------------------------------- -----------

Jeff's version, which also creates and populates the table... 3014173

Eirikur's version 4624264

sqldriver's version 16524945

USE tempdb;

GO

SET NOCOUNT ON;

DECLARE @INT_BUCKET INT = 0;

DECLARE @BIGINT_BUCKET BIGINT = 0;

DECLARE @TIMING_RESULTS TABLE

(

TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

,TR_TX VARCHAR(100) NOT NULL

);

DECLARE @SAMPLE_SIZE INT = 12963600;

-- Test one starts

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

;WITH E1(n) AS

(

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

),-- 10

E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

EulerOrder AS (

SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

[EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

FROM e8

),

EulerRuler AS (

SELECT eo.EulerOrder,

[EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

FROM EulerOrder eo

)

SELECT

@INT_BUCKET = el.EulerOrder

,@BIGINT_BUCKET = el.EulerRuler

FROM EulerRuler el

WHERE el.EulerRuler >= el.EulerOrder

ORDER BY el.EulerOrder

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

-----------------------------------------------------------------------------------------------------------------------

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

,EulerOrder AS

(

SELECT TOP (@SAMPLE_SIZE)

[EulerOrder] = NM.N / 3600

,[EulerRuler] = NM.N % 3600

FROM NUMS NM

)

SELECT

@INT_BUCKET = EU.EulerOrder

,@BIGINT_BUCKET = EU.EulerRuler

FROM EulerOrder EU

WHERE EU.EulerRuler >= EU.EulerOrder;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

-----------------------------------------------------------------------------------------------------------------------

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...')

;

--=====================================================================================================================

-- Create and populate the test table with the product of a simple "TRIANGULAR JOIN" such that for every value of

-- [SID] (0 thru 3599), the Euler column will have a value greater than or equal to the [SID] thru 3599. For

-- reference purposes, a "Triangular Join" is half of a "Square Join", which is also known as a "Cartesian

-- Product" or "Cross Join".

--=====================================================================================================================

--===== If the test table alreeady exists, drop it to make reruns in SSMS easier

IF OBJECT_ID('dbo.EulerSource','U') IS NOT NULL

DROP TABLE dbo.EulerSource

;

--===== Create AND populate the table on-the-fly.

WITH

E1(N) AS (

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

) -- 10^ 1 = 10

,E4(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^ 4 = 10,000

,E16(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^16 = 1 followed by more zero's than you can shake a stick at.

,Tally(N) AS (

SELECT 0 UNION ALL --This is faster than subtracting a 1 for every ROW_NUMBER().

SELECT TOP 3599 N = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E16

)

SELECT [SID] = ISNULL(CAST(t1.N AS INT),0) -- ISNULL makes the column NOT NULL

,Euler = ISNULL(CAST(t2.N AS INT),0) -- ISNULL makes the column NOT NULL

INTO dbo.EulerSource -- This creates the table

FROM Tally t1, Tally t2 -- Yep... 'nuther happy little Cross Join here...

WHERE t2.N >= t1.N -- ... and this limits it to a Triangular Join.

;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...');

SELECT

TR.TR_TX AS OPERATION

,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

FROM @TIMING_RESULTS TR

GROUP BY TR.TR_TX

ORDER BY DURATION;

--ORDER BY 2; --ORDER BY column ordinal has been deprecated and should not be used.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 3

@sqldriver,

Apologies. Forgot to post the code to create the clustered index. SQL Server REALLY likes unique indexes and REALLY likes NOT NULL columns. With that in mind, that's why I used the ISNULL() thing in the code I posted. It forces the result table in the SELECT/INTO (which, as an added bonus, is also minimally logged if the DB is in the BULK LOGGED or SIMPLE recovery mode as is the creation of the clustered index) to produce NOT NULL columns. Since the composite of both columns form a unique set of rows, is seems natural to make unique clustered index that doesn't allow NULLs by making the composite of the two columns the PK.

You might also want to reconsider the method you're currently using for your inline Tally Table. That's a whole lot of code to just get to 10^8 and the extra cross joins do tend to slow the larger number generations down a bit.

Finally, since this table probably won't be written to or updated (although it might be dropped and recreated), I force the FILLFACTOR to 100 for space conservation (if someone has changed the default to something lower) and for a bit of extra speed during SELECTs.

--===== Create the unique index as the PRIMARY KEY.

ALTER TABLE dbo.EulerSource

ADD CONSTRAINT PK_EulerSource PRIMARY KEY CLUSTERED ([SID] ASC, Euler ASC)

WITH (ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY]

;

As a final note, this generates fast enough. Do you really need for it to be a table?

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Eirikur Eiriksson SSC Guru Points: 182951 More actions · Answer 4

Just brilliant as always Jeff, Chapeau!

Quick modification, mainly dropping the ISNULL as the set will not contain any nulls

😎

Run results

OPERATION DURATION

--------------------------------------------------------------- -----------

Eirikur's version with Jeff's tricks 2416806

Jeff's version, which also creates and populates the table... 2628008

Eirikur's version 3656208

sqldriver's version 10546826

The code (test harness)

USE tempdb;

GO

SET NOCOUNT ON;

DECLARE @INT_BUCKET INT = 0;

DECLARE @BIGINT_BUCKET BIGINT = 0;

DECLARE @TIMING_RESULTS TABLE

(

TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

,TR_TX VARCHAR(100) NOT NULL

);

DECLARE @SAMPLE_SIZE INT = 12963600;

-- Test one starts

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

;WITH E1(n) AS

(

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

),-- 10

E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

EulerOrder AS (

SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

[EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

FROM e8

),

EulerRuler AS (

SELECT eo.EulerOrder,

[EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

FROM EulerOrder eo

)

SELECT

@INT_BUCKET = el.EulerOrder

,@BIGINT_BUCKET = el.EulerRuler

FROM EulerRuler el

WHERE el.EulerRuler >= el.EulerOrder

ORDER BY el.EulerOrder

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

-----------------------------------------------------------------------------------------------------------------------

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

,EulerOrder AS

(

SELECT --TOP (@SAMPLE_SIZE)

[EulerOrder] = NM.N / 3600

,[EulerRuler] = NM.N % 3600

FROM NUMS NM

)

SELECT

@INT_BUCKET = EU.EulerOrder

,@BIGINT_BUCKET = EU.EulerRuler

FROM EulerOrder EU

WHERE EU.EulerRuler >= EU.EulerOrder;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

-----------------------------------------------------------------------------------------------------------------------

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...')

;

--=====================================================================================================================

-- Create and populate the test table with the product of a simple "TRIANGULAR JOIN" such that for every value of

-- [SID] (0 thru 3599), the Euler column will have a value greater than or equal to the [SID] thru 3599. For

-- reference purposes, a "Triangular Join" is half of a "Square Join", which is also known as a "Cartesian

-- Product" or "Cross Join".

--=====================================================================================================================

--===== If the test table alreeady exists, drop it to make reruns in SSMS easier

IF OBJECT_ID('dbo.EulerSource','U') IS NOT NULL

DROP TABLE dbo.EulerSource

;

--===== Create AND populate the table on-the-fly.

WITH

E1(N) AS (

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

) -- 10^ 1 = 10

,E4(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^ 4 = 10,000

,E16(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^16 = 1 followed by more zero's than you can shake a stick at.

,Tally(N) AS (

SELECT 0 UNION ALL --This is faster than subtracting a 1 for every ROW_NUMBER().

SELECT TOP 3599 N = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E16

)

SELECT [SID] = ISNULL(CAST(t1.N AS INT),0) -- ISNULL makes the column NOT NULL

,Euler = ISNULL(CAST(t2.N AS INT),0) -- ISNULL makes the column NOT NULL

INTO dbo.EulerSource -- This creates the table

FROM Tally t1, Tally t2 -- Yep... 'nuther happy little Cross Join here...

WHERE t2.N >= t1.N -- ... and this limits it to a Triangular Join.

;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...');

-----------------------------------------------------------------------------------------------------------------------

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version with Jeff''s tricks');

--===== If the test table alreeady exists, drop it to make reruns in SSMS easier

IF OBJECT_ID('dbo.EEulerSource','U') IS NOT NULL

DROP TABLE dbo.EEulerSource

;

;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

,NUMS(N) AS (SELECT 0 AS N UNION ALL SELECT TOP(3599) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)

SELECT

[SID] = EUO.N

,Euler = EUR.N

INTO dbo.EEulerSource

FROM NUMS EUO, NUMS EUR

WHERE EUR.N >= EUO.N;

INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version with Jeff''s tricks');

-----------------------------------------------------------------------------------------------------------------------

SELECT

TR.TR_TX AS OPERATION

,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

FROM @TIMING_RESULTS TR

GROUP BY TR.TR_TX

ORDER BY DURATION;

--ORDER BY 2; --ORDER BY column ordinal has been deprecated and should not be used.

/* uncomment this section to get the execution plan */

;WITH XMLNAMESPACES ( DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')

,QUERY_PLAN AS

(

SELECT

XP.query_plan

FROM sys.dm_exec_requests ER

CROSS APPLY sys.dm_exec_query_plan(ER.plan_handle) XP

WHERE ER.session_id = @@SPID

)

SELECT

QP.query_plan

FROM QUERY_PLAN QP;

sqldriver SSChampion Points: 10440 More actions · Answer 5

Holy cow. Those are both way prettier solutions. Thanks to both of you.

I ran them on my test box and got close enough results.

Jeff, first, you need to apologize about as much as an Italian mackerel needs to set an alarm for Saturday morning 😀

Second, this table is part of a process that runs hourly. So while it is much faster, I don't think we want to waste the compute time generating it that often. I know it looks a little strange.

Eirikur - mid-post edit - taking out the ISNULL is a bit faster on generation, but I get an error when I try to create the index Jeff defined, which will be a nice boost to the calculations done using this table, I think (also owe you a PM).

Msg 8111, Level 16, State 1, Line 137

Cannot define PRIMARY KEY constraint on nullable column in table 'EulerSource'.

Msg 1750, Level 16, State 0, Line 137

Could not create constraint. See previous errors.

Thanks

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 6

Eirikur Eiriksson (12/27/2014)
Just brilliant as always Jeff, Chapeau!
Quick modification, mainly dropping the ISNULL as the set will not contain any nulls
😎

Thanks, Eirikur. You're the consummate Gentleman and I'm frequently humbled by your gracious comments. :blush:

I absolutely agree. The ISNULL does slow things down but it's essential in this case. Like I said in the embedded documentation, it's there to force the columns that are created during the SELECT/INTO (which builds the table on-the-fly) to be NOT NULL so that we can actually deploy a composite PK over the two columns. That, notwithstanding, I agree... tables don't actually need PKs. They just need a UNIQUE CLUSTERED INDEX and that doesn't require NOT NULL columns. I wanted to make sure that the columns showed up as NOT NULL because the optimizer loves NOT NULL columns, not to mention avoiding the page level NULLable column bits. By itself, such an optimization won't amount to much on this single table. Over several hundred tables, it does add up.

Like Granny used to say, "Mind the pennies and the dollars will take care of themselves." 😀

Still, it's a great thing to point out as a performance point for the creation of such tables and I sure do appreciate you taking the time to make the change and run the test.

As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 7

sqldriver (12/27/2014)
Holy cow. Those are both way prettier solutions. Thanks to both of you.
I ran them on my test box and got close enough results.
Jeff, first, you need to apologize about as much as an Italian mackerel needs to set an alarm for Saturday morning 😀
Second, this table is part of a process that runs hourly. So while it is much faster, I don't think we want to waste the compute time generating it that often. I know it looks a little strange.
Eirikur - mid-post edit - taking out the ISNULL is a bit faster on generation, but I get an error when I try to create the index Jeff defined, which will be a nice boost to the calculations done using this table, I think (also owe you a PM).
Msg 8111, Level 16, State 1, Line 137
Cannot define PRIMARY KEY constraint on nullable column in table 'EulerSource'.
Msg 1750, Level 16, State 0, Line 137
Could not create constraint. See previous errors.
Thanks

You bet. Thank you very much for the feedback.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 8

Eirikur Eiriksson (12/27/2014)
Run results
OPERATION DURATION
--------------------------------------------------------------- -----------
Eirikur's version with Jeff's tricks 2416806
Jeff's version, which also creates and populates the table... 2628008
Eirikur's version 3656208
sqldriver's version 10546826

p.s. I continue to be amazed at the performance of the hardware you're running on, Eirikur. My little 4 processor i5 laptop doesn't stand a chance.

Shifting gears a bit and just to share a conviction of mine, I've recently been involved in a discussion about throwing heavy iron at problem code rather than fixing the code itself. As impressed as I am with the hardware, this is an outstanding example of how much more ROI one can get by knowing the ropes of T-SQL. The original problem on my laptop took 16524945 _uS and took 10546826 _uS on that wonderful box of yours. That's an improvement of 36%, which is awesome for "just" a hardware change. Using the time from my lesser powered laptop even for the version that uses ISNULL(), the time dropped to 3014173, which is nearly an 82% improvement. In simpler terms, the hardware improvement allowed for a 1/2X improvement. The change in code that we both made exceeds a 4.4X improvement in performance.

I wonder where those folks on that other discussion I've been having will find a computer that actually runs 4.4X faster and how much it will cost? 😀 And it doesn't appear that MPP will actually help for a query like this one because 1) parallelism is being used even in the original query and 2) you actually might have to rewrite code anyway to take advantage of MPP because it's a different paradigm.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Eirikur Eiriksson SSC Guru Points: 182951 More actions · Answer 9

Jeff Moden (12/28/2014)
Eirikur Eiriksson (12/27/2014)
Just brilliant as always Jeff, Chapeau!
Quick modification, mainly dropping the ISNULL as the set will not contain any nulls
😎
Thanks, Eirikur. You're the consummate Gentleman and I'm frequently humbled by your gracious comments. :blush:
I absolutely agree. The ISNULL does slow things down but it's essential in this case. Like I said in the embedded documentation, it's there to force the columns that are created during the SELECT/INTO (which builds the table on-the-fly) to be NOT NULL so that we can actually deploy a composite PK over the two columns. That, notwithstanding, I agree... tables don't actually need PKs. They just need a UNIQUE CLUSTERED INDEX and that doesn't require NOT NULL columns. I wanted to make sure that the columns showed up as NOT NULL because the optimizer loves NOT NULL columns, not to mention avoiding the page level NULLable column bits. By itself, such an optimization won't amount to much on this single table. Over several hundred tables, it does add up.
Like Granny used to say, "Mind the pennies and the dollars will take care of themselves." 😀
Still, it's a great thing to point out as a performance point for the creation of such tables and I sure do appreciate you taking the time to make the change and run the test.
As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

Thanks Jeff and point taken;-), which by the way I missed in the first pass, looking too much at the sequence generation but not the bigger picture of building a "usable" table containing the sequence. The ISNULL cost is close to 10% which is hardly anything when it comes to the benefits of having non-nullable columns as you mentioned. Comparing the method to other bulk-insert type methods it outperforms all I've tested (Merge, Traceflag 610 etc.) by at least 20-25%.

😎

Michael Meierruth SSChampion Points: 10051 More actions · Answer 10

Jeff Moden (12/28/2014)
As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

Jeff,

What you're saying there really freaked me out.

And you confirm this has never changed in later versions of SQL Server?

So many times I used a CTE where the first excraction has the purpose of drastically lowering the row count of the table I will be dealing with later on. Instead every reference to the first extraction re-executes it.

I guess it's time to get back to real, live tables.

sqldriver SSChampion Points: 10440 More actions · Answer 11

I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.

If there's something in particular you'd like me to try to confirm this behavior, let me know.

SET NOCOUNT ON

SET STATISTICS IO, TIME ON

/*

SELECT @@VERSION

Microsoft SQL Server 2014 - 12.0.2456.0 (X64)

Dec 11 2014 17:32:03

Copyright (c) Microsoft Corporation

Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)

*/

USE AdventureWorks2014

;

WITH pp

AS ( SELECT p.BusinessEntityID ,

p.Title ,

p.FirstName ,

p.LastName

FROM Person.Person AS p

WHERE p.BusinessEntityID % 2 = 0

)

SELECT *

FROM pp p1

JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID

/*

SQL Server Execution Times:

CPU time = 0 ms, elapsed time = 0 ms.

Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

*/

;

WITH pp

AS ( SELECT p.BusinessEntityID ,

p.Title ,

p.FirstName ,

p.LastName

FROM Person.Person AS p

WHERE p.BusinessEntityID % 2 = 0

),

pp2 AS( SELECT p1.BusinessEntityID ,

p1.Title ,

p1.FirstName ,

p1.LastName

FROM pp p1

)

SELECT *

FROM pp2 p1

JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID

/*

SQL Server Execution Times:

CPU time = 32 ms, elapsed time = 137 ms.

Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

*/

EDIT: had top 10 in one query but not the other.

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 12

Michael Meierruth (12/29/2014)
Jeff Moden (12/28/2014)
As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.
Jeff,
What you're saying there really freaked me out.
And you confirm this has never changed in later versions of SQL Server?
So many times I used a CTE where the first excraction has the purpose of drastically lowering the row count of the table I will be dealing with later on. Instead every reference to the first extraction re-executes it.
I guess it's time to get back to real, live tables.

I haven't checked that on 2014 but 2012 created a separate execution plan for each call to the "Tally" cte in the code I posted.

For me, the query I posted isn't a real problem to me because, even though executed twice, runs so fast that it doesn't matter. On other queries where I might need a self join to a CTE, I've been known to do a SELECT/INTO a Temp Table, maybe add an index, and the join to the table twice. "It Depends" on how much work the CTE has to do and these types of self joins dot come up very often.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Jeff Moden SSC Guru Points: 1004704 More actions · Answer 13

sqldriver (12/29/2014)
I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.
If there's something in particular you'd like me to try to confirm this behavior, let me know.
SET NOCOUNT ON
SET STATISTICS IO, TIME ON
/*
SELECT @@VERSION
Microsoft SQL Server 2014 - 12.0.2456.0 (X64)
Dec 11 2014 17:32:03
Copyright (c) Microsoft Corporation
Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)
*/
USE AdventureWorks2014
;
WITH pp
AS ( SELECT p.BusinessEntityID ,
p.Title ,
p.FirstName ,
p.LastName
FROM Person.Person AS p
WHERE p.BusinessEntityID % 2 = 0
)
SELECT *
FROM pp p1
JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID
/*
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
*/
;
WITH pp
AS ( SELECT p.BusinessEntityID ,
p.Title ,
p.FirstName ,
p.LastName
FROM Person.Person AS p
WHERE p.BusinessEntityID % 2 = 0
),
pp2 AS( SELECT p1.BusinessEntityID ,
p1.Title ,
p1.FirstName ,
p1.LastName
FROM pp p1
)
SELECT *
FROM pp2 p1
JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID
/*
SQL Server Execution Times:
CPU time = 32 ms, elapsed time = 137 ms.
Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
*/
EDIT: had top 10 in one query but not the other.

Just look at the actual execution plan for two sets of blocks that look similar.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

sqldriver SSChampion Points: 10440 More actions · Answer 14

Jeff Moden (12/29/2014)
sqldriver (12/29/2014)
I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.
If there's something in particular you'd like me to try to confirm this behavior, let me know.
SET NOCOUNT ON
SET STATISTICS IO, TIME ON
/*
SELECT @@VERSION
Microsoft SQL Server 2014 - 12.0.2456.0 (X64)
Dec 11 2014 17:32:03
Copyright (c) Microsoft Corporation
Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)
*/
USE AdventureWorks2014
;
WITH pp
AS ( SELECT p.BusinessEntityID ,
p.Title ,
p.FirstName ,
p.LastName
FROM Person.Person AS p
WHERE p.BusinessEntityID % 2 = 0
)
SELECT *
FROM pp p1
JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID
/*
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
*/
;
WITH pp
AS ( SELECT p.BusinessEntityID ,
p.Title ,
p.FirstName ,
p.LastName
FROM Person.Person AS p
WHERE p.BusinessEntityID % 2 = 0
),
pp2 AS( SELECT p1.BusinessEntityID ,
p1.Title ,
p1.FirstName ,
p1.LastName
FROM pp p1
)
SELECT *
FROM pp2 p1
JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID
/*
SQL Server Execution Times:
CPU time = 32 ms, elapsed time = 137 ms.
Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
*/
EDIT: had top 10 in one query but not the other.
Just look at the actual execution plan for two sets of blocks that look similar.

See attached; doesn't seem to.

I can test on 2012 later tonight. The laptop I had it installed on is sleeping with the BlackBerrys, so need a few to dig up the CD and install it somewhere else.