Blog Post

Need to create a time dimension down to the millisecond?

,

Ok, I know that creating a time dimension down to the millisecond is extremely rare.  After all, we are talking about  86,400,000 records.  In my case, it is needed for a customer with a 50TB database that contains stock trades that are recorded down to the millisecond (we are using a Parallel Data Warehouse).  You can imagine how long it would take to build this table using a loop with inserts.  But my friend Martin Lee came up with a very fast solution using a cross join (cartesian product).  Check out the code:

/* CREATE TIME PART TABLES TO DO A CROSS JOIN CARTESIAN PRODUCT TO CTAS INTO DIMTIME */--CREATE A SINGLE VALUE TABLE TO USE AS DUMMY FROM TABLE.
IF NOT EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'Singleton' AND type = 'U')
BEGIN
CREATE TABLE DirectEdgeDW.dbo.Singleton
WITH (DISTRIBUTION = REPLICATE)
AS
SELECT DISTINCT 1 AS VALUE FROM SYS.DATABASES
END
--GENERATE MILLISECOND TABLE
IF EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'MCL_TimePart_MS' AND type = 'U')
BEGIN
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_MS
END
CREATE TABLE DirectEdgeDW.dbo.MCL_TimePart_MS
WITH (DISTRIBUTION = REPLICATE)
AS
SELECT TOP 999 ROW_NUMBER() OVER (ORDER BY REQUEST_ID) AS Millisecond
FROM sys.dm_pdw_exec_requests
INSERT INTO MCL_TimePart_MS
VALUES (0)
--SELECT * FROM MCL_TimePart_MS
IF EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'MCL_TimePart_SS' AND type = 'U')
BEGIN
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_SS
END
CREATE TABLE DirectEdgeDW.dbo.MCL_TimePart_SS
WITH (DISTRIBUTION = REPLICATE)
AS
SELECT Millisecond AS Second FROM DirectEdgeDW.dbo.MCL_TimePart_MS WHERE Millisecond < 60
--select * from MCL_TimePart_SS
IF EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'MCL_TimePart_MM' AND type = 'U')
BEGIN
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_MM
END
CREATE TABLE DirectEdgeDW.dbo.MCL_TimePart_MM
WITH (DISTRIBUTION = REPLICATE)
AS
SELECT second AS Minute FROM DirectEdgeDW.dbo.MCL_TimePart_SS
--select * from MCL_TimePart_MM
IF EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'MCL_TimePart_HH' AND type = 'U')
BEGIN
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_HH
END
CREATE TABLE DirectEdgeDW.dbo.MCL_TimePart_HH
WITH (DISTRIBUTION = REPLICATE)
AS
SELECT second AS Hour FROM DirectEdgeDW.dbo.MCL_TimePart_SS WHERE Second < 24
--select * from MCL_TimePart_HH
--CROSS JOIN
IF EXISTS (SELECT 1 FROM DirectEdgeDW.sys.tables WHERE name = 'DimTime' AND type = 'U')
BEGIN
DROP TABLE DirectEdgeDW.dbo.DimTime
END
CREATE TABLE DirectEdgeDW.dbo.DimTime
WITH (CLUSTERED INDEX(TimeKey),DISTRIBUTION = REPLICATE)
AS
SELECT CAST(CAST(hh.Hour AS VARCHAR(2)) + RIGHT('0' + CAST(mm.Minute AS VARCHAR(2)),2) + RIGHT('0' + CAST(ss.Second AS VARCHAR(2)),2) + RIGHT('00' + CAST(ms.Millisecond AS VARCHAR(3)),3) AS INT) AS TimeKey
, hh.Hour, mm.Minute, ss.Second, ms.Millisecond
FROM
DirectEdgeDW.dbo.MCL_TimePart_MS ms
CROSS JOIN DirectEdgeDW.dbo.MCL_TimePart_SS ss
CROSS JOIN DirectEdgeDW.dbo.MCL_TimePart_MM mm
CROSS JOIN DirectEdgeDW.dbo.MCL_TimePart_HH hh
INSERT INTO dbo.DimDate (DateKey,FullDateAltKey,DayNumberOfWeek,DayNameOfWeek,DayNumberOfMonth,DayNumberOfYear,WeekNumberOfYear,WeekNameOfYear,[MonthName],MonthNumberOfYear,MonthFlag,QuarterNumber,QuarterName,QuarterFlag,SemesterNumber,SemesterName,SemesterFlag,[Year])
SELECT -1, CAST('1/1/2100' AS DATE), 0, 'Unknown', 0, 0, 0, 'Unknown', 'Unknown', 0, 0, 0, 'Unknown', 0, 0, 'Unknown', 0, 0
FROM [DirectEdgeDW].[dbo].[UnknownMembers]
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_MS
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_SS
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_MM
DROP TABLE DirectEdgeDW.dbo.MCL_TimePart_HH

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating