November 9, 2011 at 12:25 am
Hi,
I have a query which sums the total number of commercial seconds aired in a clock hour
SELECT SUM(BookedLength) As TotalPerHour
FROM ComercialsBreaks
GROUP BY ClockHour
So a result might be 700 seconds booked for clock hour 10 - i.e. 1000->1100
I have no issue doing this but now I need to check within 60 minutes on the 1/2 hour - so for example between 1030->1130.
Stumped :crazy:
Any ideas?
Thanks in advance,
JayK
November 9, 2011 at 1:26 am
I have no issue doing this but now I need to check within 60 minutes on the 1/2 hour - so for example between 1030->1130.
Create a computed (or actual) column in table to identify the group. Then use it in GROUP By clause. Alternatively, you may do it in subquery (or inline view) to mark a group identifier & then group by it in outer query.
November 9, 2011 at 1:56 am
JayK (11/9/2011)
Hi,I have a query which sums the total number of commercial seconds aired in a clock hour
SELECT SUM(BookedLength) As TotalPerHour
FROM ComercialsBreaks
GROUP BY ClockHour
So a result might be 700 seconds booked for clock hour 10 - i.e. 1000->1100
I have no issue doing this but now I need to check within 60 minutes on the 1/2 hour - so for example between 1030->1130.
Stumped :crazy:
Any ideas?
Thanks in advance,
JayK
It's early, so this isn't particularly well optimised. Also, I've created some sample data for testing.
Here's the actual query. Use a real tally table if possible, the one I've used here is for testing purposes only.
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT TOP 100
DATEADD(mi,30*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))),'2011-11-09 00:00:00') AS n,
DATEADD(mi,30*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))+1),'2011-11-09 00:00:00') AS n2
FROM t5 x, t5 y)
SELECT n, ISNULL(b.sum_secondsbooked,0) AS total_seconds_per_period
FROM tally
OUTER APPLY (SELECT SUM(secondsbooked) AS sum_secondsbooked
FROM (SELECT randomDate, secondsbooked
FROM #testEnvironment) a
WHERE randomDate >= n AND randomDate < n2) b
Here it is with the sample data.
BEGIN TRAN
--Standard TestEnvironment of 1,000,000 rows of random-ish data
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
--1,000,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
RAND(CHECKSUM(NEWID())) + GETDATE() AS randomDate,
(ABS(CHECKSUM(NEWID())) % 700) + 1 AS secondsbooked
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
PRINT '========== BASELINE =========='
SET STATISTICS TIME ON
SELECT SUM(secondsbooked) FROM #testEnvironment
SET STATISTICS TIME OFF
PRINT REPLICATE('=',80)
PRINT '========== Query =========='
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT TOP 100
DATEADD(mi,30*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))),'2011-11-09 00:00:00') AS n,
DATEADD(mi,30*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))+1),'2011-11-09 00:00:00') AS n2
FROM t5 x, t5 y)
SELECT n, ISNULL(b.sum_secondsbooked,0) AS total_seconds_per_period
FROM tally
OUTER APPLY (SELECT SUM(secondsbooked) AS sum_secondsbooked
FROM (SELECT randomDate, secondsbooked
FROM #testEnvironment) a
WHERE randomDate >= n AND randomDate < n2) b
SET STATISTICS TIME OFF
PRINT REPLICATE('=',80)
ROLLBACK
And here's the performance results: -
========== BASELINE ==========
(1 row(s) affected)
SQL Server Execution Times:
CPU time = 358 ms, elapsed time = 386 ms.
================================================================================
========== Query ==========
(100 row(s) affected)
SQL Server Execution Times:
CPU time = 7831 ms, elapsed time = 8039 ms.
================================================================================
If we add an index to "randomDate", such as this: -
--Add index
CREATE NONCLUSTERED INDEX [test_index]
ON #testEnvironment (randomDate)
Then we see some improvement
========== Query ==========
(100 row(s) affected)
SQL Server Execution Times:
CPU time = 6411 ms, elapsed time = 6446 ms.
================================================================================
Hope that helps.
What did I say about it being early?
I've grouped the data on 30 minute intervals instead of 60 minute intervals.
To fix, do this: -
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT '2011-11-09 00:30:00' AS n, '2011-11-09 01:30:00' AS n2
UNION
SELECT TOP 100
DATEADD(mi,60*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))),'2011-11-09 00:30:00') AS n,
DATEADD(mi,60*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))+1),'2011-11-09 00:30:00') AS n2
FROM t5 x, t5 y)
SELECT n, ISNULL(b.sum_secondsbooked,0) AS total_seconds_per_period
FROM tally
OUTER APPLY (SELECT SUM(secondsbooked) AS sum_secondsbooked
FROM (SELECT randomDate, secondsbooked
FROM #testEnvironment) a
WHERE randomDate >= n AND randomDate < n2) b
So all together: -
--Standard TestEnvironment of 1,000,000 rows of random-ish data
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment
END
--1,000,000 Random rows of data
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
RAND(CHECKSUM(NEWID())) + GETDATE() AS randomDate,
(ABS(CHECKSUM(NEWID())) % 700) + 1 AS secondsbooked
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
--Add index
CREATE NONCLUSTERED INDEX [test_index]
ON #testEnvironment (randomDate)
PRINT '========== BASELINE =========='
SET STATISTICS TIME ON
SELECT SUM(secondsbooked) FROM #testEnvironment
SET STATISTICS TIME OFF
PRINT REPLICATE('=',80)
PRINT '========== Query =========='
SET STATISTICS TIME ON
;WITH t1 AS (SELECT 1 N UNION ALL SELECT 1 N),
t2 AS (SELECT 1 N FROM t1 x, t1 y),
t3 AS (SELECT 1 N FROM t2 x, t2 y),
t4 AS (SELECT 1 N FROM t3 x, t3 y),
t5 AS (SELECT 1 N FROM t4 x, t4 y),
tally AS (SELECT '2011-11-09 00:30:00' AS n, '2011-11-09 01:30:00' AS n2
UNION
SELECT TOP 100
DATEADD(mi,60*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))),'2011-11-09 00:30:00') AS n,
DATEADD(mi,60*(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))+1),'2011-11-09 00:30:00') AS n2
FROM t5 x, t5 y)
SELECT n, ISNULL(b.sum_secondsbooked,0) AS total_seconds_per_period
FROM tally
OUTER APPLY (SELECT SUM(secondsbooked) AS sum_secondsbooked
FROM (SELECT randomDate, secondsbooked
FROM #testEnvironment) a
WHERE randomDate >= n AND randomDate < n2) b
SET STATISTICS TIME OFF
PRINT REPLICATE('=',80)
========== BASELINE ==========
(1 row(s) affected)
SQL Server Execution Times:
CPU time = 296 ms, elapsed time = 292 ms.
================================================================================
========== Query ==========
(101 row(s) affected)
SQL Server Execution Times:
CPU time = 5975 ms, elapsed time = 5968 ms.
================================================================================
I'm fairly convinced that with a properly indexes "dates" tally table you'd get better performance.
Is everything clear? Or do You need any more help?
Viewing 3 posts - 1 through 2 (of 2 total)
You must be logged in to reply to this topic. Login to reply