February 19, 2010 at 9:32 am
Heres a quick example over 20 thousand rows , yes i know i said at least a million , but im rushing to go home
Drop table #dateList
go
Create table #dateList
(
UserId integer,
DateCol smalldatetime
)
go
create unique clustered index idxdateList on #dateList(userid,DateCol)
go
with cteRown
as
(
select top(10000) row_number() over (order by (select null)) as Rown
from syscolumns a cross join syscolumns b
)
insert into #dateList
Select 1,dateadd(dd,rown,getdate())
from cteRown
where rown%7 <> 0
go
with cteRown
as
(
select top(10000) row_number() over (order by (select null)) as Rown
from syscolumns a cross join syscolumns b
)
insert into #dateList
Select 2,dateadd(dd,rown,getdate())
from cteRown
where rown%6 <> 0
go
with cteList
as
(
Select userid,
datecol,
datecol + row_number() over (partition by userid order by datecol desc) as Grouping
from #dateList
)
select userid,min(datecol),max(datecol)
from cteList
group by userid,grouping
order by 1,2
February 20, 2010 at 3:13 am
I suppose expanding the ranges could be expensive if the ranges are large.
The original posted question looks like a simple 'data island' problem to me, so we should be able to handle it without expansion.
A 'data island' is just a range where there is a gap in the data before and after the island.
One approach is just to look for the start and stop dates which occur at the beginning and end of each island.
Assuming that there are no duplicated start or stop dates in the data, and assuming there are no overlapping ranges, the following code is pretty efficient:
CREATE TABLE #Example
(
company VARCHAR(10) NOT NULL,
employee_id INTEGER NOT NULL,
id INTEGER NOT NULL,
course_id INTEGER NOT NULL,
start_date DATETIME NOT NULL,
stop_date DATETIME NOT NULL,
);
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '5 September 2006','27 August 2007');
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '28 August 2007', '4 March 2008');
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '5 March 2008', '20 April 2008');
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 01, 11, '21 April 2008', '23 April 2008');
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 02, 11, '24 April 2008', '20 September 2009');
INSERT #Example (company, employee_id, id, course_id, start_date, stop_date) VALUES ('TTT', 1, 02, 11, '1 January 2010', '31 December 9999');
WITH Grouped
AS (
-- Identify the groups in the input rows with a sequential number
-- In this example, groups are identified by the combination of
-- company, employee_id, id, and course_id
SELECT E.*,
grp = DENSE_RANK() OVER (ORDER BY company, employee_id, id, course_id)
FROM #Example E
),
Starts
AS (
-- Find start dates for each data island
SELECT E1.*,
rn = ROW_NUMBER() OVER (PARTITION BY E1.grp ORDER BY E1.start_date ASC)
FROM Grouped E1
WHERE NOT EXISTS
(
SELECT *
FROM Grouped E2
WHERE E2.stop_date = E1.start_date - 1
AND E2.grp = E1.grp
)
),
Stops
AS (
-- Find stop dates for each data island
SELECT E1.*,
rn = ROW_NUMBER() OVER (PARTITION BY E1.grp ORDER BY E1.start_date ASC)
FROM Grouped E1
WHERE NOT EXISTS
(
SELECT *
FROM Grouped E2
WHERE E2.start_date - 1 = E1.stop_date
AND E2.grp = E1.grp
)
)
SELECT R.grp, -- Group ID
R.rn, -- Island ID within each group
R.start_date, -- The start of the island
P.stop_date, -- The end of the island
R.company, --
R.employee_id, -- Remaining rows are just group details
R.id, --
R.course_id --
FROM Starts AS R
JOIN Stops AS P
-- Join the islands together, for each group
ON P.rn = R.rn
AND P.grp = R.grp;
DROP TABLE #Example;
Paul
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
February 23, 2010 at 9:25 am
Thank you paul. It worked perfect. That is exactly what I need.
February 23, 2010 at 4:28 pm
Sridhar-137443 (2/23/2010)
Thank you paul. It worked perfect. That is exactly what I need.
Awesome, thanks.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
February 24, 2010 at 9:49 am
Paul,
What if you assumptions do not hold? Overlapping dates and duplicative start dates for the same person, group , etc?
February 24, 2010 at 10:50 am
I would say that Paul went out his way given the requirements that were provided to him.
Perhaps you could write some logic that would account for these circumstances?
For better, quicker answers on T-SQL questions, click on the following...
http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better answers on performance questions, click on the following...
http://www.sqlservercentral.com/articles/SQLServerCentral/66909/
Viewing 6 posts - 16 through 20 (of 20 total)
You must be logged in to reply to this topic. Login to reply