November 15, 2007 at 7:44 am
I'm stumped.:doze: I have a list of dates and I need to return contiguous date intervals. Thus from the data below I would like to see:
2007-11-02 2007-11-06
2007-11-08 2007-11-08
2007-11-12 2007-11-16
The ideal solution would be to take into consideration weekends, i.e. monday is adjacent to friday. But even without this complication I can't seem to get at a solution that doesn't involve a cursor.
create table t(d datetime)
insert into t values('2007-11-02')
insert into t values('2007-11-05')
insert into t values('2007-11-06')
insert into t values('2007-11-08')
insert into t values('2007-11-12')
insert into t values('2007-11-13')
insert into t values('2007-11-14')
insert into t values('2007-11-15')
insert into t values('2007-11-16')
November 15, 2007 at 12:42 pm
November 15, 2007 at 12:46 pm
Just the min and max of each interval.
November 15, 2007 at 12:53 pm
November 15, 2007 at 1:01 pm
The interval can be anything. It can be a single day or 150 days. They never overlap. It's easy to get the start of an interval (the previous non weekend day is absent) and it's easy to get the end of an interval (the next non weekend day is absent). But I don't see a way to match the beginning of an interval with the end of an interval.
November 15, 2007 at 1:05 pm
November 15, 2007 at 2:35 pm
That's correct.
The keyword is contiguous.
Give me the start and end date of all contigous date sequences.
November 15, 2007 at 3:46 pm
use testing
go
IF OBJECT_ID('dbo.weekdays') IS NOT NULL
DROP FUNCTION dbo.weekdays
go
--steal code directly from Jeff Moden's article and turn into a function
create function dbo.weekdays(@startdate datetime,@enddate datetime)
returns int
as
begin
return
(DATEDIFF(dd, @StartDate, @EndDate) + 1)
-(DATEDIFF(wk, @StartDate, @EndDate) * 2)
-(CASE WHEN DATENAME(dw, @StartDate) = 'Sunday' THEN 1 ELSE 0 END)
-(CASE WHEN DATENAME(dw, @EndDate) = 'Saturday' THEN 1 ELSE 0 END)-1
end
go
--create test info
drop table dbo.testdates
create TABLE testdates (rid int identity(1,1),dateval datetime, sequence int)
INSERT TESTDATES(DATEVAL)
SELECT distinct DATE_START from dbo.matric
alter table testdates
add primary key clustered (rid)
--start the process
--need to represent the physical order for the sequence
--if it's a grouped sequence, then all of grouping elements need to be present
create index ix_testdates on testdates(dateval)
--set up the sequence ID's
declare @seq int
declare @prevdate datetime
declare @dummy int
set @seq=0
set @prevdate=0
update testdates
set sequence=case when dbo.weekdays(@prevdate,dateval)=1 then @seq else @seq+1 end,
@seq=case when dbo.weekdays(@prevdate,dateval)=1 then @seq else @seq+1 end,
@prevdate=dateval
from testdates
with(index(ix_testdates),tablock) --VERY important - this needs to set up the order
--once you have the sequneces - the rest is easy
select min(dateval), max(dateval) from testdates group by sequence
----------------------------------------------------------------------------------
Your lack of planning does not constitute an emergency on my part...unless you're my manager...or a director and above...or a really loud-spoken end-user..All right - what was my emergency again?
November 15, 2007 at 9:04 pm
The keyword is contiguous.
Give me the start and end date of all contigous date sequences.
Actually, Michael... according to that bit of information, I don't think you give a rat's patooti about whether a date is a weekday or not... all you want is start and end dates of contiguous ranges of dates...
...and, I assume that you might also want to use the result in other places without too much bother.
Ok, let's say your table does look like this (look kinda familiar?)...
--===== Create a test table to hold the test data in
-- (I mixed the order up to prove it still works and added two other months)
SET NOCOUNT ON
CREATE TABLE yourtable (SomeDate DATETIME PRIMARY KEY CLUSTERED)
INSERT INTO yourtable (SomeDate)
SELECT '2007-10-12' UNION ALL
SELECT '2007-10-05' UNION ALL
SELECT '2007-10-06' UNION ALL
SELECT '2007-10-02' UNION ALL
SELECT '2007-10-08' UNION ALL
SELECT '2007-10-13' UNION ALL
SELECT '2007-10-15' UNION ALL
SELECT '2007-10-14' UNION ALL
SELECT '2007-10-16' UNION ALL
SELECT '2007-11-12' UNION ALL
SELECT '2007-11-05' UNION ALL
SELECT '2007-11-06' UNION ALL
SELECT '2007-11-02' UNION ALL
SELECT '2007-11-08' UNION ALL
SELECT '2007-11-13' UNION ALL
SELECT '2007-11-15' UNION ALL
SELECT '2007-11-14' UNION ALL
SELECT '2007-11-16' UNION ALL
SELECT '2007-12-12' UNION ALL
SELECT '2007-12-05' UNION ALL
SELECT '2007-12-06' UNION ALL
SELECT '2007-12-02' UNION ALL
SELECT '2007-12-08' UNION ALL
SELECT '2007-12-13' UNION ALL
SELECT '2007-12-15' UNION ALL
SELECT '2007-12-14' UNION ALL
SELECT '2007-12-16'
Let's create a fairly simple function VERY similar to what Matt's good code looks like... but without any regard to weekdays or not...
CREATE FUNCTION dbo.fnFindDateRangesYourTable
(@MinStartDate DATETIME, @MaxEndDate DATETIME)
RETURNS @Return TABLE
(
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL,
PRIMARY KEY CLUSTERED (StartDate,EndDate)
)
AS
BEGIN
--===== Declare Local variables
DECLARE @PrevDate DATETIME --Keeps track of previous record's dates
DECLARE @PrevSeq INT --Keeps track of the previous sequence we used
DECLARE @ScratchPad TABLE --Our working table where we'll create the sequences
(
SomeDate DATETIME PRIMARY KEY CLUSTERED WITH FILLFACTOR = 100,
Sequence INT
)
--===== Grab all the dates from the table for the desired range.
INSERT INTO @ScratchPad (SomeDate)
SELECT SomeDate
FROM yourtable
WHERE SomeDate >= @MinStartDate
AND SomeDate < @MaxEndDate+1
ORDER BY SomeDate
--===== Create a sequence starting at 1 and incrementing only when we skip a date
-- Clustered Primary Key forces the correct order
UPDATE @ScratchPad
SET @PrevSeq = Sequence = CASE
WHEN SomeDate = @PrevDate+1
THEN @PrevSeq
ELSE ISNULL(@PrevSeq+1,1)
END,
@PrevDate = SomeDate
--===== Using the sequence to group by, find the min and max dates for each sequence
INSERT INTO @Return (StartDate,EndDate)
SELECT StartDate = MIN(SomeDate),
EndDate = MAX(SomeDate)
FROM @ScratchPad
GROUP BY Sequence
RETURN
END
Then, guess what happens when you run the following?
SELECT *
FROM dbo.fnFindDateRangesYourTable('2007-11-01','2007-11-30')
...and it's NASTY fast...
StartDate EndDate
------------------------------------------------------ ------------------------------------------------------
2007-11-02 00:00:00.000 2007-11-02 00:00:00.000
2007-11-05 00:00:00.000 2007-11-06 00:00:00.000
2007-11-08 00:00:00.000 2007-11-08 00:00:00.000
2007-11-12 00:00:00.000 2007-11-16 00:00:00.000
--Jeff Moden
Change is inevitable... Change for the better is not.
November 16, 2007 at 1:47 am
I am a purist at heart and was looking for a plain vanilla view. But you are convincing me that you need this sequence number stuff both of you are using. Thus I got as far as the following output:
dtype date
----- ----------
1 2007-11-02
2 2007-11-06
1 2007-11-08
2 2007-11-08
1 2007-11-12
2 2007-11-16
i.e. 1 indicates the date is the start of an interval and 2 indicates that it's the end of an interval and all dates that are neither the beginning nor the start of an interval don't appear. But this still doesn't seem to be enough to 'easily' get the two dates into a horizontal position - unless you prove me wrong.
November 16, 2007 at 3:53 pm
Here is a solution with no cursors, no variable, no temporary tables, no functions, no identity/sequence columms, and no update statements. Just add a create view. Weekends are the next problem but always give me a headache.
Sergiy - you always seems to have elegent solution for these types of problems, can you assist?
set nocount on
create table Event ( EventDt datetime)
insert into Event values('2007-11-02')
insert into Event values('2007-11-05')
insert into Event values('2007-11-06')
insert into Event values('2007-11-08')
insert into Event values('2007-11-12')
insert into Event values('2007-11-13')
insert into Event values('2007-11-14')
insert into Event values('2007-11-15')
insert into Event values('2007-11-16')
go
selectMin(PeriodStartDt) as PeriodStartDt
,PeriodEndDt
from(
selectEventStartDt as PeriodStartDt
,MAX(EventEndDt)as PeriodEndDt
FROM(
select EventStart.EventDtas EventStartDt
, EventEnd.EventDt as EventEndDt
from Event as EventStart -- timestart
joinEvent as EventEnd
on EventStart.EventDt <= EventEnd.EventDt
) as EventSpan
where datediff(dd,EventStartDt,EventEndDt) + 1
-- less than number of weekend days between the dates
= (select count(*)
from Event
whereEvent.EventDt between EventStartDt and EventEndDt
)
group by EventStartDt
) as Period
group by PeriodEndDt
order by PeriodStartDt
SQL = Scarcely Qualifies as a Language
November 16, 2007 at 10:53 pm
Ummm... you might want to be just a little careful with that code, Carl... actual execution plan show something worse than a cross-join at over 165 rows in one spot and a full cross-join of 81 rows in another spot... imagine if you had 10,000 or 100,000 rows...
--Jeff Moden
Change is inevitable... Change for the better is not.
November 17, 2007 at 2:23 am
Indeed, for a 1000 rows that cross join generates half a million rows and total execution time is 35 seconds dropping by only 5 seconds with a unique clustered index.
On 10000 rows I interrupted it after 20 minutes.
On those same 10000 rows, Jeff's function takes about 10 seconds on a freshly started SQL Server instance and settles down to less than a second on subsequent calls.
On the other hand, in the real world where this will get used, rarely are there more than 100 records. At that level, Carl's function drops to less than a second and tickles 😀 my purist sense of a plain vanilla view - cross join or not.
But I'm beginning to appreciate very much Jeff's approach of procedural code within a function that returns a table. For some reason, I always equated procedural code like this with cursors. But they are far from being the same.
November 17, 2007 at 7:45 am
Heh... thanks for the timing analysis, Michael. Guess I'm a purist, as well. Different kind, but a purist none the less.
In this case (except for the difference in our definitions of "purity"), the function code is nothing more than a "view"... they both produce a result set and they both are used in the FROM clause... biggest difference is raw performance (my "purity" hangup is "always plan for scalability" 😛 ).
By the way...
am a purist at heart and was looking for a plain vanilla view. But you are convincing me that you need this sequence number stuff both of you are using. Thus I got as far as the following output:
dtype date
----- ----------
1 2007-11-02
2 2007-11-06
1 2007-11-08
2 2007-11-08
1 2007-11-12
2 2007-11-16
i.e. 1 indicates the date is the start of an interval and 2 indicates that it's the end of an interval and all dates that are neither the beginning nor the start of an interval don't appear. But this still doesn't seem to be enough to 'easily' get the two dates into a horizontal position - unless you prove me wrong.
... are you all set now or are you still having this problem? Either way, would you post the code that got you there? I'm always interested in learning different things...
--Jeff Moden
Change is inevitable... Change for the better is not.
November 17, 2007 at 11:03 am
I'm still curious. So here is how I got to the 1/2 output I descriped earlier - see below. And it handles those awful weekends.
What I have found though is that in Oracle, having a native sequence function, this problem is fairly straight forward. I wonder if SQL Server 2005 has something like that.
if exists (select name from sysobjects where name = 'mydates' and type = 'U') drop table mydates
go
create table mydates(
date datetime)
insert into mydates values('20071102')
insert into mydates values('20071105')
insert into mydates values('20071106')
insert into mydates values('20071108')
insert into mydates values('20071112')
insert into mydates values('20071113')
insert into mydates values('20071114')
insert into mydates values('20071115')
insert into mydates values('20071116')
if exists (select name from sysobjects where name = 'PrevWorkDay' and type = 'FN') drop function PrevWorkDay
go
create function PrevWorkDay(@d as datetime) returns datetime begin
declare @dowMon int
declare @dm1 as datetime
set @dowMon=datepart(dw,'20071105')
if datepart(dw,@d)=@dowMon
set @dm1=dateadd(d,-3,@d)
else
set @dm1=dateadd(d,-1,@d)
return @dm1
end
go
if exists (select name from sysobjects where name = 'NextWorkDay' and type = 'FN') drop function NextWorkDay
go
create function NextWorkDay(@d as datetime) returns datetime begin
declare @dowFri int
declare @dp1 as datetime
set @dowFri=datepart(dw,'20071109')
if datepart(dw,@d)=@dowFri
set @dp1=dateadd(d,3,@d)
else
set @dp1=dateadd(d,1,@d)
return @dp1
end
go
-- here we go
select *
from
(
select case when (dm1.date is null and dp1.date is not null) or (dm1.date is null and dp1.date is null) then 1 else 2 end dtype,d.date
from mydates d
left join mydates dm1 on dbo.PrevWorkDay(d.date)=dm1.date
left join mydates dp1 on dbo.NextWorkDay(d.date)=dp1.date
where dm1.date is null or dp1.date is null
union all
-- catches 'solo' dates which are both the start and end of an interval - this duplicates them
select 2,d.date
from mydates d
left join mydates dm1 on dbo.PrevWorkDay(d.date)=dm1.date
left join mydates dp1 on dbo.NextWorkDay(d.date)=dp1.date
where dm1.date is null and dp1.date is null
) t
order by date
Viewing 15 posts - 1 through 15 (of 17 total)
You must be logged in to reply to this topic. Login to reply