January 27, 2011 at 5:02 pm
Here is the situation:
I have multiple events with start and end dates.
I've been asked to find the difference between the start and end dates for each event for each clientID.
Simple enough using datediff.
BUT.....
The user wants to treat 2 (or more) records as ONE if the end date of the first record is the same as the start date of the next.
use master
--drop table #temp
create table #temp(
pk_id int,
clientID int,
lastName char(20),
firstName char(20),
startDate datetime,
endDate datetime
)
insert into #temp(pk_id, clientID, lastName, firstName, startDate, endDate)
select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all
select 2, 1, 'Jones', 'Mary', '2011-01-05', '2011-01-10' union all
select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all
select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all
select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all
select 6, 1, 'Jones', 'Mary', '2011-01-16', '2011-01-31' union all
select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all
select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all
select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all
select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21'
Using the code below we get 10 rows returned from the temp table.
select clientID, lastName, firstName, startDate, endDate, datediff(dd, startDate, enddate) as dtDiff
from #temp
order by clientID, startDate, endDate
What I need to have is 7 rows:
clientID lastName firstName startDate endDate dtDiff
1 Jones Mary 2011-01-01 00:00:00.000 2011-01-15 00:00:00.000 14
1 Jones Mary 2011-01-16 00:00:00.000 2011-01-31 00:00:00.000 15
2 Smith John 2011-01-02 00:00:00.000 2011-01-31 00:00:00.000 29
3 Brown Fred 2011-02-02 00:00:00.000 2011-02-15 00:00:00.000 13
4 Green Sue 2011-01-04 00:00:00.000 2011-01-31 00:00:00.000 27
4 Green Sue 2011-02-15 00:00:00.000 2011-02-21 00:00:00.000 6
5 Barnes Victor 2011-01-05 00:00:00.000 2011-02-10 00:00:00.000 36
As you can see Mary Jones and Sue Green each have 2 events even though there are 4 and 3 records respectively in the source table. There will never be overlapping date ranges for a clientID.
I know there are many cursor based options for this problem, but I'm looking for a simple SQL solution as we may need to transfer the SQL code into BusinessObjects.
Thanks in advance for any assistance.
Tim Hansen
January 27, 2011 at 6:56 pm
Based on what you have provided, here is what I came up with to solve your problem.
--drop table #temp
create table #TimeTemp(
pk_id int,
clientID int,
lastName char(20),
firstName char(20),
startDate datetime,
endDate datetime
)
insert into #TimeTemp(pk_id, clientID, lastName, firstName, startDate, endDate)
select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all
select 2, 1, 'Jones', 'Mary', '2011-01-05', '2011-01-10' union all
select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all
select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all
select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all
select 6, 1, 'Jones', 'Mary', '2011-01-16', '2011-01-31' union all
select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all
select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all
select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all
select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21';
with TimeTemp (
pk_id,
clientID,
lastName,
firstName,
startDate,
endDate,
rn
) as (
select
pk_id,
clientID,
lastName,
firstName,
startDate,
endDate,
ROW_NUMBER() over (partition by clientID order by startDate)
from
#TimeTemp
)
, TestDate (
pk_id,
clientID,
lastName,
firstName,
startDate,
endDate,
groupid
) as (
select
tt1.pk_id,
tt1.clientID,
tt1.lastName,
tt1.firstName,
tt1.startDate,
tt1.endDate,
tt1.rn - ISNULL(tt2.rn,0)
from
TimeTemp tt1
left outer join TimeTemp tt2
on (tt1.clientID = tt2.clientID
and tt1.startDate = tt2.endDate)
)
select
clientID,
lastName,
firstName,
min(startDate) as startDate,
max(endDate) as endDate,
datediff(dd,min(startDate),max(endDate)) as DtDiff
from
TestDate td
group by
clientID,
lastName,
firstName,
groupid
order by
td.clientID;
drop table #TimeTemp;
Hope this helps, and thank you for providing us with the DDL, sample data, and expected results. It helped a lot.
January 28, 2011 at 2:38 am
Another way
WITH Starts AS (
SELECT a.clientID, a.lastName, a.firstName, a.startDate
FROM #temp a
WHERE NOT EXISTS (SELECT * FROM #temp b
WHERE b.clientID=a.clientID
AND b.endDate=a.startDate)),
Ends AS (
SELECT a.clientID, a.endDate
FROM #temp a
WHERE NOT EXISTS (SELECT * FROM #temp b
WHERE b.clientID=a.clientID
AND b.startDate=a.endDate))
SELECT s.clientID, s.lastName, s.firstName, s.startDate,
MIN(e.endDate) AS endDate,
DATEDIFF(day,s.startDate,MIN(e.endDate)) AS dtDiff
FROM Starts s
INNER JOIN Ends e ON e.clientID=s.clientID
AND s.startDate<e.endDate
GROUP BY s.clientID, s.lastName, s.firstName, s.startDate
ORDER BY s.clientID, s.startDate;
____________________________________________________
Deja View - The strange feeling that somewhere, sometime you've optimised this query before
How to get the best help on a forum
http://www.sqlservercentral.com/articles/Best+Practices/61537January 28, 2011 at 8:45 am
Thanks Lynn.
I will give this solution to the SQL developer working on this project and let him see if it will work for him.
Tim Hansen
January 28, 2011 at 9:39 am
NOTE: The first code is much more efficient. Just based on the sample data the first had a scan count of 2 and logical reads of 2, while the second had 4 scan counts and 30 logical reads.
For better, quicker answers, click on the following...
http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better answers on performance questions, click on the following...
http://www.sqlservercentral.com/articles/SQLServerCentral/66909/
January 29, 2011 at 12:16 am
Hate to burst my own bubble, but there is a bug in my code. The code as posted will only work if there is no gap in the first set of records. If there is a gap between the first and second record, it fails.
Here is some new code. To fully understand what is happening, you need to read the article I reference below regarding Running Totals. You want the article written my Jeff Moden, not mine, as he explains how it works. Then you need to read the discussion as well. If I recall there has been verification code written, and that is discussed in the discuss thread as well.
create table dbo.TimeData( -- Represents source data
pk_id int,
clientID int,
lastName char(20),
firstName char(20),
startDate datetime,
endDate datetime
);
insert into dbo.TimeData(pk_id, clientID, lastName, firstName, startDate, endDate)
select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all
select 2, 1, 'Jones', 'Mary', '2011-01-06', '2011-01-10' union all
select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all
select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all
select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all
select 6, 1, 'Jones', 'Mary', '2011-01-15', '2011-01-31' union all
select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all
select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all
select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all
select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21';
create table #TimeTemp( -- Work table
pk_id int,
clientID int,
lastName char(20),
firstName char(20),
startDate datetime,
endDate datetime,
groupid int null,
primary key clustered (clientID, startDate)
);
insert into #TimeTemp (
pk_id,
clientID,
lastName,
firstName,
startDate,
endDate
)
select
pk_id,
clientID,
lastName,
firstName,
startDate,
endDate
from
dbo.TimeData;
declare @currentDate datetime,
@groupid int,
@clientID int;
set @currentDate = '19000101'; -- zero date, hopefully not equal to any date in your system
set @groupid = 0;
set @clientID = 0;
update #TimeTemp set
@groupid = groupid = @groupid + case when clientID <> @clientID then 1
when startDate <> @currentDate then 1
else 0 end,
@currentDate = endDate,
@clientID = clientID
from
#TimeTemp with (tablockx)
option
(maxdop 1);
select * from #TimeTemp; -- Show what is in the work table after the update
select
clientID,
lastName,
firstName,
min(startDate) as startDate,
max(endDate) as endDate,
datediff(dd,min(startDate),max(endDate)) as DtDiff
from
#TimeTemp td
group by
clientID,
lastName,
firstName,
groupid
order by
td.clientID;
drop table #TimeTemp;
drop table dbo.TimeData;
Viewing 6 posts - 1 through 5 (of 5 total)
You must be logged in to reply to this topic. Login to reply