July 13, 2012 at 3:35 pm
I have an audit table that has Inserts and Updates from a parent table. Unfortunately, we're not tracking field-level changes, so a record will appear in the table for any change to any field in the original table. I've got to find a way to determine the old value and new value along with the dates on which it changed. I've been beating my head against the wall for hours trying to use row_number(), min(auditdatetime), etc... and I can't get it to work. Legitimately, business rules dictate that an inmate can go into and out of the loc_unit many times. I can't take the min by Loc_Unit, because it won't capture the next time they're put into that same Loc_unit. Please help! Below is just the subset of data I'm interested in. (It's a very wide table, but I'm only concerned with Loc_Unit)
--example of audit table
Create Table tab1
(
InmateNumber varchar(10),
AuditDateTime datetime,
Loc_unit varchar(10)
)
Insert into tab1 values ('10371','2012-05-26 11:56:36.100','TANK1')
Insert into tab1 values ('10371','2012-05-26 13:56:33.957','TANK1')
Insert into tab1 values ('10371','2012-05-26 14:11:35.977','QMSTR')
Insert into tab1 values ('10371','2012-05-27 14:26:40.440','QMSTR')
Insert into tab1 VALUES ('10371','2012-06-06 09:56:13.817','A3R')
Insert into tab1 VALUES ('10371','2012-07-10 15:11:47.403','A3R')
Insert into tab1 VALUES ('10371','2012-07-11 03:56:41.540','TANK1')
Insert into tab1 VALUES ('10371','2012-07-11 10:11:51.723','A3R')
Insert into tab1 VALUES ('10371','2012-07-13 13:41:45.343','J')
INSERT INTO tab1 VALUES ('100593','2012-04-11 01:41:31.340','TANK1')
INSERT INTO tab1 VALUES ('100593','2012-04-11 15:41:37.637','TANK1')
INSERT INTO tab1 VALUES ('100593','2012-06-06 09:56:05.080','J')
--results i would like to receive
CREATE TABLE results
(InmateNumber VARCHAR(10),
Loc_Unit VARCHAR(10),
Loc_StartDate DATETIME,
Loc_EndDate DATETIME)
INSERT INTO results VALUES('10371','TANK1','2012-05-26 11:56:00.000','2012-05-26 14:11:00.000')
INSERT INTO results VALUES ('10371','QMSTR','2012-05-26 14:11:00.000','2012-06-06 09:56:00.000')
INSERT INTO results VALUES ('10371','A3R','2012-06-06 09:56:00.000', '2012-07-11 03:56:00.000')
INSERT INTO results VALUES ('10371','TANK1','2012-07-11 03:56:00.000','2012-07-11 10:11:00.000')
INSERT INTO results VALUES ('10371','A3R','2012-07-11 10:11:00.000','2012-07-13 13:41:00.000')
INSERT INTO results VALUES ('10371','J','2012-07-13 13:41:00.000',NULL)
INSERT INTO results VALUES ('100593','TANK1','2012-04-11 01:41:00.000','2012-06-06 09:56:00.000')
INSERT INTO results VALUES ('100593','J','2012-06-06 09:56:00.000',NULL)
July 13, 2012 at 4:01 pm
Fantastic job in posting the table, sample data and required results. Makes life easier 🙂
Here is what you are looking after:
; WITH cte AS
(
SELECT *
,RN = ROW_NUMBER() OVER (PARTITION BY TB.InmateNumber ORDER BY TB.AuditDateTime )
,Diff = ROW_NUMBER() OVER (PARTITION BY TB.InmateNumber ORDER BY TB.AuditDateTime )
- ROW_NUMBER() OVER (PARTITION BY TB.InmateNumber,TB.Loc_unit ORDER BY TB.AuditDateTime )
FROM tab1 TB
--ORDER BY TB.InmateNumber , Diff
)
SELECT OTR.InmateNumber
, Loc_Unit = MAX(Otr.Loc_unit)
, Loc_StartDate = MIN(Otr.AuditDateTime)
, Loc_EndDate = MAX(OtrApp.AuditDateTime)
FROM CTE Otr
OUTER APPLY
(
SELECT Inr.AuditDateTime
FROM CTE Inr
WHERE Otr.InmateNumber = Inr.InmateNumber
AND Otr.RN + 1 = Inr.RN
) OtrApp
GROUP BY Otr.InmateNumber ,Otr.Loc_unit, Otr.Diff
ORDER BY Otr.InmateNumber DESC , Loc_StartDate
{Edit : Fixed a bug}
July 13, 2012 at 5:53 pm
Thank you so very much! I will give this a shot when I get to work Monday morning.
July 24, 2012 at 1:08 pm
ColdCoffee, I missed a scenario. I really hope you can help me. The awesome code you wrote won't give me the correct results if the inmate moved back into the same room again at a later date. (He was in Unit HEM, moved to J, moved to HEM again.) I'm posting better sample data below.
--example of audit table
Create Table tab1
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
)
INSERT INTO tab1 VALUES('694768','','2011-08-23 15:00:59.233')
INSERT INTO tab1 VALUES('694768','','2011-08-23 15:01:50.810')
INSERT INTO tab1 VALUES('694768','','2012-02-09 09:26:22.850')
INSERT INTO tab1 VALUES('694768','HEM','2012-02-27 15:11:55.977')
INSERT INTO tab1 VALUES('694768','J','2012-02-29 13:28:01.400')
INSERT INTO tab1 VALUES('694768','HEM','2012-03-19 10:56:29.340')
INSERT INTO tab1 VALUES('694768','MCV H','2012-03-20 16:56:29.573')
INSERT INTO tab1 VALUES('694768','J','2012-03-20 19:41:24.040')
INSERT INTO tab1 VALUES('694768','A3L','2012-04-09 13:11:33.613')
INSERT INTO tab1 VALUES('694768','G3','2012-04-19 15:56:32.890')
INSERT INTO tab1 VALUES('694768','G3','2012-06-06 09:55:51.760')
INSERT INTO tab1 VALUES('853140','HEI','2012-02-09 09:25:36.277')
INSERT INTO tab1 VALUES('853140','HEI','2012-06-06 09:55:51.760')
INSERT INTO tab1 VALUES('853140','HEI','2012-06-20 13:12:03.020')
INSERT INTO tab1 VALUES('853140','TANK1','2012-06-20 17:26:34.510')
INSERT INTO tab1 VALUES('853140','J','2012-06-22 11:57:18.813')
--results i would like to receive
CREATE TABLE results
(InmateNumber VARCHAR(10),
Loc_Unit VARCHAR(10),
Loc_StartDate DATETIME,
Loc_EndDate DATETIME)
INSERT INTO results VALUES('694768','','2011-08-23 15:00:59.233','2012-02-27 15:11:55.977')
INSERT INTO results VALUES ('694768', 'HEM','2012-02-27 15:11:55.977','2012-02-29 13:28:01.400')
INSERT INTO results VALUES ('694768', 'J', '2012-02-29 13:28:01.400', '2012-03-19 10:56:29.340')
INSERT INTO results VALUES ('694768', 'HEM', '2012-03-19 10:56:29.340','2012-03-20 16:56:29.573')
INSERT INTO results VALUES ('694768', 'MCV H','2012-03-20 16:56:29.573','2012-03-20 19:41:24.040')
INSERT INTO results VALUES ('694768', 'J','2012-03-20 19:41:24.040','2012-04-09 13:11:33.613')
INSERT INTO results VALUES ('694768', 'A3L','2012-04-09 13:11:33.613','2012-04-19 15:56:32.890')
INSERT INTO results VALUES ('694768', 'G3','2012-04-19 15:56:32.890',NULL)
INSERT INTO results VALUES ('853140', 'HEI','2012-02-09 09:25:36.277','2012-06-20 17:26:34.510')
INSERT INTO results VALUES ('853140', 'TANK1','2012-06-20 17:26:34.510','2012-06-22 11:57:18.813')
INSERT INTO results VALUES ('853140', 'J','2012-06-22 11:57:18.813',NULL)
July 24, 2012 at 1:22 pm
Hello Iduvall,
I see that my earlier code works for your new data set too. Did you take the latest version?
I initially posted a version and then edited it for bug the same day and i sense the bug is the same scenario you are talking about right now. Can you take the latest code and try?
July 24, 2012 at 1:38 pm
Ahhh!!! You're right. I was using the pre-bug-fix version. You are the best. Thank you again very much!
July 24, 2012 at 2:48 pm
Haven't looked at the code ColdCoffee provided. Here is my solution:
Create Table dbo.tab1
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
)
INSERT INTO dbo.tab1 VALUES('694768','','2011-08-23 15:00:59.233');
INSERT INTO dbo.tab1 VALUES('694768','','2011-08-23 15:01:50.810');
INSERT INTO dbo.tab1 VALUES('694768','','2012-02-09 09:26:22.850');
INSERT INTO dbo.tab1 VALUES('694768','HEM','2012-02-27 15:11:55.977');
INSERT INTO dbo.tab1 VALUES('694768','J','2012-02-29 13:28:01.400');
INSERT INTO dbo.tab1 VALUES('694768','HEM','2012-03-19 10:56:29.340');
INSERT INTO dbo.tab1 VALUES('694768','MCV H','2012-03-20 16:56:29.573');
INSERT INTO dbo.tab1 VALUES('694768','J','2012-03-20 19:41:24.040');
INSERT INTO dbo.tab1 VALUES('694768','A3L','2012-04-09 13:11:33.613');
INSERT INTO dbo.tab1 VALUES('694768','G3','2012-04-19 15:56:32.890');
INSERT INTO dbo.tab1 VALUES('694768','G3','2012-06-06 09:55:51.760');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-02-09 09:25:36.277');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-06-06 09:55:51.760');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-06-20 13:12:03.020');
INSERT INTO dbo.tab1 VALUES('853140','TANK1','2012-06-20 17:26:34.510');
INSERT INTO dbo.tab1 VALUES('853140','J','2012-06-22 11:57:18.813');
go
with BaseData as (
select
InmateNumber,
Loc_unit,
AuditDateTime,
row_number() over (partition by InmateNumber order by AuditDatetime) as rn1,
row_number() over (partition by InmateNumber, Loc_unit order by AuditDatetime) as rn2,
row_number() over (partition by InmateNumber order by AuditDatetime) -
row_number() over (partition by InmateNumber, Loc_unit order by AuditDatetime) as rn3
from
dbo.tab1
)
select
InmateNumber,
Loc_unit,
min(AuditDateTime) LocStartDate,
max(AuditDateTime) LocEndDate
from
BaseData
group by
InmateNumber,
Loc_unit,
rn3
order by
InmateNumber,
min(AuditDateTime);
go
drop table dbo.tab1;
go
July 24, 2012 at 2:50 pm
Okay, I just looked at the code from ColdCoffee. Looks like we used the same method to accomplish the task.
July 24, 2012 at 3:50 pm
I want to thank you both for your help. Unfortunately, I'm beginning to see that my company doesn't put data in audit tables the way I've been accustomed to seeing at previous employers. Here, the most recent record does not go into the audit table. The most recent record sits in the main patients table until it is updated again. (typically once per night) However, the audit table records an entire row for any type of change, so I've got to compare the last loc_unit in the aud_patients table to the loc_unit in the patients table. I tried starting with a CTE of the aud_table records and doing a UNION to the patients table, but the main patients table only gets updated once per night for some sites, even though the audit data may accumulate throughout the day. Also, it looks like we've had interface issues where we've got records in the audit table with a more recent datetime than the patient's actual release. (meaning it cannot be good data).
If you are willing and able, would you help me figure out how to make this work if you assume the record in the patients table (tab2) ALWAYS trumps the record in the aud_patients table (tab1) if the two are different?
--example of main patients table
Create Table tab2
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
)
INSERT INTO tab1 VALUES('694768','G3','2012-04-19 15:56:00.000')
INSERT INTO tab1 VALUES('853140','HEI','2012-06-22 11:57:00.000')
--
--example of audit table
Create Table tab1
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
)
INSERT INTO tab1 VALUES('694768','','2011-08-23 15:00:59.233')
INSERT INTO tab1 VALUES('694768','','2011-08-23 15:01:50.810')
INSERT INTO tab1 VALUES('694768','','2012-02-09 09:26:22.850')
INSERT INTO tab1 VALUES('694768','HEM','2012-02-27 15:11:55.977')
INSERT INTO tab1 VALUES('694768','J','2012-02-29 13:28:01.400')
INSERT INTO tab1 VALUES('694768','HEM','2012-03-19 10:56:29.340')
INSERT INTO tab1 VALUES('694768','MCV H','2012-03-20 16:56:29.573')
INSERT INTO tab1 VALUES('694768','J','2012-03-20 19:41:24.040')
INSERT INTO tab1 VALUES('694768','A3L','2012-04-09 13:11:33.613')
INSERT INTO tab1 VALUES('694768','G3','2012-04-19 15:56:32.890')
INSERT INTO tab1 VALUES('694768','G3','2012-06-06 09:55:51.760')
INSERT INTO tab1 VALUES('853140','HEI','2012-02-09 09:25:36.277')
INSERT INTO tab1 VALUES('853140','HEI','2012-06-06 09:55:51.760')
INSERT INTO tab1 VALUES('853140','HEI','2012-06-20 13:12:03.020')
INSERT INTO tab1 VALUES('853140','TANK1','2012-06-20 17:26:34.510')
INSERT INTO tab1 VALUES('853140','J','2012-06-22 11:57:18.813')
--example of needed results
CREATE TABLE results
(InmateNumber VARCHAR(10),
Loc_Unit VARCHAR(10),
Loc_StartDate DATETIME,
Loc_EndDate DATETIME)
--This one works inherently because the patient has been released & that was his last location. NOTE the timestamp difference
--on the Aud_Patients table versus the Patients table. I tried casting the AudDateTime to DATE and comparing, but that breaks
--if they move more than once in a day (which can happen in the audit tables).
INSERT INTO results VALUES('694768','','2011-08-23 15:00:59.233','2012-02-27 15:11:55.977')
INSERT INTO results VALUES ('694768', 'HEM','2012-02-27 15:11:55.977','2012-02-29 13:28:01.400')
INSERT INTO results VALUES ('694768', 'J', '2012-02-29 13:28:01.400', '2012-03-19 10:56:29.340')
INSERT INTO results VALUES ('694768', 'HEM', '2012-03-19 10:56:29.340','2012-03-20 16:56:29.573')
INSERT INTO results VALUES ('694768', 'MCV H','2012-03-20 16:56:29.573','2012-03-20 19:41:24.040')
INSERT INTO results VALUES ('694768', 'J','2012-03-20 19:41:24.040','2012-04-09 13:11:33.613')
INSERT INTO results VALUES ('694768', 'A3L','2012-04-09 13:11:33.613','2012-04-19 15:56:32.890')
INSERT INTO results VALUES ('694768', 'G3','2012-04-19 15:56:32.890',NULL)
--I need this one to show the HEI. I know it doesn't make sense, but the audit table gets a datetime stamp but the patients table doesn't.
--This guy was only in the J location for a portion of the day.
--I didn't make the data. I just inherited it.
INSERT INTO results VALUES ('853140', 'HEI','2012-02-09 09:25:36.277','2012-06-20 17:26:34.510')
INSERT INTO results VALUES ('853140', 'TANK1','2012-06-20 17:26:34.510','2012-06-22 11:57:18.813')
INSERT INTO results VALUES ('853140', 'J','2012-06-22 11:57:18.813','2012-06-22 11:57:18.813')
INSERT INTO results VALUES ('853140', 'HEI','2012-06-22 11:57:18.813',NULL)
The time portion is really irrelevant to us. I just have it in the aud_patients table but don't in the main patients table. I've tried combinations of UNION and CAST(auditdatetime as date) and I just can't get it. Thanks again for any help.
July 24, 2012 at 4:51 pm
Just create a "feeder" CTE at the top of CC's code. It would consist of reading rows from the audit table and the main table with a UNION ALL between them. Then change CC's code to select from that first CTE instead of the main table.
--Jeff Moden
Change is inevitable... Change for the better is not.
July 24, 2012 at 5:14 pm
Here is my solution, no guaranttees on performance:
Create Table dbo.tab2
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
);
INSERT INTO dbo.tab2 VALUES('694768','G3','2012-04-19 15:56:00.000');
INSERT INTO dbo.tab2 VALUES('853140','HEI','2012-06-22 11:57:00.000');
--
Create Table dbo.tab1
(
InmateNumber varchar(10),
Loc_unit varchar(10),
AuditDateTime datetime
)
INSERT INTO dbo.tab1 VALUES('694768','','2011-08-23 15:00:59.233');
INSERT INTO dbo.tab1 VALUES('694768','','2011-08-23 15:01:50.810');
INSERT INTO dbo.tab1 VALUES('694768','','2012-02-09 09:26:22.850');
INSERT INTO dbo.tab1 VALUES('694768','HEM','2012-02-27 15:11:55.977');
INSERT INTO dbo.tab1 VALUES('694768','J','2012-02-29 13:28:01.400');
INSERT INTO dbo.tab1 VALUES('694768','HEM','2012-03-19 10:56:29.340');
INSERT INTO dbo.tab1 VALUES('694768','MCV H','2012-03-20 16:56:29.573');
INSERT INTO dbo.tab1 VALUES('694768','J','2012-03-20 19:41:24.040');
INSERT INTO dbo.tab1 VALUES('694768','A3L','2012-04-09 13:11:33.613');
INSERT INTO dbo.tab1 VALUES('694768','G3','2012-04-19 15:56:32.890');
INSERT INTO dbo.tab1 VALUES('694768','G3','2012-06-06 09:55:51.760');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-02-09 09:25:36.277');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-06-06 09:55:51.760');
INSERT INTO dbo.tab1 VALUES('853140','HEI','2012-06-20 13:12:03.020');
INSERT INTO dbo.tab1 VALUES('853140','TANK1','2012-06-20 17:26:34.510');
INSERT INTO dbo.tab1 VALUES('853140','J','2012-06-22 11:57:18.813');
go
with
CombinedData as (
select
InmateNumber,
Loc_unit,
AuditDateTime,
1 as SourceTable
from
dbo.tab1
union all
select
InmateNumber,
Loc_unit,
AuditDateTime,
2 as SourceTable
from
dbo.tab2
),
BaseData as (
select
InmateNumber,
Loc_unit,
SourceTable,
AuditDateTime,
row_number() over (partition by InmateNumber order by SourceTable,AuditDatetime) as rn1,
row_number() over (partition by InmateNumber, Loc_unit order by SourceTable,AuditDatetime) as rn2,
row_number() over (partition by InmateNumber order by SourceTable,AuditDatetime) -
row_number() over (partition by InmateNumber, Loc_unit order by SourceTable,AuditDatetime) as rn3
from
CombinedData
), FinalData as (
select
bd1.rn3,
bd1.SourceTable,
bd1.InmateNumber,
bd1.Loc_unit,
case bd1.SourceTable when 1 then bd1.AuditDateTime
when 2 then bd2.AuditDateTime
end LocStartDate,
case bd1.SourceTable when 1 then bd1.AuditDateTime
when 2 then '99991231'
end LocEndDate
from
BaseData bd1
left outer join BaseData bd2
on (bd1.InmateNumber = bd2.InmateNumber and
bd1.rn1 = bd2.rn1 + 1)
)
select
InmateNumber,
Loc_unit,
min(LocStartDate) as LocStartDate,
nullif(max(LocEndDate),'99991231') as LocEndDate
from
FinalData
group by
InmateNumber,
Loc_unit,
rn3
order by
InmateNumber,
min(LocStartDate),
max(LocEndDate);
go
drop table dbo.tab1;
drop table dbo.tab2;
go
Viewing 11 posts - 1 through 10 (of 10 total)
You must be logged in to reply to this topic. Login to reply