March 15, 2019 at 9:21 am
Hi there
I am trying to select one record per partition (customer) and “round trip” from an event-tracking table. In my real-world scenario, there are plenty of situations, but it all seems to break down to these two rules:
· 0 or 1 ACTION (eventName “failure” or “success”) then select the first/oldest “assignment” or “extension”
· > 1 ACTION: select the first/oldest “assignment” or “extension” of the last ACTION (“failure” or “success”)
Please find a demo table below…
My idea was to gather all “meta-data” such as number of actions first (using count..over) to have all information ready to make decisions at row-level. But then, I somehow lost that path due to confusion 😉
Thanks a lot in advance for any help.
drop table if exists demoEvents.eventStream;
drop schema if exists demoEvents;
GO
create schema demoEvents;
GO
create table demoEvents.eventStream
(
[id] int identity(1, 1) not null,
customer int not null,
eventDT date not null,
eventName varchar(10) not null,
wanted char(3) not null,
constraint [PK_kunde] primary key
(
[id] asc
)
)
;
-- CASE 1:
-- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180101', 'extension', 'yes');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180201', 'extension', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180301', 'extension', 'no');
-- CASE 1:
-- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180101', 'assignment', 'yes');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180201', 'assignment', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180301', 'assignment', 'no');
-- CASE 1:
-- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180101', 'assigned', 'yes');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180201', 'assigned', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180301', 'failure', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180401', 'assigned', 'no');
-- CASE 2:
-- if more than one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION after last action
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180101', 'assigned', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180201', 'assigned', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180301', 'failure', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180401', 'assigned', 'yes');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180501', 'success', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180601', 'success', 'no');
insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180701', 'success', 'no');
March 15, 2019 at 1:32 pm
This gives the results you want. I had to add a little extra code to treat 1 the same as zero.
WITH eventgroups AS
(
SELECT *, COUNT(CASE WHEN es.eventName IN ('failure', 'success')THEN es.id END) OVER(PARTITION BY es.customer ORDER BY es.eventDT DESC ROWS UNBOUNDED PRECEDING) AS grp
FROM #eventStream es
)
, eventorder AS
(
SELECT eg.id, eg.customer, eg.eventDT, eg.eventName, eg.wanted, grp, ROW_NUMBER() OVER(PARTITION BY eg.customer ORDER BY CASE WHEN grp < 2 THEN 1 ELSE grp END, eventDT) AS rn
FROM eventgroups eg
WHERE eg.eventName NOT IN ('failure', 'success')
)
SELECT eo.id, eo.customer, eo.eventDT, eo.eventName, eo.wanted, rn, grp
FROM eventorder eo
WHERE eo.rn = 1
ORDER BY eo.customer
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
March 15, 2019 at 2:41 pm
You were quicker, Drew.
---------------------------------------------------
-- helpful index
-- create unique nonclustered index IX1_EventStream on demoevents.EventStream(customer,id) include (eventname);
WITH cte1 as ( -- flag prior actions
select id, customer, eventDT, eventName, wanted
,case when lag(eventname) over (partition by customer order by id)
in ('Failure','Success')
then 1 end as PriorAction
from demoevents.EventStream e
)
,cte2 as (-- analysis
select customer
,sum(Case when EventName in ('Failure','Success') then 1 else 0 end) as ActionCount
,min(Case when EventName in ('Failure','Success') then ID end) as ActionID
,sum(Case when EventName in ('Assignment','Extension') then 1 else 0 end) as AssgnExtCount
,min(Case when EventName in ('Assignment','Extension') then ID end) as AssgnExtID
,min(Case when EventName in ('Assignment','Extension')
and PriorAction = 1
then ID end) as AssgnExtIDPrior
from cte1
group by customer
)
select e.*
from cte2
join demoevents.Eventstream e on e.ID = Case when ActionCount <= 1 then AssgnExtID else AssgnExtIDPrior end
--------------------------------------------------------------------------------------------------------------------------------------------------------------
After note: The LAG expression above assumes that the sequence of ID would be the same as the sequence of EventDT.
This can be corrected in the LAG clause without affecting performance by substituting EventDT for ID in the OVER section.
__________________________________________________
Against stupidity the gods themselves contend in vain. -- Friedrich Schiller
Stop, children, what's that sound? Everybody look what's going down. -- Stephen Stills
Viewing 3 posts - 1 through 2 (of 2 total)
You must be logged in to reply to this topic. Login to reply