select by "pattern"

  • Hi there
    I am trying to select one record per partition (customer) and “round trip” from an event-tracking table. In my real-world scenario, there are plenty of situations, but it all seems to break down to these two rules:

    ·        0 or 1 ACTION (eventName “failure” or “success”) then select the first/oldest “assignment” or “extension”
    ·        > 1 ACTION: select the first/oldest “assignment” or “extension” of the last ACTION (“failure” or “success”)

    Please find a demo table below…

    My idea was to gather all “meta-data” such as number of actions first (using count..over) to have all information ready to make decisions at row-level. But then, I somehow lost that path due to confusion 😉
    Thanks a lot in advance for any help.

    drop table if exists demoEvents.eventStream;
    drop schema if exists demoEvents;
    GO

    create schema demoEvents;
    GO

    create table demoEvents.eventStream
    (
        [id] int identity(1, 1) not null,
        customer int not null,
        eventDT date not null,
        eventName varchar(10) not null,
        wanted char(3) not null,
        constraint [PK_kunde] primary key
        (
            [id] asc
        )    
    )
    ;

    -- CASE 1:
    -- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180101', 'extension', 'yes');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180201', 'extension', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (100, '20180301', 'extension', 'no');

    -- CASE 1:
    -- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180101', 'assignment', 'yes');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180201', 'assignment', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (110, '20180301', 'assignment', 'no');

    -- CASE 1:
    -- if no or one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180101', 'assigned', 'yes');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180201', 'assigned', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180301', 'failure', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (200, '20180401', 'assigned', 'no');

    -- CASE 2:
    -- if more than one ACTION (success, failure) present, select first ASSIGNMENT or EXTENSION after last action
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180101', 'assigned', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180201', 'assigned', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180301', 'failure', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180401', 'assigned', 'yes');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180501', 'success', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180601', 'success', 'no');
    insert into demoEvents.eventStream (customer, eventDT, eventName, wanted) values (300, '20180701', 'success', 'no');

  • This gives the results you want.  I had to add a little extra code to treat 1 the same as zero.


    WITH eventgroups AS
    (
        SELECT *, COUNT(CASE WHEN es.eventName IN ('failure', 'success')THEN es.id END) OVER(PARTITION BY es.customer ORDER BY es.eventDT DESC ROWS UNBOUNDED PRECEDING) AS grp
        FROM #eventStream es
    )
    , eventorder AS
    (
        SELECT eg.id, eg.customer, eg.eventDT, eg.eventName, eg.wanted, grp, ROW_NUMBER() OVER(PARTITION BY eg.customer ORDER BY CASE WHEN grp < 2 THEN 1 ELSE grp END, eventDT) AS rn
        FROM eventgroups eg
        WHERE eg.eventName NOT IN ('failure', 'success')
    )
    SELECT eo.id, eo.customer, eo.eventDT, eo.eventName, eo.wanted, rn, grp
    FROM eventorder eo
    WHERE eo.rn = 1
    ORDER BY eo.customer

    Drew

    J. Drew Allen
    Business Intelligence Analyst
    Philadelphia, PA

  • You were quicker, Drew.
    ---------------------------------------------------
    -- helpful index
    -- create unique nonclustered index IX1_EventStream on demoevents.EventStream(customer,id) include (eventname);

    WITH cte1 as ( -- flag prior actions
                    select id, customer, eventDT, eventName, wanted
                            ,case when lag(eventname) over (partition by customer order by id)
                                        in ('Failure','Success')
                                 then 1 end as PriorAction
                    from demoevents.EventStream e
                 )

        ,cte2 as (-- analysis
                    select customer
                        ,sum(Case when EventName in ('Failure','Success') then 1 else 0 end) as ActionCount
                        ,min(Case when EventName in ('Failure','Success') then ID end) as ActionID
                        ,sum(Case when EventName in ('Assignment','Extension') then 1 else 0 end) as AssgnExtCount
                        ,min(Case when EventName in ('Assignment','Extension') then ID end) as AssgnExtID                        
                        ,min(Case when EventName in ('Assignment','Extension')
                            and PriorAction = 1                              
                                 then ID end) as AssgnExtIDPrior
                    from cte1
                    group by customer
                    )

    select e.* 
    from cte2
    join demoevents.Eventstream e on e.ID = Case when ActionCount <= 1 then AssgnExtID else AssgnExtIDPrior end

    --------------------------------------------------------------------------------------------------------------------------------------------------------------
    After note:    The LAG expression above assumes that the sequence of ID would be the same as the sequence of EventDT.   
    This can be corrected in the LAG clause without affecting performance by substituting EventDT for ID in the OVER section.

    __________________________________________________

    Against stupidity the gods themselves contend in vain. -- Friedrich Schiller
    Stop, children, what's that sound? Everybody look what's going down. -- Stephen Stills

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply