Hi!
I'm having struggle with selecting rows, that match "dynamic" rules as described:
I think it has to do with "gaps and islands" and probably something with lead/lag to build group ids. But I don't know how to build the "counter" so I know if it's 10 or more records that match the pattern.
Please see this simplified demo table (id is an INT, in real-world it's a timestamp of an event)
drop table if exists patternDemo;
create table patternDemo
(
id int,
value int,
include char(3) -- select record in final query?
)
-- ignore everything before START
insert into patternDemo values (1, 100, 'no');
insert into patternDemo values (2, 100, 'no');
insert into patternDemo values (3, 100, 'no');
insert into patternDemo values (4, 200, 'no');
insert into patternDemo values (5, 100, 'no');
insert into patternDemo values (6, 300, 'no');
insert into patternDemo values (7, 500, 'no');
-- START: block having >= 10 consecutive valus above 1000
insert into patternDemo values (8, 1500, 'yes');
insert into patternDemo values (9, 1500, 'yes');
insert into patternDemo values (10, 1300, 'yes');
insert into patternDemo values (11, 1600, 'yes');
insert into patternDemo values (12, 1500, 'yes');
insert into patternDemo values (13, 1700, 'yes');
insert into patternDemo values (14, 1800, 'yes');
insert into patternDemo values (15, 1800, 'yes');
insert into patternDemo values (16, 1800, 'yes');
insert into patternDemo values (17, 1800, 'yes');
insert into patternDemo values (18, 1600, 'yes');
insert into patternDemo values (19, 1500, 'yes');
-- after a START block having at least 10 rows with a value over 1000
-- there maybe lower values, which i want to include, since we're still
-- before a STOP pattern
insert into patternDemo values (20, 600, 'yes');
insert into patternDemo values (21, 400, 'yes');
insert into patternDemo values (22, 1500, 'yes');
insert into patternDemo values (23, 1800, 'yes');
insert into patternDemo values (24, 1800, 'yes');
insert into patternDemo values (25, 1800, 'yes');
-- STOP pattern: another block of at least 10 rows with a value lower than 1000
insert into patternDemo values (26, 300, 'no');
insert into patternDemo values (27, 300, 'no');
insert into patternDemo values (28, 100, 'no');
insert into patternDemo values (29, 100, 'no');
insert into patternDemo values (30, 100, 'no');
insert into patternDemo values (31, 100, 'no');
insert into patternDemo values (32, 100, 'no');
insert into patternDemo values (33, 100, 'no');
insert into patternDemo values (34, 100, 'no');
insert into patternDemo values (35, 100, 'no');
insert into patternDemo values (36, 100, 'no');
insert into patternDemo values (37, 100, 'no');
insert into patternDemo values (38, 100, 'no');
insert into patternDemo values (39, 100, 'no');
insert into patternDemo values (40, 100, 'no');
January 17, 2022 at 3:23 pm
Well this is definitely a head scratcher. The following code is not pretty, but it seems to get the expected results
WITH cteBase AS (
SELECT *
, rn = ROW_NUMBER() OVER ( ORDER BY pd.id )
, IsStartBlock = SUM( CASE WHEN pd.VALUE > 1000 THEN 1 ELSE 0 END )
OVER ( ORDER BY pd.id ROWS BETWEEN 9 PRECEDING AND CURRENT ROW )
, IsEndBlock = SUM( CASE WHEN pd.VALUE < 1000 THEN 1 ELSE 0 END )
OVER ( ORDER BY pd.id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING )
FROM #patternDemo AS pd
)
, cteStart AS (
SELECT sb.*
, grp = sb.rn - ROW_NUMBER() OVER ( ORDER BY sb.rn )
FROM cteBase AS sb
WHERE sb.IsStartBlock IN ( 1, 10 )
)
, cteStartBlock AS (
SELECT sb.StartBlockStart
, StartBlockEnd = MAX( st.id )
, rn = ROW_NUMBER() OVER (ORDER BY sb.StartBlockStart)
FROM cteStart AS st
OUTER APPLY ( SELECT StartBlockStart = MAX( sbs.id )
FROM cteStart AS sbs
WHERE sbs.IsStartBlock = 1
AND sbs.id < st.id
) AS sb
WHERE st.IsStartBlock = 10
GROUP BY st.grp, sb.StartBlockStart
)
, cteEnd as (
SELECT eb.*
, grp = eb.rn - ROW_NUMBER() OVER ( ORDER BY eb.rn )
FROM cteBase AS eb
WHERE eb.IsEndBlock IN ( 1, 10 )
)
, cteEndBlock AS (
SELECT EndBlockStart = MIN( nd.id )
, eb.EndBlockEnd
, rn = ROW_NUMBER() OVER (ORDER BY eb.EndBlockEnd)
FROM cteEnd AS nd
OUTER APPLY ( SELECT EndBlockEnd = MIN( ebs.id )
FROM cteEnd AS ebs
WHERE ebs.IsEndBlock = 1
AND ebs.id > nd.id
) AS eb
WHERE nd.IsEndBlock = 10
GROUP BY nd.grp, eb.EndBlockEnd
)
, cteBlocks AS (
SELECT BlockNo = scte.rn
, scte.StartBlockStart
, scte.StartBlockEnd
, ecte.EndBlockStart
, ecte.EndBlockEnd
FROM cteStartBlock AS scte
LEFT JOIN cteEndBlock AS ecte
ON ecte.rn = scte.rn
)
SELECT bk.BlockNo
, src.*
FROM #patternDemo AS src
INNER JOIN cteBlocks AS bk
ON src.id > bk.StartBlockEnd
AND src.id < bk.EndBlockStart
ORDER BY src.id;
January 17, 2022 at 3:33 pm
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,StartStop
AS
(
SELECT id, [value]
,CASE
WHEN 1 = LAG(StartSignal) OVER (ORDER BY id)
THEN 0
ELSE StartSignal
END AS StartSignal
,CASE
WHEN 1 = LAG(StopSignal) OVER (ORDER BY id)
THEN 0
ELSE StopSignal
END AS StopSignal
FROM Rules
)
,StartWithStart
AS
(
SELECT id, [value], StartSignal
,CASE
WHEN LAG(StopSignal) OVER (ORDER BY id) IS NULL
THEN 0
ELSE StopSignal
END AS StopSignal
FROM StartStop
)
,Includes
AS
(
SELECT id, [value]
,SUM(StartSignal) OVER (ORDER BY id)
- SUM(StopSignal) OVER (ORDER BY id) AS IncludeSignal
FROM StartWithStart
)
SELECT id, [value]
FROM Includes
WHERE IncludeSignal = 1;
January 17, 2022 at 4:09 pm
It seems safer to avoid LEAD and LAG because start/stop conditions might repeat within ranges of groups
with
gap_cte as (
select *, iif(v.sgn<>lag(v.sgn) over (order by id), 1, 0) gap
from #patternDemo pd
cross apply (values (sign([value]-1000))) v(sgn)),
grp_cte as (
select *, sum(gap) over (order by id) grp
from gap_cte),
rn_cte as (
select *,
row_number() over (partition by grp order by id desc) rn,
row_number() over (partition by grp order by id) rn_max
from grp_cte),
start_stop_cte as (
select top(1) with ties *
from rn_cte
where rn_max>=10
order by rn)
select g.*
from start_stop_cte s
outer apply (select top(1) grp
from start_stop_cte ss
where ss.id<s.id
and ss.sgn=1
order by id desc) starts(start_grp)
left join grp_cte g on g.grp<s.grp
and g.grp>=starts.start_grp
where s.sgn=-1
order by rn;
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
This should cope with more edge conditions than my original query:
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,StartStop
AS
(
SELECT id, [value]
,CASE
WHEN 1 = LAG(StartSignal) OVER (ORDER BY id)
THEN 0
ELSE StartSignal
END AS StartSignal
,CASE
WHEN 1 = LAG(StopSignal) OVER (ORDER BY id)
THEN 0
ELSE StopSignal
END AS StopSignal
,MAX(StartSignal) OVER (ORDER BY id)
-MAX(StopSignal) OVER (ORDER BY id) AS StartStopDiff
FROM Rules
)
,StartWithStart
AS
(
SELECT id, [value], StartSignal
,CASE
WHEN StartStopDiff = -1
THEN 0
ELSE StopSignal
END AS StopSignal
FROM StartStop
)
,Includes
AS
(
SELECT id, [value]
,SUM(StartSignal) OVER (ORDER BY id)
- SUM(StopSignal) OVER (ORDER BY id) AS IncludeSignal
FROM StartWithStart
)
SELECT id, [value], IncludeSignal
FROM Includes
WHERE IncludeSignal >= 1;
January 18, 2022 at 10:42 am
On looking at this again this may be a safer approach:
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,PKOnOffs
AS
(
SELECT id, [value]
,CASE
WHEN StartSignal = 1
THEN RIGHT('000000000' + CAST(id AS varchar(20)), 10) + 'On'
WHEN StopSignal = 1
THEN RIGHT('000000000' + CAST(id AS varchar(20)), 10) + 'Off'
END AS PKOnOff
FROM Rules
)
,OnOffs
AS
(
SELECT id, [value]
-- MAX ignores NULLs
,ISNULL(SUBSTRING(MAX(PKOnOff) OVER (ORDER BY id), 11, 3), 'Off') AS OnOff
FROM PKOnOffs
)
SELECT id, [value]
FROM OnOffs
WHERE OnOff = 'On';
Viewing 6 posts - 1 through 5 (of 5 total)
You must be logged in to reply to this topic. Login to reply