October 4, 2020 at 11:00 am
My solution, no claim to originality.
declare @AtLeastCount int = 3
;with T as
(
select *, RowNum = row_number() over(order by ID)
from @T
),
T1 as
(
select *, RowGroup = RowNum - row_number() over (order by RowNum)
from T
where Val = 0
),
T2 as
(
select *, RowGroup = RowNum - row_number() over (order by RowNum)
from T
where Val = 1
),
T3 as
(
select
Val = min(Val),
FirstID = min(ID),
LastID = max(ID),
CountID = count(ID)
from T1
group by
RowGroup
union all
select
Val = min(Val),
FirstID = min(ID),
LastID = max(ID),
CountID = count(ID)
from T2
group by
RowGroup
),
T4 as
(
select *,
BeforeID = lag(LastID) over(order by LastID),
BeforeVal = lag(Val) over(order by LastID),
AfterID = lead(FirstID) over(order by FirstID),
AfterVal = lead(Val) over(order by FirstID)
from T3
)
--unpivot
select
X.PosDec,
X.ID,
X.Val
from
T4
cross apply
(
values
('First', FirstID, Val),
('Last', LastID, Val),
('Before', BeforeID, BeforeVal),
('After', AfterID, AfterVal)
) X(PosDec, ID, Val)
where
T4.Val = 1
and T4.CountID >= @AtLeastCount
and X.ID is not null
order by
X.ID
October 4, 2020 at 3:23 pm
And without the UNION ALL.
declare @AtLeastCount int = 3
;with T as
(
select *,
RowNum0 = case when Val = 0 then row_number() over(order by ID) else null end,
RowNum1 = case when Val = 1 then row_number() over(order by ID) else null end
from @T
),
T1 as
(
select *,
RowGroup0 = case when Val = 0 then RowNum0 - row_number() over (order by RowNum0) else null end,
RowGroup1 = case when Val = 1 then RowNum1 - row_number() over (order by RowNum1) else null end
from T
),
T2 as
(
select
Val = min(Val),
FirstID = min(ID),
LastID = max(ID),
CountID = count(ID)
from T1
group by
coalesce(RowGroup0, RowGroup1)
),
T3 as
(
select *,
BeforeID = lag(LastID) over(order by LastID),
BeforeVal = lag(Val) over(order by LastID),
AfterID = lead(FirstID) over(order by FirstID),
AfterVal = lead(Val) over(order by FirstID)
from T2
)
--unpivot
select
X.PosDec,
X.ID,
X.Val
from
T3
cross apply
(
values
('First', FirstID, Val),
('Last', LastID, Val),
('Before', BeforeID, BeforeVal),
('After', AfterID, AfterVal)
) X(PosDec, ID, Val)
where
T3.Val = 1
and T3.CountID >= @AtLeastCount
and X.ID is not null
order by
X.ID
October 5, 2020 at 10:03 am
In my previous attempts, I don't think I can guarantee that RowGroup is unique across the two 'paths', and so could get incorrect result when combined. Would depend on the test data? So my next version...
declare @AtLeastCount int = 3
;with T as
(
select *,
RowNum = row_number() over(order by ID),
BeforeID = lag(ID) over(order by ID), --to use later
BeforeVal = lag(Val) over(order by ID),
AfterID = lead(ID) over(order by ID),
AfterVal = lead(Val) over(order by ID)
from @T
),
T1 as
(
select *,
RowGroup = RowNum - row_number() over (order by RowNum) --create groups for islands
from T
where
Val = 1 --important is here
),
T2 as
(
select
Val = min(Val),
FirstID = min(ID),
LastID = max(ID),
CountID = count(ID)
from T1
group by
RowGroup
),
T3 as
(
select
T2.Val,
T2.FirstID,
T2.LastID,
[Before].BeforeID,
[Before].BeforeVal,
[After].AfterID,
[After].AfterVal
from
T2
inner join T as [Before] on --look back
T2.FirstID = [Before].ID
inner join T as [After] on
T2.LastID = [After].ID
where
CountID >= @AtLeastCount
)
--unpivot
select
X.PosDec,
X.ID,
X.Val
from
T3
cross apply
(
values
('First', FirstID, Val),
('Last', LastID, Val),
('Before', BeforeID, BeforeVal),
('After', AfterID, AfterVal)
) X(PosDec, ID, Val)
where
X.ID is not null
order by
X.ID
October 7, 2020 at 9:26 pm
Hello kuopaz, the following elegant solution uses only windowed sums to obtain the desired result:
/* INPUT TEST DATA (from original post) */
declare @T table(ID int not null primary key, Val int not null)
insert @T(ID, Val)
select 1, 0 union all
select 10, 1 union all
select 17, 0 union all
select 18, 0 union all
select 27, 1 union all
select 34, 1 union all
select 42, 1 union all
select 52, 0 union all
select 62, 1 union all
select 63, 1 union all
select 65, 0 union all
select 73, 0 union all
select 79, 1 union all
select 85, 1 union all
select 89, 1 union all
select 90, 1
/* Original test data with a proper ORDER BY clause */
select * from
@T
order by
ID;
/*
-- An answer to the original question finding runs of at least 3 subsequent 1s
-- Each original row appears at most once in the result set
--
-- Remark: it is possible that a row with [Val] = 0 is both 'Before'
-- and 'After' a run, hence the addition of 'Between'
-- Required: sums of the preceding and following 1, N (3) and N - 1 (2) values
-- Assumption: [Val] is always either 0 or 1
-- Alternative: if [Val] may have other values as well, use
-- sum(case when Val = 1 then 1 else 0 end) instead of
-- sum(Val) in each windowed aggregate
*/
select * from
( select
case
when Val = 0 and Preceding3 < 3 and Following3 = 3 then 'Before'
when Val = 0 and Preceding3 = 3 and Following3 < 3 then 'After'
when Val = 0 and Preceding3 = 3 and Following3 = 3 then 'Between'
when Val = 1 and Preceding1 = 1 and Following2 = 3 then 'First'
when Val = 1 and Preceding2 = 3 and Following1 = 1 then 'Last'
end as PosDesc
, ID
, Val
from
( select
ID
, Val
, sum(Val) over (order by ID rows between current row and 1 following) as Following1
, sum(Val) over (order by ID rows between current row and 2 following) as Following2
, sum(Val) over (order by ID rows between current row and 3 following) as Following3
, sum(Val) over (order by ID rows between 1 preceding and current row) as Preceding1
, sum(Val) over (order by ID rows between 2 preceding and current row) as Preceding2
, sum(Val) over (order by ID rows between 3 preceding and current row) as Preceding3
from
@T
) as W
) as R
where
PosDesc is not null
order by
ID;
/*
-- An answer to an alternative question finding runs of at least 4 subsequent 1s
-- Each original row appears at most once in the result set
--
-- Remark: it is possible that a row with [Val] = 0 is both 'Before'
-- and 'After' a run, hence the addition of 'Between'
-- Required: sums of the preceding and following 1, N (4) and N - 1 (3) values
-- Assumption: [Val] is always either 0 or 1
-- Alternative: if [Val] may have other values as well, use
-- sum(case when Val = 1 then 1 else 0 end) instead of
-- sum(Val) in each windowed aggregate
*/
select * from
( select
case
when Val = 0 and Preceding4 < 4 and Following4 = 4 then 'Before'
when Val = 0 and Preceding4 = 4 and Following4 < 4 then 'After'
when Val = 0 and Preceding4 = 4 and Following4 = 4 then 'Between'
when Val = 1 and Preceding1 = 1 and Following3 = 4 then 'First'
when Val = 1 and Preceding3 = 4 and Following1 = 1 then 'Last'
end as PosDesc
, ID
, Val
from
( select
ID
, Val
, sum(Val) over (order by ID rows between current row and 1 following) as Following1
, sum(Val) over (order by ID rows between current row and 3 following) as Following3
, sum(Val) over (order by ID rows between current row and 4 following) as Following4
, sum(Val) over (order by ID rows between 1 preceding and current row) as Preceding1
, sum(Val) over (order by ID rows between 3 preceding and current row) as Preceding3
, sum(Val) over (order by ID rows between 4 preceding and current row) as Preceding4
from
@T
) as W
) as R
where
PosDesc is not null
order by
ID;
It can not use the run length as a parameter, because the window specification requires a literal value.
But for every given run length a solution exists using only six windowed aggregates, as shown above.
October 8, 2020 at 2:44 am
It can not use the run length as a parameter, because the window specification requires a literal value.
But for every given run length a solution exists using only six windowed aggregates, as shown above.
So, how do you identity sequences of, say, 8 1s rather than 3/4, with "only six windowed aggregates"?
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
October 8, 2020 at 9:10 am
To identify runs of eight 1s you can use the following select statement:
select * from
( select
case
when Val = 0 and Preceding8 < 8 and Following8 = 8 then 'Before'
when Val = 0 and Preceding8 = 8 and Following8 < 8 then 'After'
when Val = 0 and Preceding8 = 8 and Following8 = 8 then 'Between'
when Val = 1 and Preceding1 = 1 and Following7 = 8 then 'First'
when Val = 1 and Preceding7 = 8 and Following1 = 1 then 'Last'
end as PosDesc
, ID
, Val
from
( select
ID
, Val
, sum(Val) over (order by ID rows between current row and 1 following) as Following1
, sum(Val) over (order by ID rows between current row and 7 following) as Following7
, sum(Val) over (order by ID rows between current row and 8 following) as Following8
, sum(Val) over (order by ID rows between 1 preceding and current row) as Preceding1
, sum(Val) over (order by ID rows between 7 preceding and current row) as Preceding7
, sum(Val) over (order by ID rows between 8 preceding and current row) as Preceding8
from
@T
) as W
) as R
where
PosDesc is not null
order by
ID;
October 8, 2020 at 10:38 am
Thanks vliet. It is elegant but having to use a literal value in the windowing frame is a big limitation.
The islands method above also uses six windowing functions - two lag, two lead, two row_number.
October 8, 2020 at 2:48 pm
As long as we're fixated on the number of windowed functions (I'd personally prefer choosing based on overall performance profile over a wide variety of data sets, but c'est la vie), I guess I should point out that my first solution uses exactly 2 windowed functions, and can easily take the required number of consecutive 1s as a parameter. 🙂
Cheers!
January 15, 2021 at 10:31 am
No, it's well enough defined.
Viewing 9 posts - 16 through 23 (of 23 total)
You must be logged in to reply to this topic. Login to reply