October 6, 2021 at 11:24 am
Hi there
I have a table of data readings which I am able to use NTILE to sucessfully extract the MinValue, MaxValue and Average Reading Value using
the following query:
;WITH cte_ntile AS (
SELECT *, NTILE(2) OVER(ORDER BY ReadingDateTime asc) AS [Group]
FROM SIDataGroup
)
SELECT MIN(ReadingValue) AS MinValue, MAX(ReadingValue) AS MaxValue, AVG(ReadingValue) AS AvgValue, [Group]
FROM cte_ntile
GROUP BY [Group]
Now in addition to extracting these values, I now need to extract the Median date from each group
is there are a straight forward way of doing this?
I tried using RowNumber to partion the day as follows but it didnt work properly
;with cte
as
(
SELECT
ReadingDateTime,
ReadingValue,
NTILE(2) OVER(ORDER BY ReadingDateTime asc) AS [Group]
FROM SIDataGroup
)
select
ReadingDateTime,
ReadingValue,
[Group],
ROW_NUMBER() OVER (
--PARTITION BY ReadingDateTime,ReadingValue,[Group]
ORDER BY ReadingDateTime,ReadingValue
) RowNo
from cte
where [Group] in (1,2)
So in this case, i have a set of 40 records split into 2 groups of 20
Therefore i can work out that the 10th record in each group gives me the Median Date
Table Definition and Population
DROP TABLE IF EXISTS [dbo].[SIDataGroup]
CREATE TABLE [dbo].[SIDataGroup](
[ReadingDateTime] [datetime2](7) NULL,
[ReadingValue] [float] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:06:39.0933333' AS DateTime2), 59.64)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:06:39.0937500' AS DateTime2), 59.64)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:08:39.0933333' AS DateTime2), 59.71)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:08:39.0937500' AS DateTime2), 59.71)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:10:39.0933333' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:10:39.0937500' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:12:39.0933333' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:12:39.0937500' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:14:39.0933333' AS DateTime2), 59.66)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:14:39.0937500' AS DateTime2), 59.66)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:16:39.0933333' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:16:39.0937500' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:18:39.0933333' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:18:39.0937500' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:20:39.0933333' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:20:39.0937500' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:22:39.0933333' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:22:39.0937500' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:24:39.2800000' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:24:39.2812500' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:26:39.0933333' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:26:39.0937500' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:28:39.0933333' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:28:39.0937500' AS DateTime2), 59.69)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:30:39.0933333' AS DateTime2), 59.67)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:30:39.0937500' AS DateTime2), 59.67)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:32:39.0933333' AS DateTime2), 59.67)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:32:39.0937500' AS DateTime2), 59.67)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:34:39.0933333' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:34:39.0937500' AS DateTime2), 59.68)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:36:39.0933333' AS DateTime2), 59.72)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:36:39.0937500' AS DateTime2), 59.72)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:38:39.0933333' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:38:39.0937500' AS DateTime2), 59.7)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:40:39.0933333' AS DateTime2), 59.65)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:40:39.0937500' AS DateTime2), 59.65)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:42:39.0933333' AS DateTime2), 59.66)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:42:39.0937500' AS DateTime2), 59.66)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:44:39.0933333' AS DateTime2), 59.72)
GO
INSERT [dbo].[SIDataGroup] ([ReadingDateTime], [ReadingValue]) VALUES (CAST(N'2021-05-16T05:44:39.0937500' AS DateTime2), 59.72)
GO
October 6, 2021 at 2:01 pm
Maybe something like this. The 'median_dt' column calculation adds half the difference in seconds between the min and max ReadingDateTime values within the [Group] group to the min ReadingDateTime
;WITH cte_ntile AS (
SELECT *, NTILE(2) OVER(ORDER BY ReadingDateTime asc) AS [Group]
FROM #SIDataGroup
)
SELECT MIN(ReadingValue) AS MinValue, MAX(ReadingValue) AS MaxValue,
AVG(ReadingValue) AS AvgValue,
dateadd(second, datediff(second, min(ReadingDateTime),
max(ReadingDateTime))/2, min(ReadingDateTime)) median_dt,
[Group]
FROM cte_ntile
GROUP BY [Group];
MinValue MaxValue AvgValue median_dt Group
59.64 59.71 59.683 2021-05-16 05:15:39.0933333 1
59.65 59.72 59.686 2021-05-16 05:35:39.0933333 2
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
October 6, 2021 at 3:07 pm
Hi Steve
Thank you very much for that. Thats a very elegant solution because it captures the interval midway between the 10 and 11th record in each group
October 6, 2021 at 5:07 pm
Thanks for the feedback. Nice I'm happy if the code helps. Precision-wise it's maybe not ideal. I tried it with nanoseconds and there was an overflow error. Maybe microseconds would work. Also, integer division using the date difference potentially adds some tiny error too.
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
October 6, 2021 at 6:46 pm
That's not an accurate method to get a median value.
For example, if you have these values:
SELECT (MAX(value) - MIN(value)) / 2 + MIN(value)
FROM ( VALUES(1),(2),(3),(1000),(3000) ) AS data(value)
the median (middle) value is 3, but that formula yields 1500, which is not even in the list of values.
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
October 6, 2021 at 8:12 pm
It turns out the median is derived from the ordered set. My query only calculated the midpoint. If the number of rows in the [Group] (calculated as 'grp_count') is even this query returns the grp_count/2-th ordered row. If 'grp_count' is odd the query returns the (grp_count/2)+1-th ordered row
;with
cte_ntile(ReadingDateTime, ReadingValue, [Group]) AS (
select *, ntile(2) over(order by ReadingDateTime asc) AS [Group]
from #SIDataGroup),
cte_rn(ReadingDateTime, ReadingValue, [Group], rn) as (
select *,
row_number() over (partition by [Group] order by ReadingDateTime) rn
from cte_ntile),
cte_summary as (
select MIN(ReadingValue) AS MinValue,
MAX(ReadingValue) AS MaxValue,
AVG(ReadingValue) AS AvgValue,
dateadd(second, datediff(second, min(ReadingDateTime),
max(ReadingDateTime))/2, min(ReadingDateTime)) mid_point_dt,
count(*) grp_count,
[Group]
from cte_ntile cn
group by [Group])
select s.*, median.rv
from cte_summary s
cross apply (select ReadingDateTime
from cte_rn r
where s.[Group]=r.[Group]
and r.rn=(s.grp_count/2+s.grp_count%2)) median(rv);
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
October 6, 2021 at 8:15 pm
Actually, for an even number of rows, the median is the avg of the middle two (IIRC).
For example, for values:
1, 2, 3, 101, 200, 500 :: median is 52 ( (3+101)/2 ).
For an odd number of rows, it's just the middle row:
1, 2, 101, 200, 500 :: 101
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
October 6, 2021 at 8:59 pm
Maybe something like this
;with
cte_ntile(ReadingDateTime, ReadingValue, [Group]) AS (
select *, ntile(2) over(order by ReadingDateTime asc) AS [Group]
from #SIDataGroup),
cte_rn(ReadingDateTime, ReadingValue, [Group], rn) as (
select *,
row_number() over (partition by [Group] order by ReadingDateTime) rn
from cte_ntile),
cte_summary as (
select MIN(ReadingValue) AS MinValue,
MAX(ReadingValue) AS MaxValue,
AVG(ReadingValue) AS AvgValue,
dateadd(second, datediff(second, min(ReadingDateTime),
max(ReadingDateTime))/2, min(ReadingDateTime)) mid_point_dt,
count(*) grp_count,
[Group]
from cte_ntile cn
group by [Group])
select s.*, median.median_dt
from cte_summary s
cross apply (select dateadd(microsecond, datediff(microsecond, min(ReadingDateTime),
max(ReadingDateTime))/2, min(ReadingDateTime))
from cte_rn r
cross join (select top(iif(s.grp_count%2=1, 1, 2)) v.n
from (values (s.grp_count/2),(s.grp_count/2+1)) v(n)
order by v.n desc) x(n)
where s.[Group]=r.[Group]
and r.rn=x.n) median(median_dt);
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
Viewing 8 posts - 1 through 7 (of 7 total)
You must be logged in to reply to this topic. Login to reply