September 13, 2011 at 4:48 pm
struggling with this one now.
I have taken the script supllied earlier in thread and added some month_names to the figures in the test data.
I have also removed the declaring of @percentile as it is alsways going to be 95th (0.95)
I cant get the results to show the median of the figures for each month seperatly.
Can anyone point me in the right direction please. Thanks
IF OBJECT_ID('tempdb..#DATA') IS NOT NULL DROP TABLE #data
CREATE TABLE #data (number INT, Month_Name nvarchar (10))
INSERT INTO #data
SELECT 15 as number, 'jan' as Month_Name union all
SELECT 20 as number, 'jan' as Month_Name union all
SELECT 35 as number, 'jan' as Month_Name union all
SELECT 40 as number, 'jan' as Month_Name union all
SELECT 5 as number, 'feb' as Month_Name union all
SELECT 2 as number, 'feb' as Month_Name union all
SELECT 3 as number, 'feb' as Month_Name union all
SELECT 4 as number, 'feb' as Month_Name union all
SELECT 7 as number, 'feb' as Month_Name
;with percentile as
(
select
CAST(number AS DECIMAL(18,3)) as number
,row_number() OVER (PARTITION BY (SELECT NULL) order by number asc) as row
from #data
)
,percentile2 AS
(
SELECT
number
,row
,MAX(row) OVER (PARTITION BY (SELECT NULL)) as max_row
,(0.95 * (MAX(row) OVER (PARTITION BY (SELECT NULL)) - 1)) + 1 as percentile_row
FROM percentile
)
select
AVG(number) as percentile_value
from percentile2
WHERE ABS(percentile_row - row) < 1
September 13, 2011 at 5:15 pm
You say that you want the median for each month - is this instead of the 95th percentile or on top (i.e. does the query need to calculate both simultaneously?)
September 13, 2011 at 5:16 pm
Sean Lange (9/12/2011)
Well then....
select MAX(number) from
(
select top 95 percent number from PS_TestForOnline order by number
) MyAlias
You probably want a bit more sample data to prove this but it should be what you are looking for.
You would want to order that subquery by number desc
Jason...AKA CirqueDeSQLeil
_______________________________________________
I have given a name to my pain...MCM SQL Server, MVP
SQL RNNR
Posting Performance Based Questions - Gail Shaw[/url]
Learn Extended Events
September 13, 2011 at 7:04 pm
Statistics is really not my thing, but I thought I would mention a post by Rob Farley recently:
http://sqlblog.com/blogs/rob_farley/archive/2011/08/23/t-sql-thoughts-about-the-95th-percentile.aspx
Might be of interest.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
September 14, 2011 at 2:16 am
Ignore my reference to median ( i was thinking about another query i'm struggling with )
I need to get the 95th percentile for each months figures.
Thanks
September 14, 2011 at 2:33 am
I think this should work for the specific month figures:
;with percentile as
(
select
CAST(number AS DECIMAL(18,3)) as number
,month_name
,row_number() OVER (PARTITION BY month_name order by number asc) as row
from #data
)
,percentile2 AS
(
SELECT
number
,month_name
,row
,MAX(row) OVER (PARTITION BY (SELECT NULL)) as max_row
,(0.95 * (MAX(row) OVER (PARTITION BY (SELECT NULL)) - 1)) + 1 as percentile_row
FROM percentile
)
select
month_name
,AVG(number) as percentile_value
from percentile2
WHERE ABS(percentile_row - row) < 1
GROUP BY month_name
September 14, 2011 at 5:14 am
Now 100% sure this applies for your case (speed read the thread, but it seems unsolved at the moment).
Maybe this can inspire you the answer you need :
WITH Waits AS
(SELECT
wait_type,
wait_time_ms / 1000.0 AS WaitS,
(wait_time_ms - signal_wait_time_ms) / 1000.0 AS ResourceS,
signal_wait_time_ms / 1000.0 AS SignalS,
waiting_tasks_count AS WaitCount,
100.0 * wait_time_ms / SUM (wait_time_ms) OVER() AS Percentage,
ROW_NUMBER() OVER(ORDER BY wait_time_ms DESC) AS RowNum
FROM sys.dm_os_wait_stats
WHERE wait_type NOT IN (
'CLR_SEMAPHORE', 'LAZYWRITER_SLEEP', 'RESOURCE_QUEUE', 'SLEEP_TASK',
'SLEEP_SYSTEMTASK', 'SQLTRACE_BUFFER_FLUSH', 'WAITFOR', 'LOGMGR_QUEUE',
'CHECKPOINT_QUEUE', 'REQUEST_FOR_DEADLOCK_SEARCH', 'XE_TIMER_EVENT', 'BROKER_TO_FLUSH',
'BROKER_TASK_STOP', 'CLR_MANUAL_EVENT', 'CLR_AUTO_EVENT', 'DISPATCHER_QUEUE_SEMAPHORE',
'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT', 'XE_DISPATCHER_JOIN', 'BROKER_EVENTHANDLER',
'TRACEWRITE', 'FT_IFTSHC_MUTEX', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', 'BROKER_RECEIVE_WAITFOR')
)
SELECT
W1.wait_type AS WaitType,
CAST (W1.WaitS AS DECIMAL(14, 2)) AS Wait_S,
CAST (W1.WaitS * 1000 / W1.WaitCount AS DECIMAL(14, 2)) AS AvgWait_MS,
CAST (W1.ResourceS AS DECIMAL(14, 2)) AS Resource_S,
CAST (W1.SignalS AS DECIMAL(14, 2)) AS Signal_S,
W1.WaitCount AS WaitCount,
CAST (W1.Percentage AS DECIMAL(4, 2)) AS Percentage
FROM Waits AS W1
INNER JOIN Waits AS W2
ON W2.RowNum <= W1.RowNum
GROUP BY W1.RowNum, W1.wait_type, W1.WaitS, W1.ResourceS, W1.SignalS, W1.WaitCount, W1.Percentage
HAVING SUM (W2.Percentage) - W1.Percentage < 95 -- percentage threshold
AND W1.WaitCount > 0;
GO
September 16, 2011 at 5:30 am
Hi, I back and and I think I'm 95 percentile there. (excuse the pun!!!):-)
I have needed to replicate the percentile function in EXCEL which uses interpolation which gives slightly different figures.
The following works a treat to give me an overall percentile using a couple of functions and the final script. All below
As before, I'm struggling to group the result at the end by the Month_Name column in the results.
If anyone can help that would be great.
Thanks in advance
/*create function 1*/
CREATE FUNCTION dbo.LERP
(@value float, -- between low and high
@low float,
@high float,
@newlow float,
@newhigh float)
RETURNS float -- between newlow and newhigh
AS
BEGIN
RETURN CASE
WHEN @value between @low and @high and @newlow <= @newhigh
THEN @newlow+dbo.FDIV((@value-@low),
@high-@low)*(@newhigh-@newlow)
WHEN @value = @low and @newlow is not NULL THEN @newlow
WHEN @value = @high and @newhigh is not NULL THEN @newhigh
ELSE NULL
END
END
GO
-----------------------------------
/*create function 2*/
CREATE FUNCTION dbo.FDIV
(@numerator float,
@denominator float)
RETURNS float
AS
BEGIN
RETURN CASE WHEN @denominator = 0.0
THEN 0.0
ELSE @numerator / @denominator
END
END
GO
-----------------------------------
/*insert data*/
CREATE TABLE #data (number INT, Month_Name nvarchar (10))
INSERT INTO #data
SELECT 15 as number, 'jan' as Month_Name union all
SELECT 26 as number, 'jan' as Month_Name union all
SELECT 47 as number, 'jan' as Month_Name union all
SELECT 25 as number, 'jan' as Month_Name union all
SELECT 15 as number, 'jan' as Month_Name union all
SELECT 20 as number, 'jan' as Month_Name union all
SELECT 22 as number, 'jan' as Month_Name union all
SELECT 40 as number, 'jan' as Month_Name union all
SELECT 98 as number, 'mar' as Month_Name union all
SELECT 15 as number, 'mar' as Month_Name union all
SELECT 48 as number, 'mar' as Month_Name union all
SELECT 75 as number, 'mar' as Month_Name union all
SELECT 25 as number, 'mar' as Month_Name union all
SELECT 40 as number, 'mar' as Month_Name union all
SELECT 44 as number, 'mar' as Month_Name union all
SELECT 40 as number, 'mar' as Month_Name union all
SELECT 5 as number, 'feb' as Month_Name union all
SELECT 2 as number, 'feb' as Month_Name union all
SELECT 3 as number, 'feb' as Month_Name union all
SELECT 4 as number, 'feb' as Month_Name union all
SELECT 5 as number, 'feb' as Month_Name union all
SELECT 2 as number, 'feb' as Month_Name union all
SELECT 3 as number, 'feb' as Month_Name union all
SELECT 4 as number, 'feb' as Month_Name union all
SELECT 7 as number, 'feb' as Month_Name
select * from #data
----------------------------
--declare @pp float
--set @pp = .95
select
dbo.LERP(max(d), 0.0, 1.0, max(a.number), max(b.number)) as percentile
from
(
select floor(kf) as k, kf-floor(kf) as d
from(
select 1+0.95*(count(*)-1) as kf
from #data
) as x1
) as x2
join #data a
on
(select count(*) from #data aa
where aa.number < a.number) < k
join #data b
on
(select count(*) from #data bb
where bb.number < b.number) < k+1
--88888888888888888888888888888888888888888888888888888888888888888888
drop table #data
Viewing 8 posts - 16 through 22 (of 22 total)
You must be logged in to reply to this topic. Login to reply