95th percentile

  • struggling with this one now.

    I have taken the script supllied earlier in thread and added some month_names to the figures in the test data.

    I have also removed the declaring of @percentile as it is alsways going to be 95th (0.95)

    I cant get the results to show the median of the figures for each month seperatly.

    Can anyone point me in the right direction please. Thanks

    IF OBJECT_ID('tempdb..#DATA') IS NOT NULL DROP TABLE #data

    CREATE TABLE #data (number INT, Month_Name nvarchar (10))

    INSERT INTO #data

    SELECT 15 as number, 'jan' as Month_Name union all

    SELECT 20 as number, 'jan' as Month_Name union all

    SELECT 35 as number, 'jan' as Month_Name union all

    SELECT 40 as number, 'jan' as Month_Name union all

    SELECT 5 as number, 'feb' as Month_Name union all

    SELECT 2 as number, 'feb' as Month_Name union all

    SELECT 3 as number, 'feb' as Month_Name union all

    SELECT 4 as number, 'feb' as Month_Name union all

    SELECT 7 as number, 'feb' as Month_Name

    ;with percentile as

    (

    select

    CAST(number AS DECIMAL(18,3)) as number

    ,row_number() OVER (PARTITION BY (SELECT NULL) order by number asc) as row

    from #data

    )

    ,percentile2 AS

    (

    SELECT

    number

    ,row

    ,MAX(row) OVER (PARTITION BY (SELECT NULL)) as max_row

    ,(0.95 * (MAX(row) OVER (PARTITION BY (SELECT NULL)) - 1)) + 1 as percentile_row

    FROM percentile

    )

    select

    AVG(number) as percentile_value

    from percentile2

    WHERE ABS(percentile_row - row) < 1

  • You say that you want the median for each month - is this instead of the 95th percentile or on top (i.e. does the query need to calculate both simultaneously?)

  • Sean Lange (9/12/2011)


    Well then....

    select MAX(number) from

    (

    select top 95 percent number from PS_TestForOnline order by number

    ) MyAlias

    You probably want a bit more sample data to prove this but it should be what you are looking for.

    You would want to order that subquery by number desc

    Jason...AKA CirqueDeSQLeil
    _______________________________________________
    I have given a name to my pain...MCM SQL Server, MVP
    SQL RNNR
    Posting Performance Based Questions - Gail Shaw[/url]
    Learn Extended Events

  • Statistics is really not my thing, but I thought I would mention a post by Rob Farley recently:

    http://sqlblog.com/blogs/rob_farley/archive/2011/08/23/t-sql-thoughts-about-the-95th-percentile.aspx

    Might be of interest.

  • Ignore my reference to median ( i was thinking about another query i'm struggling with )

    I need to get the 95th percentile for each months figures.

    Thanks

  • I think this should work for the specific month figures:

    ;with percentile as

    (

    select

    CAST(number AS DECIMAL(18,3)) as number

    ,month_name

    ,row_number() OVER (PARTITION BY month_name order by number asc) as row

    from #data

    )

    ,percentile2 AS

    (

    SELECT

    number

    ,month_name

    ,row

    ,MAX(row) OVER (PARTITION BY (SELECT NULL)) as max_row

    ,(0.95 * (MAX(row) OVER (PARTITION BY (SELECT NULL)) - 1)) + 1 as percentile_row

    FROM percentile

    )

    select

    month_name

    ,AVG(number) as percentile_value

    from percentile2

    WHERE ABS(percentile_row - row) < 1

    GROUP BY month_name

  • Now 100% sure this applies for your case (speed read the thread, but it seems unsolved at the moment).

    Maybe this can inspire you the answer you need :

    WITH Waits AS

    (SELECT

    wait_type,

    wait_time_ms / 1000.0 AS WaitS,

    (wait_time_ms - signal_wait_time_ms) / 1000.0 AS ResourceS,

    signal_wait_time_ms / 1000.0 AS SignalS,

    waiting_tasks_count AS WaitCount,

    100.0 * wait_time_ms / SUM (wait_time_ms) OVER() AS Percentage,

    ROW_NUMBER() OVER(ORDER BY wait_time_ms DESC) AS RowNum

    FROM sys.dm_os_wait_stats

    WHERE wait_type NOT IN (

    'CLR_SEMAPHORE', 'LAZYWRITER_SLEEP', 'RESOURCE_QUEUE', 'SLEEP_TASK',

    'SLEEP_SYSTEMTASK', 'SQLTRACE_BUFFER_FLUSH', 'WAITFOR', 'LOGMGR_QUEUE',

    'CHECKPOINT_QUEUE', 'REQUEST_FOR_DEADLOCK_SEARCH', 'XE_TIMER_EVENT', 'BROKER_TO_FLUSH',

    'BROKER_TASK_STOP', 'CLR_MANUAL_EVENT', 'CLR_AUTO_EVENT', 'DISPATCHER_QUEUE_SEMAPHORE',

    'FT_IFTS_SCHEDULER_IDLE_WAIT', 'XE_DISPATCHER_WAIT', 'XE_DISPATCHER_JOIN', 'BROKER_EVENTHANDLER',

    'TRACEWRITE', 'FT_IFTSHC_MUTEX', 'SQLTRACE_INCREMENTAL_FLUSH_SLEEP', 'BROKER_RECEIVE_WAITFOR')

    )

    SELECT

    W1.wait_type AS WaitType,

    CAST (W1.WaitS AS DECIMAL(14, 2)) AS Wait_S,

    CAST (W1.WaitS * 1000 / W1.WaitCount AS DECIMAL(14, 2)) AS AvgWait_MS,

    CAST (W1.ResourceS AS DECIMAL(14, 2)) AS Resource_S,

    CAST (W1.SignalS AS DECIMAL(14, 2)) AS Signal_S,

    W1.WaitCount AS WaitCount,

    CAST (W1.Percentage AS DECIMAL(4, 2)) AS Percentage

    FROM Waits AS W1

    INNER JOIN Waits AS W2

    ON W2.RowNum <= W1.RowNum

    GROUP BY W1.RowNum, W1.wait_type, W1.WaitS, W1.ResourceS, W1.SignalS, W1.WaitCount, W1.Percentage

    HAVING SUM (W2.Percentage) - W1.Percentage < 95 -- percentage threshold

    AND W1.WaitCount > 0;

    GO

  • Hi, I back and and I think I'm 95 percentile there. (excuse the pun!!!):-)

    I have needed to replicate the percentile function in EXCEL which uses interpolation which gives slightly different figures.

    The following works a treat to give me an overall percentile using a couple of functions and the final script. All below

    As before, I'm struggling to group the result at the end by the Month_Name column in the results.

    If anyone can help that would be great.

    Thanks in advance

    /*create function 1*/

    CREATE FUNCTION dbo.LERP

    (@value float, -- between low and high

    @low float,

    @high float,

    @newlow float,

    @newhigh float)

    RETURNS float -- between newlow and newhigh

    AS

    BEGIN

    RETURN CASE

    WHEN @value between @low and @high and @newlow <= @newhigh

    THEN @newlow+dbo.FDIV((@value-@low),

    @high-@low)*(@newhigh-@newlow)

    WHEN @value = @low and @newlow is not NULL THEN @newlow

    WHEN @value = @high and @newhigh is not NULL THEN @newhigh

    ELSE NULL

    END

    END

    GO

    -----------------------------------

    /*create function 2*/

    CREATE FUNCTION dbo.FDIV

    (@numerator float,

    @denominator float)

    RETURNS float

    AS

    BEGIN

    RETURN CASE WHEN @denominator = 0.0

    THEN 0.0

    ELSE @numerator / @denominator

    END

    END

    GO

    -----------------------------------

    /*insert data*/

    CREATE TABLE #data (number INT, Month_Name nvarchar (10))

    INSERT INTO #data

    SELECT 15 as number, 'jan' as Month_Name union all

    SELECT 26 as number, 'jan' as Month_Name union all

    SELECT 47 as number, 'jan' as Month_Name union all

    SELECT 25 as number, 'jan' as Month_Name union all

    SELECT 15 as number, 'jan' as Month_Name union all

    SELECT 20 as number, 'jan' as Month_Name union all

    SELECT 22 as number, 'jan' as Month_Name union all

    SELECT 40 as number, 'jan' as Month_Name union all

    SELECT 98 as number, 'mar' as Month_Name union all

    SELECT 15 as number, 'mar' as Month_Name union all

    SELECT 48 as number, 'mar' as Month_Name union all

    SELECT 75 as number, 'mar' as Month_Name union all

    SELECT 25 as number, 'mar' as Month_Name union all

    SELECT 40 as number, 'mar' as Month_Name union all

    SELECT 44 as number, 'mar' as Month_Name union all

    SELECT 40 as number, 'mar' as Month_Name union all

    SELECT 5 as number, 'feb' as Month_Name union all

    SELECT 2 as number, 'feb' as Month_Name union all

    SELECT 3 as number, 'feb' as Month_Name union all

    SELECT 4 as number, 'feb' as Month_Name union all

    SELECT 5 as number, 'feb' as Month_Name union all

    SELECT 2 as number, 'feb' as Month_Name union all

    SELECT 3 as number, 'feb' as Month_Name union all

    SELECT 4 as number, 'feb' as Month_Name union all

    SELECT 7 as number, 'feb' as Month_Name

    select * from #data

    ----------------------------

    --declare @pp float

    --set @pp = .95

    select

    dbo.LERP(max(d), 0.0, 1.0, max(a.number), max(b.number)) as percentile

    from

    (

    select floor(kf) as k, kf-floor(kf) as d

    from(

    select 1+0.95*(count(*)-1) as kf

    from #data

    ) as x1

    ) as x2

    join #data a

    on

    (select count(*) from #data aa

    where aa.number < a.number) < k

    join #data b

    on

    (select count(*) from #data bb

    where bb.number < b.number) < k+1

    --88888888888888888888888888888888888888888888888888888888888888888888

    drop table #data

Viewing 8 posts - 16 through 22 (of 22 total)

You must be logged in to reply to this topic. Login to reply