Data distribution query

  • Hi All,

    See DDL and sample data below. What would be the easiest way to get the desired output without hard coding the values? Data in both tables may change over time.Thanks

    DECLARE @num AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Price MONEY

    )

    DECLARE @range AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Rng MONEY

    )

    INSERT INTO @num

    SELECT 10

    UNION ALL

    SELECT 5

    UNION ALL

    SELECT 15

    UNION ALL

    SELECT 50

    UNION ALL

    SELECT 25

    UNION ALL

    SELECT 15

    UNION ALL

    SELECT 10

    UNION ALL

    SELECT 35

    UNION ALL

    SELECT 12

    UNION ALL

    SELECT 45

    UNION ALL

    SELECT 5

    UNION ALL

    SELECT 10

    INSERT INTO @range

    SELECT 10

    UNION ALL

    SELECT 25

    UNION ALL

    SELECT 50

    SELECT *

    FROM @num

    SELECT *

    FROM @range

    -- Expected output

    SELECT 10 AS Limit

    ,2 AS NumberOfOccuranceInRange

    UNION ALL -- 0-9

    SELECT 25

    ,6

    UNION ALL -- 10-24

    SELECT 50

    ,4 -- 25-50

  • It looks like you want to get the number of rows in @num for each range defined in @range. To accomplish this, I'm going to change the structure of your @range table so it includes the upper and lower boundary of each range. This way, it's simple to get the number of elements in each range using a BETWEEN.

    DECLARE @num AS TABLE (

    ID INT IDENTITY(1, 1),

    Price Money);

    DECLARE @range AS TABLE (

    ID INT IDENTITY(1, 1),

    LowerBound Money,

    UpperBound Money);

    INSERT INTO @num(Price)

    SELECT 10 UNION ALL

    SELECT 5 UNION ALL

    SELECT 15 UNION ALL

    SELECT 50 UNION ALL

    SELECT 25 UNION ALL

    SELECT 15 UNION ALL

    SELECT 10 UNION ALL

    SELECT 35 UNION ALL

    SELECT 12 UNION ALL

    SELECT 45 UNION ALL

    SELECT 5 UNION ALL

    SELECT 10;

    INSERT INTO @Range(LowerBound, UpperBound)

    VALUES(1, 9),

    (10, 24),

    (25, 49),

    (50, 999);

    SELECT r.LowerBound, r.UpperBound, COUNT(n.ID) row_count

    FROM @num n

    CROSS JOIN @Range r

    WHERE n.Price BETWEEN r.LowerBound AND r.UpperBound

    GROUP BY r.LowerBound, r.UpperBound

    ORDER BY r.LowerBound;

  • Another version, similar to Ed's, this one uses CTEs to construct the ranges.

    😎

    USE tempdb;

    GO

    DECLARE @num AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Price MONEY

    )

    DECLARE @range AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Rng MONEY

    )

    INSERT INTO @num

    SELECT 10 UNION ALL SELECT 5 UNION ALL SELECT 15 UNION ALL

    SELECT 50 UNION ALL SELECT 25 UNION ALL SELECT 15 UNION ALL

    SELECT 10 UNION ALL SELECT 35 UNION ALL SELECT 12 UNION ALL

    SELECT 45 UNION ALL SELECT 5 UNION ALL SELECT 10

    INSERT INTO @range

    SELECT 10 UNION ALL SELECT 25 UNION ALL SELECT 50

    /*

    The RANGE_LIST CTE sets the range values in an

    ascending order and adds a sequence number.

    It also adds min money value as lowest and

    max money value as highest.

    */

    ;WITH RANGE_LIST AS

    (

    SELECT

    ROW_NUMBER() OVER

    (

    ORDER BY X.RNG ASC

    ) AS RRID

    ,COUNT(X.Rng) OVER

    (

    PARTITION BY (SELECT NULL)

    ) AS R_COUNT

    ,X.Rng

    FROM

    (

    SELECT

    (-922337203685477.5808) AS Rng

    UNION ALL

    SELECT

    R.Rng

    FROM @range R

    UNION ALL

    SELECT

    922337203685477.5807 AS Rng

    ) AS X

    )

    /*

    The COMB_RANGE CTE does two things, firstly it combines

    range values into from-too ranges.

    Secondly, it fixes a slight discrepancy in the logic. A range

    is defined (implied) for value X as (Low range value ) >= X < (High range value).

    This has to be adjusted for the last defined range value to

    (Low range value ) >= X <= (High range value).

    */

    ,COMB_RANGE AS

    (

    SELECT

    RFROM.RRID

    ,RTOO.Rng AS Limit

    ,CASE

    WHEN RFROM.RRID < (RFROM.R_COUNT - 1) THEN RFROM.Rng

    WHEN RFROM.RRID = (RFROM.R_COUNT - 1) THEN RFROM.Rng + 0.0001

    ELSE RFROM.Rng

    END AS R_FROM

    ,CASE

    WHEN RFROM.RRID < (RFROM.R_COUNT - 2) THEN RTOO.Rng

    WHEN RFROM.RRID = (RFROM.R_COUNT - 2) THEN RTOO.Rng + 0.0001

    ELSE RTOO.Rng

    END AS R_TOO

    FROM RANGE_LIST RFROM

    INNER JOIN RANGE_LIST RTOO

    ON RFROM.RRID = RTOO.RRID - 1

    )

    SELECT

    CR.Limit AS Limit

    ,COUNT(CR.RRID) AS NumberOfOccuranceInRange

    FROM @num NM

    OUTER APPLY COMB_RANGE CR

    WHERE

    (

    NM.Price >= CR.R_FROM

    AND

    NM.Price < CR.R_TOO

    )

    GROUP BY CR.RRID,CR.Limit;

    Results

    Limit NumberOfOccuranceInRange

    --------- ------------------------

    10.0000 2

    25.0000 6

    50.0000 4

  • Hi,

    Thank you for taking the time to respond!

    Consider the following:

    DECLARE @num AS TABLE (

    ID INT IDENTITY(1, 1),

    Price Money);

    DECLARE @range AS TABLE (

    ID INT IDENTITY(1, 1),

    LowerBound Money,

    UpperBound Money);

    INSERT INTO @num(Price)

    SELECT 10 UNION ALL

    SELECT 5 UNION ALL

    SELECT 15 UNION ALL

    SELECT 55 UNION ALL

    SELECT 75 UNION ALL

    SELECT 15 UNION ALL

    SELECT 10 UNION ALL

    SELECT 12 UNION ALL

    SELECT 5

    INSERT INTO @Range(LowerBound, UpperBound)

    VALUES(1, 9),

    (10, 24),

    (25, 49),

    (50, 999);

    SELECT r.LowerBound, r.UpperBound, COUNT(n.ID) row_count

    FROM @num n

    CROSS JOIN @Range r

    WHERE n.Price BETWEEN r.LowerBound AND r.UpperBound

    GROUP BY r.LowerBound, r.UpperBound

    ORDER BY r.LowerBound;

    This returns:

    SELECT 1.00 AS LowerBound, 9.00 AS UpprBound, 2 AS row_count UNION ALL

    SELECT 10.00, 24.00, 5 UNION ALL

    SELECT 50.00, 999.00, 2

    Is there a way to amend the query so that it returns the following result set?

    SELECT 1.00 AS LowerBound, 9.00 AS UpprBound, 2 AS row_count UNION ALL

    SELECT 10.00, 24.00, 5 UNION ALL

    SELECT 25.00, 49.00, 0 UNION ALL-- Return 0 if there are no numbers in range

    SELECT 50.00, 999.00, 2

  • clayman (6/8/2014)


    INSERT INTO @Range(LowerBound, UpperBound)

    VALUES(1, 9),

    (10, 24),

    (25, 49),

    (50, 999);

    You have to be careful here, looks like you are loosing the values between the ranges (9 - 10, 24 - 25, 49 - 50).

    The next lower must be equal to the current higher boundary.

    😎

  • Thank you for the heads up, I'll fix that! Any suggestions regarding the gap for the 25.00 - 49.00 range?

  • clayman (6/9/2014)


    Thank you for the heads up, I'll fix that! Any suggestions regarding the gap for the 25.00 - 49.00 range?

    Somewhat belts and braces method

    😎

    USE tempdb;

    GO

    USE tempdb;

    GO

    DECLARE @num AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Price MONEY

    )

    DECLARE @range AS TABLE (

    Id INT IDENTITY(1, 1)

    ,Rng MONEY

    )

    INSERT INTO @num(Price)

    SELECT 10 UNION ALL

    SELECT 5 UNION ALL

    SELECT 15 UNION ALL

    SELECT 55 UNION ALL

    SELECT 75 UNION ALL

    SELECT 15 UNION ALL

    SELECT 10 UNION ALL

    SELECT 12 UNION ALL

    SELECT 5

    INSERT INTO @range

    SELECT 10 UNION ALL SELECT 25 UNION ALL SELECT 50 UNION ALL SELECT 999

    /*

    The RANGE_LIST CTE sets the range values in an

    ascending order and adds a sequence number.

    It also adds min money value as lowest and

    max money value as highest.

    */

    ;WITH RANGE_LIST AS

    (

    SELECT

    ROW_NUMBER() OVER

    (

    ORDER BY X.RNG ASC

    ) AS RRID

    ,COUNT(X.Rng) OVER

    (

    PARTITION BY (SELECT NULL)

    ) AS R_COUNT

    ,X.Rng

    FROM

    (

    SELECT

    (-922337203685477.5808) AS Rng

    UNION ALL

    SELECT

    R.Rng

    FROM @range R

    UNION ALL

    SELECT

    922337203685477.5807 AS Rng

    ) AS X

    )

    /*

    The COMB_RANGE CTE does two things, firstly it combines

    range values into from-too ranges.

    Secondly, it fixes a slight discrepancy in the logic. A range

    is defined (implied) for value X as (Low range value ) >= X < (High range value).

    This has to be adjusted for the last defined range value to

    (Low range value ) >= X <= (High range value).

    */

    ,COMB_RANGE AS

    (

    SELECT

    RFROM.RRID

    ,RTOO.Rng AS Limit

    ,CASE

    WHEN RFROM.RRID < (RFROM.R_COUNT - 1) THEN RFROM.Rng

    WHEN RFROM.RRID = (RFROM.R_COUNT - 1) THEN RFROM.Rng + 0.0001

    ELSE RFROM.Rng

    END AS R_FROM

    ,CASE

    WHEN RFROM.RRID < (RFROM.R_COUNT - 2) THEN RTOO.Rng

    WHEN RFROM.RRID = (RFROM.R_COUNT - 2) THEN RTOO.Rng + 0.0001

    ELSE RTOO.Rng

    END AS R_TOO

    FROM RANGE_LIST RFROM

    INNER JOIN RANGE_LIST RTOO

    ON RFROM.RRID = RTOO.RRID - 1

    )

    ,INST_COUNT AS

    (

    SELECT

    CR.Limit AS Limit

    ,COUNT(CR.RRID) AS NumberOfOccuranceInRange

    FROM @num NM

    OUTER APPLY COMB_RANGE CR

    WHERE

    (

    NM.Price >= CR.R_FROM

    AND

    NM.Price < CR.R_TOO

    )

    GROUP BY CR.RRID,CR.Limit

    )

    /* Catching overflows, if any value is greater than the highest range */

    SELECT TOP ((SELECT COUNT(R.Rng) AS NUM_RANGES FROM @range R)

    + (SELECT SIGN(COUNT(*)) FROM @num N WHERE N.Price > (SELECT MAX(R.Rng) FROM @range R)))

    CR.Rng AS Limit

    ,ISNULL(IC.NumberOfOccuranceInRange,0) AS NumberOfOccuranceInRange

    FROM

    (

    SELECT

    CCR.Rng

    FROM @range CCR

    UNION ALL

    SELECT 922337203685477.5807 AS Rng

    ) AS CR

    LEFT OUTER JOIN INST_COUNT IC

    ON CR.Rng = IC.Limit

    Results

    Limit NumberOfOccuranceInRange

    ----------- ------------------------

    10.0000 2

    25.0000 5

    50.0000 0

    999.0000 2

  • Ed's version modified to show missing ranges.

    😎

    USE tempdb;

    GO

    DECLARE @num AS TABLE (

    ID INT IDENTITY(1, 1),

    Price Money);

    DECLARE @range AS TABLE (

    ID INT IDENTITY(1, 1),

    LowerBound Money,

    UpperBound Money);

    INSERT INTO @num(Price)

    SELECT 10 UNION ALL

    SELECT 5 UNION ALL

    SELECT 15 UNION ALL

    SELECT 55 UNION ALL

    SELECT 75 UNION ALL

    SELECT 15 UNION ALL

    SELECT 10 UNION ALL

    SELECT 12 UNION ALL

    SELECT 5

    INSERT INTO @Range(LowerBound, UpperBound)

    VALUES(1, 9.999),

    (10, 24.999),

    (25, 49.999),

    (50, 999.999);

    SELECT

    RN.LowerBound

    ,RN.UpperBound

    ,ISNULL(X.row_count,0) AS row_count

    FROM @Range RN

    LEFT OUTER JOIN

    (

    SELECT r.LowerBound, r.UpperBound, COUNT(n.ID) row_count

    FROM @num n

    CROSS JOIN @Range r

    WHERE n.Price BETWEEN r.LowerBound AND r.UpperBound

    GROUP BY r.LowerBound, r.UpperBound

    --ORDER BY r.LowerBound

    ) AS X

    ON RN.LowerBound = X.LowerBound

    Results

    LowerBound UpperBound row_count

    --------------------- --------------------- -----------

    1.00 9.999 2

    10.00 24.999 5

    25.00 49.999 0

    50.00 999.999 2

Viewing 8 posts - 1 through 7 (of 7 total)

You must be logged in to reply to this topic. Login to reply