Condensed resultset distinct

  • Hello, can you help me out?

    I'm having a bit of a brainfreeze.

    Looking for a way to distinct 2 fields but my left joins results in duplicates. Bonus sort on non-null values IN_ID, OUT_ID first.

    Thougt about rownumbering the seperate tables ARRIVED,PREPARED and join on that. But can't find the right formula

    Got:

    BASKET_ID IN_ID OUT_ID

    A "INO 1" "INO 111"

    A "INO 2 "INO 111" /* INO 111 is already mentioned in OUT_ID for this basket, show as null */

    B "INO 3" "INO 44"

    B "INO 3" INO 55 /* INO 3 is already mentioned in IN_ID for this basket, show as null */

    B "INO 3" "INO 66"  /* INO 3 is already mentioned in IN_ID for this basket, show as null */

    C NULL "INO 170"

    D "INO 8" NULL

    Wanted:

    BASKET_ID IN_ID OUT_ID SORT_NBR

    A "INO 1" "INO 111"

    A "INO 2" NULL

    B "INO 3" "INO 44"

    B NULL "INO 55"

    B NULL "INO 66"

    C NULL "INO 170"

    D "INO 8" NULL

     

    DROP TABLE IF EXISTS #BASKET;
    DROP TABLE IF EXISTS #ARRIVED;
    DROP TABLE IF EXISTS #PREPARED;
    DROP TABLE IF EXISTS #WantedResult;

    CREATE TABLE #BASKET
    (
    BASKET_ID nvarchar(15) PRIMARY KEY NOT NULL
    )
    ;
    CREATE TABLE #ARRIVED
    (
    BASKET_ID nvarchar(15) NOT NULL
    ,ITEM_ID nvarchar(15) NOT NULL
    )
    CREATE TABLE #PREPARED
    (
    BASKET_ID nvarchar(15) NOT NULL
    ,ITEM_ID nvarchar(15) NOT NULL
    )

    INSERT INTO #BASKET
    (BASKET_ID)
    SELECT 'A'
    UNION ALL SELECT 'B'
    UNION ALL SELECT 'C'
    UNION ALL SELECT 'D'
    ;

    INSERT INTO #ARRIVED
    (BASKET_ID, ITEM_ID)
    SELECT 'A','INO 1'
    UNION ALL SELECT 'A','INO 2'
    UNION ALL SELECT 'B','INO 3'
    UNION ALL SELECT 'D','INO 8'

    ;
    INSERT INTO #PREPARED
    (BASKET_ID, ITEM_ID)
    SELECT 'A','INO 111'
    UNION ALL SELECT 'B','INO 44'
    UNION ALL SELECT 'B','INO 55'
    UNION ALL SELECT 'B','INO 66'
    UNION ALL SELECT 'C','INO 170'
    ;
    /* Gives duplicates INO 111, INO 3 */
    SELECT
    BASKET.BASKET_ID
    ,ARRIVED.ITEM_ID IN_ID
    ,PREPARED.ITEM_ID OUT_ID
    FROM #BASKET BASKET
    LEFT JOIN #ARRIVED ARRIVED
    ON BASKET.BASKET_ID=ARRIVED.BASKET_ID
    LEFT JOIN #PREPARED PREPARED
    ON BASKET.BASKET_ID=PREPARED.BASKET_ID
    ORDER BY BASKET.BASKET_ID,ARRIVED.BASKET_ID,PREPARED.BASKET_ID
    ;


    CREATE TABLE #WantedResult
    (
    BASKET_ID nvarchar(15)
    ,IN_ID nvarchar(15)
    ,OUT_ID nvarchar(15)
    ,SORT_NBR int
    )
    INSERT INTO #WantedResult (BASKET_ID,IN_ID,OUT_ID,SORT_NBR)
    SELECT 'A' BASKET_ID,'INO 1' IN_ID,'INO 111' OUT_ID,1 SORT_NBR
    UNION ALL SELECT 'A' BASKET_ID,'INO 2' IN_ID,NULL OUT_ID,2 SORT_NBR
    UNION ALL SELECT 'B' BASKET_ID,NULL,'INO 55' OUT_ID,4 SORT_NBR
    UNION ALL SELECT 'B' BASKET_ID,NULL,'INO 66' OUT_ID,5 SORT_NBR
    UNION ALL SELECT 'B' BASKET_ID,'INO 3' IN_ID,'INO 44' OUT_ID,3 SORT_NBR
    UNION ALL SELECT 'C' BASKET_ID,NULL IN_ID,'INO 170' OUT_ID,6 SORT_NBR
    UNION ALL SELECT 'D' BASKET_ID,'INO 8' IN_ID,NULL OUT_ID,7 SORT_NBR
    ;
    SElECT BASKET_ID,IN_ID,OUT_ID,SORT_NBR
    FROM #WantedResult
    ORDER BY BASKET_ID,SORT_NBR

    ;

    /*
    SELECT
    BASKET.BASKET_ID
    ,ARRIVED.ITEM_ID IN_ID
    FROM #BASKET BASKET
    LEFT JOIN #ARRIVED ARRIVED
    ON BASKET.BASKET_ID=ARRIVED.BASKET_ID
    ORDER BY BASKET.BASKET_ID,ARRIVED.BASKET_ID
    ;
    SELECT
    BASKET.BASKET_ID
    ,PREPARED.ITEM_ID OUT_ID
    FROM #BASKET BASKET
    LEFT JOIN #PREPARED PREPARED
    ON BASKET.BASKET_ID=PREPARED.BASKET_ID
    ORDER BY BASKET.BASKET_ID,PREPARED.BASKET_ID
    ;
    */
  • *edit *

    • This reply was modified 1 year, 4 months ago by  Jo Pattyn. Reason: Possbile wrong answer
  • If you have a lot of rows, you'd likely get better performance putting the results (from the cte) into an indexed temp table, but for now, just see if this gives you the results you want:

    ;WITH cte_results AS (
    SELECT
    BASKET.BASKET_ID
    ,ARRIVED.ITEM_ID IN_ID
    ,PREPARED.ITEM_ID OUT_ID
    ,ROW_NUMBER() OVER(ORDER BY BASKET.BASKET_ID,ARRIVED.BASKET_ID,PREPARED.BASKET_ID) AS row_num
    FROM #BASKET BASKET
    LEFT JOIN #ARRIVED ARRIVED
    ON BASKET.BASKET_ID=ARRIVED.BASKET_ID
    LEFT JOIN #PREPARED PREPARED
    ON BASKET.BASKET_ID=PREPARED.BASKET_ID
    --ORDER BY BASKET.BASKET_ID,ARRIVED.BASKET_ID,PREPARED.BASKET_ID
    )
    SELECT
    c1.BASKET_ID,
    CASE WHEN EXISTS(SELECT 1 FROM cte_results c2 WHERE c2.BASKET_ID = c1.BASKET_ID AND c2.IN_ID = c1.IN_ID AND c2.row_num < c1.row_num) THEN NULL ELSE c1.IN_ID END,
    CASE WHEN EXISTS(SELECT 1 FROM cte_results c2 WHERE c2.BASKET_ID = c1.BASKET_ID AND c2.OUT_ID = c1.OUT_ID AND c2.row_num < c1.row_num) THEN NULL ELSE c1.OUT_ID END
    FROM cte_results c1
    ORDER BY c1.row_num

    SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".

  • The subqueries are disastrous for performance.  Here is a query that gives the same results (I also added the requested SORT_NBR column) with much better performance.

    SELECT b.BASKET_ID
    , CASE WHEN ROW_NUMBER() OVER(PARTITION BY b.BASKET_ID, a.ITEM_ID ORDER BY p.ITEM_ID) = 1 THEN a.ITEM_ID END AS IN_ID
    , CASE WHEN ROW_NUMBER() OVER(PARTITION BY b.BASKET_ID, p.ITEM_ID ORDER BY a.ITEM_ID) = 1 THEN p.ITEM_ID END AS OUT_ID
    , ROW_NUMBER() OVER(ORDER BY B.BASKET_ID, a.ITEM_ID, p.ITEM_ID) AS Sort_Nbr
    FROM #BASKET AS b
    LEFT OUTER JOIN #ARRIVED AS a
    ON b.BASKET_ID = a.BASKET_ID
    LEFT OUTER JOIN #PREPARED AS p
    ON b.BASKET_ID = p.BASKET_ID

    Here are the read stats.

    /*  Scott's query  */
    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#PREPARED__000000000B62'. Scan count 15, logical reads 75, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#ARRIVED___000000000B61'. Scan count 15, logical reads 60, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#BASKET____000000000B60'. Scan count 15, logical reads 30, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.

    /* Drew's query */
    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#PREPARED__000000000B62'. Scan count 1, logical reads 5, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#ARRIVED___000000000B61'. Scan count 1, logical reads 4, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.
    Table '#BASKET____000000000B60'. Scan count 1, logical reads 2, physical reads 0, page server reads 0, read-ahead reads 0, page server read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob page server reads 0, lob read-ahead reads 0, lob page server read-ahead reads 0.

    Note that Scott's query has 15 times the number of reads of my query.

    Drew

    J. Drew Allen
    Business Intelligence Analyst
    Philadelphia, PA

  • Your ordering is not guaranteed to be in the OUT ITEM order, since you don't sort by it.

    SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".

  • Thanks for the suggestions, but the dataset was too small. An expanded version generates too much nulls.

    Got a sufficient solution working posted below.

    Better dataset:

    DROP TABLE IF EXISTS #BASKET;
    DROP TABLE IF EXISTS #ARRIVED;
    DROP TABLE IF EXISTS #PREPARED;
    DROP TABLE IF EXISTS #WantedResult;
    CREATE TABLE #BASKET
    (
    BASKET_ID nvarchar(15) PRIMARY KEY NOT NULL
    )
    ;
    CREATE TABLE #ARRIVED
    (
    BASKET_ID nvarchar(15) NOT NULL
    ,ITEM_ID nvarchar(15) NOT NULL
    ,ITEM_ORDER int NOT NULL
    )
    ;
    CREATE TABLE #PREPARED
    (
    BASKET_ID nvarchar(15) NOT NULL
    ,ITEM_ID nvarchar(15) NOT NULL
    ,ITEM_ORDER int NOT NULL
    )
    ;

    INSERT INTO #BASKET
    (
    BASKET_ID
    )
    SELECT 'MYSHINYBASKET';

    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'0960' ITEM_ID,1 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'0961' ITEM_ID,2 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'0962' ITEM_ID,3 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'6515' ITEM_ID,4 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'6516' ITEM_ID,5 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'6517' ITEM_ID,6 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'6518' ITEM_ID,7 ITEM_ORDER;
    INSERT INTO #ARRIVED SELECT 'MYSHINYBASKET' BASKET_ID,'6519' ITEM_ID,8 ITEM_ORDER;

    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'4458' ITEM_ID,1 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'4459' ITEM_ID,2 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'4460' ITEM_ID,3 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'4462' ITEM_ID,4 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'4466' ITEM_ID,5 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'5150' ITEM_ID,6 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'6093' ITEM_ID,7 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'6097' ITEM_ID,8 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'6098' ITEM_ID,9 ITEM_ORDER;
    INSERT INTO #PREPARED SELECT 'MYSHINYBASKET' BASKET_ID,'9572' ITEM_ID,10 ITEM_ORDER;

    Current solution involves counting rows and pushing the order up to ARRIVED / PREPARED

    CREATE TABLE #BASKET_COUNT
    (
    BASKET_ID nvarchar(15) NOT NULL PRIMARY KEY
    ,COUNT_ARRIVED int
    ,COUNT_PREPARED int
    )
    ;

    INSERT INTO #BASKET_COUNT
    (
    BASKET_ID
    ,COUNT_ARRIVED
    ,COUNT_PREPARED
    )
    SELECT
    #BASKET.BASKET_ID
    ,COUNT(DISTINCT #ARRIVED.ITEM_ID) COUNT_ARRIVED
    ,COUNT(DISTINCT #PREPARED.ITEM_ID) COUNT_PREPARED
    FROM #BASKET
    LEFT JOIN #ARRIVED ON #BASKET.BASKET_ID=#ARRIVED.BASKET_ID
    LEFT JOIN #PREPARED ON #BASKET.BASKET_ID=#PREPARED.BASKET_ID
    GROUP BY #BASKET.BASKET_ID
    ;


    WITH cteTally AS
    (
    SELECT ones.n + 10*tens.n + 100*hundreds.n + 1000*thousands.n ROW_ORDER
    FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
    (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
    (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n),
    (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) thousands(n)
    )
    ,BASKET_SUPPORT AS
    (
    SELECT BASKET_COUNT.BASKET_ID
    ,cteTally.ROW_ORDER+1 ROW_ORDER
    FROM #BASKET_COUNT BASKET_COUNT
    LEFT JOIN cteTally
    ON cteTally.ROW_ORDER<(
    SELECT
    CASE WHEN ISNULL(BASKET_COUNT.COUNT_ARRIVED,0)<ISNULL(BASKET_COUNT.COUNT_PREPARED,0) THEN BASKET_COUNT.COUNT_PREPARED
    ELSE BASKET_COUNT.COUNT_ARRIVED
    END )
    AND (BASKET_COUNT.COUNT_ARRIVED > 0 OR BASKET_COUNT.COUNT_PREPARED > 0 )
    )
    SELECT
    BASKET_SUPPORT.BASKET_ID
    ,ARRIVED.ITEM_ID IN_ID
    ,PREPARED.ITEM_ID OUT_ID
    ,BASKET_SUPPORT.ROW_ORDER
    FROM BASKET_SUPPORT
    LEFT JOIN #ARRIVED ARRIVED
    ON BASKET_SUPPORT.BASKET_ID=ARRIVED.BASKET_ID
    and BASKET_SUPPORT.ROW_ORDER=ARRIVED.ITEM_ORDER
    LEFT JOIN #PREPARED PREPARED
    ON BASKET_SUPPORT.BASKET_ID=PREPARED.BASKET_ID
    and BASKET_SUPPORT.ROW_ORDER=PREPARED.ITEM_ORDER
    WHERE BASKET_SUPPORT.BASKET_ID='MYSHINYBASKET'
    ORDER BY BASKET_SUPPORT.BASKET_ID,BASKET_SUPPORT.ROW_ORDER
    ;

    BASKET_ID IN_ID OUT_ID ROW_ORDER

    MYSHINYBASKET "0960" "4458" 1

    MYSHINYBASKET "0961" "4459" 2

    MYSHINYBASKET "0962" "4460" 3

    MYSHINYBASKET "6515" "4462" 4

    MYSHINYBASKET "6516" "4466" 5

    MYSHINYBASKET "6517" "5150" 6

    MYSHINYBASKET "6518" "6093" 7

    MYSHINYBASKET "6519" "6097" 8

    MYSHINYBASKET NULL "6098" 9

    MYSHINYBASKET NULL "9572" 10

     

  • This was removed by the editor as SPAM

Viewing 7 posts - 1 through 6 (of 6 total)

You must be logged in to reply to this topic. Login to reply