Moving duplicate data to single unique rows

  • Hi

    I am looking to create a script that will move the duplicate data in the following table onto 1 unique row for each Person_ID and also add columns to this table to accommodate the moved data. (an example of each table is included below)

    Original table:

    Person_ID Review Initial_ReviewPKID

    165763 24/08/09 Yes 1

    165763 15/02/10 No 2

    182985 23/10/09 No 3

    182985 09/03/10 No 4

    204497 17/07/09 No 5

    204497 10/12/09 No 6

    214438 30/07/09 Yes 7

    214438 26/01/10 No 8

    217622 02/10/09 No 9

    217622 12/03/10 No 10

    220475 01/04/10 No 11

    232518 17/06/09 No 12

    232518 19/10/09 No 13

    232518 29/03/10 No 14

    385438 NULL NULL 15

    And this is how I’d like the data to look:

    Person_ID Review Initial_Review Review_2 Initial_Review_2 Review_3 Initial_Review_3

    165763 24/08/09No 15/02/10No

    182985 23/10/09No 09/03/10No

    204497 17/07/09No 10/12/09No

    214438 30/07/09No 26/01/10No

    217622 02/10/09No 12/03/10No

    220475 01/04/10Yes

    232518 17/06/09No 19/10/09No29/03/10 No

    385438 NULLNULL

    Here is the script to create the Sample table:

    CREATE TABLE #Reviews

    (

    Person_ID INT NOT NULL ,

    Review_Date DATETIME NULL ,

    Initial_Review TEXT NULL ,

    PKID INT NULL ,

    )

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(165763, '2009-08-24', 'Yes', 1)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(165763, '2010-02-15', 'No', 2)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(182985, '2009-10-23', 'No', 3)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(182985, '2010-03-09', 'No', 4)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(204497, '2009-07-17', 'No', 5)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(204497, '2009-12-10', 'No', 6)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(214438, '2009-07-30', 'Yes', 7)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(214438, '2010-01-26', 'No', 8)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(217622, '2009-10-02', 'No', 9)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(217622, '2010-03-12', 'No', 10)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(220475, '2010-04-01', 'No', 11)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2009-06-17', 'No', 12)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2009-10-19', 'No', 13)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2010-03-29', 'No', 14)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(385438, NULL, NULL, 15)

    Hope this is straight forward

    Thanks in advance for any help provided.

  • Probably not the most eloquent solution but maybe this'll help.

    CREATE TABLE #Reviews

    (

    Person_ID INT NOT NULL ,

    Review_Date DATETIME NULL ,

    Initial_Review VARCHAR(MAX) NULL ,

    PKID INT NULL ,

    )

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(165763, '2009-08-24', 'Yes', 1)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(165763, '2010-02-15', 'No', 2)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(182985, '2009-10-23', 'No', 3)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(182985, '2010-03-09', 'No', 4)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(204497, '2009-07-17', 'No', 5)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(204497, '2009-12-10', 'No', 6)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(214438, '2009-07-30', 'Yes', 7)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(214438, '2010-01-26', 'No', 8)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(217622, '2009-10-02', 'No', 9)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(217622, '2010-03-12', 'No', 10)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(220475, '2010-04-01', 'No', 11)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2009-06-17', 'No', 12)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2009-10-19', 'No', 13)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(232518, '2010-03-29', 'No', 14)

    insert into #Reviews (Person_ID, Review_Date, Initial_Review, PKID)

    values(385438, NULL, NULL, 15)

    DECLARE @reviews INT

    , @stmt NVARCHAR(MAX)

    , @stmt2 NVARCHAR(MAX)

    , @loop INT

    SELECT@reviews = MAX(r.[RANK])

    FROM(

    SELECTRANK() OVER (PARTITION BY Person_ID ORDER BY Review_Date ASC) as [RANK]

    FROM#Reviews

    ) r

    SET @stmt = 'SELECTr.Person_ID

    , max(case when r.[RANK] = 1 then Review_Date end) as Review

    , max(case when r.[RANK] = 1 then Initial_Review end) as Initial_Review

    , min(r.PKID) as PKID'

    SET @loop = 1

    WHILE @reviews >= @loop

    BEGIN

    SET @stmt = @stmt

    + ', max(case when r.[RANK] = ' + CAST(@loop AS CHAR(2)) + ' then Review_Date end) as Review_' + CAST(@loop AS CHAR(2))

    + ', max(case when r.[RANK] = ' + CAST(@loop AS CHAR(2)) + ' then Initial_Review end) as Initial_Review_' + CAST(@loop AS CHAR(2))

    SET @stmt2 = isnull(@stmt2, '') + 'ALTER TABLE #Reviews ADD Review_' + CAST(@loop AS CHAR(2)) + ' DATETIME' + char(10)

    SET @stmt2 = isnull(@stmt2, '') + 'ALTER TABLE #Reviews ADD Initial_Review_' + CAST(@loop AS CHAR(2)) + ' TEXT' + char(10)

    SET @loop = @loop + 1

    END

    SET @stmt = @stmt

    + ' INTO #temp

    FROM(

    SELECTPerson_ID, Review_Date, Initial_Review, PKID

    , RANK() OVER (PARTITION BY Person_ID ORDER BY Review_Date ASC) AS [RANK]

    FROM#Reviews

    ) r

    GROUP BY r.Person_ID'

    SET @stmt = @stmt

    + ' TRUNCATE TABLE #Reviews'

    SET @stmt = @stmt

    + ' INSERT INTO #Reviews

    SELECT*

    FROM#temp tmp'

    -- add columns

    EXEC SP_EXECUTESQL @stmt = @stmt2

    -- pivot and update data

    EXEC SP_EXECUTESQL @stmt = @stmt

    SELECT * FROM #Reviews

    DROP TABLE #Reviews

    _____________________________________________________________________
    - Nate

    @nate_hughes
  • Looks great! Works a treat.

    Many, many thanks

    Neil.

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply