Help needed with removing duplicates

  • I played with a temp table to see if I could get this to work.  It does.  Thanks for the script.

    create table #temp1(

     id int identity(1,1)

     , Name varchar(50)

    &nbsp

    go

    Insert #Temp1(Name) values('name1')

    Insert #Temp1(Name) values('name1')

    Insert #Temp1(Name) values('name1')

    Insert #Temp1(Name) values('name2')

    Insert #Temp1(Name) values('name2')

    Insert #Temp1(Name) values('name2')

    Insert #Temp1(Name) values('name2')

    Insert #Temp1(Name) values('name3')

    Insert #Temp1(Name) values('name4')

    Select

     t1.*

    From

     #Temp1 t1 ( nolock )

    Delete

     #Temp1

    From

     #Temp1 t1 ( nolock )

     Inner Join

     (

     Select

        id = Min(t2.id)

      , t2.Name

     From

      #Temp1 t2 ( nolock )

     Group By

      t2.Name

     Having

      Count(*) > 1

    &nbsp t3

     On

      t3.Name = t1.name

      and t3.id <> t1.id

    Select

     t1.*

    From

     #Temp1 t1 ( nolock )

    drop table #temp1

  • Hello All,

    I'm facing a similar situation. However, the value of every single column in my table are identical to the original row. for example:

    rec   last   first   email  

    1     Doe   John   jdoe@anything.com

    1     Doe   John   jdoe@anything.com

    2     Zest  Mary   mzest@anything.com

    2     Zest  Mary   mzest@anything.com

    how do i remove the dupe row?  it doesn't matter whether which row get remove. your help is greatly appreciated.

  • Crystal are you sure that every column is the same? If you have a primary key then this is not possible as a primary key must be unique. If you do not have a primary key you can use the scripts on this thread to delete the dups and then modify your table and add a primary key.

    Edited to add example.

    HTH Mike

    IF Object_ID('TempDB..#TestMe') > 0

     DROP TABLE #TestMe

    CREATE TABLE #TestMe

    (

     PK INT IDENTITY(1,1) Primary Key,

     SomeNumber int

    )

    GO

    /*****************************************************************************************

    Load some Data 200 rows 100 of which are duplicates of another row except for the PK

    *****************************************************************************************/

    DECLARE @Count int

    SET @Count = 1

    WHILE @Count < 101

    Begin

     INSERT INTO #TestMe(SomeNumber) Values(@Count)

     INSERT INTO #TestMe(SomeNumber) Values(@Count)

     SET @Count = @Count + 1

    END

    /*******************END LOADING TEST DATA***********************************************/

    GO

    --Check to see if dublicate rows exists. The number of row(s) affected is the number of

    --dublicate rows

    SELECT MIN(PK) AS PK,SomeNumber

    FROM #TestMe

    GROUP BY SomeNumber

    HAVING COUNT(*) > 1

     

    --DELETE duplicate rows

    --Logic:

    --Create an alias for the table that contains duplicates records

    --Delete rows from the base table when identical rows are found in

    --the alias. The WHERE clause should contain enough information to

    --insure that the records are in fact identical. A customer table

    --could have more than one customer with the same physical address or

    --email address.

    --If you want to keep the row with the highest PK use MAX(PK) in place of MIN(PK)

    DELETE

    FROM   #TestMe

    WHERE

    (

     PK >(

           SELECT

             MIN(PK)

           FROM

             #TestMe t1

           WHERE

             #TestMe.SomeNumber = t1.SomeNumber

         )

    )

    SELECT COUNT(*)

    FROM #TestMe

    --100 row(s) affected

    SELECT DISTINCT T.SomeNumber

    FROM #TestMe T

    --100 row(s) affected

    DROP TABLE #TestMe

Viewing 3 posts - 16 through 17 (of 17 total)

You must be logged in to reply to this topic. Login to reply