Query for duplicates

  • I'm using this to create a view for duplicates , but no of records coming out of this view are more than I'm passing in:(this query works fine in SQL server but when I try in postgresql it doesn't) this is little off-topic on this forum but if I get any help that would be wonderful.

    1.CREATE VIEW test AS

    2. (

    3. SELECT field1,

    4. field2,

    5. field3,field4,

    6. field5 ,

    7. field6,

    8. id,

    9. field7,

    10. COUNT(*) AS REC_COUNT

    11. FROM tablename GROUP BY

    12. field1,

    13. field2,

    14. field3 ,

    15. field4,

    16. field5,

    17. field6,

    18. id,

    19. field7

    20. HAVING COUNT(*) > 1

  • sqllearner1234 (10/3/2015)


    I'm using this to create a view for duplicates , but no of records coming out of this view are more than I'm passing in:(this query works fine in SQL server but when I try in postgresql it doesn't) this is little off-topic on this forum but if I get any help that would be wonderful.

    1.CREATE VIEW test AS

    2. (

    3. SELECT field1,

    4. field2,

    5. field3,field4,

    6. field5 ,

    7. field6,

    8. id,

    9. field7,

    10. COUNT(*) AS REC_COUNT

    11. FROM tablename GROUP BY

    12. field1,

    13. field2,

    14. field3 ,

    15. field4,

    16. field5,

    17. field6,

    18. id,

    19. field7

    20. HAVING COUNT(*) > 1

    Hi and welcome to the forums. Your code has two flaws, firstly the redundant opening parenthesis in line two which is a syntax error, secondly a logical error where the id column is included in the group by, if this is a unique row identity it will make all rows unique and the query will not return any rows.

    😎

    Here is a quick example you can experiment with

    USE tempdb;

    /**********************************************************

    Safe place

    **********************************************************/

    GO

    SET NOCOUNT ON;

    /**********************************************************

    Global parameters */

    /**********************************************************

    Set @SAMPLE_SIZE to the desired number of rows

    in the set

    */

    DECLARE @SAMPLE_SIZE INT = 1000;

    /**********************************************************

    @VARIANCE controls the number of distinct values for

    each column in the set, actual values produced range

    from 0 to (@VARIANCE - 1). Higher numbers produces

    fewer duplications, lower number produces higer number

    of duplications.

    **********************************************************/

    DECLARE @VARIANCE INT = 2;

    /**********************************************************

    Sample Set DDL

    Drops the table if it exists and (re)creates it.

    **********************************************************/

    IF OBJECT_ID(N'dbo.TBL_DUPE') IS NOT NULL DROP TABLE dbo.TBL_DUPE;

    CREATE TABLE dbo.TBL_DUPE

    (

    [ID] INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBL_DUPE_DUPE_ID PRIMARY KEY

    ,[FIELD01] INT NOT NULL

    ,[FIELD02] INT NOT NULL

    ,[FIELD03] INT NOT NULL

    ,[FIELD04] INT NOT NULL

    ,[FIELD05] INT NOT NULL

    ,[FIELD06] INT NOT NULL

    ,[FIELD07] INT NOT NULL

    );

    /**********************************************************

    Generate sample data

    **********************************************************/

    INSERT INTO dbo.TBL_DUPE(FIELD01,FIELD02,FIELD03,FIELD04,FIELD05,FIELD06,FIELD07)

    SELECT TOP(@SAMPLE_SIZE)

    ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    ,ABS(CHECKSUM(NEWID())) % @VARIANCE

    FROM sys.all_columns SAC1

    CROSS JOIN sys.all_columns SAC2

    CROSS JOIN sys.all_columns SAC3

    ;

    /**********************************************************

    Return all duplicated rows with count and min/max id

    of each distinct value duplicated.

    **********************************************************/

    SELECT

    MIN(D.ID) AS MIN_ID

    ,MAX(D.ID) AS CAX_ID

    ,COUNT(*) AS R_CNT

    ,D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    FROM dbo.TBL_DUPE D

    GROUP BY D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    HAVING COUNT(*) > 1;

    /**********************************************************

    Return all duplicated values, retaining the lowest ID

    value.

    **********************************************************/

    SELECT

    XD.ID

    ,XD.ID_RID

    ,XD.FIELD01

    ,XD.FIELD02

    ,XD.FIELD03

    ,XD.FIELD04

    ,XD.FIELD05

    ,XD.FIELD06

    ,XD.FIELD07

    FROM

    (

    SELECT

    D.ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    ORDER BY D.ID ASC

    ) AS ID_RID

    ,D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    FROM dbo.TBL_DUPE D

    ) AS XD

    WHERE XD.ID_RID > 1;

    /**********************************************************

    List the distinct rows from the set with the lowest

    ID value

    **********************************************************/

    SELECT

    XD.ID

    ,XD.ID_RID

    ,XD.FIELD01

    ,XD.FIELD02

    ,XD.FIELD03

    ,XD.FIELD04

    ,XD.FIELD05

    ,XD.FIELD06

    ,XD.FIELD07

    FROM

    (

    SELECT

    D.ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    ORDER BY D.ID ASC

    ) AS ID_RID

    ,D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    FROM dbo.TBL_DUPE D

    ) AS XD

    WHERE XD.ID_RID = 1;

    /**********************************************************

    List the distinct rows from the set with the highest

    ID value

    **********************************************************/

    SELECT

    XD.ID

    ,XD.ID_RID

    ,XD.FIELD01

    ,XD.FIELD02

    ,XD.FIELD03

    ,XD.FIELD04

    ,XD.FIELD05

    ,XD.FIELD06

    ,XD.FIELD07

    FROM

    (

    SELECT

    D.ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    ORDER BY D.ID DESC

    ) AS ID_RID

    ,D.FIELD01

    ,D.FIELD02

    ,D.FIELD03

    ,D.FIELD04

    ,D.FIELD05

    ,D.FIELD06

    ,D.FIELD07

    FROM dbo.TBL_DUPE D

    ) AS XD

    WHERE XD.ID_RID = 1;

Viewing 2 posts - 1 through 1 (of 1 total)

You must be logged in to reply to this topic. Login to reply