Delete Duplicate Households

  • CREATE

    TABLE [dbo].[PreloadedCustomer_Total2](

    [cifid] [int]

    NULL,

    [company] [varchar]

    (100) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [firstname] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [lastname] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [address1] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [address2] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [city] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [state] [varchar]

    (25) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [zip] [varchar]

    (75) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [homephone] [varchar]

    (255) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [workphone] [varchar]

    (255) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [cellphone] [varchar]

    (255) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [bizphone] [varchar]

    (255) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [emailaddr] [varchar]

    (255) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [storename] [varchar]

    (50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [storeid] [int]

    NULL,

    [lastdeal] [datetime]

    NULL,

    [lastsvc] [datetime]

    NULL,

    [balance] [int]

    NULL,

    [sortcol] [varchar]

    (8000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [dup] [int]

    NULL

    )

    ON [PRIMARY]

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('975 W. River Street',null,0,null,null,null, 'Some City' ,null,0,null, '1-stop Auto Body' , '1111111' ,null, 'Smith' ,null, '975wriver 54729' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('1008 7th St #2',null,0,null,null,null, 'Some City' ,null,0,null, 'A J Pyramid' , '1111111' ,null, 'Smith' ,null, '10087#2 54751' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('S237 County Rd VV',null,0,null,null,null, 'Some City' ,null,0,null, 'A. D. S. Of' , '1111111' ,null, 'Smith' ,null, 's237countyvv 54755' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('2402 W Cameron St',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron ' , '1111111' ,null, 'Smith' ,null, '2402wcameron 54703' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('4121 Davey Ct',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '4121dyct 54703' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('9163 Beaver Creek Rd',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '9163bercreek 54742' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('S7160 County Road Z',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, 's7160countyroadz 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('2536 Spooner Ave',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '2536spooner 54720' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('708 S 140th Ave',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron De' , '1111111' ,null, 'Smith' ,null, '708s140 54742' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('5175 US Hwy 53',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '5175ushwy53 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('1909 Goff Avenue',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '1909goff 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('15169 101st',null,0,null,null,null, 'Some City' ,null,1,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '15169101 54729' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('15169 101st Ave',null,50,null,null,null, 'Some City' ,null,1,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '15169101 54729' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('1824 FenZZck Avenue',null,0,null,null,null, 'Some City' ,null,1,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '1824fenZZck 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('1824 FenZZck Avenue',null,0,null,null,null, 'Some City' ,null,1,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '1824fenZZck 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('2408 Sessions St#4',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '2408sessions#4 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('639 ZZsconsin St',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '639ZZsconsin 54703' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('820 University Drive',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '820universitydrive 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('1211 S. Dewey St #8',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '1211sdewey#8 54701' , 'ZZ' ,5,null,null, '55555' )

    INSERT

    INTO preloadedcustomer_total2 (address1,address2,balance,bizphone,cellphone,cifid,city,company,dup,emailaddr,firstname,homephone,lastdeal,lastname,lastsvc,sortcol,state,storeid,storename,workphone,zip ) VALUES ('714 Gilbert Ave',null,0,null,null,null, 'Some City' ,null,0,null, 'Aaron' , '1111111' ,null, 'Smith' ,null, '714gilbert 54701' , 'ZZ' ,5,null,null, '55555' )

    update preloadedcustomer_total2 set balance=50 where sortcol='15169101 54729' and address1 like '%Ave%'

  • I'll try to boil this down as much as I can.  I think my brain stopped functioning an hour ago, and would appreciate any advice.

    I'm working on a project that will enroll customers.  I have to populate a table from 5 different customer databases (different databases, tables, data quality etc).   I've got this done.

    Once, I have it populated I need to delete duplicate households, since there will be one customer per household.  Each data source can assign a balance with each customer depending on their activity with their business.  So, I want to keep the customer record for the household with the hightest balance.  This is the tricky part since I could have 2 records with the same address and equal balances or unequal balances.

    I should mention when I brought in the data I added a column called sort column.  I concatenated address line 1 and zip code.  This will identify unique households.

    That's it, table create statement and test data to follow.  Thanks!!

  • I figured it out.  I added a temporary identity field (pkid).

    /* mark duplicates */

    update

    preloadedcustomer_total set dup=1 where sortcol in

    (select sortcol from preloadedcustomer_total group by sortcol having count(sortcol) >1)

    /* delete the records, may need to run many times.  If 10 people have the same address, run this 9 times

    because I'm deleting the minimum balance record */

    delete

    preloadedcustomer_total

    from

    preloadedcustomer_total pt2

    inner

    join ( --find minimum pkid

    select pt.sortcol,pt.balance,min(pt.pkid) pkid

    from preloadedcustomer_total pt

    inner join ( --find minimum balance

    select sortcol,min(balance) balance

    from preloadedcustomer_total

    where dup = 1

    group by sortcol

    having count(sortcol) >1

    ) hhdups on hhdups.balance=pt.balance and hhdups.sortcol=pt.sortcol

    group by pt.sortcol,pt.balance

    )

    moredups on moredups.pkid = pt2.pkid

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply