Delete duplication with where clause condition

Question

Post reply

Delete duplication with where clause condition

Viewing 15 posts - 16 through 30 (of 40 total)

You must be logged in to reply to this topic. Login to reply

Lynn Pettis SSC Guru Points: 442470 More actions · Answer 1

You mean something like this:

CREATE TABLE #temp_delete_duplicate

(

--emphist int identity(1,1),

empno int,

marital_status char(8),

status_change_date datetime default getdate()

);

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (1, 'SINGLE', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (2, 'MARRIED', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (3, 'SINGLE', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (3, 'DIVORCED', '31-DEC-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (4, 'WIDOWED', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (4, 'SINGLE', '30-SEP-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (5, 'WIDOWED', '01-JAN-2010');

SELECT *

FROM #temp_delete_duplicate

order by

empno,

status_change_date;

select

empno

from #temp_delete_duplicate

group by

empno

having

count(1) > 1;

with MaritalStatus (

MaritalStatusID,

MaritalStatusDesc

) as (

select

1, 'Single'

union all

select

2, 'Married'

union all

select

3, 'Widowed'

union all

select

4, 'Divorced'

),

EmpBase as (

select

tdd.empno,

tdd.marital_status,

tdd.status_change_date,

ROW_NUMBER() over (partition by tdd.empno order by ms.MaritalStatusID desc) rownum

from

#temp_delete_duplicate tdd

inner join MaritalStatus ms

on (UPPER(tdd.marital_status) = UPPER(MaritalStatusDesc))

)

select

empno,

marital_status,

status_change_date

from

EmpBase

where

rownum = 1;

with MaritalStatus (

MaritalStatusID,

MaritalStatusDesc

) as (

select

1, 'Single'

union all

select

2, 'Married'

union all

select

3, 'Widowed'

union all

select

4, 'Divorced'

),

EmpBase as (

select

tdd.empno,

tdd.marital_status,

tdd.status_change_date,

ROW_NUMBER() over (partition by tdd.empno order by ms.MaritalStatusID desc) rownum

from

#temp_delete_duplicate tdd

inner join MaritalStatus ms

on (UPPER(tdd.marital_status) = UPPER(MaritalStatusDesc))

)

delete from EmpBase

where

rownum > 1;

select * from #temp_delete_duplicate;

drop table #temp_delete_duplicate;

Too bad it doesn't keep the latest status change for the employee #4.

iBar SSCertifiable Points: 5872 More actions · Answer 2

Yes @Lynn Pettis,

That is the point that latest record of employee#4 is not correct. so delete that invalid data.

Error : However you have also deleted one record of employee#3 which is not needed because they are as per logical defined sequence. Emp3 was single then married/divorced so no problem with this sequence.

Please check my output where i have deleted only invalid data of emp4.

NOTE: before deletion 7 rows and after deletion 6 rows, just for clarity.

@adrian.facio: Please provide the DELETE statement, which is the actual requirement.

Thanks.

adrian.facio SSCrazy Points: 2406 More actions · Answer 3

c'mon IBar you just need

delete from mytable where empno in

( the last query i sent you)

adrian.facio SSCrazy Points: 2406 More actions · Answer 4

Ok, here it is:

;WITH CivilStatus (StatusSequenceNumber,[Marital_Status])

AS

(

SELECT 1,'SINGLE' UNION

SELECT 2,'MARRIED' UNION

SELECT 3,'DIVORCED' UNION

SELECT 3,'WIDOWED'

),

PersonCivilStatus

AS

(

SELECT d.empno,d.marital_status,d.status_change_date,

Sequence = row_number() over (partition by empno order by status_change_date),

StatusSequenceNumber

FROM #temp_delete_duplicate d

INNER JOIN CivilStatus cs

ON cs.Marital_Status = d.Marital_status

)

delete from #temp_delete_duplicate

where empno in

(

select os.empno

from PersonCivilStatus os

inner JOIN PersonCivilStatus fs

ON os.sequence = fs.sequence - 1 and

os.empno = fs.empno

where (fs.StatusSequenceNumber - os.StatusSequenceNumber) <> 1

)

Lynn Pettis SSC Guru Points: 442470 More actions · Answer 5

CREATE TABLE #temp_delete_duplicate

(

--emphist int identity(1,1),

empno int,

marital_status char(8),

status_change_date datetime default getdate()

);

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (1, 'SINGLE', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (2, 'MARRIED', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (3, 'SINGLE', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (3, 'DIVORCED', '31-DEC-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (4, 'WIDOWED', '01-JAN-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (4, 'SINGLE', '30-SEP-2010');

INSERT INTO #temp_delete_duplicate (empno, marital_status, status_change_date) VALUES (5, 'WIDOWED', '01-JAN-2010');

select * from #temp_delete_duplicate;

with MaritalStatus (

MaritalStatusID,

MaritalStatusDesc

) as (

select

1, 'Single'

union all

select

2, 'Married'

union all

select

3, 'Widowed'

union all

select

4, 'Divorced'

),

EmpBase as (

select

tdd.empno,

tdd.marital_status,

tdd.status_change_date,

ROW_NUMBER() over (partition by tdd.empno order by tdd.status_change_date asc) rownum,

ms.MaritalStatusID

from

#temp_delete_duplicate tdd

inner join MaritalStatus ms

on (UPPER(tdd.marital_status) = UPPER(MaritalStatusDesc))

)

DELETE FROM eb1

from

EmpBase eb1

LEFT OUTER JOIN EmpBase eb2

ON (eb1.empno = eb2.empno

AND eb1.rownum = eb2.rownum + 1)

WHERE

eb1.MaritalStatusID - eb2.MaritalStatusID < 0;

select * from #temp_delete_duplicate;

drop table #temp_delete_duplicate;

Here you go, but I still think it is wrong. Based on your rules, if I were employee 3 and I got remarried I would still be listed as Divorced.

adrian.facio SSCrazy Points: 2406 More actions · Answer 6

Ohh dude i think i understood the problem wrong, i made the query to delete the whole employee entries, not just the incorrect status changes.

adrian.facio SSCrazy Points: 2406 More actions · Answer 7

I make the following.

;WITH CivilStatus (StatusSequenceNumber,[Marital_Status])

AS

(

SELECT 1,'SINGLE' UNION

SELECT 2,'MARRIED' UNION

SELECT 3,'DIVORCED' UNION

SELECT 3,'WIDOWED'

),

PersonCivilStatus

AS

(

SELECT d.empno,d.marital_status,d.status_change_date,

Sequence = row_number() over (partition by empno order by status_change_date),

StatusSequenceNumber

FROM #temp_delete_duplicate d

INNER JOIN CivilStatus cs

ON cs.Marital_Status = d.Marital_status

)

delete #temp_delete_duplicate

from #temp_delete_duplicate dp

inner join

(

select os.empno, IncorrectStatusChange = fs.Marital_Status, IncorrectStatusChangeDate = fs.status_change_date

from PersonCivilStatus os

inner JOIN PersonCivilStatus fs

ON os.sequence = fs.sequence - 1 and

os.empno = fs.empno

where (fs.StatusSequenceNumber - os.StatusSequenceNumber) <> 1

) as isc

on dp.empno = isc.empno and

dp.marital_status = isc.IncorrectStatusChange and

dp.status_change_date = isc.IncorrectStatusChangeDate

select * from #temp_delete_duplicate

The problem is that for employee 4 keeps the original status "Widowed" and not the last status "Single" as your example suggest. How is that you know that New Status Single is the right one? It could be that the employee was indeed originally widowed and the new status change to "Single" is that one that is incorrect. How can we stablish a consistent rule to know what are the incorrect status changes?. The query i just wrote is based on the idea that the very first status recorded is always correct, and the cause of error migth be placed in newer status changes, it provides good consistency, but how can we adapt this to meet your needs?

adrian.facio SSCrazy Points: 2406 More actions · Answer 8

Employee number 5 is another example of this, according to the example he is ok being originally widowed, but if two days later some computer operator changes is status to Single by mistake, What is the good one??

adrian.facio SSCrazy Points: 2406 More actions · Answer 9

By the way, im not trying to probe that my point of view is right or better, im just giving a different perspective of what thing migth look like, if you give me yours , i migth realize im wrong and see thing differently or we can get to a solution that takes in consideration different perspectisves.

Lynn Pettis SSC Guru Points: 442470 More actions · Answer 10

adrian.facio (5/3/2012)
I make the following.
;WITH CivilStatus (StatusSequenceNumber,[Marital_Status])
AS
(
SELECT 1,'SINGLE' UNION
SELECT 2,'MARRIED' UNION
SELECT 3,'DIVORCED' UNION
SELECT 3,'WIDOWED'
),
PersonCivilStatus
AS
(
SELECT d.empno,d.marital_status,d.status_change_date,
Sequence = row_number() over (partition by empno order by status_change_date),
StatusSequenceNumber
FROM #temp_delete_duplicate d
INNER JOIN CivilStatus cs
ON cs.Marital_Status = d.Marital_status
)
delete #temp_delete_duplicate
from #temp_delete_duplicate dp
inner join
(
select os.empno, IncorrectStatusChange = fs.Marital_Status, IncorrectStatusChangeDate = fs.status_change_date
from PersonCivilStatus os
inner JOIN PersonCivilStatus fs
ON os.sequence = fs.sequence - 1 and
os.empno = fs.empno
where (fs.StatusSequenceNumber - os.StatusSequenceNumber) <> 1
) as isc
on dp.empno = isc.empno and
dp.marital_status = isc.IncorrectStatusChange and
dp.status_change_date = isc.IncorrectStatusChangeDate
select * from #temp_delete_duplicate
The problem is that for employee 4 keeps the original status "Widowed" and not the last status "Single" as your example suggest. How is that you know that New Status Single is the right one? It could be that the employee was indeed originally widowed and the new status change to "Single" is that one that is incorrect. How can we stablish a consistent rule to know what are the incorrect status changes?. The query i just wrote is based on the idea that the very first status recorded is always correct, and the cause of error migth be placed in newer status changes, it provides good consistency, but how can we adapt this to meet your needs?

Are you sure your code is right? You have Widowed and Divorced with the same ID, 3. Looking at the OP's posts, Widowed is 3 and Divorced is 4.

adrian.facio SSCrazy Points: 2406 More actions · Answer 11

adrian.facio

SSCrazy

Points: 2406

May 3, 2012 at 12:46 pm

#1483284

let me checlk. One minute

adrian.facio SSCrazy Points: 2406 More actions · Answer 12

I understand what you mean now. Yes man, that is rigth, that number is not actually an Id, is a step value, the 3 is because widowed and divorced can follow the step 2 "Married". i'll explain further.

adrian.facio SSCrazy Points: 2406 More actions · Answer 13

The idea is to order the status changes of a employee chronologically. Now, every status has the "Step Value", if two consetive items are different by 1, then sequence is ok,

Married (2) to Widowed(3), or married to Divorced(3). If the difference is not 1, then the sequence is not ok, Widowed(3) To Single(1) are differente by 2 (no good change). I just realize i have to tweak the condition to aceppt back steps Divorced To Married, but not allowing Married To Single.

adrian.facio SSCrazy Points: 2406 More actions · Answer 14

;WITH CivilStatus (StatusSequenceNumber,[Marital_Status])

AS

(

SELECT 1,'SINGLE' UNION

SELECT 2,'MARRIED' UNION

SELECT 3,'DIVORCED' UNION

SELECT 3,'WIDOWED'

),

PersonCivilStatus

AS

(

SELECT d.empno,d.marital_status,d.status_change_date,

Sequence = row_number() over (partition by empno order by status_change_date),

StatusSequenceNumber

FROM #temp_delete_duplicate d

INNER JOIN CivilStatus cs

ON cs.Marital_Status = d.Marital_status

)

delete #temp_delete_duplicate

from #temp_delete_duplicate dp

inner join

(

select os.empno, IncorrectStatusChange = fs.Marital_Status, IncorrectStatusChangeDate = fs.status_change_date

from PersonCivilStatus os

inner JOIN PersonCivilStatus fs

ON os.sequence = fs.sequence - 1 and

os.empno = fs.empno

where abs(fs.StatusSequenceNumber - os.StatusSequenceNumber) <> 1 or

(fs.StatusSequenceNumber = 1 and os.StatusSequenceNumber = 2 )

) as isc

on dp.empno = isc.empno and

dp.marital_status = isc.IncorrectStatusChange and

dp.status_change_date = isc.IncorrectStatusChangeDate

select * from #temp_delete_duplicate

Now, this is better rigth??? =D.

Lynn Pettis SSC Guru Points: 442470 More actions · Answer 15

adrian.facio (5/3/2012)
The idea is to order the status changes of a employee chronologically. Now, every status has the "Step Value", if two consetive items are different by 1, then sequence is ok,
Married (2) to Widowed(3), or married to Divorced(3). If the difference is not 1, then the sequence is not ok, Widowed(3) To Single(1) are differente by 2 (no good change). I just realize i have to tweak the condition to aceppt back steps Divorced To Married, but not allowing Married To Single.

Actually, not what the OP requested based on the discussion. From what I infer from the discussion you can go up the chain but not backward down the chain. You can't go from Divorced to Widowed, nor can you go from Divorced to Married. That is what was brought to the OP's attention and he was adamant that we code specifically to his rules and sample data.