January 5, 2024 at 5:20 am
Hi All,
Need some help on optimizing a stored proc. I see some of the update statements written multiple times. Instead of writing multiple updates , can we re-write in one single statement which might avoid reading same table multiple times. This stored proc is doing a lot of logical reads almost like 76TB of data. Do u see any anti-patterns. I am looking at indexing but I am more afraid the way this stored proc code been written for handling duplicate data.
This below SQL statement is taking a lot of time
INSERT INTO CONTACT_PERSON_ID
(
ROWID_OBJECT,
ROWID_OBJECT_MATCHED,
STATUS,
PERSON_ID,
EMAIL,
EMAIL_MATCHED,
CREATE_DATE,
LAST_UPDATE_DATE
)
SELECT d.ROWID_OBJECT,
d.min_object,
'NEW',
l.all_rowid,
c.COMM_VAL,
CC.COMM_VAL,
GETDATE(),
GETDATE()
FROM #potential_dups d
inner join #person_list l
on d.min_object = l.min_object
left outer join c_b_party_comm (nolock) c
on d.ROWID_OBJECT = c.PARTY_ID
and c.COMM_TYP_CD = 'EMAIL'
left outer join c_b_party_comm (nolock) cC
on d.min_object = cC.PARTY_ID
and cC.COMM_TYP_CD = 'EMAIL'
WHERE d.rowid_object <> d.min_object
union
SELECT d.rowid_object_matched,
d.min_object,
'NEW',
l.all_rowid,
c.COMM_VAL,
cc.COMM_VAL,
GETDATE(),
GETDATE()
FROM #potential_dups d
inner join #person_list l
on d.min_object = l.min_object
left outer join c_b_party_comm (nolock) C
on d.ROWID_OBJECT_MATCHED = C.PARTY_ID
and C.COMM_TYP_CD = 'EMAIL'
left outer join c_b_party_comm (nolock) CC
on d.min_object = CC.PARTY_ID
and CC.COMM_TYP_CD = 'EMAIL'
WHERE d.rowid_object_matched <> d.min_object
Complete stored proc code below. Please provide recommendations.
Looking for ways to simply this code and reduce the execution time.
ALTER PROCEDURE [dbo].[usp_check_potential_dups_person_id]
(
@STATUS_CODE VARCHAR(20) OUTPUT,
@STATUS_DESC VARCHAR(3000) OUTPUT
)
AS
declare @cnt int,
@inc int,
@rowid_object varchar(20),
@rowid_object_matched varchar(20),
@min_object varchar(20)
declare @min_obj varchar(20),
@last_update int,
@run char(1)
BEGIN
SET NOCOUNT ON
BEGIN TRY
IF OBJECT_ID('tempdb..#potential_dups') IS NOT NULL
DROP TABLE #potential_dups
IF OBJECT_ID('tempdb..#person_list') IS NOT NULL
DROP TABLE #person_list
-- delete record having same people id
delete d
from contact_potential_dups d,
c_b_party p,
c_b_party c
where d.ROWID_OBJECT = p.rowid_object
and d.ROWID_OBJECT_MATCHED = c.rowid_object
and p.people_id = c.people_id
-- Create Temp table which needs to be processed
select rowid_object,
rowid_object_matched,
rowid_object_matched as min_object,
orig_rowid_object,
orig_rowid_object_matched,
cast(null as varchar(8000)) person_id
into #potential_dups
from
(
select case
when cast(d.rowid_object as int) > cast(rowid_object_matched as int) then
ROWID_OBJECT_MATCHED
else
d.ROWID_OBJECT
end rowid_object_matched,
case
when cast(d.rowid_object as int) < cast(rowid_object_matched as int) then
ROWID_OBJECT_MATCHED
else
d.ROWID_OBJECT
end rowid_object,
d.ROWID_OBJECT orig_rowid_object,
ROWID_OBJECT_MATCHED orig_rowid_object_matched
from CONTACT_POTENTIAL_DUPS (nolock) d
) aa
set @cnt = @@ROWCOUNT;
select @inc = 0,
@last_update = @cnt,
@run = 'Y'
CREATE NONCLUSTERED INDEX inx_potential_dups_temp1
ON [dbo].[#potential_dups] ([rowid_object_matched])
INCLUDE ([min_object])
CREATE NONCLUSTERED INDEX inx_potential_dups_temp2
ON [dbo].[#potential_dups] ([rowid_object])
INCLUDE ([min_object])
-- Look will continue till either run='N' or increment is more than total recods
while @cnt > @inc and @run = 'Y'
begin
set @run = 'N'
-- Update min_object if matching (match_rowid=rowid) having higher min_object
update pd
set pd.min_object = sp.min_object
from #potential_dups pd
cross apply
(
select min(cast(sp.min_object as int)) min_object
from #potential_dups sp
where pd.rowid_object_matched = sp.rowid_object
) sp
where cast(pd.min_object as int) > cast(sp.min_object as int)
set @last_update = @@ROWCOUNT;
if @run = 'N'
and @last_update > 0
set @run = 'Y'
-- Update min_object if matching (rowid=match_rowid) having higher min_object
update pd
set pd.min_object = sp.min_object
from #potential_dups pd
cross apply
(
select min(cast(sp.min_object as int)) min_object
from #potential_dups sp
where pd.rowid_object = sp.rowid_object_matched
) sp
where cast(pd.min_object as int) > cast(sp.min_object as int)
set @last_update = @@ROWCOUNT;
if @run = 'N'
and @last_update > 0
set @run = 'Y'
-- Update min_object if matching (rowid=rowid) having higher min_object
update pd
set pd.min_object = sp.min_object
from #potential_dups pd
cross apply
(
select min(cast(sp.min_object as int)) min_object
from #potential_dups sp
where pd.rowid_object = sp.rowid_object
) sp
where cast(pd.min_object as int) > cast(sp.min_object as int)
set @last_update = @@ROWCOUNT;
if @run = 'N'
and @last_update > 0
set @run = 'Y'
-- Update min_object if matching (rowid=match_rowid) having higher min_object
update pd
set pd.min_object = sp.min_object
from #potential_dups pd
cross apply
(
select min(cast(sp.min_object as int)) min_object
from #potential_dups sp
where pd.rowid_object = sp.rowid_object_matched
) sp
where cast(pd.min_object as int) > cast(sp.min_object as int)
set @last_update = @@ROWCOUNT;
if @run = 'N'
and @last_update > 0
set @run = 'Y'
-- Update min_object if matching (match_rowid=match_rowid) having higher min_object
update pd
set pd.min_object = sp.min_object
from #potential_dups pd
cross apply
(
select min(cast(sp.min_object as int)) min_object
from #potential_dups sp
where pd.rowid_object_matched = sp.rowid_object_matched
) sp
where cast(pd.min_object as int) > cast(sp.min_object as int)
set @last_update = @@ROWCOUNT;
if @run = 'N'
and @last_update > 0
set @run = 'Y'
set @inc = @inc + 1;
end
-- Get list of connected rowid for each min object
SELECT min_object,
all_rowid = substring(
ltrim(STUFF(
(
SELECT distinct
', ' + cast(ltrim(rtrim(rowid_object)) as varchar)
from
(
select min_object,
rowid_object
from #potential_dups
union
select min_object,
rowid_object_matched
from #potential_dups
) b
WHERE b.min_object = a.min_object --ORDER BY CAST(cast(rowid_object as varchar) AS INT)
FOR XML path('')
),
1,
1,
''
)
),
1,
8000
)
into #person_list
FROM #potential_dups (NOLOCK) a
GROUP BY min_object
-- ORDER BY 1
create nonclustered index idx_person_1 on #person_list (min_object)
-- remove all previous data
truncate table CONTACT_PERSON_ID
INSERT INTO CONTACT_PERSON_ID
(
ROWID_OBJECT,
ROWID_OBJECT_MATCHED,
STATUS,
PERSON_ID,
EMAIL,
EMAIL_MATCHED,
CREATE_DATE,
LAST_UPDATE_DATE
)
SELECT d.ROWID_OBJECT,
d.min_object,
'NEW',
l.all_rowid,
c.COMM_VAL,
CC.COMM_VAL,
GETDATE(),
GETDATE()
FROM #potential_dups d
inner join #person_list l
on d.min_object = l.min_object
left outer join c_b_party_comm (nolock) c
on d.ROWID_OBJECT = c.PARTY_ID
and c.COMM_TYP_CD = 'EMAIL'
left outer join c_b_party_comm (nolock) cC
on d.min_object = cC.PARTY_ID
and cC.COMM_TYP_CD = 'EMAIL'
WHERE d.rowid_object <> d.min_object
union
SELECT d.rowid_object_matched,
d.min_object,
'NEW',
l.all_rowid,
c.COMM_VAL,
cc.COMM_VAL,
GETDATE(),
GETDATE()
FROM #potential_dups d
inner join #person_list l
on d.min_object = l.min_object
left outer join c_b_party_comm (nolock) C
on d.ROWID_OBJECT_MATCHED = C.PARTY_ID
and C.COMM_TYP_CD = 'EMAIL'
left outer join c_b_party_comm (nolock) CC
on d.min_object = CC.PARTY_ID
and CC.COMM_TYP_CD = 'EMAIL'
WHERE d.rowid_object_matched <> d.min_object
-------------------------- my comments - do a set based operations instead of updating column by column
-- Update person id back to NULL so latest person id value can be updated
update contact_potential_dups
set PERSON_ID = NULL
-- Update person id back to potential dups table
update d
set PERSON_ID = p.person_id
from contact_potential_dups d,
[CONTACT_PERSON_ID] p
where d.ROWID_OBJECT = p.ROWID_OBJECT
and d.person_id is null
update d
set PERSON_ID = p.person_id
from contact_potential_dups d,
[CONTACT_PERSON_ID] p
where d.ROWID_OBJECT_MATCHED = p.ROWID_OBJECT
and d.person_id is null
update d
set PERSON_ID = p.person_id
from contact_potential_dups d,
[CONTACT_PERSON_ID] p
where d.ROWID_OBJECT = p.ROWID_OBJECT_MATCHED
and d.person_id is null
update d
set PERSON_ID = p.person_id
from contact_potential_dups d,
[CONTACT_PERSON_ID] p
where d.ROWID_OBJECT_MATCHED = p.ROWID_OBJECT_MATCHED
and d.person_id is null
--------------------------
drop table #potential_dups
drop table #person_list
SET @status_code = 'SUCCESS';
SET @status_desc = 'Process Complete'
RETURN
END TRY
BEGIN CATCH
SET @status_code = 'ERROR' --'SQLERR-'+CAST(ERROR_NUMBER() AS VARCHAR)
SET @status_desc
= CONCAT(
'ERROR_SEVERITY : ',
CAST(ERROR_SEVERITY() AS VARCHAR),
', ERROR_STATE : ',
CAST(ERROR_STATE() AS VARCHAR),
', ERROR_PROCEDURE : ',
ERROR_PROCEDURE(),
', ERROR_LINE : ',
CAST(ERROR_LINE() AS VARCHAR),
', ERROR_MESSAGE : ',
ERROR_MESSAGE()
)
END CATCH
RETURN
END
go
Regards,
Sam
January 5, 2024 at 12:55 pm
I would initially try creating the temporary tables (#potential_dups and #person_list) before you insert into it and create the CLUSTERED indexes inx_potential_dups_temp1 and index idx_person_1 instead of NONCLUSTERED, and create that index before you insert into the table:
CREATE CLUSTERED INDEX inx_potential_dups_temp1
ON [dbo].[#potential_dups] ([rowid_object_matched], min_object);
create unique clustered index idx_person_1 on #person_list(min_object);
The second index you should include min_object in the key not in the include:
CREATE NONCLUSTERED INDEX inx_potential_dups_temp2
ON [dbo].[#potential_dups] ([rowid_object], [min_object]);
Also, don't cast the columns as int in the select this will make the query non-sargable just insert them as int when you initially insert them into the temporary table.
January 5, 2024 at 12:55 pm
duplicate
Viewing 3 posts - 1 through 2 (of 2 total)
You must be logged in to reply to this topic. Login to reply