December 6, 2024 at 5:55 pm
I have 2 tables which I need to cleanup old costkeys, based upon a driver table.
The driver table is [CostKeyDeletes] and used as input to the cstdetails for cleanup. The costkeys very in record counts so I need a batchsize limit. I would like see logging of the process showing which CostKey it working on and the deletes issued.
It will be cleaning up millions of records in the cstdetails table.
Thanks.
CREATE TABLE [dbo].[cstDetails](
[CompPartNbr] [varchar](20) NOT NULL,
[PartNbr] [varchar](20) NOT NULL,
[PlantCd] [varchar](9) NOT NULL,
[CostKey] [varchar](9) NOT NULL,
[usgQty] [decimal](16, 8) NOT NULL,
[scrQty] [decimal](16, 8) NOT NULL,
[totQty] [decimal](16, 8) NOT NULL,
[UnitCost] [decimal](16, 8) NOT NULL,
[usgCost] [decimal](16, 8) NOT NULL,
[scrCost] [decimal](16, 8) NOT NULL,
[totCost] [decimal](16, 8) NOT NULL,
[StrJCost] [decimal](16, 8) NOT NULL,
[StrMCost] [decimal](16, 8) NOT NULL,
[RequestedBy] [varchar](9) NULL,
[FreezeTime] [varchar](22) NULL,
[FreezeYN] [varchar](1) NULL,
[ApprovedBy] [varchar](9) NULL,
[SetUpQty] [decimal](16, 8) NULL,
[SetUpCost] [decimal](16, 8) NULL,
[CurrentOPRSetup] [decimal](16, 8) NULL,
CONSTRAINT [PK__cstDetails__145C0A3F] PRIMARY KEY CLUSTERED
(
[CostKey] ASC,
[PlantCd] ASC,
[PartNbr] ASC,
[CompPartNbr] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
) ON [PRIMARY]
GO
The driver
CREATE TABLE [dbo].[CostKeyDeletes](
[ID] [int] IDENTITY(1,1) NOT NULL,
[CostKey] [varchar](9) NOT NULL,
CONSTRAINT [PK_CostKey_Deletes] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
December 6, 2024 at 7:19 pm
The CostKeyDeletes tables should be clustered on CostKey, not on ID. It's best for it to match the original table's key. You can keep the ID, if you want, just don't make it the clustering key.
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
December 6, 2024 at 10:53 pm
Is CostKey unique in the CostKeyDeletes table? If so then making it the CLUSTERED key would be preferable. To log the process you could add an OUTPUT clause to the DELETE statement
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
December 7, 2024 at 12:28 am
thanks any examples for the loop used and batch delete size
December 7, 2024 at 12:31 am
any examples using a loop and batchsize
Thanks.
December 7, 2024 at 1:41 am
I've changed table [dbo].[CostKeyDeletes]
. There is no need for and ID and CostKey
should be the clustered index. I've added an extra column to show if the CostKey
has been deleted from [cstDetails]
.
CREATE TABLE [dbo].[cstDetails]
(
[CompPartNbr] [varchar](20) NOT NULL,
[PartNbr] [varchar](20) NOT NULL,
[PlantCd] [varchar](9) NOT NULL,
[CostKey] [varchar](9) NOT NULL,
[usgQty] [decimal](16, 8) NOT NULL,
[scrQty] [decimal](16, 8) NOT NULL,
[totQty] [decimal](16, 8) NOT NULL,
[UnitCost] [decimal](16, 8) NOT NULL,
[usgCost] [decimal](16, 8) NOT NULL,
[scrCost] [decimal](16, 8) NOT NULL,
[totCost] [decimal](16, 8) NOT NULL,
[StrJCost] [decimal](16, 8) NOT NULL,
[StrMCost] [decimal](16, 8) NOT NULL,
[RequestedBy] [varchar](9) NULL,
[FreezeTime] [varchar](22) NULL,
[FreezeYN] [varchar](1) NULL,
[ApprovedBy] [varchar](9) NULL,
[SetUpQty] [decimal](16, 8) NULL,
[SetUpCost] [decimal](16, 8) NULL,
[CurrentOPRSetup] [decimal](16, 8) NULL,
CONSTRAINT [PK__cstDetails__145C0A3F] PRIMARY KEY CLUSTERED ([CostKey] ASC, [PlantCd] ASC, [PartNbr] ASC, [CompPartNbr] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
) ON [PRIMARY]
GO
DROP TABLE IF EXISTS [dbo].[CostKeyDeletes];
CREATE TABLE [dbo].[CostKeyDeletes]
(
[CostKey] [varchar](9) NOT NULL,
[Deleted] bit NOT NULL,
CONSTRAINT [PK_CostKey_Deletes] PRIMARY KEY CLUSTERED([CostKey] ASC, Deleted)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
Here is a script to set up some data (10 million rows) using Jeff Moden's fnTally TVF
TRUNCATE TABLE [dbo].[cstDetails]
-- Corrected script with CostKey values fitting varchar(9)
DECLARE @TotalRows INT = 10000000;
INSERT INTO [dbo].[cstDetails]
(
[CompPartNbr],
[PartNbr],
[PlantCd],
[CostKey],
[usgQty],
[scrQty],
[totQty],
[UnitCost],
[usgCost],
[scrCost],
[totCost],
[StrJCost],
[StrMCost]
)
SELECT 'Comp-' + CAST(ABS(CHECKSUM(NEWID())) % 10000 + 1 AS VARCHAR), -- Generates varying CompPartNbr
'Part-' + CAST(t.N AS VARCHAR), -- Unique PartNbr
'Plant-' + CAST((t.N % 100) + 1 AS VARCHAR), -- Random PlantCd
RIGHT('000000000' + CAST(t.N%10000 AS VARCHAR), 9), -- Unique CostKey padded to ensure length <= 9
100, -- Random usgQty
10, -- Random scrQty
200, -- Random totQty
50, -- Random UnitCost
500, -- Random usgCost
50, -- Random scrCost
1000, -- Random totCost
200, -- Random StrJCost
300 -- Random StrMCost
FROM dbo.fnTally(1, @TotalRows) t;
GO
SELECT COUNT(*) FROM [dbo].[cstDetails]
TRUNCATE TABLE [dbo].[CostKeyDeletes]
-- Populate CostKeyDeletes with a subset of CostKey values from cstDetails
INSERT INTO [dbo].[CostKeyDeletes] ([CostKey], Deleted)
SELECT DISTINCT [CostKey], 0
FROM [dbo].[cstDetails]
SELECT COUNT(*) FROM [dbo].[CostKeyDeletes]
Here is a stored procedure to delete the rows from [cstDetails]
:
-- Sample call:
-- EXEC dbo.[CleanupCostKeysWithPK] @BatchSize=500, @MaxDeletes=1000000, @LogProgress=0
CREATE OR ALTER PROCEDURE dbo.[CleanupCostKeysWithPK]
(
@BatchSize INT = 1000, -- Number of rows to process in each batch
@MaxDeletes INT = 1000000, -- Maximum number of rows to delete in total
@LogProgress BIT = 1 -- Enable or disable progress logging
)
AS
BEGIN
SET NOCOUNT ON;
DECLARE @StartTime DATETIME, @EndTime DATETIME, @ElapsedTime INT;
DECLARE @RowCount INT = 0;
DECLARE @TotalDeletedRows INT = 0;
-- Temporary table for storing primary keys of rows to be deleted
DROP TABLE IF EXISTS #RowsToDelete;
CREATE TABLE #RowsToDelete
(
CostKey VARCHAR(9) NOT NULL,
PlantCd VARCHAR(9) NOT NULL,
PartNbr VARCHAR(20) NOT NULL,
CompPartNbr VARCHAR(20) NOT NULL,
PRIMARY KEY CLUSTERED (CostKey, PlantCd, PartNbr, CompPartNbr)
);
DROP TABLE IF EXISTS #Batch;
CREATE TABLE #Batch
(
CostKey VARCHAR(9) NOT NULL,
PlantCd VARCHAR(9) NOT NULL,
PartNbr VARCHAR(20) NOT NULL,
CompPartNbr VARCHAR(20) NOT NULL,
PRIMARY KEY CLUSTERED (CostKey, PlantCd, PartNbr, CompPartNbr)
);
-- Logging table
IF @LogProgress = 1
BEGIN
RAISERROR('-- Starting Cleanup Process', 0, 1) WITH NOWAIT;
CREATE TABLE #Log (
LogID INT IDENTITY(1,1),
BatchNumber INT,
DeletedRows INT,
LogTime DATETIME DEFAULT GETDATE()
);
END;
-- Step 1: Populate temporary table with rows to delete
INSERT INTO #RowsToDelete (CostKey, PlantCd, PartNbr, CompPartNbr)
SELECT TOP(@MaxDeletes) cd.CostKey, cd.PlantCd, cd.PartNbr, cd.CompPartNbr
FROM [dbo].[cstDetails] cd
INNER JOIN [dbo].[CostKeyDeletes] ckd
ON cd.CostKey = ckd.CostKey
AND ckd.Deleted = 0;
-- Get the total count of rows to delete
SELECT @RowCount = COUNT(*) FROM #RowsToDelete;
RAISERROR('-- Total rows identified for deletion: %d', 0, 1, @RowCount) WITH NOWAIT;
-- Step 2: Batch delete process
WHILE EXISTS (SELECT 1 FROM #RowsToDelete) AND @TotalDeletedRows < @MaxDeletes BEGIN
BEGIN TRANSACTION;
SET @StartTime = GETUTCDATE();
TRUNCATE TABLE #Batch;
;WITH cte_OrderedJobs AS
(
SELECT TOP (@BatchSize) CostKey, PlantCd, PartNbr, CompPartNbr
FROM #RowsToDelete
)
DELETE
FROM cte_OrderedJobs
OUTPUT deleted.CostKey, deleted.PlantCd, deleted.PartNbr, deleted.CompPartNbr
INTO #Batch(CostKey, PlantCd, PartNbr, CompPartNbr);
-- Step 2.1: Delete rows in batches
DELETE cd
FROM [dbo].[cstDetails] cd
WHERE EXISTS (
SELECT 1
FROM #Batch rtd
WHERE cd.CostKey = rtd.CostKey
AND cd.PlantCd = rtd.PlantCd
AND cd.PartNbr = rtd.PartNbr
AND cd.CompPartNbr = rtd.CompPartNbr
);
SET @RowCount = @@ROWCOUNT;
SET @TotalDeletedRows += @RowCount;
-- Log progress
IF @LogProgress = 1
BEGIN
INSERT INTO #Log (BatchNumber, DeletedRows)
VALUES ((@TotalDeletedRows / @BatchSize), @RowCount);
END;
SET @EndTime = GETUTCDATE();
SET @ElapsedTime = DATEDIFF(ms, @StartTime, @EndTime);
RAISERROR('-- Batch processed %d rows in %d ms', 0, 1, @RowCount, @ElapsedTime) WITH NOWAIT;
COMMIT TRANSACTION;
WAITFOR DELAY '00:00:00.001' -- To let other process in
END;
UPDATE ckd
SET ckd.Deleted = 1
FROM [dbo].[CostKeyDeletes] ckd
WHERE NOT EXISTS(SELECT 1
FROM dbo.cstDetails d
WHERE d.CostKey = ckd.CostKey)
AND ckd.Deleted = 0
-- Display log
IF @LogProgress = 1 BEGIN
SELECT * FROM #Log;
DROP TABLE #Log;
END;
-- Cleanup
DROP TABLE IF EXISTS #RowsToDelete, #Batch;
RAISERROR('-- Total rows deleted: %d', 0, 1, @TotalDeletedRows) WITH NOWAIT;
END;
GO
Viewing 6 posts - 1 through 5 (of 5 total)
You must be logged in to reply to this topic. Login to reply