Identify Primary Key Violation Prior to Insert (Updated)
This script allows for testing if rows will cause a primary key violations prior to inserting the data. Offending rows are stored in a table so they can be used in subsequent queries or used to debug bad data issues.
This updated version adds a timestamp to the duplicates table so that identifying when the duplicate occurred is easier. This is really helpful if you have scheduled data uploads and can track when a bulk insert from a file was performed.
This can be called from any database and will identify duplicates in the select statement as well as the target table
Split and PrimaryKeyColumns functions are called by the stored procedure and are included in the post.
IF exists (SELECT * from dbo.sysobjects
WHERE id = object_id(N'[dbo].[Split]')
AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[Split]
GO
GO
CREATE FUNCTION dbo.Split (@vcDelimitedString varchar(8000),
@vcDelimitervarchar(100) )
/**************************************************************************
DESCRIPTION: Accepts a delimited string and splits it at the specified
delimiter points. Returns the individual items as a table data
type with the ElementID field as the array index and the Element
field as the data
PARAMETERS:
@vcDelimitedString- The string to be split
@vcDelimiter- String containing the delimiter where
delimited string should be split
RETURNS:
Table data type containing array of strings that were split with
the delimiters removed from the source string
USAGE:
SELECT ElementID, Element FROM Split('11111,22222,3333', ',') ORDER BY ElementID
AUTHOR:Karen Gayda
DATE: 05/31/2001
MODIFICATION HISTORY:
WHODATEDESCRIPTION
----------------------------------------------------------------
***************************************************************************/RETURNS @tblArray TABLE
(
ElementIDsmallintIDENTITY(1,1), --Array index
Elementvarchar(1000)--Array element contents
)
AS
BEGIN
DECLARE
@siIndexsmallint,
@siStartsmallint,
@siDelSizesmallint
SET @siDelSize= LEN(@vcDelimiter)
--loop through source string and add elements to destination table array
WHILE LEN(@vcDelimitedString) > 0
BEGIN
SET @siIndex = CHARINDEX(@vcDelimiter, @vcDelimitedString)
IF @siIndex = 0
BEGIN
INSERT INTO @tblArray VALUES(@vcDelimitedString)
BREAK
END
ELSE
BEGIN
INSERT INTO @tblArray VALUES(SUBSTRING(@vcDelimitedString, 1,@siIndex - 1))
SET @siStart = @siIndex + @siDelSize
SET @vcDelimitedString = SUBSTRING(@vcDelimitedString, @siStart , LEN(@vcDelimitedString) - @siStart + 1)
END
END
RETURN
END
GO
IF exists (SELECT * from dbo.sysobjects
WHERE id = object_id(N'[dbo].[PrimaryKeyColumns]')
AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[PrimaryKeyColumns]
GO
GO
CREATE FUNCTION dbo.PrimaryKeyColumns (@sysTableName sysname )
/**************************************************************************
DESCRIPTION: Finds the name and column id of primary key columns for a
table.
PARAMETERS:
@sysTableName- The name of the table for which primary
key info is sought
RETURNS:
Table data type list of primary key column names
USAGE:USE pubs
SELECT * from dbo.PrimaryKeyColumns( 'authors')
AUTHOR:Karen Gayda
DATE: 10/24/2003
MODIFICATION HISTORY:
WHODATEDESCRIPTION
----------------------------------------------------------------
***************************************************************************/RETURNS @tblCols TABLE
(
namesysname,-- Name of primary key column
colidsmallint-- System id for column
)
AS
BEGIN
INSERT INTO @tblCols
SELECT c.name, c.colid
FROM sysindexes i
INNER JOIN sysobjects t
ON i.id = t.id
INNER JOIN sysindexkeys k
ON i.indid = k.indid
AND i.id = k.ID
INNER JOIN syscolumns c
ON c.id = t.id
AND c.colid = k.colid
WHERE i.id = t.id
AND i.indid BETWEEN 1 And 254
AND (i.status & 2048) = 2048
AND t.id = OBJECT_ID(@sysTableName)
ORDER BY k.KeyNo
RETURN
END
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
IF EXISTS(SELECT * FROM dbo.sysobjects
WHERE id = object_id(N'[dbo].[sp_GetKeyViolations]')
AND OBJECTPROPERTY(id, N'IsProcedure') = 1
)
DROP PROCEDURE [dbo].[sp_GetKeyViolations]
GO
CREATE PROCEDURE dbo.sp_GetKeyViolations
@vcSelectStatementvarchar (8000),
@sysTargetTablesysname,
@sysViolationTablesysname,
@sysTargetDatabasesysname,
@bitEmptyViolationTable bit= 0
/***********************************************************************************
DESCRIPTION:
Evaluates whether a select statement will cause Primary Key violations
if data is inserted into a given target table. Key violation rows are
saved to the designated violation table in the target database.
PARAMETERS:
@vcSelectStatement - SQL statement that selects primary key columns for
records being tested for key violations.
IMPORTANT: Only key columns should
be included in select. If source table
columns are named differently than in the
target then they must be aliased with the
same name as the target. These restrictions
were necessary to make the procedure generic.
@sysTargetTable - table where data will be inserted if record is
not a duplicate.
@sysViolationTable - table where key violation rows will be saved
@sysTargetDatabase - Database where target table is located and where
key violation table exists and/or will be created
@bitEmptyViolationTable - Optional, indicates whether pre-existing key violation
rows should be deleted from violation table before adding new
records. If omitted, records will be preserved.
USAGE:
exec sp_GetKeyViolations 'SELECT TOP 10 CustomerID as CustomerID FROM Northwind.dbo.Customers WHERE ContactTitle LIKE ''Sales%''',
'Customers', --target table
'DupCustomers', -- table to store violation rows
'NORTHWIND', -- target database
0 -- do not clear violation table prior to insert
REMARKS: This is a very generic function that allows for identifying key violations
for data that will be inserted into a target table before the insert takes
place. The offending rows are saved to a caller-specified table so that the
records can be analyzed or used by a subsequent query to exclude those rows
from being inserted.
Duplicates are identified in the source select as well as duplicates that already exist in target.
DEPENDANCIES:
dbo.Split function must be installed in master database
dbo.PrimaryKeyColumns function must be installed in each target database
AUTHOR: Karen Gayda
DATE: 10/24/2003
MODIFICATIONS:
WhoDateWhat
-----------------------------------------------------------------
KMG10/30/2003Added ability to run this proc. from any database
KMG10/31/2003Added check for duplicates in source select in
addition to checking already existing data
KMG 01/04/2004Added timestamp column to duplicates table for easier
tracking of data
************************************************************************************/AS
SET NOCOUNT ON
DECLARE @vcSQL varchar(8000),
@vcColumnList varchar(300),
@sysColNamesysname
SET @vcSQL = 'DECLARE crCols CURSOR STATIC FOR SELECT DISTINCT
name FROM ' + @sysTargetDatabase + '.dbo.PrimaryKeyColumns(''' + @sysTargetTable + ''')'
EXEC(@vcSQL)
OPEN crCols
--For Each Column in PrimaryKey, concatonate to column list
SET @vcColumnList = ''
FETCH NEXT FROM crCols INTO @sysColName
WHILE(@@FETCH_STATUS = 0)
BEGIN
SET @vcColumnList = @vcColumnList + @sysColName + ','
FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcColumnList = SUBSTRING(@vcColumnList,1,LEN(@vcColumnList) -1) --remove trailing comma
CLOSE crCols
--Get select rows and save to temporary table
IF EXISTS (select * from sysobjects where id = object_id('##tempSourceRecords')
and OBJECTPROPERTY(id, N'IsTable') = 1)
DROP TABLE ##tempSourceRecords
SET @vcSQL = (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 1)
+ ' INTO ##tempSourceRecords FROM '
+ (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 2)
PRINT(@vcSQL)
EXEC (@vcSQL)
--If key violation table does not already exist, create it
DECLARE@nvcSQLnvarchar(4000),
@nvcParmDefnvarchar(500),
@bitExistsbit,
@vcDate varchar(25)
SET @vcDate = CONVERT(varchar(20),GETDATE(), 109)
SET @nvcSQL = 'IF EXISTS (select * from '+ @sysTargetDatabase+ '.dbo.sysobjects
where name = ''' + @sysViolationTable + ''' AND
xtype=''U'') ' +
'SET @bitExistsOUT = 1 ' +
'ELSE ' +
'SET @bitExistsOUT = 0'
SET @nvcParmDef = N'@bitExistsOUT bit OUTPUT'
EXEC sp_executesql @nvcSQL, @nvcParmDef, @bitExistsOUT=@bitExists OUTPUT
IF @bitExists = 0
BEGIN
--Create new table based upon supplied select list if needed
SET @vcSQL = 'SELECT TOP 0 ' + @vcColumnList + ',''' + @vcDate + ''' as ProcessedDate ' +
+ ' INTO ' + @sysTargetDatabase + '.dbo.'+ @sysViolationTable +
' FROM ##tempSourceRecords '
PRINT @vcSQL
EXEC (@vcSQL)
END
--Empty violation table if caller indicates it should be cleared
IF @bitEmptyViolationTable = 1
BEGIN
SET @vcSQL = 'DELETE FROM ' + @sysTargetDatabase + '.dbo.' + @sysViolationTable
PRINT @vcSQL
EXEC (@vcSQL)
END
--Insert duplicate rows from target into key violation table
SET @vcSQL = 'INSERT INTO ' + @sysTargetDatabase + '.dbo.' + @sysViolationTable +
' SELECT a.' + REPLACE(@vcColumnList, ',', ',a.') + ', ''' + @vcDate + '''' +
' FROM ##tempSourceRecords a' +
' INNER JOIN ' + @sysTargetDatabase + '.dbo.' + @sysTargetTable + ' b ' +
' ON '
OPEN crCols
FETCH NEXT FROM crCols INTO @sysColName --get inner join columns to add to SQL insert statement
WHILE(@@FETCH_STATUS = 0)
BEGIN
SET @vcSQL = @vcSQL + 'a.' + @sysColName + '=b.' + @sysColName + ' AND '
FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcSQL = SUBSTRING(@vcSQL,1,LEN(@vcSQL) -4) --remove trailing AND
--Add rows that are duplicate in the source select that may cause violation
SET @vcSQL = @vcSQL + ' UNION SELECT ' + @vcColumnList + + ',''' + @vcDate + ''' as ProcessedDate '
+ ' FROM ##TempSourceRecords ' +
'GROUP BY ' + @vcColumnList + ' HAVING COUNT(*) > 1'
CLOSE crCOLS
DEALLOCATE crCols
PRINT @vcSQL
EXEC (@vcSQL)
DROP TABLE ##tempSourceRecords
RETURN (0)
GO