Compare 2 datasets and output delta in one TSQL statement
It's a common problem we all share from time to time, and i thought my solution is worth a post π
You have 2 datasets and you wonder..... has anything changed, and if so, did it change in A or B?
In my example i show a way to compare 2 sets of employee data and output the changes in one TSQL statement
Table A
# | EmpNr | Name | HireDT | FireDT |
---|
1 | 1 | janssen | 2000-01-01 00:00:00.000 | {NULL} |
2 | 2 | de bruin | 2001-05-01 00:00:00.000 | 2005-12-31 00:00:00.000 |
3 | 3 | pieterssen | 2004-08-01 00:00:00.000 | {NULL} |
4 | 4 | de vries | 2003-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
5 | 5 | Klaassen | 2009-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
6 | 6 | van Speyk | 2009-09-01 00:00:00.000 | {NULL} |
7 | 7 | Klaassen | 2009-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
Table B
# | EmpNr | Name | HireDT | FireDT |
---|
1 | 2 | de bruin | 2001-05-01 00:00:00.000 | 2005-12-31 00:00:00.000 |
2 | 3 | pietersse | 2004-08-01 00:00:00.000 | {NULL} |
3 | 4 | de vries | 2003-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
4 | 5 | Klaassen | 2009-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
5 | 6 | van Speijk | 2009-09-01 00:00:00.000 | {NULL} |
6 | 7 | Klaassen | 2009-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
7 | 8 | de groot | 2003-09-01 00:00:00.000 | 2011-01-01 00:00:00.000 |
The differences
# | EmpNr_a | EmpNr_b | Name_a | Name_b | HireDT_a | HireDT_b | FireDT_a | FireDT_b |
---|
1 | 1 | {NULL} | janssen | {NULL} | 2000-01-01 00:00:00.000 | {NULL} | {NULL} | {NULL} |
2 | 3 | 3 | pieterssen | pietersse | 2004-08-01 00:00:00.000 | 2004-08-01 00:00:00.000 | {NULL} | {NULL} |
3 | 6 | 6 | van Speyk | van Speijk | 2009-09-01 00:00:00.000 | 2009-09-01 00:00:00.000 | {NULL} | {NULL} |
4 | {NULL} | 8 | {NULL} | de groot | {NULL} | 2003-09-01 00:00:00.000 | {NULL} | 2011-01-01 00:00:00.000 |
As you can see, the output shows additions, removals and changes.
Happy Coding!
Theo Ekelmans
Sr MS-SQL DBA @ Ordina.nl
/*
this example will show you how to compare 2 datasets and output the differences in one statement
Author:ββββTheo Ekelmans (theo@ekelmans.com)
Version: 1.0 2011-10-07
*/set nocount on
-- Original data
CREATE TABLE #tblA (
[EmpNr]ββββ [int], -- Primary key
[Name] [varchar](15) NULL,
[HireDT] [datetime] NULL,
[FireDT] [datetime] NULL,
CONSTRAINT [PK_tblA] PRIMARY KEY CLUSTERED ( [EmpNr] ASC )
)
-- Some copy you need to compare
CREATE TABLE #tblB (
[EmpNr]ββββ [int], -- Primary key
[Name] [varchar](15) NULL,
[HireDT] [datetime] NULL,
[FireDT] [datetime] NULL,
CONSTRAINT [PK_tblB] PRIMARY KEY CLUSTERED ( [EmpNr] ASC )
)
-- Create table to hold the Delta of tblA and tblB
CREATE TABLE #cmpAB (
[EmpNr_a]ββββ[varchar](10) NULL,
[EmpNr_b] ββββ[varchar](10) NULL,
[Name_a] [varchar](15) NULL,
[Name_b] [varchar](15) NULL,
[HireDT_a] ββββ[datetime] NULL,
[HireDT_b] ββββ[datetime] NULL,
[FireDT_a] ββββ[datetime] NULL,
[FireDT_b] ββββ[datetime] NULL
)
-- Fill tblA and tblB with some sample data (Epic Dutch names!)
INSERT INTO dbo.#tblA([EmpNr], [Name], [HireDT], [FireDT])
SELECT 1, N'janssen', '20000101 00:00:00.000', NULL UNION ALL
SELECT 2, N'de bruin', '20010501 00:00:00.000', '20051231 00:00:00.000' UNION ALL
SELECT 3, N'pieterssen', '20040801 00:00:00.000', NULL UNION ALL
SELECT 4, N'de vries', '20030901 00:00:00.000', '20110101 00:00:00.000' UNION ALL
SELECT 5, N'Klaassen', '20090901 00:00:00.000', '20110101 00:00:00.000' UNION ALL
SELECT 6, N'van Speyk', '20090901 00:00:00.000', null UNION ALL
SELECT 7, N'Klaassen', '20090901 00:00:00.000', '20110101 00:00:00.000'
INSERT INTO dbo.#tblB([EmpNr], [Name], [HireDT], [FireDT])
SELECT 2, N'de bruin', '20010501 00:00:00.000', '20051231 00:00:00.000' UNION ALL
SELECT 3, N'pietersse', '20040801 00:00:00.000', NULL UNION ALL -- n deleted
SELECT 4, N'de vries', '20030901 00:00:00.000', '20110101 00:00:00.000' UNION ALL
SELECT 5, N'Klaassen', '20090901 00:00:00.000', '20110101 00:00:00.000' UNION ALL
SELECT 6, N'van Speijk', '20090901 00:00:00.000', null UNION ALL -- y replaced by ij
SELECT 7, N'Klaassen', '20090901 00:00:00.000', '20110101 00:00:00.000' UNION ALL
SELECT 8, N'de groot', '20030901 00:00:00.000', '20110101 00:00:00.000'
-- Compare and insert delta into table, in one sql statement
Insert into #cmpAB
ββββ(ββββ[EmpNr_a],ββββ[EmpNr_b],
ββββββββ[Name_a],ββββ[Name_b],
ββββββββ[HireDT_a],ββββ[HireDT_b],
ββββββββ[FireDT_a],ββββ[FireDT_b]
ββββ)
select a.[EmpNr] as [EmpNr_a],ββββββββb.[EmpNr] as [EmpNr_b],
a.[Name] as [Name_a],ββββββββb.[Name] as [Name_b],
a.[HireDT] as [HireDT_a],ββββb.[HireDT] as [HireDT_b],
a.[FireDT] as [FireDT_a], ββββb.[FireDT] as [FireDT_b]
from (
-- Give all from B, if it's new or different
select [EmpNr], [Name], [HireDT], [FireDT]
fromββββ#tblB
except
select [EmpNr], [Name], [HireDT], [FireDT]
fromββββ#tblA
) b
full outer join -- Join the 2 delta sets
(
-- Give all from B, if it's an orphan or different
select [EmpNr], [Name], [HireDT], [FireDT]
fromββββ#tblA
except
select [EmpNr], [Name], [HireDT], [FireDT]
fromββββ#tblB
) a
onββββa.[EmpNr] = b.[EmpNr] -- Join on PK !
-- Show the tables
select * from #tblA
select * from #tblB
-- Show the Delta
select * from #cmpAB
-- Clean up
DROP TABLE #cmpAB
DROP TABLE #tblA
DROP TABLE #tblB