December 30, 2009 at 7:13 am
I am doing a project where I need to Mark similar records in a table with millions of records. By similar I mean some fields have similar values. I select all rows and then loop through all records in the table using cursors and update the record with duplicateid and type of match based on which fields were used to do the matching.
Here is the create statement
CREATE TABLE [dbo].[mjtest](
[InvoiceId] [int] IDENTITY(1,1) NOT NULL,
[ProjectId] [int] NOT NULL,
[CompanyCode] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[VendorNumber] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[CheckNumber] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[InvoiceNumber] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[VoucherNumber] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[DocumentNumber] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[Ap1] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[Ap2] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[Ap3] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[UserId] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[PurchaseOrdNumber] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[CheckStatus] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[VendorName] [varchar](2000) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[InvoiceDate] [datetime] NULL,
[InvoiceNo] [text] COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[CheckDate] [datetime] NULL,
[InvoiceAmount] [float] NULL,
[CheckAmount] [float] NULL,
[tagging] [varchar](250) COLLATE SQL_Latin1_General_CP1_CI_AS NULL CONSTRAINT [DF_mjtest_tagging] DEFAULT ('no'),
[auto_tag] [varchar](250) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[duplicate_type] [varchar](250) COLLATE SQL_Latin1_General_CP1_CI_AS NULL CONSTRAINT [DF_mjtest_duplicate_type] DEFAULT ('no'),
[convertion_done] [tinyint] NULL CONSTRAINT [DF_mjtest_convertion_done] DEFAULT ((0)),
[duplicate_with] [bigint] NULL CONSTRAINT [DF_mjtest_duplicate_with] DEFAULT ((0)),
[StatusId] [int] NULL CONSTRAINT [DF_mjtest_StatusId] DEFAULT ((0)),
[unmark] [tinyint] NULL,
[reason] [varchar](max) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[discount] [float] NULL,
[tagstatus] [int] NULL CONSTRAINT [DF_mjtest_tagstatus] DEFAULT ((0)),
[sortbyid] [bigint] NULL,
[chkfornegative] [tinyint] NULL CONSTRAINT [DF_mjtest_chkfornegative] DEFAULT ((0)),
[id] [bigint] NULL,
CONSTRAINT [PK_dbo.mjtest] PRIMARY KEY CLUSTERED
(
[InvoiceId] ASC
)WITH (IGNORE_DUP_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
Insert statements
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(1,50,'MTGI3','NOR03','A-286','286','16270','16270','a1414','b1211','c15281 ','11111','A134590','P','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Oct 16 2008 12:00:00:000AM',-5.220000000000000e+002,-5.220000000000000e+002,'TD','YES','TRUE DUPLICATE',0,1,0,0,NULL,4.800000000000000e+001,0,1,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(2,50,'MTGI3','NOR03','5315','286','16370','16370','a1415','b1212','c15282 ','11112','A134591','V','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Nov 14 2008 12:00:00:000AM',2.000000000000000e-002,2.000000000000000e-002,'TDCR','MATCHED','MATCHED CREDIT',0,2,0,0,NULL,4.800000000000000e+001,0,2,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(3,50,'MTGI3','NOR03','5315','286','16339','16339','a1416','b1213','c15283 ','11113','A134592','P','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Nov 13 2008 12:00:00:000AM',-2.000000000000000e-002,-2.000000000000000e-002,'TDCR','MATCHED','MATCHED CREDIT',0,2,0,0,NULL,4.800000000000000e+001,0,2,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(4,50,'MTGI3','NOR03','5328','286','16425','16425','a1417','b1214','c15284 ','11114','A134593','P','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Nov 26 2008 12:00:00:000AM',-5.220000000000000e+002,-5.220000000000000e+002,'TD','YES','TRUE DUPLICATE',0,1,0,0,NULL,4.800000000000000e+001,0,1,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(5,50,'MTGI3','NOR03','7899','DM00000286','16426','16426','a1418','b1215','c15285 ','11115','A134594','D','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Oct 15 2008 12:00:00:000AM',2.000000000000000e-002,2.000000000000000e-002,'no','no',NULL,0,0,0,0,NULL,4.800000000000000e+001,0,22,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(6,50,'MTGI3','NOR03','A-286','286','16353','16353','a1419','b1216','c15286 ','11116','A134595','V','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Nov 13 2008 12:00:00:000AM',5.220000000000000e+002,5.220000000000000e+002,'TD','YES','TRUE DUPLICATE',0,6,0,0,NULL,4.800000000000000e+001,0,1,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(7,50,'MTGI3','RAM03','5318','CK RQST 10/31/0','16342','16342','a1420','b1217','c15287 ','11117','A134596','P','FELIPE RAMIREZ','Oct 31 2008 12:00:00:000AM',NULL,'Nov 13 2008 12:00:00:000AM',-4.136400000000000e+002,-4.136400000000000e+002,'no','no',NULL,0,0,0,0,NULL,4.800000000000000e+001,0,22,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(8,50,'MTGI3','SYS03','101214','810220564','17146','17146','a1421','b1218','c15288 ','11118','A134597','P','SYSCO FOOD SVS OF ATLANTA','Nov 1 2008 12:00:00:000AM',NULL,'Dec 16 2008 12:00:00:000AM',-8.834999999999999e+001,-8.834999999999999e+001,'A2','YES','INVOICE DATE',0,8,0,0,NULL,4.800000000000000e+001,0,11,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(9,50,'MTGI3','NOR03','5328','286','16725','16725','a1422','b1219','c15289 ','11119','A134598','V','NORTHEAST SALES DIST INC','Oct 15 2008 12:00:00:000AM',NULL,'Nov 30 2008 12:00:00:000AM',5.220000000000000e+002,5.220000000000000e+002,'TD','YES','TRUE DUPLICATE',0,6,0,0,NULL,4.800000000000000e+001,0,1,0,0)
INSERT INTO [mjtest] ([InvoiceId],[ProjectId],[CompanyCode],[VendorNumber],[CheckNumber],[InvoiceNumber],[VoucherNumber],[DocumentNumber],[Ap1],[Ap2],[Ap3],[UserId],[PurchaseOrdNumber],[CheckStatus],[VendorName],[InvoiceDate],[InvoiceNo],[CheckDate],[InvoiceAmount],[CheckAmount],[tagging],[auto_tag],[duplicate_type],[convertion_done],[duplicate_with],[StatusId],[unmark],[reason],[discount],[tagstatus],[sortbyid],[chkfornegative],[id])VALUES(10,50,'MTGI3','SYS03','101218','812231389','17160','17160','a1423','b1220','c15290 ','11120','A134599','P','SYSCO FOOD SVS OF ATLANTA','Dec 23 2008 12:00:00:000AM',NULL,'Dec 30 2008 12:00:00:000AM',-9.509270000000000e+003,-9.509270000000000e+003,'no','no',NULL,0,0,0,0,NULL,4.800000000000000e+001,0,22,0,0)
The way I am doing this is running a select statement to get all records for that project.
Select id, VendorNumber, checknumber, invoicenumber, InvoiceDate from mjtest where projectid=50
Then I loop through them to find similar records and update a field duplicate_with with the matching record number and also check if it has a duplicate_with then not to update it. I cannot update anything where duplicate_with field has a value because I do different types of matching using field duplicate_type starting from matching most fields and then reducing them and so each one has a match type and I don't want to overwrite that.
update transactions set duplicate_with = @id, duplicate_type='P4' where duplicate_with is null AND VendorNumber like '%@VendorNumber%' AND checknumber like '%@checknumber%' AND invoicenumber like '%@invoicenumber%' AND InvoiceDate <= @InvoiceDate+15 AND AND InvoiceDate >= @InvoiceDate-15
THis takes very long as it has to loop through every record. Is there a faster way to do this.
December 30, 2009 at 7:16 am
I just posted an answer to this in your other post here:
http://www.sqlservercentral.com/Forums/Topic839275-338-1.aspx
For better, quicker answers, click on the following...
http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better answers on performance questions, click on the following...
http://www.sqlservercentral.com/articles/SQLServerCentral/66909/
Viewing 2 posts - 1 through 1 (of 1 total)
You must be logged in to reply to this topic. Login to reply