December 5, 2005 at 9:35 am
I recently ran a Query which upset our DBA a bit.
as it took 100% of the CPU any tips on cleaning this out to make it that little bit processor heavy
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
SELECT a.ROW_ID,b.ROW_ID,a.NAME, b.NAME,
DIFFERENCE
(
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(a.NAME,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''',''),
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(b.NAME,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''','')
) as 'Diff2',
DIFFERENCE(
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(aADR.ADDR,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''',''),
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(bADR.ADDR,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''','')
) as 'Diff2',
aADR.ADDR,bADR.ADDR,SOUNDEX(a.NAME),SOUNDEX(b.NAME),CASE WHEN SUBSTRING(a.NAME,1,1) = SUBSTRING(b.NAME,1,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,2,1) = SUBSTRING(b.NAME,2,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,3,1) = SUBSTRING(b.NAME,3,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,4,1) = SUBSTRING(b.NAME,4,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,5,1) = SUBSTRING(b.NAME,5,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,6,1) = SUBSTRING(b.NAME,6,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,7,1) = SUBSTRING(b.NAME,7,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,8,1) = SUBSTRING(b.NAME,8,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,9,1) = SUBSTRING(b.NAME,9,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,10,1) = SUBSTRING(b.NAME,10,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,11,1) = SUBSTRING(b.NAME,11,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,12,1) = SUBSTRING(b.NAME,12,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,13,1) = SUBSTRING(b.NAME,13,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,14,1) = SUBSTRING(b.NAME,14,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,15,1) = SUBSTRING(b.NAME,15,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,16,1) = SUBSTRING(b.NAME,16,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,17,1) = SUBSTRING(b.NAME,17,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,18,1) = SUBSTRING(b.NAME,18,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,19,1) = SUBSTRING(b.NAME,19,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,20,1) = SUBSTRING(b.NAME,20,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,21,1) = SUBSTRING(b.NAME,21,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,22,1) = SUBSTRING(b.NAME,22,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,23,1) = SUBSTRING(b.NAME,23,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,24,1) = SUBSTRING(b.NAME,24,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,25,1) = SUBSTRING(b.NAME,25,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,26,1) = SUBSTRING(b.NAME,26,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,27,1) = SUBSTRING(b.NAME,27,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,28,1) = SUBSTRING(b.NAME,28,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,29,1) = SUBSTRING(b.NAME,29,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,30,1) = SUBSTRING(b.NAME,30,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,31,1) = SUBSTRING(b.NAME,31,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,32,1) = SUBSTRING(b.NAME,32,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,33,1) = SUBSTRING(b.NAME,33,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,34,1) = SUBSTRING(b.NAME,34,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,35,1) = SUBSTRING(b.NAME,35,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,36,1) = SUBSTRING(b.NAME,36,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,37,1) = SUBSTRING(b.NAME,37,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,38,1) = SUBSTRING(b.NAME,38,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,39,1) = SUBSTRING(b.NAME,39,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,40,1) = SUBSTRING(b.NAME,40,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,41,1) = SUBSTRING(b.NAME,41,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,42,1) = SUBSTRING(b.NAME,42,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,43,1) = SUBSTRING(b.NAME,43,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,44,1) = SUBSTRING(b.NAME,44,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,45,1) = SUBSTRING(b.NAME,45,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,46,1) = SUBSTRING(b.NAME,46,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,47,1) = SUBSTRING(b.NAME,47,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,48,1) = SUBSTRING(b.NAME,48,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(a.NAME,49,1) = SUBSTRING(b.NAME,49,1) THEN 1 ELSE 0 END as 'Score of Match',
LEN(a.NAME) as 'Length of aName'
,CASE WHEN SUBSTRING(aADR.ADDR,1,1) = SUBSTRING(bADR.ADDR,1,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,2,1) = SUBSTRING(bADR.ADDR,2,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,3,1) = SUBSTRING(bADR.ADDR,3,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,4,1) = SUBSTRING(bADR.ADDR,4,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,5,1) = SUBSTRING(bADR.ADDR,5,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,6,1) = SUBSTRING(bADR.ADDR,6,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,7,1) = SUBSTRING(bADR.ADDR,7,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,8,1) = SUBSTRING(bADR.ADDR,8,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,9,1) = SUBSTRING(bADR.ADDR,9,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,10,1) = SUBSTRING(bADR.ADDR,10,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,11,1) = SUBSTRING(bADR.ADDR,11,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,12,1) = SUBSTRING(bADR.ADDR,12,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,13,1) = SUBSTRING(bADR.ADDR,13,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,14,1) = SUBSTRING(bADR.ADDR,14,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,15,1) = SUBSTRING(bADR.ADDR,15,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,16,1) = SUBSTRING(bADR.ADDR,16,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,17,1) = SUBSTRING(bADR.ADDR,17,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,18,1) = SUBSTRING(bADR.ADDR,18,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,19,1) = SUBSTRING(bADR.ADDR,19,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,20,1) = SUBSTRING(bADR.ADDR,20,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,21,1) = SUBSTRING(bADR.ADDR,21,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,22,1) = SUBSTRING(bADR.ADDR,22,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,23,1) = SUBSTRING(bADR.ADDR,23,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,24,1) = SUBSTRING(bADR.ADDR,24,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,25,1) = SUBSTRING(bADR.ADDR,25,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,26,1) = SUBSTRING(bADR.ADDR,26,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,27,1) = SUBSTRING(bADR.ADDR,27,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,28,1) = SUBSTRING(bADR.ADDR,28,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,29,1) = SUBSTRING(bADR.ADDR,29,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,30,1) = SUBSTRING(bADR.ADDR,30,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,31,1) = SUBSTRING(bADR.ADDR,31,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,32,1) = SUBSTRING(bADR.ADDR,32,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,33,1) = SUBSTRING(bADR.ADDR,33,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,34,1) = SUBSTRING(bADR.ADDR,34,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,35,1) = SUBSTRING(bADR.ADDR,35,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,36,1) = SUBSTRING(bADR.ADDR,36,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,37,1) = SUBSTRING(bADR.ADDR,37,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,38,1) = SUBSTRING(bADR.ADDR,38,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,39,1) = SUBSTRING(bADR.ADDR,39,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,40,1) = SUBSTRING(bADR.ADDR,40,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,41,1) = SUBSTRING(bADR.ADDR,41,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,42,1) = SUBSTRING(bADR.ADDR,42,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,43,1) = SUBSTRING(bADR.ADDR,43,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,44,1) = SUBSTRING(bADR.ADDR,44,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,45,1) = SUBSTRING(bADR.ADDR,45,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,46,1) = SUBSTRING(bADR.ADDR,46,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,47,1) = SUBSTRING(bADR.ADDR,47,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,48,1) = SUBSTRING(bADR.ADDR,48,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,49,1) = SUBSTRING(bADR.ADDR,49,1) THEN 1 ELSE 0 END
+ CASE WHEN SUBSTRING(aADR.ADDR,50,1) = SUBSTRING(bADR.ADDR,50,1) THEN 1 ELSE 0 END as 'Score of Match',
LEN(a.NAME) as 'Length of bName'
FROM firm a,firm b,addresses aADR,addresses bADR
WHERE aADR.ROW_ID =a.PR_ADDR_ID
AND bADR.ROW_ID =b.PR_ADDR_ID
AND DIFFERENCE(
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(aADR.ADDR,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''',''),
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(bADR.ADDR,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''','')
) =4
AND DIFFERENCE(
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(a.NAME,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''',''),
Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(b.NAME,' ',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9',''),'0',''),'-',''),'_',''),'=',''),'+',''),'[',''),']',''),'(',''),')',''),'!',''),'#',''),'?',''),',',''),'&',''),'.',''),'/',''),'@',''),'''','')
) =4
AND a.ROW_ID <> b.ROW_ID
December 5, 2005 at 9:43 am
What the heck are you trying to do with that thing???
Sample data and output would be very usefull here...
December 5, 2005 at 11:41 am
WOW...how did you even come up with this query ?!?! I think sample data, output AND a brief explanation of requirements would not go amiss!
**ASCII stupid question, get a stupid ANSI !!!**
December 5, 2005 at 3:43 pm
Is this a joke?? If not, along with sample data, output, and a synopsis of what you are trying to accomplish, you may want to give us your create table DDL along with pertinent indexing info.
December 5, 2005 at 6:15 pm
Have you heard something about UDF?
All those terrible REPLACE and SUBSTRING = SUBSTRING must be encapsulated in 2 scalar UDF.
Within UDF you may have WHILE loop to avoid matching not existing charachters and not to limit yourself with length of 49 charachters.
And it will be much faster.
_____________
Code for TallyGenerator
December 6, 2005 at 2:32 am
see I thought it was a bit of genius that code, I would have written a UDF but I haven't written one yet , suppose this would give me a kick in the backside.
well for my organisation its pretty good as no one looks at deduplication
What it does is self join the firm table and then for every name on the data base it tries to find a match
The match is performed by first replacing the excess characters
then with the pure alpha characters remaining it tries to match using soundex as a guide (I know its old hat but its provides something still)
it does this for both (firms and addresses)
The second part tries to match the position of characters against each name and provides a scoring
at the end of the day the query is supposed to give us a good idea as to what firms are duplicates.
December 6, 2005 at 4:02 am
No this is not a joke, please bear in mind there are people who use this board who aren't as ofee with writing as many queries as you do that is why im asking for some help
thanks
December 6, 2005 at 4:20 am
source Data could be something in the firm.NAME
John Smiths Ltd.
John Smith Limited
Paul Spanners Mortgate Consultants
Paul Spanners Mort. Cons.
Westfield I.F.A.
Westfield Independant Fin. Adv.
Money-World.Com
110% Profit
110% Profit(London)
110% Profit(Glasgow)
110% Profit(Lnd.)
John Smyth Limited
like wise the details in Addresses are normal first line street addresses
December 6, 2005 at 8:23 am
Thanx for that info. But the real question is what do you have to do with that data???
December 6, 2005 at 8:35 am
As I understood the various explanations (I really didn't try to understand the SQL itself), the task is to find firms, whose name and address suggest possible duplicity, and create something as a suspect list. Then probably someone will check this list, decide whether it is a real duplicity and take some action. Is that so, hickymanz?
December 6, 2005 at 9:26 am
Hey, no offense intended. You have to admit, that is one wild looking query. I, like Vladan did not even try to read through it. You've given us an example of your source data. What would you expect your query to return based on the sample source data?
December 6, 2005 at 10:05 am
No worries , it did seem a little like a dig on those not so used to writing more refined queries.
As Vladan remarked this is purely for our sales team to eyeball so that they can identify the duplicates that they have created
We then have a tool for merging(somewhat archaic) which we will use to bring all the records together under one roof.
I guess I need some help in writing a scalar(* ive never found a good explantion for this term) UDF which hopefully will lower the processor usage of the query.
December 6, 2005 at 10:16 am
I do not have experience writting scalar functions, but you should be able to get your question answered by someone on this forum. There are some very advanced SQL statement experts out here. I think you will need to clarify exactly what it is you want as your return value. Again, you've provided sample data and mentioned something about scoring. If you could explain your scoring requirements more in depth and give an example of what return values you would expect given the sample you've provided, I think someone can get you an answer.
December 7, 2005 at 3:59 am
Okay So from the sample data below
John Smiths Ltd.
John Smith Limited
Paul Spanners Mortgate Consultants
Paul Spanners Mort. Cons.
Westfield I.F.A.
Westfield Independant Fin. Adv.
Money-World.Com
110% Profit
110% Profit(London)
110% Profit(Glasgow)
110% Profit(Lnd.)
John Smyth Limited
The query should return these results , columns further to the right would be Diff(firma,Firmb), soundex(firma),soundex(firmb)
JohnSmithsLtd JohnSmithLimited
JohnSmithsLtd JohnSmythLimited
JohnSmithLimited JohnSmithsLtd
JohnSmithLimited John Smyth Limited
PaulSpannersMortgateConsultants PaulSpannersMortCons.
PaulSpannersMortCons PaulSpannersMortgateConsultants
WestfieldIFA WestfieldIndependantFinAdv
WestfieldIndependantFinAdv WestfieldIFA
Profit ProfitLondon
Profit ProfitGlasgow
Profit ProfitLnd
ProfitLondon Profit
ProfitLondon ProfitGlasgow
ProfitLondon ProfitLnd
ProfitGlasgow Profit
ProfitGlasgow ProfitLondon
ProfitGlasgow ProfitLnd
ProfitLnd Profit
ProfitLnd ProfitGlasgow
ProfitLnd ProfitLondon
basically If someone can show me how I can Clean up a name using a UDF
removing anycharacters that are special or numeric
I would also like to be able to replace instances of:
" LTD." to "limited"
" LTD" to "limited"
" IFA " to IndependantFinancialAdvisor
" Mort. " to Mortgage
" Mort " to Mortgage
December 7, 2005 at 12:53 pm
IF EXISTS (SELECT * FROM sysobjects WHERE name = N'RefineString')
DROP FUNCTION RefineString
GO
CREATE FUNCTION dbo.RefineString
(@String nvarchar (4000))
RETURNS nvarchar
AS
BEGIN
SELECT @String = REPLACE(@String, ' LTD.', ' limited')
SELECT @String = REPLACE(@String, ' LTD', ' limited')
SELECT @String = REPLACE(@String, ' IFA ', ' IndependantFinancialAdvisor ')
SELECT @String = REPLACE(@String, ' Mort.', ' Mortgage')
SELECT @String = REPLACE(@String, ' Mort ', ' Mortgage ')
..............
..............
RETURN @String
END
GO
SELECT dbo.RefineString ('Paul Spanners Mort. Cons.')
_____________
Code for TallyGenerator
Viewing 15 posts - 1 through 15 (of 15 total)
You must be logged in to reply to this topic. Login to reply