October 28, 2014 at 7:22 am
Little clean up in the logic, now it looks pretty good
😎
/********************************************************************
-- Stripping out any non-numerical characters
-- EE 2014-10-28 Inital coding
-- EE Cleaned up comparison logic
********************************************************************/
ALTER FUNCTION dbo.STRIP_NUM_EE
(
@INSTR VARCHAR(8000)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH T(N) AS (SELECT N FROM ( VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,NUMS(N) AS (SELECT TOP(CONVERT(BIGINT,LEN(@INSTR),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)
SELECT
(
SELECT
CASE WHEN (ASCII(SUBSTRING(@INSTR COLLATE Latin1_General_BIN,NM.N,1)) - 48) BETWEEN 0 AND 9 THEN SUBSTRING(@INSTR,NM.N,1) END
FROM NUMS NM
FOR XML PATH('')
) AS OUT_STR
;
Test results
Beginning execution loop
========== Using nGrams ==========
1666
========== Using nGrams ==========
1680
========== Using nGrams ==========
1706
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
2830
========== Using PatExclude8K ==========
2756
========== Using PatExclude8K ==========
2750
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
1596
========== Jeff's Old Scalar Loop Function ==========
1556
========== Jeff's Old Scalar Loop Function ==========
1556
Batch execution completed 3 times.
Beginning execution loop
========== dbo.STRIP_NUM_EE Function ==========
1393
========== dbo.STRIP_NUM_EE Function ==========
1343
========== dbo.STRIP_NUM_EE Function ==========
1343
Batch execution completed 3 times.
October 28, 2014 at 8:59 pm
Eirikur Eiriksson (10/28/2014)
Little clean up in the logic, now it looks pretty good😎
/********************************************************************
-- Stripping out any non-numerical characters
-- EE 2014-10-28 Inital coding
-- EE Cleaned up comparison logic
********************************************************************/
ALTER FUNCTION dbo.STRIP_NUM_EE
(
@INSTR VARCHAR(8000)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH T(N) AS (SELECT N FROM ( VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,NUMS(N) AS (SELECT TOP(CONVERT(BIGINT,LEN(@INSTR),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)
SELECT
(
SELECT
CASE WHEN (ASCII(SUBSTRING(@INSTR COLLATE Latin1_General_BIN,NM.N,1)) - 48) BETWEEN 0 AND 9 THEN SUBSTRING(@INSTR,NM.N,1) END
FROM NUMS NM
FOR XML PATH('')
) AS OUT_STR
;
Test results
Beginning execution loop
========== Using nGrams ==========
1666
========== Using nGrams ==========
1680
========== Using nGrams ==========
1706
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
2830
========== Using PatExclude8K ==========
2756
========== Using PatExclude8K ==========
2750
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
1596
========== Jeff's Old Scalar Loop Function ==========
1556
========== Jeff's Old Scalar Loop Function ==========
1556
Batch execution completed 3 times.
Beginning execution loop
========== dbo.STRIP_NUM_EE Function ==========
1393
========== dbo.STRIP_NUM_EE Function ==========
1343
========== dbo.STRIP_NUM_EE Function ==========
1343
Batch execution completed 3 times.
Brilliant work sir - very well done!
What is a little weird is how PatExclude8K is performing so bad on your system. It is doing much better than the nGrams solution on my system. Other than that I get similar results.
Beginning execution loop
========== Using nGrams ==========
2416
========== Using nGrams ==========
2420
========== Using nGrams ==========
2390
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
2053
========== Using PatExclude8K ==========
2013
========== Using PatExclude8K ==========
2043
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
1840
========== Jeff's Old Scalar Loop Function ==========
1860
========== Jeff's Old Scalar Loop Function ==========
1840
Batch execution completed 3 times.
Beginning execution loop
========== dbo.STRIP_NUM_EE Function ==========
1690
========== dbo.STRIP_NUM_EE Function ==========
1690
========== dbo.STRIP_NUM_EE Function ==========
1673
Batch execution completed 3 times.
-- Itzik Ben-Gan 2001
October 28, 2014 at 9:08 pm
Eirikur Eiriksson (10/28/2014)
Little clean up in the logic, now it looks pretty good😎
/********************************************************************
-- Stripping out any non-numerical characters
-- EE 2014-10-28 Inital coding
-- EE Cleaned up comparison logic
********************************************************************/
ALTER FUNCTION dbo.STRIP_NUM_EE
(
@INSTR VARCHAR(8000)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH T(N) AS (SELECT N FROM ( VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,NUMS(N) AS (SELECT TOP(CONVERT(BIGINT,LEN(@INSTR),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)
SELECT
(
SELECT
CASE WHEN (ASCII(SUBSTRING(@INSTR COLLATE Latin1_General_BIN,NM.N,1)) - 48) BETWEEN 0 AND 9 THEN SUBSTRING(@INSTR,NM.N,1) END
FROM NUMS NM
FOR XML PATH('')
) AS OUT_STR
;
Test results
Beginning execution loop
========== Using nGrams ==========
1666
========== Using nGrams ==========
1680
========== Using nGrams ==========
1706
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
2830
========== Using PatExclude8K ==========
2756
========== Using PatExclude8K ==========
2750
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
1596
========== Jeff's Old Scalar Loop Function ==========
1556
========== Jeff's Old Scalar Loop Function ==========
1556
Batch execution completed 3 times.
Beginning execution loop
========== dbo.STRIP_NUM_EE Function ==========
1393
========== dbo.STRIP_NUM_EE Function ==========
1343
========== dbo.STRIP_NUM_EE Function ==========
1343
Batch execution completed 3 times.
You've just gotta love pure math instead of character based calculations. Thanks and well done, Eirikur. Now I have to test similar logic in a While Loop and see if that's any faster.
It also shows that general purpose functions have general purpose performance. Functions with a specific purpose are usually faster. It's a lesson that I learned a long time ago and have apparently lost my mind because I sure didn't apply it here. So, thinks for the well deserved kick in the head to shake me out of it.
--Jeff Moden
Change is inevitable... Change for the better is not.
October 28, 2014 at 9:29 pm
Jeff Moden (10/28/2014)
I took the two contenders from your good post, Alan, and added on of my own. Here's the code for the function I use for such things. Yeah... you'll be shocked. It's not only a scalar function but it also has a WHILE loop in it.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[CleanString]
/*******************************************************************************
Purpose:
Given a string and a pattern of characters to remove, remove the patterned
characters from the string.
Usage:
--===== Basic Syntax Example
SELECT CleanedString = dbo.CleanString(@pSomeString,@pPattern)
;
--===== Remove all but Alpha characters
SELECT CleanedString = dbo.CleanString(st.SomeString,'%[^A-Za-z]%');
FROM dbo.SomeTable st
;
--===== Remove all but Numeric digits
SELECT CleanedString = dbo.CleanString(st.SomeString,'%[^0-9]%');
FROM dbo.SomeTable st
;
Programmer Notes:
1. @pPattern is case sensitive.
2. The pattern set of characters must be for just one character.
Revision History:
Rev 00 - Circa 2007 - George Mastros?
- Initial find on the web
Rev 01 - 29 Mar 2007 - Jeff Moden
- Optimize to remove one instance of PATINDEX from the loop.
- Add code to use the pattern as a parameter.
Rev 02 - 26 May 2013 - Jeff Moden
- Add case sensitivity
*******************************************************************************/
(@pString VARCHAR(8000),@pPattern VARCHAR(100))
RETURNS VARCHAR(8000) AS
BEGIN
DECLARE @Pos SMALLINT;
SELECT @Pos = PATINDEX(@pPattern,@pString COLLATE Latin1_General_BIN);
WHILE @Pos > 0
SELECT @pString = STUFF(@pString,@Pos,1,''),
@Pos = PATINDEX(@pPattern,@pString COLLATE Latin1_General_BIN);
RETURN @pString;
END
;
Here's the test harness that I used. It runs each function through the 100K row table 3 times.
--===== Create the 100K row test table
IF OBJECT_ID('tempdb..#val') IS NOT NULL DROP TABLE #val
;
SELECT TOP 100000
txt = ISNULL(CONVERT(VARCHAR(36),NEWID()),'')
INTO #val
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
;
ALTER TABLE #Val
ADD PRIMARY KEY CLUSTERED (txt)
;
--===== Do the tests. Had to use duration because one
-- of the tests is on the new scalar function and
-- SET STATISTICS doesn't report on those correctly.
GO
PRINT '========== Using nGrams ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @string = CleanedText
FROM #val
CROSS APPLY dbo.StripNonNumeric_itvf_ajb(txt)
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== Using PatExclude8K ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @string = newstring
FROM #val
CROSS APPLY dbo.PatExclude8K(txt, '[^0-9]')
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== Jeff''s Old Scalar Loop Function ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = dbo.CleanString(txt,'%[^0-9]%')
FROM #val
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
Here are the run results. Again, you'll be shocked. This is the one place where I've not been able to make a Tally Table solution able to beat it. Lord knows I and other good folks have tried.
(100000 row(s) affected)
Beginning execution loop
========== Using nGrams ==========
2916
========== Using nGrams ==========
2893
========== Using nGrams ==========
2890
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
2703
========== Using PatExclude8K ==========
2640
========== Using PatExclude8K ==========
2653
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
2413
========== Jeff's Old Scalar Loop Function ==========
2500
========== Jeff's Old Scalar Loop Function ==========
2466
Batch execution completed 3 times.
That is an excellent function Jeff, thanks for sharing that. It took a a few reads to understand how it works. Interesting to see a scalar function with a loop perform so well. I've tested Eirikur's solution on and it is getting the best performance. I started to make a version of Eirikur's that takes a pattern but I'm out of gas for the day. I am going to re-factor his code in the morning to take a pattern as a parameter and re-test the performance.
-- Itzik Ben-Gan 2001
October 29, 2014 at 9:18 am
So I made the following change to Eirikur's function to so that it takes a pattern as a parameter:
-- refactoring...
CREATE FUNCTION dbo.STRIP_NUM_EE_PAT
(
@INSTR VARCHAR(8000),
@PATTERN VARCHAR(50)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH T(N) AS (SELECT N FROM ( VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,NUMS(N) AS (SELECT TOP(CONVERT(BIGINT,LEN(@INSTR),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)
SELECT
(
SELECT
CASE WHEN PATINDEX(@PATTERN,SUBSTRING(@INSTR COLLATE Latin1_General_BIN,NM.N,1)) = 0 THEN SUBSTRING(@INSTR,NM.N,1) END
FROM NUMS NM
FOR XML PATH('')
) AS OUT_STR
;
GO
and ran the following test a few times:
--===== Create the 100K row test table
IF OBJECT_ID('tempdb..#val') IS NOT NULL DROP TABLE #val
;
SELECT TOP 100000
txt = ISNULL(CONVERT(VARCHAR(36),NEWID()),'')
INTO #val
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
;
ALTER TABLE #Val
ADD PRIMARY KEY CLUSTERED (txt)
;
--===== Do the tests. Had to use duration because one
-- of the tests is on the new scalar function and
-- SET STATISTICS doesn't report on those correctly.
GO
PRINT '========== Using PatExclude8K ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @string = newstring
FROM #val
CROSS APPLY dbo.PatExclude8K(txt, '[^0-9]')
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== Jeff''s Old Scalar Loop Function ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = dbo.CleanString(txt,'%[^0-9]%')
FROM #val
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== EE ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @string = OUT_STR
FROM #val
CROSS APPLY dbo.STRIP_NUM_EE(txt);
;
PRINT DATEDIFF(ms,@StartTime,GETDATE());
GO 3
PRINT '========== EE_PAT ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @string = OUT_STR
FROM #val
CROSS APPLY dbo.STRIP_NUM_EE_PAT(txt,'[^0-9]');
;
PRINT DATEDIFF(ms,@StartTime,GETDATE());
GO 3
Though the new version appears to produce the exact some query plan it the version that uses PATINDEX seems to perform a little faster....
Beginning execution loop
========== Using PatExclude8K ==========
1993
========== Using PatExclude8K ==========
2073
========== Using PatExclude8K ==========
1993
Batch execution completed 3 times.
Beginning execution loop
========== Jeff's Old Scalar Loop Function ==========
1803
========== Jeff's Old Scalar Loop Function ==========
1840
========== Jeff's Old Scalar Loop Function ==========
1810
Batch execution completed 3 times.
Beginning execution loop
========== EE ==========
1766
========== EE ==========
1716
========== EE ==========
1703
Batch execution completed 3 times.
Beginning execution loop
========== EE_PAT ==========
1626
========== EE_PAT ==========
1576
========== EE_PAT ==========
1563
Batch execution completed 3 times.
-- Itzik Ben-Gan 2001
October 29, 2014 at 9:24 am
First, hats off to Eirikur Eiriksson for the reminders about the performance of dedicated rather than general purpose functions and the speed associated with integer comparisons rather than character-based comparisons. With that, I made a couple of tweaks to Eirikur's good code for another bit of performance. Here's the function I came up with as a result. As usual, details are in the code. If someone needs this for 2005, just change the VALUES function to 10 individual SELECT NULL UNION ALL statements (the last one not having UNION ALL) and it'll work in 2005.
CREATE FUNCTION dbo.DigitsOnly
/******************************************************************************************************************************
Purpose:
Given a VARCHAR(8000) or less string, return only the numeric digits from the string.
Programmer's Notes:
1. This is an iTVF (Inline Table Valued Function) that will be used as an iSF (Inline Scalar Function) in that it returns a
single value in the returned table and should normally be used in the FROM clause as with any other iTVF.
2. CHECKSUM returns an INT and will return the exact number given if given an INT to begin with. It's also faster than a CAST
or CONVERT and is used as a performance enhancer by changing the BIGINT of ROW_NUMBER() to a more appropriately sized INT.
3. Another performance enhancement is using a WHERE clause calculation to prevent the relatively expensive XML PATH
concatentation of empty strings normally determined by a CASE statement in the XML "loop".
4. Another performance enhancement is not making this function a generic function that could handle a pattern. That allows
us to use all integer math to do the comparison using the high speed ASCII function convert characters to their numeric
equivalent. ASCII characters 48 through 57 are the digit characters of 0 through 9.
Kudos:
1. Hats off to Eirikur Eiriksson for the ASCII conversion idea and for the reminders that dedicated functions will always
be faster than generic functions and that integer math beats the tar out of character comparisons that use LIKE or
PATINDEX.
2. Hats off to all of the good people that submitted and tested their code on the following thread. It's this type of
participation and interest that makes code better. You've just gotta love this commmunity.
http://www.sqlservercentral.com/Forums/Topic1585850-391-2.aspx#bm1629360
Usage:
--===== CROSS APPLY example
SELECT ca.DigitsOnly
FROM dbo.SomeTable
CROSS APPLY dbo.DigitsOnly(SomeVarcharCol) ca
;
Revision History:
Rev 00 - 29 Oct 2014 - Jeff Moden - Initial Creation
-
******************************************************************************************************************************/
--===== Declare the I/O for this function
(@pString VARCHAR(8000))
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (LEN(@pString)) CHECKSUM(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) FROM E1 a,E1 b,E1 c,E1 d)
SELECT DigitsOnly =
(
SELECT SUBSTRING(@pString,N,1)
FROM Tally
WHERE ASCII(SUBSTRING(@pString,N,1)) BETWEEN 48 AND 57
FOR XML PATH('')
)
;
Here's the test-harness code that I used for the performance tests. It uses the same table we've been using.
PRINT '========== Erikur''s Function ==========';
GO
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = OUT_STR
FROM #val
CROSS APPLY dbo.STRIP_NUM_EE(txt)
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== Jeff''s Modification of Erikur''s Function ==========';
GO
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = ca.DigitsOnly
FROM #val
CROSS APPLY dbo.DigitsOnly(txt) ca
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
Here are the run results from the same test harness that we've been using...
========== Erikur's Function ==========
Beginning execution loop
2626
2650
2563
Batch execution completed 3 times.
========== Jeff's Modification of Erikur's Function ==========
Beginning execution loop
2083
2073
2133
Batch execution completed 3 times.
--Jeff Moden
Change is inevitable... Change for the better is not.
October 29, 2014 at 9:26 am
Alan.B (10/29/2014)
So I made the following change to Eirikur's function to so that it takes a pattern as a parameter:
Interesting... (our posts crossed paths). I'll take a look at that tonight, Alan. Thanks.
--Jeff Moden
Change is inevitable... Change for the better is not.
October 29, 2014 at 11:29 pm
Thanks Alan and Jeff, very good job indeed!
This is really the SSC community at it's best and although I haven't had any time to look properly into this, Alan and Jeff have picked it up and pushed it further, Chapeau!
Looking at Jeff's brilliant improvements (nice trick with the checksum!), I can see an opportunity for further improvement, mainly the logical operator BETWEEN. It has roughly 1/4 of the total cost. I replaced it with a single comparison operator using a little bit-bashing and got the filter cost down to about 1/5.
😎
CREATE FUNCTION dbo.DigitsOnlyEE
/* Jeff's comment from his function goes here */
(@pString VARCHAR(8000))
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (LEN(@pString)) (CHECKSUM(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)))) FROM E1 a,E1 b,E1 c,E1 d)
SELECT DigitsOnly =
(
SELECT SUBSTRING(@pString,N,1)
FROM Tally
WHERE ((ASCII(SUBSTRING(@pString,N,1)) - 48) & 0x7FFF) < 10
FOR XML PATH('')
)
;
GO
Test result using Jeff's test harness
Beginning execution loop
========== dbo.DigitsOnly Function ==========
1306
========== dbo.DigitsOnly Function ==========
1286
========== dbo.DigitsOnly Function ==========
1310
Batch execution completed 3 times.
Beginning execution loop
========== dbo.DigitsOnlyEE Function ==========
1113
========== dbo.DigitsOnlyEE Function ==========
1123
========== dbo.DigitsOnlyEE Function ==========
1153
Batch execution completed 3 times.
Edit: missed the second byte 0x7FFF.
October 30, 2014 at 5:01 am
Tweaking the code a little bit more shaves off approximately 17 percent, here is an all in one code
😎
USE tempdb;
GO
SET NOCOUNT ON;
--===== Create the 100K row test table
IF OBJECT_ID('tempdb..#val') IS NOT NULL DROP TABLE #val
;
SELECT TOP 1000000
txt = ISNULL(CONVERT(VARCHAR(36),NEWID()),'')
INTO #val
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2
;
ALTER TABLE #Val
ADD PRIMARY KEY CLUSTERED (txt)
;
GO
/* Jeff's Modification of Erikur's Function */
IF OBJECT_ID('dbo.DigitsOnly') IS NOT NULL DROP FUNCTION dbo.DigitsOnly;
GO
CREATE FUNCTION dbo.DigitsOnly
/******************************************************************************************************************************
Purpose:
Given a VARCHAR(8000) or less string, return only the numeric digits from the string.
Programmer's Notes:
1. This is an iTVF (Inline Table Valued Function) that will be used as an iSF (Inline Scalar Function) in that it returns a
single value in the returned table and should normally be used in the FROM clause as with any other iTVF.
2. CHECKSUM returns an INT and will return the exact number given if given an INT to begin with. It's also faster than a CAST
or CONVERT and is used as a performance enhancer by changing the BIGINT of ROW_NUMBER() to a more appropriately sized INT.
3. Another performance enhancement is using a WHERE clause calculation to prevent the relatively expensive XML PATH
concatentation of empty strings normally determined by a CASE statement in the XML "loop".
4. Another performance enhancement is not making this function a generic function that could handle a pattern. That allows
us to use all integer math to do the comparison using the high speed ASCII function convert characters to their numeric
equivalent. ASCII characters 48 through 57 are the digit characters of 0 through 9.
Kudos:
1. Hats off to Eirikur Eiriksson for the ASCII conversion idea and for the reminders that dedicated functions will always
be faster than generic functions and that integer math beats the tar out of character comparisons that use LIKE or
PATINDEX.
2. Hats off to all of the good people that submitted and tested their code on the following thread. It's this type of
participation and interest that makes code better. You've just gotta love this commmunity.
http://www.sqlservercentral.com/Forums/Topic1585850-391-2.aspx#bm1629360
Usage:
--===== CROSS APPLY example
SELECT ca.DigitsOnly
FROM dbo.SomeTable
CROSS APPLY dbo.DigitsOnly(SomeVarcharCol) ca
;
Revision History:
Rev 00 - 29 Oct 2014 - Jeff Moden - Initial Creation
-
******************************************************************************************************************************/
--===== Declare the I/O for this function
(@pString VARCHAR(8000))
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (LEN(@pString)) CHECKSUM(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) FROM E1 a,E1 b,E1 c,E1 d)
SELECT DigitsOnly =
(
SELECT SUBSTRING(@pString,N,1)
FROM Tally
WHERE ASCII(SUBSTRING(@pString,N,1)) BETWEEN 48 AND 57
FOR XML PATH('')
)
;
/* Eirikur's modification of Jeff's Modification of Erikur's Function */
GO
IF OBJECT_ID('dbo.DigitsOnlyEE') IS NOT NULL DROP FUNCTION dbo.DigitsOnlyEE;
GO
CREATE FUNCTION dbo.DigitsOnlyEE
(@pString VARCHAR(8000))
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (LEN(@pString)) (ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) FROM E1 a,E1 b,E1 c,E1 d)
SELECT DigitsOnly =
(
SELECT SUBSTRING(@pString,N,1)
FROM Tally
WHERE ((ASCII(SUBSTRING(@pString,N,1)) - 48) & 0x7FFF) < 10
FOR XML PATH('')
)
;
GO
PRINT '========== Jeff''s Modification of Erikur''s Function ==========';
GO
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = ca.DigitsOnly
FROM #val
CROSS APPLY dbo.DigitsOnly(txt) ca
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
PRINT '========== Eirikur''s modification of Jeff''s Modification of Erikur''s Function ==========';
GO
DECLARE @String VARCHAR(36)
,@StartTime DATETIME
;
SELECT @StartTime = GETDATE()
;
SELECT @String = ca.DigitsOnly
FROM #val
CROSS APPLY dbo.DigitsOnlyEE(txt) ca
;
PRINT DATEDIFF(ms,@StartTime,GETDATE())
;
GO 3
Results
========== Jeff's Modification of Erikur's Function ==========
Beginning execution loop
11613
11630
11763
Batch execution completed 3 times.
========== Eirikur's modification of Jeff's Modification of Erikur's Function ==========
Beginning execution loop
9700
9666
9663
Batch execution completed 3 times.
October 30, 2014 at 9:58 pm
Now we're cooking with gas! Well done! I had eye surgery today (lens replacement) and can't see so well yet and can't spend much time in front of the computer but I'll run some additional tests over the weekend using variable length data and see what happens.
We also need to try the same thing on Alan's good pattern matching code.
As Eirikur suggests, this is an example of the SQL Server Community (same initials as SQL Server Central :-)) at it's best. Keep 'em coming folks!
BTW, that's one heck of a nice machine you have there, Eirikur!
--Jeff Moden
Change is inevitable... Change for the better is not.
November 1, 2014 at 4:40 pm
Adding some gas supplies for the stove, first there are five methods for replacing logical operators or comparison operator combination equivalent thereof with a single comparison operator.
😎
USE tempdb;
GO
SET NOCOUNT ON;
DECLARE @START_NUM INT = 48; -- ASCII Code for 0 (zero)
DECLARE @SAMPLE_SIZE INT = 256; -- Number of ASCII Characters, counting from 0 to 255
;WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (@SAMPLE_SIZE) (ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) -1 FROM E1 a,E1 b,E1 c,E1 d)
SELECT
T.N AS CH_No
,CHAR(T.N ) AS TChar
/* Less than 10 */
,((T.N ) - @START_NUM) & 0x7FFF AS LT_10
/* Equal to 0 */
,(((T.N ) - @START_NUM) & 0x7FFF) / 10 AS EQ_0
/* Equal to 9 */
,ABS(((T.N ) - 48)) + ABS((T.N) - 57) AS EQ_9
/* Equal to 1 */
,((T.N ) - 38) / 10 AS EQ_1
/* Equal to 5 */
,(T.N + 2) / 10 AS EQ_5
FROM Tally T;
Results
CH_No TChar LT_10 EQ_0 EQ_9 EQ_1 EQ_5
-------------------- ----- -------------------- -------------------- -------------------- -------------------- --------------------
0 32720 3272 105 -3 0
1 32721 3272 103 -3 0
2 32722 3272 101 -3 0
3 32723 3272 99 -3 0
4 32724 3272 97 -3 0
5 32725 3272 95 -3 0
6 32726 3272 93 -3 0
7 32727 3272 91 -3 0
8 32728 3272 89 -3 1
9 32729 3272 87 -2 1
10 32730 3273 85 -2 1
11 32731 3273 83 -2 1
12 32732 3273 81 -2 1
13 32733 3273 79 -2 1
14 32734 3273 77 -2 1
15 32735 3273 75 -2 1
16 32736 3273 73 -2 1
17 32737 3273 71 -2 1
18 32738 3273 69 -2 2
19 32739 3273 67 -1 2
20 32740 3274 65 -1 2
21 32741 3274 63 -1 2
22 32742 3274 61 -1 2
23 32743 3274 59 -1 2
24 32744 3274 57 -1 2
25 32745 3274 55 -1 2
26 32746 3274 53 -1 2
27 32747 3274 51 -1 2
28 32748 3274 49 -1 3
29 32749 3274 47 0 3
30 32750 3275 45 0 3
31 32751 3275 43 0 3
32 32752 3275 41 0 3
33 ! 32753 3275 39 0 3
34 " 32754 3275 37 0 3
35 # 32755 3275 35 0 3
36 $ 32756 3275 33 0 3
37 % 32757 3275 31 0 3
38 & 32758 3275 29 0 4
39 ' 32759 3275 27 0 4
40 ( 32760 3276 25 0 4
41 ) 32761 3276 23 0 4
42 * 32762 3276 21 0 4
43 + 32763 3276 19 0 4
44 , 32764 3276 17 0 4
45 - 32765 3276 15 0 4
46 . 32766 3276 13 0 4
47 / 32767 3276 11 0 4
48 0 0 0 9 1 5
49 1 1 0 9 1 5
50 2 2 0 9 1 5
51 3 3 0 9 1 5
52 4 4 0 9 1 5
53 5 5 0 9 1 5
54 6 6 0 9 1 5
55 7 7 0 9 1 5
56 8 8 0 9 1 5
57 9 9 0 9 1 5
58 : 10 1 11 2 6
59 ; 11 1 13 2 6
60 < 12 1 15 2 6
61 = 13 1 17 2 6
62 > 14 1 19 2 6
63 ? 15 1 21 2 6
64 @ 16 1 23 2 6
65 A 17 1 25 2 6
66 B 18 1 27 2 6
67 C 19 1 29 2 6
68 D 20 2 31 3 7
69 E 21 2 33 3 7
70 F 22 2 35 3 7
71 G 23 2 37 3 7
72 H 24 2 39 3 7
73 I 25 2 41 3 7
74 J 26 2 43 3 7
75 K 27 2 45 3 7
76 L 28 2 47 3 7
77 M 29 2 49 3 7
78 N 30 3 51 4 8
79 O 31 3 53 4 8
80 P 32 3 55 4 8
81 Q 33 3 57 4 8
82 R 34 3 59 4 8
83 S 35 3 61 4 8
84 T 36 3 63 4 8
85 U 37 3 65 4 8
86 V 38 3 67 4 8
87 W 39 3 69 4 8
88 X 40 4 71 5 9
89 Y 41 4 73 5 9
90 Z 42 4 75 5 9
91 [ 43 4 77 5 9
92 \ 44 4 79 5 9
93 ] 45 4 81 5 9
94 ^ 46 4 83 5 9
95 _ 47 4 85 5 9
96 ` 48 4 87 5 9
97 a 49 4 89 5 9
98 b 50 5 91 6 10
99 c 51 5 93 6 10
100 d 52 5 95 6 10
101 e 53 5 97 6 10
102 f 54 5 99 6 10
103 g 55 5 101 6 10
104 h 56 5 103 6 10
105 i 57 5 105 6 10
106 j 58 5 107 6 10
107 k 59 5 109 6 10
108 l 60 6 111 7 11
109 m 61 6 113 7 11
110 n 62 6 115 7 11
111 o 63 6 117 7 11
112 p 64 6 119 7 11
113 q 65 6 121 7 11
114 r 66 6 123 7 11
115 s 67 6 125 7 11
116 t 68 6 127 7 11
117 u 69 6 129 7 11
118 v 70 7 131 8 12
119 w 71 7 133 8 12
120 x 72 7 135 8 12
121 y 73 7 137 8 12
122 z 74 7 139 8 12
123 { 75 7 141 8 12
124 | 76 7 143 8 12
125 } 77 7 145 8 12
126 ~ 78 7 147 8 12
127 79 7 149 8 12
128 € 80 8 151 9 13
129 81 8 153 9 13
130 ‚ 82 8 155 9 13
131 ƒ 83 8 157 9 13
132 „ 84 8 159 9 13
133 … 85 8 161 9 13
134 † 86 8 163 9 13
135 ‡ 87 8 165 9 13
136 ˆ 88 8 167 9 13
137 ‰ 89 8 169 9 13
138 Š 90 9 171 10 14
139 ‹ 91 9 173 10 14
140 Œ 92 9 175 10 14
141 93 9 177 10 14
142 Ž 94 9 179 10 14
143 95 9 181 10 14
144 96 9 183 10 14
145 ‘ 97 9 185 10 14
146 ’ 98 9 187 10 14
147 “ 99 9 189 10 14
148 ” 100 10 191 11 15
149 • 101 10 193 11 15
150 – 102 10 195 11 15
151 — 103 10 197 11 15
152 ˜ 104 10 199 11 15
153 ™ 105 10 201 11 15
154 š 106 10 203 11 15
155 › 107 10 205 11 15
156 œ 108 10 207 11 15
157 109 10 209 11 15
158 ž 110 11 211 12 16
159 Ÿ 111 11 213 12 16
160 112 11 215 12 16
161 ¡ 113 11 217 12 16
162 ¢ 114 11 219 12 16
163 £ 115 11 221 12 16
164 ¤ 116 11 223 12 16
165 ¥ 117 11 225 12 16
166 ¦ 118 11 227 12 16
167 § 119 11 229 12 16
168 ¨ 120 12 231 13 17
169 © 121 12 233 13 17
170 ª 122 12 235 13 17
171 « 123 12 237 13 17
172 ¬ 124 12 239 13 17
173 125 12 241 13 17
174 ® 126 12 243 13 17
175 ¯ 127 12 245 13 17
176 ° 128 12 247 13 17
177 ± 129 12 249 13 17
178 ² 130 13 251 14 18
179 ³ 131 13 253 14 18
180 ´ 132 13 255 14 18
181 µ 133 13 257 14 18
182 ¶ 134 13 259 14 18
183 · 135 13 261 14 18
184 ¸ 136 13 263 14 18
185 ¹ 137 13 265 14 18
186 º 138 13 267 14 18
187 » 139 13 269 14 18
188 ¼ 140 14 271 15 19
189 ½ 141 14 273 15 19
190 ¾ 142 14 275 15 19
191 ¿ 143 14 277 15 19
192 À 144 14 279 15 19
193 Á 145 14 281 15 19
194 Â 146 14 283 15 19
195 Ã 147 14 285 15 19
196 Ä 148 14 287 15 19
197 Å 149 14 289 15 19
198 Æ 150 15 291 16 20
199 Ç 151 15 293 16 20
200 È 152 15 295 16 20
201 É 153 15 297 16 20
202 Ê 154 15 299 16 20
203 Ë 155 15 301 16 20
204 Ì 156 15 303 16 20
205 Í 157 15 305 16 20
206 Î 158 15 307 16 20
207 Ï 159 15 309 16 20
208 Ð 160 16 311 17 21
209 Ñ 161 16 313 17 21
210 Ò 162 16 315 17 21
211 Ó 163 16 317 17 21
212 Ô 164 16 319 17 21
213 Õ 165 16 321 17 21
214 Ö 166 16 323 17 21
215 × 167 16 325 17 21
216 Ø 168 16 327 17 21
217 Ù 169 16 329 17 21
218 Ú 170 17 331 18 22
219 Û 171 17 333 18 22
220 Ü 172 17 335 18 22
221 Ý 173 17 337 18 22
222 Þ 174 17 339 18 22
223 ß 175 17 341 18 22
224 à 176 17 343 18 22
225 á 177 17 345 18 22
226 â 178 17 347 18 22
227 ã 179 17 349 18 22
228 ä 180 18 351 19 23
229 å 181 18 353 19 23
230 æ 182 18 355 19 23
231 ç 183 18 357 19 23
232 è 184 18 359 19 23
233 é 185 18 361 19 23
234 ê 186 18 363 19 23
235 ë 187 18 365 19 23
236 ì 188 18 367 19 23
237 í 189 18 369 19 23
238 î 190 19 371 20 24
239 ï 191 19 373 20 24
240 ð 192 19 375 20 24
241 ñ 193 19 377 20 24
242 ò 194 19 379 20 24
243 ó 195 19 381 20 24
244 ô 196 19 383 20 24
245 õ 197 19 385 20 24
246 ö 198 19 387 20 24
247 ÷ 199 19 389 20 24
248 ø 200 20 391 21 25
249 ù 201 20 393 21 25
250 ú 202 20 395 21 25
251 û 203 20 397 21 25
252 ü 204 20 399 21 25
253 ý 205 20 401 21 25
254 þ 206 20 403 21 25
255 ÿ 207 20 405 21 25
Timing of different operators, functions and combinations thereof, gives only an indication though
USE tempdb;
GO
SET NOCOUNT ON;
DECLARE @START_NUM INT = 48;
DECLARE @RANGE INT = 10;
DECLARE @SAMPLE_SIZE INT = 10000000;
DECLARE @SAMPLE_DISTR INT = 256;
DECLARE @PATTERN VARCHAR(50) = '%[^0-9]%';
DECLARE @RV_PATTERN VARCHAR(50) = '%[0-9]%';
IF OBJECT_ID('dbo.TBL_TEST_SET') IS NULL
BEGIN
-- DROP TABLE dbo.TBL_TEST_SET;
CREATE TABLE dbo.TBL_TEST_SET
(
TS_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_TEST_SET_TS_ID PRIMARY KEY CLUSTERED WITH ( FILLFACTOR = 100, DATA_COMPRESSION = PAGE)
,TS_CHAR CHAR(1) NOT NULL
)
;WITH E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,Tally(N) AS (SELECT TOP (@SAMPLE_SIZE) (ROW_NUMBER() OVER (ORDER BY (SELECT NULL))) FROM E1 a,E1 b,E1 c,E1 d,E1 e,E1 f,E1 g,E1 h,E1 i)
INSERT INTO dbo.TBL_TEST_SET(TS_CHAR)
SELECT
CHAR(T.N % @SAMPLE_DISTR)
FROM Tally T;
END
DECLARE @INT_BUCKET INT = 0;
DECLARE @CHR_BUCKET CHAR(1) = '';
DECLARE @TIMING TABLE
(
TIMING_ID INT IDENTITY(1,1) NOT NULL
,TIMING_TD DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())
,TIMING_TXT VARCHAR(100) NOT NULL
);
/* BASE LINE */
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = T.TS_CHAR');
SELECT
@CHR_BUCKET = T.TS_CHAR
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = T.TS_CHAR');
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR)');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR)');
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = T.TS_CHAR #2');
SELECT
@CHR_BUCKET = T.TS_CHAR
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = T.TS_CHAR #2');
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) #2');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) #2');
/* CASE #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 ELSE 0 END');
SELECT
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 ELSE 0 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 ELSE 0 END');
/* CASE #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 END');
/* CASE #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) BETWEEN 48 AND 57 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) BETWEEN 48 AND 57 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) BETWEEN 48 AND 57 THEN 1 END');
/* CASE #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) < 10 THEN 1 ELSE 0 END');
SELECT
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) < 10 THEN 1 ELSE 0 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) < 10 THEN 1 ELSE 0 END');
/* CASE #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) > 10 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) > 10 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) > 10 THEN 1 END');
/* BITWISE AND #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7F');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7F
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7F');
/* BITWISE AND #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFF');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFF
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFF');
/* BITWISE AND #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFF');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFF
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFF');
/* BITWISE AND #4*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFFFF');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFFFF
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFFFF');
/* SUBTRACTION #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 1');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - 1
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 1');
/* SUBTRACTION #10*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 10');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - 10
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 10');
/* SUBTRACTION #48*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 48');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - 48
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 48');
/* SUBTRACTION #100*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 100');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - 100
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 100');
/* SUBTRACTION #1000*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 1000');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - 1000
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - 1000');
/* SUBTRACTION #@START_NUM*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - @START_NUM');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) - @START_NUM
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) - @START_NUM');
/* MODULO #10*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) % 10');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) % 10
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) % 10');
/* DIVISION #10*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) / 10');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) / 10
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) / 10');
/* MODULO #100*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) % 100');
SELECT
@INT_BUCKET = ASCII(T.TS_CHAR) % 100
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ASCII(T.TS_CHAR) % 100');
/* SUBTRACTION #48 MOD 0x7FFF*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFF');
SELECT
@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFF
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFF');
/* SUBTRACTION #48 MOD 0x7FFF*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF');
SELECT
@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF');
/* SUBTRACTION CASE #48 MOD 0x7FFF*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ((ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF) < 10 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN ((ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF) < 10 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN ((ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF) < 10 THEN 1 END');
/* CASE BETWEEN #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR BETWEEN ''0'' AND ''9'' THEN 1 END');
SELECT
@CHR_BUCKET = CASE WHEN T.TS_CHAR BETWEEN '0' AND '9' THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR BETWEEN ''0'' AND ''9'' THEN 1 END');
/* CASE BETWEEN #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN BETWEEN ''0'' AND ''9'' THEN 1 END');
SELECT
@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN BETWEEN '0' AND '9' THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN BETWEEN ''0'' AND ''9'' THEN 1 END');
/* CASE BETWEEN #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN2 BETWEEN ''0'' AND ''9'' THEN 1 END');
SELECT
@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN2 BETWEEN '0' AND '9' THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN2 BETWEEN ''0'' AND ''9'' THEN 1 END');
/* PATINDEX #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)');
SELECT
@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)');
/* PATINDEX #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2)');
SELECT
@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2)');
/* PATINDEX #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)');
SELECT
@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN)');
/* PATINDEX #4*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@@RV_PATTERN,SUBSTRING(T.TS_CHAR COLLATE Latin1_General_BIN2,N,1))');
SELECT
@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@@RV_PATTERN,SUBSTRING(T.TS_CHAR COLLATE Latin1_General_BIN2,N,1))');
/* PATINDEX #5*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR)');
SELECT
@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR)');
/* CASE PATINDEX #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END');
/* CASE PATINDEX #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END');
/* CASE PATINDEX #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END');
/* CASE PATINDEX #4*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END');
/* CASE PATINDEX #5*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR) = 0 THEN 1 END');
SELECT
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR) = 0 THEN 1 END
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR) = 0 THEN 1 END');
/* ARITHMETIC IDENTIFICATION #1*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ((ASCII(T.TS_CHAR) - 38) / 10 )');
SELECT
@INT_BUCKET = ((ASCII(T.TS_CHAR) - 38) / 10 )
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ((ASCII(T.TS_CHAR) - 38) / 10 )');
/* ARITHMETIC IDENTIFICATION #2*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ABS(ASCII(T.TS_CHAR) - 48) + ABS(ASCII(T.TS_CHAR) - 57)');
SELECT
@INT_BUCKET = ABS(ASCII(T.TS_CHAR) - 48) + ABS(ASCII(T.TS_CHAR) - 57)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = ABS(ASCII(T.TS_CHAR) - 48) + ABS(ASCII(T.TS_CHAR) - 57)');
/* ARITHMETIC IDENTIFICATION #3*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (((ASCII(T.TS_CHAR) - @START_NUM) & 0x7FFF) / 10)');
SELECT
@INT_BUCKET = (((ASCII(T.TS_CHAR) - @START_NUM) & 0x7FFF) / 10)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (((ASCII(T.TS_CHAR) - @START_NUM) & 0x7FFF) / 10)');
/* ARITHMETIC IDENTIFICATION #4*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (((ASCII(T.TS_CHAR) - 48) & 0x7FFF) / 10)');
SELECT
@INT_BUCKET = (((ASCII(T.TS_CHAR) - 48) & 0x7FFF) / 10)
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (((ASCII(T.TS_CHAR) - 48) & 0x7FFF) / 10)');
/* ARITHMETIC IDENTIFICATION #5*/
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) + 2) / 10');
SELECT
@INT_BUCKET = (ASCII(T.TS_CHAR) + 2) / 10
FROM TBL_TEST_SET T
INSERT INTO @TIMING(TIMING_TXT) VALUES('@INT_BUCKET = (ASCII(T.TS_CHAR) + 2) / 10');
SELECT
T.TIMING_TXT
,@SAMPLE_SIZE AS SAMPLE_SIZE
,@SAMPLE_DISTR AS SAMPLE_DISTR
,DATEDIFF(MICROSECOND,MIN(T.TIMING_TD),MAX(T.TIMING_TD)) / 1000.0 AS DURATION_MSEC
FROM @TIMING T
GROUP BY T.TIMING_TXT
ORDER BY (DATEDIFF(MICROSECOND,MIN(T.TIMING_TD),MAX(T.TIMING_TD)))
Timing results
TIMING_TXT SAMPLE_SIZE SAMPLE_DISTR DURATION_MSEC
---------------------------------------------------------------------------------------------------- ----------- ------------ ---------------------------------------
@CHR_BUCKET = T.TS_CHAR #2 10000000 256 1532.803000
@CHR_BUCKET = T.TS_CHAR 10000000 256 1546.402000
@INT_BUCKET = ASCII(T.TS_CHAR) 10000000 256 1626.403000
@INT_BUCKET = ASCII(T.TS_CHAR) #2 10000000 256 1640.003000
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7F 10000000 256 1702.403000
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFFFF 10000000 256 1705.404000
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFF 10000000 256 1710.404000
@INT_BUCKET = ASCII(T.TS_CHAR) - 10 10000000 256 1720.004000
@INT_BUCKET = ASCII(T.TS_CHAR) % 100 10000000 256 1733.603000
@INT_BUCKET = ASCII(T.TS_CHAR) - 48 10000000 256 1735.004000
@INT_BUCKET = ASCII(T.TS_CHAR) % 10 10000000 256 1735.603000
@INT_BUCKET = ASCII(T.TS_CHAR) - 1 10000000 256 1742.603000
@INT_BUCKET = ASCII(T.TS_CHAR) - 1000 10000000 256 1749.203000
@INT_BUCKET = ASCII(T.TS_CHAR) / 10 10000000 256 1751.203000
@INT_BUCKET = ASCII(T.TS_CHAR) & 0x7FFFFF 10000000 256 1753.203000
@INT_BUCKET = ASCII(T.TS_CHAR) - 100 10000000 256 1769.803000
@INT_BUCKET = ASCII(T.TS_CHAR) - @START_NUM 10000000 256 1782.404000
@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFF 10000000 256 1813.604000
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 END 10000000 256 1829.203000
@INT_BUCKET = (ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF 10000000 256 1844.803000
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) < 10 THEN 1 ELSE 0 END 10000000 256 1844.804000
@INT_BUCKET = (ASCII(T.TS_CHAR) + 2) / 10 10000000 256 1859.403000
@INT_BUCKET = ((ASCII(T.TS_CHAR) - 38) / 10 ) 10000000 256 1866.403000
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) > 10 THEN 1 END 10000000 256 1876.003000
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) = 1 THEN 1 ELSE 0 END 10000000 256 1876.004000
@INT_BUCKET = (((ASCII(T.TS_CHAR) - 48) & 0x7FFF) / 10) 10000000 256 2078.804000
@INT_BUCKET = CASE WHEN ASCII(T.TS_CHAR) BETWEEN 48 AND 57 THEN 1 END 10000000 256 2079.804000
@INT_BUCKET = (((ASCII(T.TS_CHAR) - @START_NUM) & 0x7FFF) / 10) 10000000 256 2129.005000
@INT_BUCKET = CASE WHEN ((ASCII(T.TS_CHAR) - 48) & 0x7FFFFFFF) < 10 THEN 1 END 10000000 256 2250.404000
@INT_BUCKET = ABS(ASCII(T.TS_CHAR) - 48) + ABS(ASCII(T.TS_CHAR) - 57) 10000000 256 2251.405000
@INT_BUCKET = PATINDEX(@@RV_PATTERN,SUBSTRING(T.TS_CHAR COLLATE Latin1_General_BIN2,N,1)) 10000000 256 2876.405000
@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) 10000000 256 2887.807000
@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) 10000000 256 2910.606000
@INT_BUCKET = PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) 10000000 256 2927.205000
@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN BETWEEN '0' AND '9' THEN 1 END 10000000 256 3379.007000
@CHR_BUCKET = CASE WHEN T.TS_CHAR COLLATE Latin1_General_BIN2 BETWEEN '0' AND '9' THEN 1 END 10000000 256 3395.207000
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END 10000000 256 3440.007000
@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END 10000000 256 3453.606000
@INT_BUCKET = CASE WHEN PATINDEX(@PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN) = 0 THEN 1 END 10000000 256 3460.607000
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR COLLATE Latin1_General_BIN2) = 0 THEN 1 END 10000000 256 3471.206000
@CHR_BUCKET = CASE WHEN T.TS_CHAR BETWEEN '0' AND '9' THEN 1 END 10000000 256 4021.411000
@INT_BUCKET = PATINDEX(@RV_PATTERN,T.TS_CHAR) 10000000 256 5740.211000
@INT_BUCKET = CASE WHEN PATINDEX(@RV_PATTERN,T.TS_CHAR) = 0 THEN 1 END 10000000 256 6080.412000
Edit: type and highlight.
November 1, 2014 at 4:54 pm
Jeff Moden (10/30/2014)
Now we're cooking with gas! Well done! I had eye surgery today (lens replacement) and can't see so well yet and can't spend much time in front of the computer but I'll run some additional tests over the weekend using variable length data and see what happens.We also need to try the same thing on Alan's good pattern matching code.
As Eirikur suggests, this is an example of the SQL Server Community (same initials as SQL Server Central :-)) at it's best. Keep 'em coming folks!
BTW, that's one heck of a nice machine you have there, Eirikur!
Good luck with the eye Jeff, hope every thing goes well.
😎
...yes the machine is a nifty little thingy, quad core xeon, 8Mb cache, makes a nice rumble;-)
November 3, 2014 at 2:32 pm
Jeff Moden (10/30/2014)
We also need to try the same thing on Alan's good pattern matching code.
Sorry for not checking back sooner - I spent the weekend dealing with some <sarcasm>very realistic and reasonable deadlines </sarcasm>.
This is the best I could do so far:
ALTER FUNCTION dbo.PatExclude8K
(
@String VARCHAR(8000),
@Pattern VARCHAR(50)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N)),
Itally(N) AS
(
SELECT TOP(CONVERT(BIGINT,LEN(@String),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E1 T1 CROSS JOIN E1 T2 CROSS JOIN E1 T3 CROSS JOIN E1 T4
)
SELECT
(
SELECT CASE WHEN PATINDEX(@Pattern,SUBSTRING(@String COLLATE Latin1_General_BIN,N,1)) = 0 THEN SUBSTRING(@String,N,1) END
FROM iTally
FOR XML PATH('')
) AS NewString;
GO
This get's me about a 15-20% improvement compared to the previous version:
(note: for brevity, I attached the old and new version with the test script)
Beginning execution loop
========== Using PatExclude8K_old ==========
2066
========== Using PatExclude8K_old ==========
2140
========== Using PatExclude8K_old ==========
2153
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
1603
========== Using PatExclude8K ==========
1620
========== Using PatExclude8K ==========
1660
Batch execution completed 3 times.
I don't know how I could refactor this:
WHERE ASCII(SUBSTRING(@pString,N,1)) BETWEEN 48 AND 57
to take a pattern... :crazy:
-- Itzik Ben-Gan 2001
November 3, 2014 at 6:25 pm
Adding a little bit of tinkering based on the operator cost results in the previous post, mainly changing from the CASE to a WHERE clause for filtering. Not a big gain but some.
😎
USE tempdb;
GO
SET NOCOUNT ON;
IF OBJECT_ID('temp1.dbo.PatEExclude8K') IS NOT NULL DROP FUNCTION dbo.PatEExclude8K;
IF OBJECT_ID('temp1.dbo.PatExclude8K') IS NOT NULL DROP FUNCTION dbo.PatExclude8K;
GO
CREATE FUNCTION dbo.PatEExclude8K
(
@String VARCHAR(8000),
@Pattern VARCHAR(50)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N)),
Itally(N) AS
(
SELECT TOP(CONVERT(INT,LEN(@String),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E1 T1 CROSS JOIN E1 T2 CROSS JOIN E1 T3 CROSS JOIN E1 T4
)
SELECT
(
SELECT SUBSTRING(@String,N,1)
FROM iTally
WHERE 0 = PATINDEX(@Pattern,SUBSTRING(@String COLLATE Latin1_General_BIN,N,1))
FOR XML PATH('')
) AS NewString;
GO
CREATE FUNCTION dbo.PatExclude8K
(
@String VARCHAR(8000),
@Pattern VARCHAR(50)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N)),
Itally(N) AS
(
SELECT TOP(CONVERT(BIGINT,LEN(@String),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E1 T1 CROSS JOIN E1 T2 CROSS JOIN E1 T3 CROSS JOIN E1 T4
)
SELECT
(
SELECT CASE WHEN PATINDEX(@Pattern,SUBSTRING(@String COLLATE Latin1_General_BIN,N,1)) = 0 THEN SUBSTRING(@String,N,1) END
FROM iTally
FOR XML PATH('')
) AS NewString;
GO
-- make sure the new version works
--SELECT * FROM dbo.PatExclude8K('123acb456!','[^0-9!]')
--SELECT * FROM dbo.PatEExclude8K('123acb456!','[^0-9!]')
--===== Create the 100K row test table
IF OBJECT_ID('tempdb..#val') IS NOT NULL DROP TABLE #val;
SELECT TOP 100000
txt = ISNULL(CONVERT(VARCHAR(36),NEWID()),'')
INTO #val
FROM sys.all_columns ac1
CROSS JOIN sys.all_columns ac2;
ALTER TABLE #Val
ADD PRIMARY KEY CLUSTERED (txt);
--===== Do the tests. Had to use duration because one
-- of the tests is on the new scalar function and
-- SET STATISTICS doesn't report on those correctly.
GO
PRINT '========== Using PatEExclude8K ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME;
SELECT @StartTime = GETDATE();
SELECT @string = newstring
FROM #val
CROSS APPLY dbo.PatEExclude8K(txt, '[^0-9]');
PRINT DATEDIFF(ms,@StartTime,GETDATE());
GO 3
PRINT '========== Using PatExclude8K ==========';
DECLARE @String VARCHAR(36)
,@StartTime DATETIME;
SELECT @StartTime = GETDATE();
SELECT @string = newstring
FROM #val
CROSS APPLY dbo.PatExclude8K(txt, '[^0-9]');
PRINT DATEDIFF(ms,@StartTime,GETDATE());
GO 3
Results
Beginning execution loop
========== Using PatEExclude8K ==========
1450
========== Using PatEExclude8K ==========
1470
========== Using PatEExclude8K ==========
1496
Batch execution completed 3 times.
Beginning execution loop
========== Using PatExclude8K ==========
1586
========== Using PatExclude8K ==========
1563
========== Using PatExclude8K ==========
1556
Batch execution completed 3 times.
November 4, 2014 at 10:18 am
Just dawned on me, kind of a deja vu, here is around six years old thread on the same subject.
😎
Viewing 15 posts - 16 through 30 (of 45 total)
You must be logged in to reply to this topic. Login to reply