November 1, 2017 at 4:07 am
Haven't had time to look deeper into this but I do have a hunch that this can be optimized further. Quite certain though that the length of the strings can and will dramatically alter the execution times and that in those terms, there is no one size that fits all.
π
November 1, 2017 at 5:08 am
ChrisM@Work - Wednesday, November 1, 2017 2:50 AMNice job, Eirikur. It's the fastest by a useful margin:
Haven't had time to look deeper into this but I do have a hunch that this can be optimized further. Quite certain though that the length of the strings can and will dramatically alter the execution times and that in those terms, there is no one size that fits all.
π
Oh, absolutely. It's not necessary to split out the whole string - the process can stop when the first recurrence is found...
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
November 1, 2017 at 3:14 pm
Eirikur Eiriksson - Wednesday, November 1, 2017 2:14 AMUntested and just for fun
π
USE TEEST;
GOIF object_id('tempdb..#ListOfStrings') IS NOT NULL DROP TABLE #ListOfStrings;
SELECT ROW_NUMBER() OVER (ORDER BY @@VERSION) AS ID,X.y
INTO #ListOfStrings
FROM (VALUES
('Ad'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb'),
('AdbicdA'),
('f1yf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Ad2bicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf3 Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbi4cdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbic5defghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Ad6bicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicde7fghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbi8cdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefg9hijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefgh1ijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicde2fghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghij3kiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefg4hijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijki5qtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiq6tfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghij7kiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtf8joen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijki9qtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjo0en gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb')
)X(y)-- Inline Tally
SET STATISTICS IO, TIME ON;
;WITH T(N) AS (SELECT X.N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0))X(N))
,POS_DIST AS
(
SELECT
LOS.ID
,NUMS.N
,SUBSTRING(LOS.Y,NUMS.N,1) AS XCHAR
,CHARINDEX(SUBSTRING(LOS.Y,NUMS.N,1),LOS.y,NUMS.N + 1) - NUMS.N AS DISTANCE
FROM #ListOfStrings LOS
CROSS APPLY
(
SELECT TOP(LEN(LOS.y)) ROW_NUMBER() OVER (ORDER BY @@VERSION) AS N
FROM T T1,T T22,T T3,T T4
) NUMS(N)
WHERE CHARINDEX(SUBSTRING(LOS.Y,NUMS.N,1),LOS.y,NUMS.N + 1) - NUMS.N > 0
)
,FIND_MATCH AS
(
SELECT
PD.ID
,ROW_NUMBER() OVER (PARTITION BY PD.ID ORDER BY PD.N + PD.DISTANCE) XRID
,PD.XCHAR
FROM POS_DIST PD
)
SELECT
FM.ID
,FM.XCHAR
FROM FIND_MATCH FM
WHERE FM.XRID = 1;SET STATISTICS IO, TIME OFF
The statistics
(37 row(s) affected)
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#ListOfStrings'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.SQL Server Execution Times:
CPU time = 15 ms, elapsed time = 29 ms.
Most interesting... I had been trying to find the right optimization for my 1 scan version, and when I saw yours, I realized I could make mine 10 times faster than where it was when I began. I borrowed your set of strings, and here's what I ended up with:IF OBJECT_ID(N'tempdb..#ListOfStrings', N'U') IS NOT NULL
BEGIN
DROP TABLE #ListOfStrings;
END;
GO
SELECT ROW_NUMBER() OVER (ORDER BY @@VERSION) AS ID,
CONVERT(varchar(100), X.y) AS y
INTO #ListOfStrings
FROM (VALUES
('Ad'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb'),
('AdbicdA'),
('f1yf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Ad2bicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf3 Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbi4cdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbic5defghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Ad6bicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicde7fghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbi8cdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefg9hijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefgh1ijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicde2fghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghij3kiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefg4hijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijki5qtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiq6tfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghij7kiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtf8joen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijki9qtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjo0en gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb')
) AS X(y);
SET STATISTICS IO, TIME ON;
WITH E1 AS (
SELECT X.N
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS X (N)
)
SELECT L.ID, L.y, X.THE_CHAR, X.MIN_POS, X.NEXT_POS
FROM #ListOfStrings AS L
CROSS APPLY (
SELECT TOP (1) X.*
FROM (
SELECT W.THE_CHAR, MIN(W.FIRST_POS) AS MIN_POS, MIN(W.NEXT_POS) AS NEXT_POS
FROM (
SELECT SUBSTRING(L.y, T.RN, 1) AS THE_CHAR,
CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y) AS FIRST_POS,
CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y, T.RN + 1) AS NEXT_POS
FROM (
SELECT TOP (LEN(L.y)) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS RN
FROM E1 AS A, E1 AS B, E1 AS C, E1 AS D
) AS T
WHERE CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y) > 0
AND CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y, T.RN + 1) > 0
) AS W
GROUP BY W.THE_CHAR
) AS X
ORDER BY X.NEXT_POS, X.MIN_POS
) AS X;
SET STATISTICS IO, TIME OFF;
DROP TABLE #ListOfStrings;
And the stats: (38 row(s) affected)
(1 row(s) affected)
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 6 ms.
(37 row(s) affected)
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#ListOfStrings______________________________________________________________________________________________________00000007192B'. Scan count 1, logical reads 1, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
(1 row(s) affected)
SQL Server Execution Times:
CPU time = 15 ms, elapsed time = 11 ms.
It ran on SQL 2014, but I don't know the server specs.
Steve (aka sgmunson) π π π
Rent Servers for Income (picks and shovels strategy)
November 1, 2017 at 10:47 pm
Here's what I'm coming up with...
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 00:42:00.8331935 test name: EE FindPost: 1905501
= end time: 2017-11-02 00:42:00.8381931 duration: 5.000000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 00:42:02.2572747 test name: John FindPost: 1905507
= end time: 2017-11-02 00:42:02.2692792 duration: 12.005000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 00:42:03.6903656 test name: Chris FindPost: 1905507
= end time: 2017-11-02 00:42:03.7004021 duration: 10.037000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 00:42:05.1264601 test name: Steve FindPost: 1905734
= end time: 2017-11-02 00:42:05.1334424 duration: 6.982000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 00:42:06.5615921 test name: Jason FindPost: 1905752
= end time: 2017-11-02 00:42:06.5665196 duration: 4.927000 ms.
========================================================================================================================
IF object_id('tempdb..#ListOfStrings') IS NOT NULL DROP TABLE #ListOfStrings;
SELECT ROW_NUMBER() OVER (ORDER BY @@VERSION) AS ID,X.y
INTO #ListOfStrings
FROM (VALUES
('Ad'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb'),
('AdbicdA'),
('f1yf Adbicdefghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Ad2bicdefghijkiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf3 Adbicdefghijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbi4cdefghijkiqtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbic5defghijkiqtfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Ad6bicdefghijkiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicde7fghijkiqtfjoen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbi8cdefghijkiqtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefg9hijkiqtfjoen gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefgh1ijkiqtfjoen gfviuyhtbnhsghs1zxjhxcdkflg,gmdcj1shja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicde2fghijkiqtfjoen gfviuyhtbnh2sghszxjhxcdkflg,gm2dcjshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghij3kiqtfjoen gfviuyhtbnhsghs3zxjhxcdkflg,gmdcjsh3ja\zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefg4hijkiqtfjoen gfviuyhtbnhs4ghszxjhxcdkflg,gmdc4jshja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijki5qtfjoen gfviuyhtbnhsghszx5jhxcdkflg,gmdcjshja5\zhgatwevwnfdm,gflgotitheb'),
('Adbicdefghijkiq6tfjoen gfviuyhtbnhsghszxj6hxcdkflg,gmdcjshja\6zhgatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghij7kiqtfjoen gfviuyhtbnhsghs7zxjhxcdkflg,gmdcjsh7ja\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtf8joen gfviuyhtbnhsghszxjhx7cdkflg,gmdcjshja\zh8gatwevwnfdm,gflgotitheb'),
('fyf Adbicdefghijki9qtfjoen gfviuyhtbnhsghszx8jhxcdkflg,gmdcjshja9\zhgatwevwnfdm,gflgotitheb fyf'),
('Adbicdefghijkiqtfjo0en gfviuyhtbnhsghszxjhxcd9kflg,gmdcjshja\zhga10twevwnfdm,gflgotitheb')
)X(y)
SET NOCOUNT ON;
GO
--====================================================================================================================
--====================================================================================================================
DBCC DROPCLEANBUFFERS;
WAITFOR DELAY '00:00:01';
GO
--====================================================================================================================
--====================================================================================================================
DECLARE @start DATETIME2(7) = SYSDATETIME(); PRINT LEFT(CONCAT(REPLICATE('=', 120), CHAR(13), CHAR(10),
'= start time: ', @start, ' test name: EE FindPost: 1905501 ', REPLICATE(' ', 100)), 241) + '=';
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Inline Tally
;WITH T(N) AS (SELECT X.N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0))X(N))
,POS_DIST AS
(
SELECT
LOS.ID
,NUMS.N
,SUBSTRING(LOS.Y,NUMS.N,1) AS XCHAR
,CHARINDEX(SUBSTRING(LOS.Y,NUMS.N,1),LOS.y,NUMS.N + 1) - NUMS.N AS DISTANCE
FROM #ListOfStrings LOS
CROSS APPLY
(
SELECT TOP(LEN(LOS.y)) ROW_NUMBER() OVER (ORDER BY @@VERSION) AS N
FROM T T1,T T22,T T3,T T4
) NUMS(N)
WHERE CHARINDEX(SUBSTRING(LOS.Y,NUMS.N,1),LOS.y,NUMS.N + 1) - NUMS.N > 0
)
,FIND_MATCH AS
(
SELECT
PD.ID
,ROW_NUMBER() OVER (PARTITION BY PD.ID ORDER BY PD.N + PD.DISTANCE) XRID
,PD.XCHAR
FROM POS_DIST PD
)
SELECT
FM.ID
,FM.XCHAR
FROM FIND_MATCH FM
WHERE FM.XRID = 1;
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT CONCAT('= end time: ', SYSDATETIME(), LEFT(CONCAT(' duration: ',
DATEDIFF(mcs, @start, SYSDATETIME()) / 1000.0, ' ms.', REPLICATE(' ', 75)), 71), '=', CHAR(13), CHAR(10), REPLICATE('=', 120));
GO
--====================================================================================================================
--====================================================================================================================
DBCC DROPCLEANBUFFERS;
WAITFOR DELAY '00:00:01';
GO
--====================================================================================================================
--====================================================================================================================
GO
DECLARE @start DATETIME2(7) = SYSDATETIME(); PRINT LEFT(CONCAT(REPLICATE('=', 120), CHAR(13), CHAR(10),
'= start time: ', @start, ' test name: John FindPost: 1905507', REPLICATE(' ', 100)), 241) + '=';
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WITH Tally -- John
AS (SELECT CAST(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS INT) AS n FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n)),
Characters AS (
SELECT
c.ID
--, c.Name
, SUBSTRING(c.y,n,1) AS CHR
, t.n
FROM Tally t
JOIN #ListOfStrings c
ON t.n <= LEN(c.y)
)
, Repeats AS (
SELECT
id
--, Name
, CHR
, ROW_NUMBER() OVER (PARTITION BY ID, CHR ORDER BY n) AS RowNo
, n AS CharacterSeq
FROM Characters
)
, Recurrences AS (
SELECT
ID
, CHR
, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY CharacterSeq) AS RecurSeq
FROM Repeats
WHERE RowNo = 2 -- first recurrence
)
SELECT
ID
, CHR
FROM Recurrences
WHERE RecurSeq = 1
ORDER BY ID;
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT CONCAT('= end time: ', SYSDATETIME(), LEFT(CONCAT(' duration: ',
DATEDIFF(mcs, @start, SYSDATETIME()) / 1000.0, ' ms.', REPLICATE(' ', 75)), 71), '=', CHAR(13), CHAR(10), REPLICATE('=', 120));
GO
--====================================================================================================================
--====================================================================================================================
DBCC DROPCLEANBUFFERS;
WAITFOR DELAY '00:00:01';
GO
--====================================================================================================================
--====================================================================================================================
GO
DECLARE @start DATETIME2(7) = SYSDATETIME(); PRINT LEFT(CONCAT(REPLICATE('=', 120), CHAR(13), CHAR(10),
'= start time: ', @start, ' test name: Chris FindPost: 1905507', REPLICATE(' ', 100)), 241) + '=';
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WITH Tally -- Chris
AS (SELECT CAST(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS INT) AS n FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n))
SELECT
c.ID
, y.XCHAR
FROM #ListOfStrings c
CROSS APPLY (
SELECT TOP(1) [XCHAR] = SUBSTRING(c.y,n,1)
FROM Tally t
CROSS APPLY (SELECT NextPos = CHARINDEX(SUBSTRING(c.y,n,1),c.y,n+1)) x
WHERE LEN(c.y) >= n AND x.NextPos > 0
ORDER BY x.NextPos
) y
ORDER BY c.ID;
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT CONCAT('= end time: ', SYSDATETIME(), LEFT(CONCAT(' duration: ',
DATEDIFF(mcs, @start, SYSDATETIME()) / 1000.0, ' ms.', REPLICATE(' ', 75)), 71), '=', CHAR(13), CHAR(10), REPLICATE('=', 120));
GO
--====================================================================================================================
--====================================================================================================================
DBCC DROPCLEANBUFFERS;
WAITFOR DELAY '00:00:01';
GO
--====================================================================================================================
--====================================================================================================================
GO
DECLARE @start DATETIME2(7) = SYSDATETIME(); PRINT LEFT(CONCAT(REPLICATE('=', 120), CHAR(13), CHAR(10),
'= start time: ', @start, ' test name: Steve FindPost: 1905734', REPLICATE(' ', 100)), 241) + '=';
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WITH E1 AS (
SELECT X.N
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS X (N)
)
SELECT
L.ID,
--L.y,
X.THE_CHAR
--X.MIN_POS,
--X.NEXT_POS
FROM #ListOfStrings AS L
CROSS APPLY (
SELECT TOP (1) X.*
FROM (
SELECT W.THE_CHAR, MIN(W.FIRST_POS) AS MIN_POS, MIN(W.NEXT_POS) AS NEXT_POS
FROM (
SELECT SUBSTRING(L.y, T.RN, 1) AS THE_CHAR,
CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y) AS FIRST_POS,
CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y, T.RN + 1) AS NEXT_POS
FROM (
SELECT TOP (LEN(L.y)) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS RN
FROM E1 AS A, E1 AS B, E1 AS C, E1 AS D
) AS T
WHERE CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y) > 0
AND CHARINDEX(SUBSTRING(L.y, T.RN, 1), L.y, T.RN + 1) > 0
) AS W
GROUP BY W.THE_CHAR
) AS X
ORDER BY X.NEXT_POS, X.MIN_POS
) AS X;
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT CONCAT('= end time: ', SYSDATETIME(), LEFT(CONCAT(' duration: ',
DATEDIFF(mcs, @start, SYSDATETIME()) / 1000.0, ' ms.', REPLICATE(' ', 75)), 71), '=', CHAR(13), CHAR(10), REPLICATE('=', 120));
GO
--====================================================================================================================
--====================================================================================================================
DBCC DROPCLEANBUFFERS;
WAITFOR DELAY '00:00:01';
GO
--====================================================================================================================
--====================================================================================================================
GO
DECLARE @start DATETIME2(7) = SYSDATETIME(); PRINT LEFT(CONCAT(REPLICATE('=', 120), CHAR(13), CHAR(10),
'= start time: ', @start, ' test name: Jason FindPost: 1905752 ', REPLICATE(' ', 100)), 241) + '=';
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WITH
cte_psd AS (
SELECT
los.id,
bin_val = CAST(p.pos AS BINARY(4)) + CAST(sv.sub_val AS BINARY(2)),
drnk = DENSE_RANK() OVER (PARTITION BY los.id, sv.sub_val ORDER BY p.pos)
FROM
#ListOfStrings los
CROSS APPLY (
SELECT TOP (LEN(los.y))
rn =ROW_NUMBER() OVER (ORDER BY n2.n)
FROM ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1) ) n1 (n)
CROSS APPLY ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1) ) n2 (n) ) p (pos)
CROSS APPLY ( VALUES (SUBSTRING(los.y, p.pos, 1)) ) sv (sub_val)
)
SELECT
psd.id,
CAST(SUBSTRING(MIN(psd.bin_val), 5, 2) AS VARCHAR(2))
FROM
cte_psd psd
WHERE
psd.drnk > 1
GROUP BY
psd.id;
--~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PRINT CONCAT('= end time: ', SYSDATETIME(), LEFT(CONCAT(' duration: ',
DATEDIFF(mcs, @start, SYSDATETIME()) / 1000.0, ' ms.', REPLICATE(' ', 75)), 71), '=', CHAR(13), CHAR(10), REPLICATE('=', 120));
GO
--====================================================================================================================
--====================================================================================================================
November 2, 2017 at 4:31 am
Thanks for the test harness, Jason. My pride was dented when I saw I finished last! I tried it out on three of my own servers and my results were somewhat different. The variations between the three could be a function of differences in hardware and configuration as much as SQL Server version. I set the Discard results after execution option so as not to include the time it takes to return and display the results.
SQL Server 2008 R2
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:19:37.6556372 test name: EE FindPost: 1905501
= end time: 2017-11-02 10:19:37.6806397 duration: 25.002000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:19:38.7077424 test name: John FindPost: 1905507
= end time: 2017-11-02 10:19:38.7297446 duration: 22.002000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:19:39.7548471 test name: Chris FindPost: 1905507
= end time: 2017-11-02 10:19:39.7808497 duration: 26.002000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:19:40.8059522 test name: Steve FindPost: 1905734
= end time: 2017-11-02 10:19:40.8399556 duration: 34.003000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:19:41.8700586 test name: Jason FindPost: 1905752
= end time: 2017-11-02 10:19:41.8820598 duration: 12.001000 ms.
========================================================================================================================
SQL Server 2014
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:07.5684560 test name: EE FindPost: 1905501
= end time: 2017-11-02 10:20:07.5834575 duration: 15.001000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:08.5895581 test name: John FindPost: 1905507
= end time: 2017-11-02 10:20:08.5965588 duration: 7.000000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:09.6016593 test name: Chris FindPost: 1905507
= end time: 2017-11-02 10:20:09.6166608 duration: 15.001000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:10.6227614 test name: Steve FindPost: 1905734
= end time: 2017-11-02 10:20:10.6417633 duration: 19.002000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:11.6488640 test name: Jason FindPost: 1905752
= end time: 2017-11-02 10:20:11.6568648 duration: 8.000000 ms.
========================================================================================================================
SQL Server 2016
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:56.6837777 test name: EE FindPost: 1905501
= end time: 2017-11-02 10:20:56.7150227 duration: 31.245000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:58.1213398 test name: John FindPost: 1905507
= end time: 2017-11-02 10:20:58.1369506 duration: 15.611000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:20:59.5588979 test name: Chris FindPost: 1905507
= end time: 2017-11-02 10:20:59.5745204 duration: 15.623000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:21:00.9965024 test name: Steve FindPost: 1905734
= end time: 2017-11-02 10:21:01.0276937 duration: 31.191000 ms.
========================================================================================================================
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========================================================================================================================
= start time: 2017-11-02 10:21:02.4340127 test name: Jason FindPost: 1905752
= end time: 2017-11-02 10:21:02.4496342 duration: 15.622000 ms.
========================================================================================================================
John
November 2, 2017 at 4:40 am
Interesting...I found similar results to Jason.
Here's another solution which so far appears to be quite fast:WITH Tally -- Chris2
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT
c.RowID,
XCHAR = SUBSTRING(c.name, x.q,1)
FROM #ListOfStrings c
CROSS APPLY (
SELECT q = MIN(n)
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
) x
ORDER BY c.RowID
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
November 2, 2017 at 5:22 am
ChrisM@Work - Thursday, November 2, 2017 4:40 AMInteresting...I found similar results to Jason.
Here's another solution which so far appears to be quite fast:WITH Tally -- Chris2
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT
c.RowID,
XCHAR = SUBSTRING(c.name, x.q,1)
FROM #ListOfStrings c
CROSS APPLY (
SELECT q = MIN(n)
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
) x
ORDER BY c.RowID
Impressive! Easily wins on 2008 R2 and 2014, and is about the same as the other top ones on 2016.
John
November 2, 2017 at 7:37 am
John Mitchell-245523 - Thursday, November 2, 2017 4:31 AMThanks for the test harness, Jason. My pride was dented when I saw I finished last! I tried it out on three of my own servers and my results were somewhat different. The variations between the three could be a function of differences in hardware and configuration as much as SQL Server version. I set the Discard results after execution option so as not to include the time it takes to return and display the results.John
That's what makes SSC different than any other forum (that I'm aware of)... Other forums, the first to post a working solution get the "green check mark"... Here, the first to post a working solution, sets the "time to beat"...
Anyone coming in after has the advantage, simply because they have your solution to benchmark off of.
The test harness itself... Jeff was kind enough to bless the usage of one of his older harnesses and provided another... This just my take on his good work .
Edit: Yea, I think these are sensitive to environment... When I running it, you & Chris were consistently neck & neck (Chris with a bit of an edge) and Eirikur and I consistently neck & neck w/ no predictable advantage.
I was testing done on SQL Server 2014 Dev Edition. Simple recovery model , Compat Level = 100 (2008), & CE = 70
November 2, 2017 at 7:53 am
John Mitchell-245523 - Thursday, November 2, 2017 5:22 AMChrisM@Work - Thursday, November 2, 2017 4:40 AMInteresting...I found similar results to Jason.
Here's another solution which so far appears to be quite fast:WITH Tally -- Chris2
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT
c.RowID,
XCHAR = SUBSTRING(c.name, x.q,1)
FROM #ListOfStrings c
CROSS APPLY (
SELECT q = MIN(n)
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
) x
ORDER BY c.RowIDImpressive! Easily wins on 2008 R2 and 2014, and is about the same as the other top ones on 2016.
John
Last shot - this one consistently comes out at 0ms in my little test harness, probably better tested using Jason's:WITH Tally -- Chris3
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT
c.RowID,
XCHAR = SUBSTRING(c.name, x.n, 1)
FROM #ListOfStrings c
CROSS APPLY (
SELECT TOP(1) n
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY @@VERSION)
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
ORDER BY n
) x
ORDER BY c.RowID
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
November 2, 2017 at 8:13 am
Comes out as 0 on two of mine, as well, and easily wins the one where it doesn't. Well done, Chris!
John
November 2, 2017 at 8:18 am
John Mitchell-245523 - Thursday, November 2, 2017 8:13 AMComes out as 0 on two of mine, as well, and easily wins the one where it doesn't. Well done, Chris!John
:blush: I learned everything I know from you, John!
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
November 2, 2017 at 8:20 am
We're still testing with only 40 rows though, correct?
--Jeff Moden
Change is inevitable... Change for the better is not.
November 2, 2017 at 8:26 am
Jeff Moden - Thursday, November 2, 2017 8:20 AMWe're still testing with only 40 rows though, correct?
Jeff
Yes. I did do a couple of tests (see earlier posts) against sys.all_columns. The solutions posted since then would need to be massaged a little in order to fit that test harness.
John
November 2, 2017 at 8:33 am
Jeff Moden - Thursday, November 2, 2017 8:20 AMWe're still testing with only 40 rows though, correct?
Not any more π
IF 0 = 1 BEGIN
DROP TABLE #ListOfStrings
SELECT TOP(100000)
[RowID] = ROW_NUMBER() OVER(ORDER BY (SELECT NULL)),
[name] = CAST(NEWID() AS VARCHAR(36))
INTO #ListOfStrings
FROM SYS.COLUMNS a, SYS.COLUMNS b
END;
PRINT ''
PRINT '== John ========================================================================================================================================================';
SET STATISTICS IO, TIME ON;
WITH Tally -- John
AS (SELECT CAST(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS INT) AS n FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n), (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n)),
Characters AS (
SELECT
c.RowID
--, c.Name
, SUBSTRING(c.name,n,1) AS CHR
, t.n
FROM Tally t
JOIN #ListOfStrings c
ON t.n <= LEN(c.name)
)
, Repeats AS (
SELECT
RowID
--, Name
, CHR
, ROW_NUMBER() OVER (PARTITION BY RowID, CHR ORDER BY n) AS RowNo
, n AS CharacterSeq
FROM Characters
)
, Recurrences AS (
SELECT
RowID
, CHR
, ROW_NUMBER() OVER (PARTITION BY RowID ORDER BY CharacterSeq) AS RecurSeq
FROM Repeats
WHERE RowNo = 2 -- first recurrence
)
SELECT
RowID
, CHR
FROM Recurrences
WHERE RecurSeq = 1
ORDER BY RowID;
SET STATISTICS IO, TIME OFF;
PRINT ''
PRINT '== EE ========================================================================================================================================================';
SET STATISTICS IO, TIME ON;
;WITH T(N) AS (SELECT X.N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0))X(N))
,POS_DIST AS
(
SELECT
LOS.RowID
,NUMS.N
,SUBSTRING(LOS.[name],NUMS.N,1) AS XCHAR
,CHARINDEX(SUBSTRING(LOS.[name],NUMS.N,1),LOS.[name],NUMS.N + 1) - NUMS.N AS DISTANCE
FROM #ListOfStrings LOS
CROSS APPLY
(
SELECT TOP(LEN(LOS.[name])) ROW_NUMBER() OVER (ORDER BY @@VERSION) AS N
FROM T T1,T T22,T T3,T T4
) NUMS(N)
WHERE CHARINDEX(SUBSTRING(LOS.[name],NUMS.N,1),LOS.[name],NUMS.N + 1) - NUMS.N > 0
)
,FIND_MATCH AS
(
SELECT
PD.RowID
,ROW_NUMBER() OVER (PARTITION BY PD.RowID ORDER BY PD.N + PD.DISTANCE) XRID
,PD.XCHAR
FROM POS_DIST PD
)
SELECT
FM.RowID
,FM.XCHAR
FROM FIND_MATCH FM
WHERE FM.XRID = 1
ORDER BY FM.RowID;
SET STATISTICS IO, TIME OFF;
PRINT ''
PRINT '== Chris3 ========================================================================================================================================================';
SET STATISTICS IO, TIME ON;
WITH Tally -- Chris3
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT
c.RowID,
XCHAR = SUBSTRING(c.name, x.n, 1)
FROM #ListOfStrings c
CROSS APPLY (
SELECT TOP(1) n
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY @@VERSION)
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
ORDER BY n
) x
ORDER BY c.RowID
SET STATISTICS IO, TIME OFF;
PRINT ''
PRINT '== Jason ========================================================================================================================================================';
SET STATISTICS IO, TIME ON;
WITH
cte_psd AS (
SELECT
los.RowID,
bin_val = CAST(p.pos AS BINARY(4)) + CAST(sv.sub_val AS BINARY(2)),
drnk = DENSE_RANK() OVER (PARTITION BY los.RowID, sv.sub_val ORDER BY p.pos)
FROM
#ListOfStrings los
CROSS APPLY (
SELECT TOP (LEN(los.name))
rn =ROW_NUMBER() OVER (ORDER BY n2.n)
FROM ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1) ) n1 (n)
CROSS APPLY ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1) ) n2 (n) ) p (pos)
CROSS APPLY ( VALUES (SUBSTRING(los.name, p.pos, 1)) ) sv (sub_val)
)
SELECT
psd.RowID,
CAST(SUBSTRING(MIN(psd.bin_val), 5, 2) AS VARCHAR(2))
FROM
cte_psd psd
WHERE
psd.drnk > 1
GROUP BY
psd.RowID
ORDER BY
psd.RowID;
SET STATISTICS IO, TIME OFF;
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
November 2, 2017 at 6:32 pm
SELECT XCHar
FROM #ListOfStrings los
CROSS APPLY
(
SELECT TOP (1) token --, position
FROM dbo.ngrams8k(los.name,1)
WHERE CHARINDEX(token,SUBSTRING(los.name,1,CHECKSUM(position-1))) > 0
ORDER BY position
) ngEng(XCHar);
IF 1 = 1
BEGIN
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#ListOfStrings') IS NOT NULL DROP TABLE #ListOfStrings;
SELECT TOP(100000)
[RowID] = ROW_NUMBER() OVER(ORDER BY (SELECT NULL)),
[name] = CAST(NEWID() AS VARCHAR(36))
INTO #ListOfStrings
FROM SYS.COLUMNS a, SYS.COLUMNS b
END;
--PRINT char(10)+'== John '+REPLICATE('=',50);
SET STATISTICS IO, TIME ON;
PRINT char(10)+'== EE '+REPLICATE('========================================',2);
SET STATISTICS IO, TIME ON;
WITH T(N) AS (SELECT X.N FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) X(N)),
POS_DIST AS
(
SELECT
LOS.RowID
,NUMS.N
,SUBSTRING(LOS.[name],NUMS.N,1) AS XCHAR
,CHARINDEX(SUBSTRING(LOS.[name],NUMS.N,1),LOS.[name],NUMS.N + 1) - NUMS.N AS DISTANCE
FROM #ListOfStrings LOS
CROSS APPLY
(
SELECT TOP(LEN(LOS.[name])) ROW_NUMBER() OVER (ORDER BY @@VERSION) AS N
FROM T T1,T T22,T T3,T T4
) NUMS(N)
WHERE CHARINDEX(SUBSTRING(LOS.[name],NUMS.N,1),LOS.[name],NUMS.N + 1) - NUMS.N > 0
),
FIND_MATCH AS
(
SELECT
PD.RowID
,ROW_NUMBER() OVER (PARTITION BY PD.RowID ORDER BY PD.N + PD.DISTANCE) XRID
,PD.XCHAR
FROM POS_DIST PD
)
SELECT --FM.RowID,
FM.XCHAR
FROM FIND_MATCH FM
WHERE FM.XRID = 1
ORDER BY FM.RowID;
SET STATISTICS IO, TIME OFF;
PRINT char(10)+'== Chris3 '+REPLICATE('========================================',2);
SET STATISTICS IO, TIME ON;
WITH Tally -- Chris3
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT -- c.RowID,
XCHAR = SUBSTRING(c.name, x.n, 1)
FROM #ListOfStrings c
CROSS APPLY
(
SELECT TOP(1) n
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY @@VERSION)
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
ORDER BY n
) x
ORDER BY c.RowID;
SET STATISTICS IO, TIME OFF;
PRINT char(10)+'== Jason '+REPLICATE('========================================',2);
SET STATISTICS IO, TIME ON;
WITH
cte_psd AS
(
SELECT
los.RowID,
bin_val = CAST(p.pos AS BINARY(4)) + CAST(sv.sub_val AS BINARY(2)),
drnk = DENSE_RANK() OVER (PARTITION BY los.RowID, sv.sub_val ORDER BY p.pos)
FROM #ListOfStrings los
CROSS APPLY
(
SELECT TOP (LEN(los.name))
rn =ROW_NUMBER() OVER (ORDER BY n2.n)
FROM ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1) ) n1(n)
CROSS APPLY ( VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n2 (n) ) p (pos)
CROSS APPLY ( VALUES (SUBSTRING(los.name, p.pos, 1)) ) sv (sub_val)
)
SELECT --psd.RowID,
CAST(SUBSTRING(MIN(psd.bin_val), 5, 2) AS VARCHAR(2))
FROM cte_psd psd
WHERE psd.drnk > 1
GROUP BY psd.RowID
ORDER BY psd.RowID;
SET STATISTICS IO, TIME OFF;
PRINT char(10)+'== Alan '+REPLICATE('========================================',2);
SET STATISTICS IO, TIME ON;
SELECT XCHar
FROM #ListOfStrings los
CROSS APPLY
(
SELECT TOP (1) token --, position
FROM dbo.ngrams8k(los.name,1)
WHERE CHARINDEX(token,SUBSTRING(los.name,1,CHECKSUM(position-1))) > 0
ORDER BY position
) ngEng(XCHar);
SET STATISTICS IO, TIME OFF;
Results
== EE ================================================================================
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
SQL Server parse and compile time:
CPU time = 393 ms, elapsed time = 393 ms.
Table '#ListOfStrings______________________________________________________________________________________________________000000000013'. Scan count 9, logical reads 705, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 7283, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 7735 ms, elapsed time = 9586 ms.
== Chris3 ================================================================================
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 3 ms.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 135, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#ListOfStrings______________________________________________________________________________________________________000000000013'. Scan count 9, logical reads 705, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 407 ms, elapsed time = 579 ms.
== Jason ================================================================================
SQL Server parse and compile time:
CPU time = 74 ms, elapsed time = 74 ms.
Table '#ListOfStrings______________________________________________________________________________________________________000000000013'. Scan count 9, logical reads 705, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 8703 ms, elapsed time = 1534 ms.
== Alan ================================================================================
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 5 ms.
<NO WORKTABLE HERE>
Table '#ListOfStrings______________________________________________________________________________________________________000000000013'. Scan count 9, logical reads 705, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 347 ms, elapsed time = 462 ms.
As you can see, my solution is worktable free. While typing this up, however, I discovered a problem with Chris' solution - he includes ORDER BY c.RowID which adds overhead. Removing that removed the worktable on his. Here's the updated performance tests comparing Chris' and my solution. I ran them with serial and parallel execution plans:
PRINT char(10)+'== Chris3 (serial)'+REPLICATE('========================================',2);
GO
DECLARE @st datetime = getdate(), @x char(1);
WITH Tally -- Chris3
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT -- c.RowID,
@x = SUBSTRING(c.name, x.n, 1)
FROM #ListOfStrings c
CROSS APPLY
(
SELECT TOP(1) n
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY @@VERSION)
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
ORDER BY n
) x
OPTION (maxdop 1);
PRINT datediff(ms,@st,getdate());
GO 3
PRINT char(10)+'== Alan (serial)'+REPLICATE('========================================',2);
GO
DECLARE @st datetime = getdate(), @x char(1);
SELECT @x = XCHar
FROM #ListOfStrings los
CROSS APPLY
(
SELECT TOP (1) token --, position
FROM dbo.ngrams8k(los.name,1)
WHERE CHARINDEX(token,SUBSTRING(los.name,1,CHECKSUM(position-1))) > 0
ORDER BY position
) ngEng(XCHar)
OPTION (maxdop 1);
PRINT datediff(ms,@st,getdate());
GO 3
PRINT char(10)+'== Chris3 (parallel)'+REPLICATE('========================================',2);
GO
DECLARE @st datetime = getdate(), @x char(1);
WITH Tally -- Chris3
AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n))
SELECT -- c.RowID,
@x = SUBSTRING(c.name, x.n, 1)
FROM #ListOfStrings c
CROSS APPLY
(
SELECT TOP(1) n
FROM (
SELECT TOP(LEN(c.name)) n = ROW_NUMBER() OVER (ORDER BY @@VERSION)
FROM Tally a, Tally b, Tally c
) t
WHERE CHARINDEX(SUBSTRING(c.name, t.n, 1), LEFT(c.name, n-1)) > 0
ORDER BY n
) x
OPTION (querytraceon 8649);
PRINT datediff(ms,@st,getdate());
GO 3
PRINT char(10)+'== Alan (parallel)'+REPLICATE('========================================',2);
GO
DECLARE @st datetime = getdate(), @x char(1);
SELECT @x = XCHar
FROM #ListOfStrings los
CROSS APPLY
(
SELECT TOP (1) token --, position
FROM dbo.ngrams8k(los.name,1)
WHERE CHARINDEX(token,SUBSTRING(los.name,1,CHECKSUM(position-1))) > 0
ORDER BY position
) ngEng(XCHar)
OPTION (querytraceon 8649);
PRINT datediff(ms,@st,getdate());
GO 3
Results:== Chris3 (serial)================================================================================
Beginning execution loop
174
170
163
Batch execution completed 3 times.
== Alan (serial)================================================================================
Beginning execution loop
174
170
170
Batch execution completed 3 times.
== Chris3 (parallel)================================================================================
Beginning execution loop
66
56
63
Batch execution completed 3 times.
== Alan (parallel)================================================================================
Beginning execution loop
53
56
60
Batch execution completed 3 times.
Both identical. Ngrams just makes things simpler π
-- Itzik Ben-Gan 2001
Viewing 15 posts - 31 through 45 (of 46 total)
You must be logged in to reply to this topic. Login to reply