Fuzzy-String Search: Find misspelled information with T-SQL
An optimized Damerau-Levenshtein Distance (DLD) algorithm for "fuzzy" string matching in Transact-SQL 2000-2008
2014-01-10 (first published: 2012-09-18)
30,549 reads
/* This will name Ignite SQL hashes that have not been named yet, based on the object (or batch) they are within. ** Runs on MS-SQL2005+ (uses VARCHAR(MAX) to concatenate SQL sections). ** Run this script in ignite_repository or dpa_repository database (but if you do not, it will just error and exit without doing anything). ** @DOIT = 0 by default so no changes will be made; change @DOIT = 1 to actually add the names. ** It works best if you have a shorthand code for the object (or batch), with no spaces in it. ** Place a comment like this near the beginning of the batch (at the end of the CREATE line of the object): CREATE PROCEDURE [schema].[procname] -- Ignite=ShortObjCode VersionCode ** Place a comment like this within or directly after each statement: -- Ignite=ShortObjCode ShortStmtCode ** The hash for the statement will then be named "ShortObjCode ShortStmtCode VersionCode" as long as ** Ignite stored the entire object script (or batch), which it might not if it was very long. ** But it will work without such comments, by using the first 18 chars of the Object name. ** YYMMDD and the last 4 digits of the hash are used, if necessary, to enforce uniqueness. ** The last result set will be the SQL Text that still could not be named (not in an object, and no such comment). ** If you are unable to inject such comments into the SQL, you may be able to see a pattern in the SQL ** which would enable you to synthesize an 18-char "object name" yourself. Search for "synthesize" below to see where. ** The last column of both result sets is a link to the Historical Chart for that SQL, so you can check its usage. ** (That link is built assuming Ignite is running on localhost, but you can just replace localhost below.) ** Free to use, developed by tomkellerconsulting.com 14 Sep 2012. ** Submitted to Confio Support 13 March 2013. Submitted to sqlservercentral.com 25 Jan 2014. ** Updated 28 Feb 2014 to use LEFT(NEWID(), 4) -- random ** instead of RIGHT(CONVERT(VARCHAR(20), TS.H), 4) -- last 4 digits of the hash ** to avoid duplicates in large sets of un-named hashes. Also added "double-check not duplicate" to final insert. ** Also set @DOIT = 0 by default so no changes will be made unless requested, and select Note if no names inserted. ** Updated 17 Feb 2015 to handle 0 SS and -1 SE (specified statement start and end, replace with 1 and sql text length). ** Also mention dpa_repository as altenative to ignite_repository since Confio Ignite is now SolarWinds DPA. ** Also specify all column names in selects, so that STMT_Specified_Statement and ST_SQL_Text can be XML links. */DECLARE @dbid VARCHAR(3), @dbname NVARCHAR(100), @tbl_name SYSNAME , @sch SYSNAME, @SQL NVARCHAR(4000), @IgniteHost VARCHAR(100) , @P INT, @C VARCHAR(30), @L INT, @DOIT INT -- in case you are nervous about executing this script, SELECT @C = '-- Ignite=', @L = LEN( @C), @DOIT = 0 -- set @DOIT to 0 to just see what names it would assign , @IgniteHost = 'http://localhost:8123/iwc/sqlDisplay.iwc?' -- change localhost to where you access Ignite /*create temp tables*/ IF OBJECT_ID('tempdb..#tbl_csr', 'U') IS NOT NULL DROP TABLE #tbl_csr CREATE TABLE #tbl_csr (id SMALLINT NOT NULL PRIMARY KEY, name NVARCHAR(100) NOT NULL) IF OBJECT_ID('tempdb..#tmpSNsql', 'U') IS NOT NULL DROP TABLE #tmpSNsql CREATE TABLE #tmpSNsql (I INT IDENTITY(1,1) NOT NULL PRIMARY KEY , id SMALLINT NOT NULL, instance NVARCHAR(100) NOT NULL , H BIGINT NOT NULL UNIQUE , D DATETIME NOT NULL , P INT NOT NULL -- section number , SS INT NOT NULL -- start of specified statement , SE INT NOT NULL -- end of specified statement (-1 for rest of SQL Text) , STMTL INT NOT NULL -- length of specified statement , STL INT NOT NULL -- SQL Text Length , STMTP INT NULL -- position of @C comment in specified statement , STP INT NULL -- position of @C comment in SQL Text, should be on object CREATE statement , PNAME VARCHAR(30) NULL -- object that SQL Text should include CREATE statement of , STMTC VARCHAR(30) NULL -- @C comment in specified statement , STC VARCHAR(30) NULL -- @C comment in SQL Text , NM VARCHAR(30) NULL -- limited in Confio Ignite GUI, although ignite.CON_SQL_NAME.NAME allows 100 , STMT VARCHAR(3500) NULL -- specified statement , ST VARCHAR(MAX) NOT NULL -- SQL Text ) /*I am going to verify the schema that owns the Ignite objects, so that we make certain to qualify our object names*/ SET NOCOUNT ON SELECT @sch = SCHEMA_NAME(schema_id) FROM sys.objects WHERE name = 'COND' IF ( @@ROWCOUNT <> 1 ) BEGIN SET NOCOUNT OFF PRINT 'Please run this script in ignite_repository or dpa_repository database, where COND table is.' END ELSE BEGIN /*OK, insert the contents of COND into a table variable, and use that for outer cursor*/ SET @SQL = 'INSERT INTO #tbl_csr SELECT id, name FROM ' + @sch + '.COND' EXEC sp_executesql @SQL -- SELECT * FROM #tbl_csr /*build cursor to loop through registered dbs*/ DECLARE out_cursor CURSOR FOR SELECT id, name FROM #tbl_csr ORDER BY id OPEN out_cursor FETCH NEXT FROM out_cursor INTO @dbid, @dbname WHILE @@FETCH_STATUS = 0 BEGIN /*ok, we want to name the SQL for each registered db*/ SET @P = 0 -- get just first section of multi-segment SQL that was split PRINT 'SQL executed to gather hashes without names per instance:' SET @SQL = 'INSERT INTO #tmpSNsql (id, instance, H, D, P, SS, SE, STMTL, STL, PNAME, ST) SELECT ' + CONVERT(VARCHAR(5), @dbid) + ' AS id, ' + QUOTENAME(@dbname, '''') + ' AS instance , ST.H, ST.D, ST.P , ISNULL(NULLIF( ST.SS, 0), 1) AS SS , ISNULL(NULLIF( ST.SE, -1), LEN(ST.ST)) AS SE , ISNULL(NULLIF( ST.SE, -1), LEN(ST.ST)) -ISNULL(NULLIF( ST.SS, 0), 1) +1 AS STMTL -- SE less SS , LEN(ST.ST) AS STL , LEFT(RIGHT( ST.PNAME, CHARINDEX(''.'', REVERSE( ST.PNAME) + ''.'') -1), 30) AS PNAME -- just name, no schema , CAST(ST.ST AS VARCHAR(MAX)) AS ST FROM [' + @sch + '].[CONST_' + @dbid + '] ST LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN ON SN.[HASH] = ST.H LEFT OUTER JOIN #tmpSNsql TS ON TS.H = ST.H WHERE ST.P = ' + CONVERT(VARCHAR(10), @P) + ' -- just first section AND (ST.PNAME IS NOT NULL OR ST.ST IS NOT NULL) -- have object name or SQL Text AND SN.[HASH] IS NULL -- not yet named AND TS.H IS NULL -- not already collected (same hash can be on diff instance if db was restored across) ORDER BY D, H ' PRINT (@SQL) EXEC sp_executesql @SQL WHILE ( @@ROWCOUNT > 0 ) BEGIN -- append following sections of SQL that was split SET @P = @P +1 -- next section PRINT 'SQL executed to append following sections of SQL that was split, per instance, per section:' SET @SQL = 'UPDATE TS SET TS.STL = TS.STL + LEN(ST.ST) , TS.ST = TS.ST + ST.ST , TS.P = ST.P FROM #tmpSNsql TS JOIN [' + @sch + '].[CONST_' + @dbid + '] ST ON ST.H = TS.H AND ST.P = ' + CONVERT(VARCHAR(10), @P) + ' -- section number ' PRINT (@SQL) EXEC sp_executesql @SQL END FETCH NEXT FROM out_cursor INTO @dbid, @dbname END CLOSE out_cursor; DEALLOCATE out_cursor; /* get specified statement */ UPDATE #tmpSNsql SET STMT = LEFT(SUBSTRING( ST, SS, STMTL), 3500) WHERE STMTL > 0 AND SE <= STL; /* get position of @C comment in SQL Text, should be on object CREATE statement */ WITH A AS ( SELECT H, PATINDEX('%' + @C + '%', ST) AS STP FROM #tmpSNsql ) UPDATE TS SET TS.STP = A.STP FROM A JOIN #tmpSNsql TS ON TS.H = A.H WHERE A.STP > 0; /* get position of @C comment in specified statement */ WITH A AS ( SELECT H, PATINDEX('%' + @C + '%', STMT) AS STMTP FROM #tmpSNsql ) UPDATE TS SET TS.STMTP = A.STMTP FROM A JOIN #tmpSNsql TS ON TS.H = A.H WHERE A.STMTP > 0 /*get comment in SQL*/ UPDATE #tmpSNsql SET STC = LEFT(SUBSTRING( ST, STP +@L, CHARINDEX(CHAR(13), ST +CHAR(13), STP + @L) -STP -@L), 30) WHERE STP IS NOT NULL /*get comment in STMT*/ UPDATE #tmpSNsql SET STMTC = LEFT(SUBSTRING( STMT, STMTP +@L, CHARINDEX(CHAR(13), STMT +CHAR(13), STMTP + @L) -STMTP -@L), 30) WHERE STMTP IS NOT NULL; /*combine comments*/ WITH A AS ( SELECT H, STC, STMTC, CHARINDEX(' ', STMTC) AS STMTSP, CHARINDEX(' ', STC) AS STSP -- space positions FROM #tmpSNsql WHERE STMTC IS NOT NULL AND STC IS NOT NULL ), B AS ( SELECT H, STC, STMTC, STSP, LEFT( STC, STSP -1) AS STP, LEFT( STMTC, STMTSP -1) AS STMTP -- prefixes FROM A WHERE A.STMTSP = A.STSP ), C AS ( SELECT H, STP, RTRIM(RIGHT( STC, LEN( STC) -STSP)) AS STS, RTRIM(RIGHT( STMTC, LEN( STMTC) -STSP)) AS STMTS -- suffixes FROM B WHERE STMTP = STP -- compare prefixes, which should both be shorthand identifier for same object ) UPDATE TS SET TS.NM = C.STP + ' ' + LEFT( C.STMTS, 30 - LEN(C.STP) - LEN(C.STS) -2) + ' ' + C.STS FROM C JOIN #tmpSNsql TS ON TS.H = C.H WHERE C.STS LIKE 'v[0-9]%' /* synthesize an 18-char "object name" yourself if necessary */ UPDATE TS SET PNAME = LEFT('IgniteRestart', 18) FROM #tmpSNsql TS WHERE TS.NM IS NULL AND TS.PNAME IS NULL AND TS.ST LIKE '-- Isolate top waits for server instance since last restart or statistics clear%' /* add more like this as necessary: UPDATE TS SET PNAME = LEFT((constant or function of TS.ST), 18) FROM #tmpSNsql TS WHERE TS.NM IS NULL AND TS.PNAME IS NULL AND (some pattern is detected in TS.ST) */ PRINT 'SQL executed once, to add suffix where necessary for uniqueness:' SET @SQL = 'UPDATE TS SET -- SELECT TS.H, TS.NM, TS.STMTC, TS.STC, TS.PNAME, NM = LEFT(COALESCE( TS.NM, TS.STMTC, TS.STC, TS.PNAME), 30 -6-1 -4-1) + '' '' + RIGHT(CONVERT(CHAR(8), D, 112), 6) -- YYMMDD + '' '' + LEFT(NEWID(), 4) -- random FROM #tmpSNsql TS LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN ON SN.NAME = TS.NM LEFT OUTER JOIN (SELECT NM, COUNT(*) AS Dups FROM #tmpSNsql GROUP BY NM HAVING COUNT(*) > 1 ) D ON D.NM = TS.NM WHERE (TS.NM IS NULL -- no name calculated yet OR SN.NAME IS NOT NULL -- name already used OR D.NM IS NOT NULL -- duplicate in current set ) AND (TS.NM IS NOT NULL OR TS.PNAME IS NOT NULL) -- name calculated, or have object name ' PRINT (@SQL) EXEC sp_executesql @SQL SELECT 'Un-named SQLs with NM to be assigned' AS [First Set IS] SET NOCOUNT OFF -- show the count of names about to set SELECT TS.I, TS.id, TS.instance, TS.H, TS.D, TS.P, TS.SS, TS.SE, TS.STMTL, TS.STL, TS.STMTP, TS.STP, TS.PNAME, TS.STMTC, TS.STC, TS.NM , CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.STMT + NCHAR(13) + NCHAR(10) + N'--?>') AS STMT_Specified_Statement , CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.ST + NCHAR(13) + NCHAR(10) + N'--?>') AS ST_SQL_Text , @IgniteHost + 'db_id=' + CONVERT(VARCHAR(5), TS.id) + '&sql=' + CONVERT(VARCHAR(20), TS.H) AS Historical_Chart FROM #tmpSNsql TS WHERE TS.NM IS NOT NULL -- name calculated (maybe just object name) ORDER BY TS.I /* actually do the insert of new names */ SET @SQL = 'INSERT INTO [' + @sch + '].[CON_SQL_NAME] ([HASH], NAME) SELECT TS.H, TS.NM FROM #tmpSNsql TS LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN1 ON SN1.NAME = TS.NM LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN2 ON SN2.[HASH] = TS.H LEFT OUTER JOIN (SELECT NM, COUNT(*) AS Dups FROM #tmpSNsql GROUP BY NM HAVING COUNT(*) > 1 ) D ON D.NM = TS.NM WHERE SN1.NAME IS NULL -- double-check name not already used AND SN2.[HASH] IS NULL -- double-check not already named AND D.NM IS NULL -- double-check not duplicate AND TS.NM IS NOT NULL -- name calculated (maybe just object name) ' PRINT (@SQL) IF ( @DOIT = 1 ) BEGIN EXEC sp_executesql @SQL IF ( @@ROWCOUNT = 0 ) SELECT 'No hash names actually inserted' AS [Note] END ELSE SELECT 'Did not actually execute the SQL to insert hash names (set @DOIT=1 to do so).' AS [Note] SET NOCOUNT ON SELECT 'Un-named SQLs which could not be named (maybe you can synthesize an 18-char "object name" yourself)' AS [Last Set IS] SET NOCOUNT OFF -- show the count of names could not set SELECT TS.I, TS.id, TS.instance, TS.H, TS.D, TS.P, TS.SS, TS.SE, TS.STMTL, TS.STL, TS.STMTP, TS.STP, TS.PNAME, TS.STMTC, TS.STC, TS.NM , CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.STMT + NCHAR(13) + NCHAR(10) + N'--?>') AS STMT_Specified_Statement , CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.ST + NCHAR(13) + NCHAR(10) + N'--?>') AS ST_SQL_Text , @IgniteHost + 'db_id=' + CONVERT(VARCHAR(5), TS.id) + '&sql=' + CONVERT(VARCHAR(20), TS.H) AS Historical_Chart FROM #tmpSNsql TS WHERE TS.NM IS NULL -- name NOT calculated (no object name) ORDER BY TS.ST, TS.I END -- in ignite_repository or dpa_repository