June 27, 2014 at 9:38 am
hi
anyone got an idea for a CTE that takes a string, and puts it into a table with the words in any order.
so the variable would be 'tesco mixed fruit'
and the data column in the table would be
tesco mixed fruit
mixed fruit tesco
fruit tesco mixed
mixed tesco fruit
fruit mixed tesco
tesco fruit mixed.
easy enough to split - which I'd thought was the requirement! - but it's taken on a new level of complexity...
thanks
June 27, 2014 at 9:50 am
If you can split it, just order the results by NEW_ID() and reconcatenate.
John
June 27, 2014 at 10:01 am
but I need it any order, not just the order it came in, or the reverse, but any combination.
and the variable could be up to say six words
June 27, 2014 at 10:11 am
peter.cox (6/27/2014)
but I need it any order, not just the order it came in, or the reverse, but any combination.and the variable could be up to say six words
You can use the DelimitedSplit8K function for this.
declare @SomeString varchar(50) = 'tesco mixed fruit';
with SplitValues as
(
select s.Item
from dbo.DelimitedSplit8K(@SomeString, ' ') s
)
select *
from SplitValues s1
cross join SplitValues s2
where s1.Item <> s2.Item
_______________________________________________________________
Need help? Help us help you.
Read the article at http://www.sqlservercentral.com/articles/Best+Practices/61537/ for best practices on asking questions.
Need to split a string? Try Jeff Modens splitter http://www.sqlservercentral.com/articles/Tally+Table/72993/.
Cross Tabs and Pivots, Part 1 β Converting Rows to Columns - http://www.sqlservercentral.com/articles/T-SQL/63681/
Cross Tabs and Pivots, Part 2 - Dynamic Cross Tabs - http://www.sqlservercentral.com/articles/Crosstab/65048/
Understanding and Using APPLY (Part 1) - http://www.sqlservercentral.com/articles/APPLY/69953/
Understanding and Using APPLY (Part 2) - http://www.sqlservercentral.com/articles/APPLY/69954/
June 27, 2014 at 9:56 pm
Building off of Sean's idea - this gives you the same functionality with up to 6 words
Note: since the number of permutations will grow factorially as you expand the number of possible words in a given set, scale at your own risk.
declare @SomeString varchar(50) = 'tesco mixed fruit bob joe frank'
;with
shortTally as (select top 6 Row_number() over (order by (select null)) n from sys.columns),
SplitValues as
(
select Row_number() over (order by (select null)) TokenID, s.Item
from dbo.DelimitedSplit8K(@SomeString, ' ') s
),
TokenCount as (select SUM(n) C from shortTally where n<= (select COUNT(item) from SplitValues)),
Cartesian as (
select T1.n N1,
case when c>1 then T2.n else 0 end n2,
case when c>3 then T3.n else 0 end n3,
case when c>6 then T4.n else 0 end n4,
case when c>10 then T5.n else 0 end n5,
case when c>15 then T6.n else 0 end n6
from shortTally T1
join shortTally t2 on T1.n<>t2.n
join shortTally t3 on t1.n<>t3.n and t2.n<>t3.n
join shortTally t4 on t1.n<>t4.n and t2.n<>t4.n and t3.n<>t4.n
join shortTally t5 on t1.n<>t5.n and t2.n<>t5.n and t3.n<>t5.n and t4.n<>t5.n
join shortTally t6 on t1.n<>t6.n and t2.n<>t6.n and t3.n<>t6.n and t4.n<>t6.n and t5.n<>t6.n
cross join tokencount
)
select s1.item+
coalesce(' '+s2.item,'')+
coalesce(' '+s3.item,'')+
coalesce(' '+s4.item,'')+
coalesce(' '+s5.item,'')+
coalesce(' '+s6.item,'')
from Cartesian
left join SplitValues s1 on N1=s1.TokenID
left join SplitValues s2 on N2=s2.TokenID
left join SplitValues s3 on N3=s3.TokenID
left join SplitValues s4 on N4=s4.TokenID
left join SplitValues s5 on N5=s5.TokenID
left join SplitValues s6 on N6=s6.TokenID
where N1+n2+n3+n4+n5+n6 = (select C from TokenCount)
----------------------------------------------------------------------------------
Your lack of planning does not constitute an emergency on my part...unless you're my manager...or a director and above...or a really loud-spoken end-user..All right - what was my emergency again?
June 28, 2014 at 12:58 am
funnily enough, last night I came up with a function. I shall compare the two, but many thanks
GO
/****** Object: UserDefinedFunction [dbo].[split_test] Script Date: 28/06/2014 07:57:00 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[split_test]
(
@String NVARCHAR(4000),
@Delimiter NCHAR(1)
)
RETURNS TABLE
AS
RETURN
(
WITH Split(stpos,endpos)
AS(
SELECT 0 AS stpos, CHARINDEX(@Delimiter,@String) AS endpos
UNION ALL
SELECT endpos+1, CHARINDEX(@Delimiter,@String,endpos+1)
FROM Split
WHERE endpos > 0
)
---- SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
---- 'Data' = ltrim(rtrim(SUBSTRING(@String,a.stpos,COALESCE(NULLIF(a.endpos,0),LEN(@String)+1)-a.stpos)))
---- FROM Split a
----CROSS apply split b
, basedata(id, [text])
AS ( SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Data' = ltrim(rtrim(SUBSTRING(@String,a.stpos,COALESCE(NULLIF(a.endpos,0),LEN(@String)+1)-a.stpos)))
FROM Split a
CROSS apply split b),
cte(id, t, x)
AS (SELECT *,
CAST('<foo>' + REPLACE(REPLACE([text],'(','<bar>'),')','</bar>') + '</foo>' AS XML)
FROM basedata)
--SELECT * FROM basedata
SELECT DISTINCT a.text + ' '+ ISNULL(b.text,'') + ' ' + ISNULL(c.text,'') + ' ' + ISNULL(d.text,'') + ' ' + ISNULL(e.text,'') data FROM basedata a
LEFT outer JOIN basedata b ON a.text<>b.text --AND b.text<>c.text
LEFT outer JOIN basedata c ON a.text<>c.text AND c.text<>b.text
LEFT outer JOIN basedata d ON a.text<>d.text AND c.text<>d.text AND d.text<> b.text
LEFT outer JOIN basedata e ON a.text<>e.text AND b.text<>e.text AND d.text<> e.text AND c.text<> e.text
)
June 28, 2014 at 8:24 pm
peter.cox (6/27/2014)
hianyone got an idea for a CTE that takes a string, and puts it into a table with the words in any order.
so the variable would be 'tesco mixed fruit'
and the data column in the table would be
tesco mixed fruit
mixed fruit tesco
fruit tesco mixed
mixed tesco fruit
fruit mixed tesco
tesco fruit mixed.
easy enough to split - which I'd thought was the requirement! - but it's taken on a new level of complexity...
thanks
Now that you have a couple of possible solutions, why do you need to do this? What's the business reason?
p.s. That WHILE loop cleverly disguised as a recursive CTE probably isn't the best idea for a splitter.
--Jeff Moden
Change is inevitable... Change for the better is not.
December 10, 2014 at 11:59 pm
Two way street here. I know it' been several months but I'm still interested in why you needed to do this.
--Jeff Moden
Change is inevitable... Change for the better is not.
December 11, 2014 at 4:48 am
Sorry! Been ages.
It was just a business requirement to search a free form field. Obviously with that being the case, people have the ability to put the data in any order, with varying quantities of spaces etc. We need to be able to search everything, just to make sure any possibly variation is pulled up.
December 11, 2014 at 6:13 am
You may get some ideas from this:
;WITH
E1 AS (SELECT n = 0 FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n)),
iTally AS (SELECT n = ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) FROM E1 a, E1 b)
SELECT *
FROM iTally i
CROSS APPLY (
SELECT Word AS 'data()'
FROM (
SELECT TOP (ABS(n+CHECKSUM(NEWID()))%9+1) word
FROM (
SELECT word = CAST('the' AS VARCHAR(200)) UNION ALL
SELECT 'quick' UNION ALL
SELECT 'brown' UNION ALL
SELECT 'fox' UNION ALL
SELECT 'jumped' UNION ALL
SELECT 'over' UNION ALL
SELECT 'the' UNION ALL
SELECT 'lazy' UNION ALL
SELECT 'dog'
) Words ORDER BY NEWID()) u2
FOR XML PATH('')
) iTVF(Sentence)
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
December 11, 2014 at 7:15 am
peter.cox (12/11/2014)
Sorry! Been ages.It was just a business requirement to search a free form field. Obviously with that being the case, people have the ability to put the data in any order, with varying quantities of spaces etc. We need to be able to search everything, just to make sure any possibly variation is pulled up.
Quick thought, not certain that this permutation method is the best approach. Each "item" in the input can be matched individually as demonstrated in the code below. The code is slightly long winded in order to make it more readable/self explanatory.
π
USE tempdb;
GO
SET NOCOUNT ON;
/* The "input", that is the text to serch */
IF OBJECT_ID('dbo.TBL_INPUTSTRING') IS NOT NULL DROP TABLE dbo.TBL_INPUTSTRING;
CREATE TABLE dbo.TBL_INPUTSTRING
(
IS_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_INPUTSTRING_IS_ID PRIMARY KEY CLUSTERED
,IS_STR VARCHAR(50) NOT NULL
);
/* The match set, each group id (MTC_GRP) groups the individual values */
IF OBJECT_ID('dbo.TBL_MATCH') IS NOT NULL DROP TABLE dbo.TBL_MATCH;
CREATE TABLE dbo.TBL_MATCH
(
MTC_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_MATCH_MTC_ID PRIMARY KEY CLUSTERED
,MTC_GRP INT NOT NULL
,MTC_STR VARCHAR(50) NOT NULL
);
/* Sample input */
INSERT INTO dbo.TBL_INPUTSTRING(IS_STR)
VALUES ('tesco mixed fruit' )
,('mixed fruit tesco' )
,('fruit tesco mixed' )
,('mixed tesco fruit' )
,('fruit mixed asda' )
,(' fruit asda mixed ' )
,('fruit mixed tesco' )
,('waitrose has no fruit' )
,('morrison has mixed fruit not tesco')
,('fruit mixed' );
/* Sample match sets */
INSERT INTO dbo.TBL_MATCH(MTC_GRP,MTC_STR)
VALUES ( 1,'tesco')
,( 1,'mixed')
,( 1,'fruit')
,( 2,'mixed')
,( 2,'fruit')
,( 2,'asda' )
,( 3,'tesco')
,( 3,'mixed')
,( 4,'fruit')
,( 4,'mixed')
,( 5,'morrison')
,( 5,'has')
,( 5,'mixed')
,( 5,'fruit')
,( 5,'not')
,( 5,'tesco')
;
/* The input and the count of "items" in each */
;WITH BASE_DATA AS
(
SELECT
I.IS_ID
,LEN(LTRIM(RTRIM(I.IS_STR))) - LEN(LTRIM(RTRIM(REPLACE(I.IS_STR,CHAR(32),'')))) + 1 AS WRD_CNT
,CHAR(32) + IS_STR + CHAR(32) AS PDSTR
FROM dbo.TBL_INPUTSTRING I
)
/* The search set with count of item in each group */
,SEARCH_DATA AS
(
SELECT
M.MTC_ID
,M.MTC_GRP
,M.MTC_STR
,COUNT(*) OVER
(
PARTITION BY M.MTC_GRP
) AS GRP_CNT
FROM dbo.TBL_MATCH M
)
,MATCH_SET AS
(
SELECT
BD.IS_ID
,BD.PDSTR
,BD.WRD_CNT
,M.MTC_GRP
,M.GRP_CNT
,COUNT(M.MTC_ID) OVER
(
PARTITION BY BD.IS_ID,M.MTC_GRP
) AS MM_CNT
FROM BASE_DATA BD
CROSS APPLY SEARCH_DATA M
WHERE CHARINDEX(CHAR(32) + M.MTC_STR + CHAR(32),BD.PDSTR,1) > 0
)
SELECT
MS.IS_ID
,MS.PDSTR
,MS.MTC_GRP
FROM MATCH_SET MS
WHERE MS.WRD_CNT = MS.GRP_CNT
AND MS.WRD_CNT = MS.MM_CNT
GROUP BY MS.IS_ID
,MS.PDSTR
,MS.MTC_GRP;
Results
IS_ID PDSTR MTC_GRP
------ ------------------------------------- --------
1 tesco mixed fruit 1
2 mixed fruit tesco 1
3 fruit tesco mixed 1
4 mixed tesco fruit 1
5 fruit mixed asda 2
6 fruit asda mixed 2
7 fruit mixed tesco 1
9 morrison has mixed fruit not tesco 5
10 fruit mixed 4
December 12, 2014 at 6:00 am
I second searching for individual words, not permutations. The permutation approach scales horribly: 10 words give you 3.6M permutiations; 12 words give you 480M
Now you might assume the user will never enter more than 6 words, but users have ways of surprising you!
Viewing 12 posts - 1 through 11 (of 11 total)
You must be logged in to reply to this topic. Login to reply