January 9, 2015 at 11:58 pm
Hi,
I need some help with a split function.
i have records of multiple users, the last value of every record is a contact number (10 Digits- Numeric), I want a split function which can take the whole text and split the records on the basis of contact number.
In order words i want SQL to locate the contact number and move to the next record after that and so on till the end of the text.
create table
tbl_1
(txt varchar (max))
insert into tbl_1 values ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,
finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596
jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical
sales and support good 8137 91630 8.25 126 eileen oneal 8258364083
mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12
executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558
joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin
general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')
output
john jimenez 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596
jeffrey galione 57 allen dr wayne nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083
mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558
joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751
Regards,
Shafat Husain
🙂
And Your Lord Never Forgets...!! (64:19 -- Quran)
January 10, 2015 at 2:12 am
Something like this, i couldn't find any other way .
CREATE TABLE #temp
(
ID INT IDENTITY(1,1),
txt varchar (max)
)
DECLARE @txt1 varchar (max)
DECLARE @txt varchar (max)
SET @txt1 = 'john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,
finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596
jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical
sales and support good 8137 91630 8.25 126 eileen oneal 8258364083
mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12
executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558
joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin
general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751'
SET @txt = @txt1
WHILE LEN(@txt) > 0
BEGIN
SELECT @txt = SUBSTRING(@txt1,0, PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',@txt1)+10 ) --from tbl_1
INSERT #temp
SELECT @txt
SET @txt1 = SUBSTRING(@txt1,LEN(@txt)+1,LEN(@txt1)-LEN(@txt) )
--SELECT @txt1
END
SELECT * FROM #temp
DROP TABLE #temp
January 10, 2015 at 2:55 am
Quick but slightly verbose solution, does only scan the table once but that comes at an additional cost, would be a perfect case for LAG/LEAD on SQL Server 2012 or later.
😎
USE tempdb;
GO
SET NOCOUNT ON;
IF OBJECT_ID(N'dbo.TBL_USERTEXT') IS NOT NULL DROP TABLE dbo.TBL_USERTEXT;
create table dbo.TBL_USERTEXT
(
U_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED
,U_TXT VARCHAR(MAX) NOT NULL
);
insert into dbo.TBL_USERTEXT(U_TXT)
VALUES ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751');
;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,BASE_DATA AS
(
SELECT
UT.U_ID
,UT.U_TXT
,NM.N
,NM.N - ROW_NUMBER() OVER
(
PARTITION BY UT.U_ID
ORDER BY NM.N
) AS P_RID
FROM dbo.TBL_USERTEXT UT
CROSS APPLY
(
SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9
) AS NM
WHERE SUBSTRING(UT.U_TXT,NM.N,1) LIKE '[0-9]'
)
,FIND_END_PATTERN AS
(
SELECT
BD.U_ID
,MAX(BD.N) AS MAX_N
,BD.P_RID
,COUNT(BD.N) AS NCOUNT
,BD.U_TXT
FROM BASE_DATA BD
GROUP BY BD.U_ID,BD.P_RID,BD.U_TXT
)
,NUMBERED_GROUP AS
(
SELECT
FEP.U_ID
,ROW_NUMBER() OVER
(
PARTITION BY FEP.U_ID
ORDER BY FEP.MAX_N
) AS GRP_RID
,FEP.MAX_N
,FEP.U_TXT
FROM FIND_END_PATTERN FEP
WHERE FEP.NCOUNT = 10
)
,GROUP_POSITION AS
(
SELECT
NG.U_ID
,NG.GRP_RID
,MIN(NG.MAX_N) OVER (PARTITION BY (NG.GRP_RID + (NG.GRP_RID % 2))) AS POS_A
,MIN(NG.MAX_N) OVER (PARTITION BY (NG.GRP_RID - (NG.GRP_RID % 2))) AS POS_B
,(NG.GRP_RID % 2) AS POS_FLG
,NG.MAX_N
,NG.U_TXT
FROM NUMBERED_GROUP NG
)
SELECT
GP.U_ID
,GP.GRP_RID
,LTRIM(SUBSTRING(GP.U_TXT, CASE
WHEN GP.GRP_RID = 1 THEN 1
WHEN GP.POS_A > GP.POS_B THEN GP.POS_B + 1
ELSE GP.POS_A + 1
END
,(GP.MAX_N - CASE
WHEN GP.GRP_RID = 1 THEN 1
WHEN GP.POS_A > GP.POS_B THEN GP.POS_B
ELSE GP.POS_A
END) + 0)) AS STR_PART
FROM GROUP_POSITION GP;
Results
U_ID GRP_RID STR_PART
----------- -------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 1 john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 557562159
1 2 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083
1 3 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558
1 4 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751
Edit: Fixed error in the code
January 10, 2015 at 5:14 am
For completeness (and some fun :-)) here are two other solutions for 2012 and later
😎
Almost direct translation to window function version, uses LAG instead of aggregation for the cross row reference, around 25% improvement over the previous non window function version.
;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,BASE_DATA AS
(
SELECT
UT.U_ID
,UT.U_TXT
,NM.N
,NM.N - ROW_NUMBER() OVER
(
PARTITION BY (SELECT NULL)
ORDER BY NM.N
) AS P_RID
FROM dbo.TBL_USERTEXT UT
CROSS APPLY
(
SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9
) AS NM
WHERE SUBSTRING(UT.U_TXT,NM.N,1) LIKE '[0-9]'
)
,FIND_END_PATTERN AS
(
SELECT
BD.U_ID
,MAX(BD.N) AS MAX_N
,BD.P_RID
,COUNT(BD.N) AS NCOUNT
,BD.U_TXT
FROM BASE_DATA BD
GROUP BY BD.U_ID,BD.P_RID,BD.U_TXT
)
,GROUP_POSITION AS
(
SELECT
FEP.U_ID
,LAG(FEP.MAX_N,1,0) OVER
(
PARTITION BY FEP.U_ID
ORDER BY FEP.MAX_N
) + 1 AS START_POS
,FEP.MAX_N
,FEP.U_TXT
FROM FIND_END_PATTERN FEP
WHERE FEP.NCOUNT = 10
)
SELECT
GP.U_ID
,ROW_NUMBER() OVER
(
PARTITION BY GP.U_ID
ORDER BY GP.START_POS
) AS GRP_RID
,LTRIM(SUBSTRING(GP.U_TXT,GP.START_POS,(GP.MAX_N - GP.START_POS) + 1))
FROM GROUP_POSITION GP;
Slightly streamlined window function version, around 50% improvement over the previous non window function version.
;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))
,BASE_DATA AS
(
SELECT
UT.U_ID
,UT.U_TXT
,NM.N
,NM.N - ROW_NUMBER() OVER
(
PARTITION BY UT.U_ID
ORDER BY NM.N
) AS P_RID
FROM dbo.TBL_USERTEXT UT
CROSS APPLY
(
SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9
) AS NM
WHERE ASCII(SUBSTRING(UT.U_TXT,NM.N,1)) ^ 0x30 < 10
)
,GROUP_SET AS
(
SELECT
BD.U_ID
,MAX(BD.N)AS MAX_N
,BD.U_TXT
FROM BASE_DATA BD
GROUP BY BD.U_ID
,BD.P_RID
,BD.U_TXT
HAVING COUNT(BD.N) = 10
)
SELECT
GS.U_ID
,ROW_NUMBER() OVER
(
PARTITION BY GS.U_ID
ORDER BY GS.MAX_N
) AS GR_NO
,LTRIM(SUBSTRING(GS.U_TXT
,LAG(GS.MAX_N,1,-1) OVER ( ORDER BY GS.MAX_N ) + 1
,GS.MAX_N - LAG(GS.MAX_N,1,1) OVER ( ORDER BY GS.MAX_N ))
) AS TXT_PART
FROM GROUP_SET GS;
January 10, 2015 at 6:54 am
Why would anyone take the time to "split" the data, clean it, and not normalize it? :blink:
--Jeff Moden
Change is inevitable... Change for the better is not.
January 11, 2015 at 6:38 pm
Good question Jeff.
Another question: So I've been away from the forum for 6 months and everyone has already forgotten how to use a pattern splitter?
create table
#tbl_1
(txt varchar (max))
insert into #tbl_1 values ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive, finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751');
WITH DoTheSplit AS
(
SELECT txt, ItemNumber, Item, [Matched]
,n=CASE WHEN ItemNumber = 1 THEN ItemNumber
WHEN LEN(item) = 10 AND [Matched]=1 THEN ItemNumber
ELSE 0
END
, a=MAX(ItemNumber) OVER ()
FROM #tbl_1
CROSS APPLY dbo.PatternSplitCM(txt, '[0-9]')
)
SELECT LTRIM(
(
SELECT '' + b.Item
FROM DoTheSplit b
WHERE ItemNumber BETWEEN m1 AND m2
ORDER BY ItemNumber
FOR XML PATH(''), TYPE).value('.', 'VARCHAR(MAX)'
))
FROM
(
SELECT m1=MIN(m), m2=MAX(m)
FROM
(
SELECT m, rn=(1+ROW_NUMBER() OVER (ORDER BY m))/2
FROM DoTheSplit a
CROSS APPLY
(
VALUES(CASE ItemNumber WHEN 1 THEN 0 WHEN a THEN 0 ELSE n+1 END)
,(n)
) c (m)
WHERE n <> 0 AND m <> 0
) a
GROUP BY rn
) a;
GO
DROP TABLE #tbl_1;
Details on PatternSplitCM can be found in my signature links.
My thought question: Have you ever been told that your query runs too fast?
My advice:
INDEXing a poor-performing query is like putting sugar on cat food. Yeah, it probably tastes better but are you sure you want to eat it?
The path of least resistance can be a slippery slope. Take care that fixing your fixes of fixes doesn't snowball and end up costing you more than fixing the root cause would have in the first place.
Need to UNPIVOT? Why not CROSS APPLY VALUES instead?[/url]
Since random numbers are too important to be left to chance, let's generate some![/url]
Learn to understand recursive CTEs by example.[/url]
[url url=http://www.sqlservercentral.com/articles/St
January 12, 2015 at 5:03 am
Although recursive ctes are known to have perfomance problems you may wish to try it as well. Having no joins it may perfom not so bad.
create table dbo.TBL_USERTEXT
(
U_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED
,U_TXT VARCHAR(MAX) NOT NULL
);
insert into dbo.TBL_USERTEXT(U_TXT)
VALUES ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')
, ('**john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 **jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 **mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 ***joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')
;
insert into dbo.TBL_USERTEXT(U_TXT)
select U_TXT from dbo.TBL_USERTEXT , sys.all_objects
;
with r1 as (
select
left(U_Txt, idx.n) res, substring(U_Txt,idx.n,len(U_Txt)) U_Txt
from dbo.TBL_USERTEXT
cross apply (select PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',U_txt)+10 as N ) idx
where idx.n>10
union all
select
left(U_Txt, idx.n) res, substring(U_Txt,idx.n,len(U_Txt)) U_Txt
from r1
cross apply (select PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',U_txt)+10 as N ) idx
where idx.n>10
)
select res from r1
Viewing 7 posts - 1 through 6 (of 6 total)
You must be logged in to reply to this topic. Login to reply