Split Function on the basis of Contact Number

  • Hi,

    I need some help with a split function.

    i have records of multiple users, the last value of every record is a contact number (10 Digits- Numeric), I want a split function which can take the whole text and split the records on the basis of contact number.

    In order words i want SQL to locate the contact number and move to the next record after that and so on till the end of the text.

    create table

    tbl_1

    (txt varchar (max))

    insert into tbl_1 values ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,

    finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596

    jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical

    sales and support good 8137 91630 8.25 126 eileen oneal 8258364083

    mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12

    executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558

    joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin

    general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')

    output

    john jimenez 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596

    jeffrey galione 57 allen dr wayne nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083

    mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558

    joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751

    Regards,
    Shafat Husain
    🙂
    And Your Lord Never Forgets...!! (64:19 -- Quran)

  • Something like this, i couldn't find any other way .

    CREATE TABLE #temp

    (

    ID INT IDENTITY(1,1),

    txt varchar (max)

    )

    DECLARE @txt1 varchar (max)

    DECLARE @txt varchar (max)

    SET @txt1 = 'john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,

    finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596

    jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical

    sales and support good 8137 91630 8.25 126 eileen oneal 8258364083

    mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12

    executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558

    joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin

    general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751'

    SET @txt = @txt1

    WHILE LEN(@txt) > 0

    BEGIN

    SELECT @txt = SUBSTRING(@txt1,0, PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',@txt1)+10 ) --from tbl_1

    INSERT #temp

    SELECT @txt

    SET @txt1 = SUBSTRING(@txt1,LEN(@txt)+1,LEN(@txt1)-LEN(@txt) )

    --SELECT @txt1

    END

    SELECT * FROM #temp

    DROP TABLE #temp

  • Quick but slightly verbose solution, does only scan the table once but that comes at an additional cost, would be a perfect case for LAG/LEAD on SQL Server 2012 or later.

    😎

    USE tempdb;

    GO

    SET NOCOUNT ON;

    IF OBJECT_ID(N'dbo.TBL_USERTEXT') IS NOT NULL DROP TABLE dbo.TBL_USERTEXT;

    create table dbo.TBL_USERTEXT

    (

    U_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

    ,U_TXT VARCHAR(MAX) NOT NULL

    );

    insert into dbo.TBL_USERTEXT(U_TXT)

    VALUES ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751');

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,BASE_DATA AS

    (

    SELECT

    UT.U_ID

    ,UT.U_TXT

    ,NM.N

    ,NM.N - ROW_NUMBER() OVER

    (

    PARTITION BY UT.U_ID

    ORDER BY NM.N

    ) AS P_RID

    FROM dbo.TBL_USERTEXT UT

    CROSS APPLY

    (

    SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9

    ) AS NM

    WHERE SUBSTRING(UT.U_TXT,NM.N,1) LIKE '[0-9]'

    )

    ,FIND_END_PATTERN AS

    (

    SELECT

    BD.U_ID

    ,MAX(BD.N) AS MAX_N

    ,BD.P_RID

    ,COUNT(BD.N) AS NCOUNT

    ,BD.U_TXT

    FROM BASE_DATA BD

    GROUP BY BD.U_ID,BD.P_RID,BD.U_TXT

    )

    ,NUMBERED_GROUP AS

    (

    SELECT

    FEP.U_ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY FEP.U_ID

    ORDER BY FEP.MAX_N

    ) AS GRP_RID

    ,FEP.MAX_N

    ,FEP.U_TXT

    FROM FIND_END_PATTERN FEP

    WHERE FEP.NCOUNT = 10

    )

    ,GROUP_POSITION AS

    (

    SELECT

    NG.U_ID

    ,NG.GRP_RID

    ,MIN(NG.MAX_N) OVER (PARTITION BY (NG.GRP_RID + (NG.GRP_RID % 2))) AS POS_A

    ,MIN(NG.MAX_N) OVER (PARTITION BY (NG.GRP_RID - (NG.GRP_RID % 2))) AS POS_B

    ,(NG.GRP_RID % 2) AS POS_FLG

    ,NG.MAX_N

    ,NG.U_TXT

    FROM NUMBERED_GROUP NG

    )

    SELECT

    GP.U_ID

    ,GP.GRP_RID

    ,LTRIM(SUBSTRING(GP.U_TXT, CASE

    WHEN GP.GRP_RID = 1 THEN 1

    WHEN GP.POS_A > GP.POS_B THEN GP.POS_B + 1

    ELSE GP.POS_A + 1

    END

    ,(GP.MAX_N - CASE

    WHEN GP.GRP_RID = 1 THEN 1

    WHEN GP.POS_A > GP.POS_B THEN GP.POS_B

    ELSE GP.POS_A

    END) + 0)) AS STR_PART

    FROM GROUP_POSITION GP;

    Results

    U_ID GRP_RID STR_PART

    ----------- -------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    1 1 john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 557562159

    1 2 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083

    1 3 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558

    1 4 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751

    Edit: Fixed error in the code

  • For completeness (and some fun :-)) here are two other solutions for 2012 and later

    😎

    Almost direct translation to window function version, uses LAG instead of aggregation for the cross row reference, around 25% improvement over the previous non window function version.

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,BASE_DATA AS

    (

    SELECT

    UT.U_ID

    ,UT.U_TXT

    ,NM.N

    ,NM.N - ROW_NUMBER() OVER

    (

    PARTITION BY (SELECT NULL)

    ORDER BY NM.N

    ) AS P_RID

    FROM dbo.TBL_USERTEXT UT

    CROSS APPLY

    (

    SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9

    ) AS NM

    WHERE SUBSTRING(UT.U_TXT,NM.N,1) LIKE '[0-9]'

    )

    ,FIND_END_PATTERN AS

    (

    SELECT

    BD.U_ID

    ,MAX(BD.N) AS MAX_N

    ,BD.P_RID

    ,COUNT(BD.N) AS NCOUNT

    ,BD.U_TXT

    FROM BASE_DATA BD

    GROUP BY BD.U_ID,BD.P_RID,BD.U_TXT

    )

    ,GROUP_POSITION AS

    (

    SELECT

    FEP.U_ID

    ,LAG(FEP.MAX_N,1,0) OVER

    (

    PARTITION BY FEP.U_ID

    ORDER BY FEP.MAX_N

    ) + 1 AS START_POS

    ,FEP.MAX_N

    ,FEP.U_TXT

    FROM FIND_END_PATTERN FEP

    WHERE FEP.NCOUNT = 10

    )

    SELECT

    GP.U_ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY GP.U_ID

    ORDER BY GP.START_POS

    ) AS GRP_RID

    ,LTRIM(SUBSTRING(GP.U_TXT,GP.START_POS,(GP.MAX_N - GP.START_POS) + 1))

    FROM GROUP_POSITION GP;

    Slightly streamlined window function version, around 50% improvement over the previous non window function version.

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,BASE_DATA AS

    (

    SELECT

    UT.U_ID

    ,UT.U_TXT

    ,NM.N

    ,NM.N - ROW_NUMBER() OVER

    (

    PARTITION BY UT.U_ID

    ORDER BY NM.N

    ) AS P_RID

    FROM dbo.TBL_USERTEXT UT

    CROSS APPLY

    (

    SELECT TOP(LEN(UT.U_TXT)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9

    ) AS NM

    WHERE ASCII(SUBSTRING(UT.U_TXT,NM.N,1)) ^ 0x30 < 10

    )

    ,GROUP_SET AS

    (

    SELECT

    BD.U_ID

    ,MAX(BD.N)AS MAX_N

    ,BD.U_TXT

    FROM BASE_DATA BD

    GROUP BY BD.U_ID

    ,BD.P_RID

    ,BD.U_TXT

    HAVING COUNT(BD.N) = 10

    )

    SELECT

    GS.U_ID

    ,ROW_NUMBER() OVER

    (

    PARTITION BY GS.U_ID

    ORDER BY GS.MAX_N

    ) AS GR_NO

    ,LTRIM(SUBSTRING(GS.U_TXT

    ,LAG(GS.MAX_N,1,-1) OVER ( ORDER BY GS.MAX_N ) + 1

    ,GS.MAX_N - LAG(GS.MAX_N,1,1) OVER ( ORDER BY GS.MAX_N ))

    ) AS TXT_PART

    FROM GROUP_SET GS;

  • Why would anyone take the time to "split" the data, clean it, and not normalize it? :blink:

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Good question Jeff.

    Another question: So I've been away from the forum for 6 months and everyone has already forgotten how to use a pattern splitter?

    create table

    #tbl_1

    (txt varchar (max))

    insert into #tbl_1 values ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive, finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751');

    WITH DoTheSplit AS

    (

    SELECT txt, ItemNumber, Item, [Matched]

    ,n=CASE WHEN ItemNumber = 1 THEN ItemNumber

    WHEN LEN(item) = 10 AND [Matched]=1 THEN ItemNumber

    ELSE 0

    END

    , a=MAX(ItemNumber) OVER ()

    FROM #tbl_1

    CROSS APPLY dbo.PatternSplitCM(txt, '[0-9]')

    )

    SELECT LTRIM(

    (

    SELECT '' + b.Item

    FROM DoTheSplit b

    WHERE ItemNumber BETWEEN m1 AND m2

    ORDER BY ItemNumber

    FOR XML PATH(''), TYPE).value('.', 'VARCHAR(MAX)'

    ))

    FROM

    (

    SELECT m1=MIN(m), m2=MAX(m)

    FROM

    (

    SELECT m, rn=(1+ROW_NUMBER() OVER (ORDER BY m))/2

    FROM DoTheSplit a

    CROSS APPLY

    (

    VALUES(CASE ItemNumber WHEN 1 THEN 0 WHEN a THEN 0 ELSE n+1 END)

    ,(n)

    ) c (m)

    WHERE n <> 0 AND m <> 0

    ) a

    GROUP BY rn

    ) a;

    GO

    DROP TABLE #tbl_1;

    Details on PatternSplitCM can be found in my signature links.


    My mantra: No loops! No CURSORs! No RBAR! Hoo-uh![/I]

    My thought question: Have you ever been told that your query runs too fast?

    My advice:
    INDEXing a poor-performing query is like putting sugar on cat food. Yeah, it probably tastes better but are you sure you want to eat it?
    The path of least resistance can be a slippery slope. Take care that fixing your fixes of fixes doesn't snowball and end up costing you more than fixing the root cause would have in the first place.

    Need to UNPIVOT? Why not CROSS APPLY VALUES instead?[/url]
    Since random numbers are too important to be left to chance, let's generate some![/url]
    Learn to understand recursive CTEs by example.[/url]
    [url url=http://www.sqlservercentral.com/articles/St

  • Although recursive ctes are known to have perfomance problems you may wish to try it as well. Having no joins it may perfom not so bad.

    create table dbo.TBL_USERTEXT

    (

    U_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

    ,U_TXT VARCHAR(MAX) NOT NULL

    );

    insert into dbo.TBL_USERTEXT(U_TXT)

    VALUES ('john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')

    , ('**john asfasdf 535 summit ave franklin lks nj 15521 510_644_1079 na na 5,8/12 executive,finance finance and planning far 5537 21133 8.25 126 ronald d hensor jr. 5575621596 **jeffrey sdafas 57 allen dr sdfsasadf nj 15810 562_434_0710 na na 5,8/12 executive, technical sales and support good 8137 91630 8.25 126 eileen oneal 8258364083 **mary dizon 32 van wagene avenue jersey city nj 15327 562_438_3411 na na 12 executive, admin general admin good 6013 76801 8.5 160 earl cherry 1909876558 ***joanne t. tuohy 652 oradell ave. oradell nj 13851 707_226_6013 na na 7 executive, admin general admin far 8788 49095 9.25 156 florence ferrazzano 4665463751')

    ;

    insert into dbo.TBL_USERTEXT(U_TXT)

    select U_TXT from dbo.TBL_USERTEXT , sys.all_objects

    ;

    with r1 as (

    select

    left(U_Txt, idx.n) res, substring(U_Txt,idx.n,len(U_Txt)) U_Txt

    from dbo.TBL_USERTEXT

    cross apply (select PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',U_txt)+10 as N ) idx

    where idx.n>10

    union all

    select

    left(U_Txt, idx.n) res, substring(U_Txt,idx.n,len(U_Txt)) U_Txt

    from r1

    cross apply (select PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%',U_txt)+10 as N ) idx

    where idx.n>10

    )

    select res from r1

Viewing 7 posts - 1 through 6 (of 6 total)

You must be logged in to reply to this topic. Login to reply