''Chopping'' up a ntext field

Question

Post reply

''Chopping'' up a ntext field

James Knight

SSC Enthusiast

Points: 145
More actions
February 9, 2005 at 3:57 am

#92232

I need to cycle through a table and update address1, address2, address3 and address4 fields with the contents of a multiline field (ntext) named address. How can I use the carriage return as the separator to determine the different lines of the address field?
TIA
James Knight

Viewing 10 posts - 1 through 9 (of 9 total)

You must be logged in to reply to this topic. Login to reply

RW-206743 Newbie Points: 7 More actions · Answer 1

Hi...

in VB/VBA you can use the split function. You´ll get back an Array:

Dim vTmp as Variant

vTmp = Split(RS.Fields("address"), vbCrLf)

RS.Fields("address1) = vTmp(0)

RS.Fields("address2) = vTmp(1)

RS.Fields("address3) = vTmp(2)

RS.Fields("address4) = vTmp(3)

RS.Update

May this help you.

Greetings from Germany

Roland

vadba SSChampion Points: 11133 More actions · Answer 2

James,

Lots of ways to do this in T-SQL. Two quick-and-dirty ways:

METHOD 1:

DROP TABLE textaddr

DROP TABLE addresses

GO

CREATE TABLE textaddr

(

id int PRIMARY KEY IDENTITY(1,1),

addr ntext null

)

CREATE TABLE addresses

(

id int PRIMARY KEY,

address1 varchar(50),

address2 varchar(50),

address3 varchar(50),

address4 varchar(50)

)

SET NOCOUNT ON

INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

-- In this example, I'll assume that you already

-- got rows in the table, and that you want to update them.

INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

-- Get everything into the table, address1 ok, rest in address2

UPDATE addresses

SET address1 = Substring(t.addr, 1, CharIndex(Char(13), t.addr, 1) - 1),

address2 = Substring(t.addr, CharIndex(Char(13), t.addr, 1) + 1, 4000) + Char(13)+ Char(13)+ Char(13)

FROM addresses a JOIN textaddr t ON a.id = t.id

-- Split address2, store first line in address2, rest in addr3

UPDATE addresses

SET address3 = Substring(address2, CharIndex(Char(13), address2, 1) + 1, 4000),

address2 = Substring(address2, 1, CharIndex(Char(13), address2, 1) - 1)

-- Split address3, store first line in address3, rest in addr4

UPDATE addresses

SET address4 = Substring(address3, CharIndex(Char(13), address3, 1) + 1, 4000),

address3 = Substring(address3, 1, CharIndex(Char(13), address3, 1) - 1)

-- Split address4, store first line in address4, discard the rest

UPDATE addresses

SET address4 = Substring(address4, 1, CharIndex(Char(13), address4, 1) - 1)

SELECT * FROM addresses

SET NOCOUNT OFF

METHOD 2:

-- UDF required by method 2.

-- I use dbo.fGetToken all the time. I modified it for use here with

-- the nvarchar type, thus the N at the end of the function name.

CREATE FUNCTION dbo.fGetTokenN

(

@parm nvarchar(4000),

@delim nvarchar(100),

@whichOccur smallint

)

RETURNS varchar(8000)

AS

BEGIN

DECLARE @occur int, @spos int, @startPos int, @maxlen int

DECLARE @token nvarchar(4000)

DECLARE @delimLen int

-- Len() function ignores trailing blanks, so if our delimiter is a space,

-- or multiple spaces, Len() will returns zero.

IF Len(@delim) = 0 AND DataLength(@delim) > 0

SET @delimLen = DataLength(@delim) / 2 -- divide by 2 if using nvarchar

ELSE

SET @delimLen = Len(@delim)

SET @occur = 0

SET @startPos = 1

SET @spos = 1 - @delimLen

SET @maxLen = Len(@parm)

WHILE @occur < @whichOccur AND @parm <> N'' AND @parm IS NOT NULL AND @startPos < @maxLen

BEGIN

SET @occur = @occur + 1

SET @startPos = @spos + @delimLen

SET @spos = CHARINDEX( @delim , @parm, @startPos )

IF @spos = 0

BREAK

END

IF @occur <> @whichOccur

SET @token = N'' -- or NULL

ELSE

IF @spos = 0

SET @token = Substring(@parm, @startPos, 4000)

ELSE

SET @token = SubString( @parm, @startPos, @spos - @startPos)

RETURN @token

END

GO

-- This is example 2

DROP TABLE textaddr

DROP TABLE addresses

GO

CREATE TABLE textaddr

(

id int PRIMARY KEY IDENTITY(1,1),

addr ntext null

)

CREATE TABLE addresses

(

id int PRIMARY KEY,

address1 varchar(50),

address2 varchar(50),

address3 varchar(50),

address4 varchar(50)

)

SET NOCOUNT ON

INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

-- assume that you are updating existing records

INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

SET NOCOUNT OFF

UPDATE addresses

SET address1 = dbo.fGetTokenN(t.addr, Char(13), 1) ,

address2 = dbo.fGetTokenN(t.addr, Char(13), 2) ,

address3 = dbo.fGetTokenN(t.addr, Char(13), 3) ,

address4 = dbo.fGetTokenN(t.addr, Char(13), 4)

FROM addresses a JOIN textaddr t ON a.id = t.id

SELECT * FROM addresses ORDER BY id

James Knight SSC Enthusiast Points: 145 More actions · Answer 3

Thanks for that! I have a slight problem though.....There are some addresses that have a null value in the address field (in smaller towns, a company might be so big is doesn't even have a road name applied to it) and some which only have a single line (no char(13)). These seem to cause the script to fail.

Thanks,

James Knight

vadba SSChampion Points: 11133 More actions · Answer 4

James,

The update method has a problem with single line addresses. Nulls aren't a problem in my testing. However, the UDF method is cleaner looking, and works for all situations. I was just trying to provide a couple of different methods in case performance was an issue. Also, the UDF returns an empty string when the requested value doesn't exists (i.e. 3 lines, and you request line 4). Just change the function to return NULL instead of N'' if that's what you want.

IF @occur <> @whichOccur

SET @token = NULL

ELSE

IF @spos = 0 ....

Here's the code that I tested (just added more INSERTS to test)

DROP TABLE textaddr

DROP TABLE addresses

GO

CREATE TABLE textaddr

(

id int PRIMARY KEY IDENTITY(1,1),

addr ntext null

)

CREATE TABLE addresses

(

id int PRIMARY KEY,

address1 varchar(50),

address2 varchar(50),

address3 varchar(50),

address4 varchar(50)

)

SET NOCOUNT ON

INSERT textaddr (addr) VALUES ( 'Line 1a' + Char(13) + 'Line 2a' + Char(13) + 'Line 3a' + Char(13) + 'Line 4a' )

INSERT textaddr (addr) VALUES ( 'Line 1b' + Char(13) + 'Line 2b' + Char(13) + 'Line 3b' + Char(13) + 'Line 4b' )

INSERT textaddr (addr) VALUES ( 'Line 1c' + Char(13) + 'Line 2c' + Char(13) + 'Line 3c' + Char(13) + 'Line 4c' )

INSERT textaddr (addr) VALUES ( 'Line 1d' + Char(13) + 'Line 2d' + Char(13) + 'Line 3d' + Char(13) + 'Line 4d' )

INSERT textaddr (addr) VALUES ( null )

INSERT textaddr (addr) VALUES ( 'Line 1f' )

INSERT textaddr (addr) VALUES ( 'Line 1g' )

INSERT textaddr (addr) VALUES ( 'Line 1h' + Char(13) + 'Line 2h' )

INSERT textaddr (addr) VALUES ( 'Line 1j' + Char(13) + 'Line 2j' + Char(13) + 'Line 3j' + Char(13) + 'Line 4j' + Char(13) + 'Line 5j' )

-- assume that you are updating existing records

INSERT addresses (id) SELECT id FROM textaddr ORDER BY id

SET NOCOUNT OFF

UPDATE addresses

SET address1 = dbo.fGetTokenN(t.addr, Char(13), 1) ,

address2 = dbo.fGetTokenN(t.addr, Char(13), 2) ,

address3 = dbo.fGetTokenN(t.addr, Char(13), 3) ,

address4 = dbo.fGetTokenN(t.addr, Char(13), 4)

FROM addresses a JOIN textaddr t ON a.id = t.id

SELECT * FROM addresses ORDER BY id

/*

I got these results:

(9 row(s) affected)

id address1 address2 address3 address4

--- ---------- ---------- ---------- ----------

1 Line 1a Line 2a Line 3a Line 4a

2 Line 1b Line 2b Line 3b Line 4b

3 Line 1c Line 2c Line 3c Line 4c

4 Line 1d Line 2d Line 3d Line 4d

5

6 Line 1f

7 Line 1g

8 Line 1h Line 2h

9 Line 1j Line 2j Line 3j Line 4j

(9 row(s) affected

*/

James Knight SSC Enthusiast Points: 145 More actions · Answer 5

Works a treat apart from 1 thing.......it doesn't delete the char(13)'s while breaking up the field. I had a look at the udf, but the code is going way over my head (easily done I know!)

Cheers,

James

vadba SSChampion Points: 11133 More actions · Answer 6

Hmm..it deletes all the Char(13)'s in my tests. Does it do this for all rows, or just some?

Change the last SELECT statement in the example to read:

SELECT *

FROM addresses

WHERE CharIndex( Char(13), address1, 1) > 0

OR CharIndex( Char(13), address2, 1) > 0

OR CharIndex( Char(13), address3, 1) > 0

OR CharIndex( Char(13), address4, 1) > 0

ORDER BY id

That will list any rows for which anyone of the address columns contains a Char(13). Also, are the lines in the text column delimited with just a Char(13), or with Char(13)+Char(10) (carriage return + line feed)?

Run this:

SELECT *

FROM addresses

WHERE CharIndex( Char(10), address1, 1) > 0

OR CharIndex( Char(10), address2, 1) > 0

OR CharIndex( Char(10), address3, 1) > 0

OR CharIndex( Char(10), address4, 1) > 0

ORDER BY id

If the text is delimited with Char(13)+Char(10), just change the function call like this:

UPDATE addresses

SET address1 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 1) ,

address2 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 2) ,

address3 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 3) ,

address4 = dbo.fGetTokenN(t.addr, Char(13)+Char(10), 4)

FROM addresses a JOIN textaddr t ON a.id = t.id

James Knight SSC Enthusiast Points: 145 More actions · Answer 7

You were right, there were line feeds as well.

UPDATE address

SET addr_address1 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 1) ,

addr_address2 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 2) ,

addr_address3 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 3) ,

addr_address4 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 4) ,

addr_address5 = dbo.fGetTokenN(addr_address, Char(13) + Char(10), 5)

This worked perfectly. Now I just need to do some cleaning up and set all fields comprised of just spaces to NULL.

Many thanks,

James

vadba SSChampion Points: 11133 More actions · Answer 8

As I mentioned earlier, you can modify the fGetTokenN function to return NULL instead of the empty string for addresses that are less than 4 lines. Note the bold text near the bottom of the function.

DROP FUNCTION dbo.fGetTokenN

GO

CREATE FUNCTION dbo.fGetTokenN

(

@parm nvarchar(4000),

@delim nvarchar(100),

@whichOccur smallint

)

RETURNS nvarchar(4000)

AS

BEGIN

DECLARE @occur int, @spos int, @startPos int, @maxlen int

DECLARE @token nvarchar(4000)

DECLARE @delimLen int

-- Len() function ignores trailing blanks, so if our delimiter is a space,

-- or multiple spaces, Len() will returns zero.

IF Len(@delim) = 0 AND DataLength(@delim) > 0

SET @delimLen = DataLength(@delim) / 2 -- divide by 2 if using nvarchar

ELSE

SET @delimLen = Len(@delim)

SET @occur = 0

SET @startPos = 1

SET @spos = 1 - @delimLen

SET @maxLen = Len(@parm)

WHILE @occur < @whichOccur AND @parm <> N'' AND @parm IS NOT NULL AND @startPos < @maxLen

BEGIN

SET @occur = @occur + 1

SET @startPos = @spos + @delimLen

SET @spos = CHARINDEX( @delim , @parm, @startPos )

IF @spos = 0

BREAK

END

IF @occur <> @whichOccur

SET @token = NULL -- instead of N''

ELSE

IF @spos = 0

SET @token = Substring(@parm, @startPos, 4000)

ELSE

SET @token = SubString( @parm, @startPos, @spos - @startPos)

RETURN @token

END

James Knight SSC Enthusiast Points: 145 More actions · Answer 9

Hi,

I did make that amendment, I was just saying that I need to clean up the data somewhat. Thanks for your help.

Much appreciated,

James Knight