February 23, 2010 at 5:15 am
Hi,
I have some address data that has been populated sequentially into a set of seven fields. I need to extract the postcode and place this into a separate field, removing the data from the field it was contained in along the way. The data looks something like this:
/*
UniqueId Postal_Address_Line_1 Postal_Address_Line_2 Postal_Address_Line_3 Postal_Address_Line_4 Postal_Address_Line_5 Postal_Address_Line_6 Postal_Address_Line_7 Postal_Address_Postcode
----------- --------------------- --------------------- --------------------- --------------------- --------------------- --------------------- --------------------- -----------------------
1 1 Any Street AN1 1AN NULL
2 2 Any Street Any Town AN1 1AN NULL NULL NULL NULL NULL
3 3 Any Street Any Road Any Town AN1 1AN NULL
4 5 Any Street Any Road Any Town Any County AN1 1AN NULL NULL NULL
5 Flat 1 6 Any Street Any Road Any Town Any County AN1 1AN NULL
6 Dunroamin Flat 2 7 Any Street Any Road Any Town Any County AN1 1AN NULL
*/
Note that there is mixture of null and blank fields where I have no data.
I need to make it like this:
/*
UniqueId Postal_Address_Line_1 Postal_Address_Line_2 Postal_Address_Line_3 Postal_Address_Line_4 Postal_Address_Line_5 Postal_Address_Line_6 Postal_Address_Line_7 Postal_Address_Postcode
----------- --------------------- --------------------- --------------------- --------------------- --------------------- --------------------- --------------------- -----------------------
1 1 Any Street AN1 1AN
2 2 Any Street Any Town NULL NULL NULL NULL AN1 1AN
3 3 Any Street Any Road Any Town AN1 1AN
4 5 Any Street Any Road Any Town Any County NULL NULL AN1 1AN
5 Flat 1 6 Any Street Any Road Any Town Any County AN1 1AN
6 Dunroamin Flat 2 7 Any Street Any Road Any Town Any County AN1 1AN
*/
I've written the following (apologies, tabs have gone a bit screwy):
if isnull(object_id('tempdb.dbo.#address_data'),0) <> 0
begin
drop table #address_data
end
-- create temp table
create table #address_data (
uniqueId int identity primary key
, Postal_Address_Line_1 varchar(12)
, Postal_Address_Line_2 varchar(12)
, Postal_Address_Line_3 varchar(12)
, Postal_Address_Line_4 varchar(12)
, Postal_Address_Line_5 varchar(12)
, Postal_Address_Line_6 varchar(12)
, Postal_Address_Line_7 varchar(12)
, Postal_Address_Postcode varchar(10)
)
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('1 Any Street','AN1 1AN','','','','','')
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('2 Any Street','Any Town','AN1 1AN',null,null,null,null)
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('3 Any Street','Any Road','Any Town','AN1 1AN','','','')
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('5 Any Street','Any Road','Any Town','Any County','AN1 1AN',null,null)
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('Flat 1','6 Any Street','Any Road','Any Town','Any County','AN1 1AN','')
insert #address_data (Postal_Address_Line_1,Postal_Address_Line_2,Postal_Address_Line_3,Postal_Address_Line_4,Postal_Address_Line_5,Postal_Address_Line_6,Postal_Address_Line_7) values ('Dunroamin','Flat 2','7 Any Street','Any Road','Any Town','Any County','AN1 1AN')
-- by converting nulls to 1 and anything else to 0, the sum of the R fields will tell me the last populated field
;with cte as (
select UniqueId
, case when isnull(Postal_Address_Line_2,'')='' then 1 else 0 end as R1
, case when isnull(Postal_Address_Line_3,'')='' then 1 else 0 end as R2
, case when isnull(Postal_Address_Line_4,'')='' then 1 else 0 end as R3
, case when isnull(Postal_Address_Line_5,'')='' then 1 else 0 end as R4
, case when isnull(Postal_Address_Line_6,'')='' then 1 else 0 end as R5
, case when isnull(Postal_Address_Line_7,'')='' then 1 else 0 end as R6
, Postal_Address_Line_2
, Postal_Address_Line_3
, Postal_Address_Line_4
, Postal_Address_Line_5
, Postal_Address_Line_6
, Postal_Address_Line_7
, Postal_Address_Postcode
from #address_data
-- only interested where postcode isn't already populated
where isnull(Postal_Address_Postcode,'') = ''
)
update cte
set Postal_Address_Postcode =
case when R1+R2+R3+R4+R5+R6 = 0
-- only interested in stuff that looks like a postcode
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_7)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_7)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_7))) < 10
then Postal_Address_Line_7
when R1+R2+R3+R4+R5+R6 = 1
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_6)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_6)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_6))) < 10
then Postal_Address_Line_6
when R1+R2+R3+R4+R5+R6 = 2
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_5)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_5)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_5))) < 10
then Postal_Address_Line_5
when R1+R2+R3+R4+R5+R6 = 3
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_4)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_4)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_4))) < 10
then Postal_Address_Line_4
when R1+R2+R3+R4+R5+R6 = 4
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_3)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_3)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_3))) < 10
then Postal_Address_Line_3
when R1+R2+R3+R4+R5+R6 = 5
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_2)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_2)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_2))) < 10
then Postal_Address_Line_2
end
, Postal_Address_Line_7 = case when R1+R2+R3+R4+R5+R6 = 0
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_7)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_7)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_7))) < 10
then '' else Postal_Address_Line_7 end
, Postal_Address_Line_6 = case when R1+R2+R3+R4+R5+R6 = 1
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_6)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_6)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_6))) < 10
then '' else Postal_Address_Line_6 end
, Postal_Address_Line_5 = case when R1+R2+R3+R4+R5+R6 = 2
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_5)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_5)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_5))) < 10
then '' else Postal_Address_Line_5 end
, Postal_Address_Line_4 = case when R1+R2+R3+R4+R5+R6 = 3
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_4)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_4)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_4))) < 10
then '' else Postal_Address_Line_4 end
, Postal_Address_Line_3 = case when R1+R2+R3+R4+R5+R6 = 4
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_3)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_3)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_3))) < 10
then '' else Postal_Address_Line_3 end
, Postal_Address_Line_2= case when R1+R2+R3+R4+R5+R6 = 5
and (patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line_2)) = 1 or patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line_2)) = 1)
and len(ltrim(rtrim(Postal_Address_Line_7))) < 10
then '' else Postal_Address_Line_2 end
from cte
select UniqueId,Postal_Address_Line_1
,Postal_Address_Line_2
,Postal_Address_Line_3
,Postal_Address_Line_4
,Postal_Address_Line_5
,Postal_Address_Line_6
,Postal_Address_Line_7
, Postal_Address_Postcode
from #address_data
Given that this is abit meaty (and consequently a bit slow) I was wondering if anyone had solved the same problem before and had a more efficient solution or if anyone could suggest any enhancements to my solution above?
Assumptions used:
Addresses will not have gaps, i.e be populated Adr1, Null, Adr2 etc
I'm not worried about postcode validity at this stage
Records with a populated postcode do not need to be processed
Notes:
I'm working on a local SQL 2008 instance on a desktop and this is where the solution will stay.
Any suggestions welcome...
Regards, Iain
February 24, 2010 at 11:50 am
maybe, it's help you
;with cte as (
select
coalesce(Postal_Address_Line_7, Postal_Address_Line_6, Postal_Address_Line_5, Postal_Address_Line_4,Postal_Address_Line_3, Postal_Address_Line_2, Postal_Address_Line_1) as Postal_Address_Line
, Postal_Address_Postcode
from #address_data
-- only interested where postcode isn't already populated
where isnull(Postal_Address_Postcode,'') = ''
)
update cte
set Postal_Address_Postcode = Postal_Address_Line
from cte
where
-- only interested in stuff that looks like a postcode
(patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line)) = 1 or
patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line)) = 1)
and len(ltrim(rtrim(Postal_Address_Line))) < 10
I Have Nine Lives You Have One Only
THINK!
February 24, 2010 at 2:26 pm
To be honest, I didn't disgest your code in its entirety. But from a strictly philosophical standpoint, if you want to make it faster, consider moving it to C#. Also, you can look at using regular expressions, if you have skill in that arena.
February 25, 2010 at 2:19 am
Hi Tim,
Unfortunately what I know about C# could be comfortably squeezed onto the back of a postage stamp. I've been telling myself for ages that I need to get onto it - and not getting much further than that 🙂
The regular expression route is one I tried, but I found the performance was terrible. I think it might be because I'm working on a local instance?
Ta, Iain
February 25, 2010 at 2:21 am
Hi Handkot,
That would have been my ideal choice too, if I didn't have to remove the value from the old field too. Because of that, I need to know which field the data came from.
Cheers, Iain
February 25, 2010 at 5:01 am
what trouble?
;with cte as (
select
coalesce(Postal_Address_Line_7, Postal_Address_Line_6, Postal_Address_Line_5, Postal_Address_Line_4, Postal_Address_Line_3, Postal_Address_Line_2) as Postal_Address_Line
, Postal_Address_Postcode
from #address_data
-- only interested where postcode isn't already populated
where isnull(Postal_Address_Postcode,'') = ''
)
update cte
set
Postal_Address_Postcode = Postal_Address_Line
, replace(Postal_Address_Line_7, Postal_Address_Line, '')
, replace(Postal_Address_Line_6, Postal_Address_Line, '')
, replace(Postal_Address_Line_5, Postal_Address_Line, '')
, replace(Postal_Address_Line_4, Postal_Address_Line, '')
, replace(Postal_Address_Line_3, Postal_Address_Line, '')
, replace(Postal_Address_Line_2, Postal_Address_Line, '')
from cte
where
-- only interested in stuff that looks like a postcode
(patindex('[A-Z][0-9]%',ltrim(Postal_Address_Line)) = 1 or
patindex('[A-Z][A-Z][0-9]%',ltrim(Postal_Address_Line)) = 1)
and len(ltrim(rtrim(Postal_Address_Line))) < 10
it works if only one column has PostCode
in other case we need to add null of every rows and use case to define column for update
...
coalesce(Postal_Address_Line_7, Postal_Address_Line_6, ...) as Postal_Address_Line,
case when Postal_Address_Line_7 is null then 1 else 0 end +case when Postal_Address_Line_6 is null then 1 else 0 end+... as NULL_COUNT
...
Postal_Address_Line_7 = case when NULL_COUNT = 0 then '' else Postal_Address_Line_7 end,
Postal_Address_Line_6 = case when NULL_COUNT = 1 then '' else Postal_Address_Line_ 6end,
...
I Have Nine Lives You Have One Only
THINK!
Viewing 6 posts - 1 through 5 (of 5 total)
You must be logged in to reply to this topic. Login to reply