May 9, 2008 at 10:22 am
I have two tables and here is the structure for both:
CREATE TABLE [dbo].[WData](
[empl_id] [varchar](12) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[proj_id] [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[org_id] [varchar](20) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[plc] [varchar](6) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[PROJ_END_DT] [datetime] NULL,
[PROJ_START_DT] [datetime] NULL
)
CREATE TABLE [dbo].[tempProjtable](
[Proj_ID] [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[PROJ_START_DT] [datetime] NULL,
[PROJ_END_DT] [datetime] NULL
)
table wdata and tempProjtable both have one column that they can be joined on which is proj_id. what I want to do is update two columns in wdata table when I find a match in tempprojtable for a proj_id and when I find a value for both PROJ_START_DT and PROJ_END_DT. if any of these fields has a null or are empty in tempprojtable then I want to look for value in the parent proj_id.
Proj_id can have upto 5 levels. e.g. for proj_id='05591.0035.YR04.7051.L36G' in wdata table I want to update PROJ_START_DT and PROJ_END_DT columns from tempprojtable upon match on proj_id. if PROJ_START_DT and PROJ_END_DT are null or are empty in tempprojtable then I want to move up one level in proj_id string in tempprojtable ( so I would match on '05591.0035.YR04.7051') and check if PROJ_START_DT and PROJ_END_DT are both populated, if they are then update PROJ_START_DT and PROJ_END_DT in wdata table. if they both are not populate then I go up another level '05591.0035.YR04' and look if both fields are populated. I will do this till I find a value for both fields. and If I dont find a value for both fields after searching upto top level then I want to leave both PROJ_START_DT and PROJ_END_DT fields in wdata table as null.
here is some test data for both tables.
Insert Into wdata Select empl_id='778', proj_id='00029.9999', org_id='1.56.06106.21', plc='6CF1', PROJ_END_DT='2008-09-30 00:00:00.000', PROJ_START_DT='2008-02-29 00:00:00.000'
Insert Into wdata Select empl_id='794', proj_id='00035.9999', org_id='1.56.06106.21', plc='6CE1', PROJ_END_DT='2008-09-30 00:00:00.000', PROJ_START_DT='2008-02-29 00:00:00.000'
Insert Into wdata Select empl_id='489', proj_id='00043.0001.0001', org_id='1.51.02684.20', plc='B9BP', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'
Insert Into wdata Select empl_id='492', proj_id='05591.0035.YR04.7051.L36G', org_id='1.51.02685.20', plc='B8CE', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'
Insert Into wdata Select empl_id='498', proj_id='00053.9999', org_id='1.51.02685.20', plc='B8CH', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00029', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00029.9999', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00035', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00035.9999', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00040', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00040.0002', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00043', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00043.0001', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00043.0001.0001', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00043.0001.0002', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00043.0001.0003', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='05591.0035.YR04.7051', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='05591.0035.YR04.7051.L36G', PROJ_START_DT=null, PROJ_END_DT=null
Insert Into tempProjtable Select Proj_ID='00053', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'
Insert Into tempProjtable Select Proj_ID='00053.9999', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT=null
hope its clear.
May 9, 2008 at 11:06 am
If I've understood you correctly, here's one idea...
; with
a as (select *, case when charindex('.', Proj_ID) > 0 then left(Proj_ID, len(Proj_ID) - charindex('.', reverse(Proj_ID))) end as Parent_Proj_ID from tempProjtable a), --add Parent_Proj_ID column
b as (select * from a where PROJ_START_DT is not null and PROJ_END_DT is not null --get all valid rows
union all
select a.Proj_Id, b.PROJ_START_DT, b.PROJ_END_DT, a.Parent_Proj_ID from b inner join a on b.Proj_Id = a.Parent_Proj_ID where a.PROJ_START_DT is null or a.PROJ_END_DT is null) --get all invalid children of valid rows and give them the dates of their parents
update a set PROJ_START_DT = b.PROJ_START_DT, PROJ_END_DT = b.PROJ_END_DT
from wdata a left outer join b on a.Proj_ID = b.Proj_ID -- join up and update
select * from wdata
/* Results
empl_id proj_id org_id plc PROJ_END_DT PROJ_START_DT
------------ ------------------------------ -------------------- ------ ----------------------- -----------------------
778 00029.9999 1.56.06106.21 6CF1 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000
794 00035.9999 1.56.06106.21 6CE1 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000
489 00043.0001.0001 1.51.02684.20 B9BP 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000
492 05591.0035.YR04.7051.L36G 1.51.02685.20 B8CE NULL NULL
498 00053.9999 1.51.02685.20 B8CH 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000
*/
Ryan Randall
Solutions are easy. Understanding the problem, now, that's the hard part.
May 9, 2008 at 11:09 am
Perhaps you should change the title to walking down the tree 😀
Ryan Randall
Solutions are easy. Understanding the problem, now, that's the hard part.
May 9, 2008 at 2:21 pm
Another way to join the tables together to get this is:
select *
from WData
inner join tempProjtable
on WData.proj_ID like tempprojtable.proj_id + '%'
where tempprojtable.proj_start_dt is not null
and tempprojtable.proj_end_dt is not null
Then you just have to pick the one with the longest tempprojtable.proj_id from that and use that for the update.
- Gus "GSquared", RSVP, OODA, MAP, NMVP, FAQ, SAT, SQL, DNA, RNA, UOI, IOU, AM, PM, AD, BC, BCE, USA, UN, CF, ROFL, LOL, ETC
Property of The Thread
"Nobody knows the age of the human race, but everyone agrees it's old enough to know better." - Anon
May 14, 2008 at 8:45 am
Ryan
Your query does exactly what I wanted. However its taking forever to run with actual data sets. What can I do to make it faster?
thanks
May 14, 2008 at 9:37 am
I can only suggest things to try. You'll then have to test them (if they are feasible) to see if they improve the performance on your system and on the dataset you have.
To start with, let's try some indexes on our important columns...
alter table tempProjtable alter column Proj_ID [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL --make Proj_ID not null
alter table tempProjtable add primary key clustered (Proj_ID) --make Proj_ID primary key
alter table tempProjtable add Parent_Proj_ID as case when charindex('.', Proj_ID) > 0 then left(Proj_ID, len(Proj_ID) - charindex('.', reverse(Proj_ID))) end --add computed column for Parent_Proj_ID
create index ix_tempProjtable__Parent_Proj_ID on tempProjtable (Parent_Proj_ID) --index computed Parent_Proj_ID column
create index ix_tempProjtable__Proj_ID__Parent_Proj_ID__PROJ_START_DT__PROJ_END_DT on tempProjtable (Proj_ID, Parent_Proj_ID, PROJ_START_DT, PROJ_END_DT) --covering index
alter table WData alter column Proj_ID [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL --make not null
create index ix_WData__Proj_ID on WData (Proj_ID) --index Proj_ID column
; with
b as (select * from tempProjtable where PROJ_START_DT is not null and PROJ_END_DT is not null --get all valid rows
union all
select a.Proj_Id, b.PROJ_START_DT, b.PROJ_END_DT, a.Parent_Proj_ID from b inner join tempProjtable a on b.Proj_Id = a.Parent_Proj_ID where a.PROJ_START_DT is null or a.PROJ_END_DT is null) --get all invalid children of valid rows and give them the dates of their parents
update a set PROJ_START_DT = b.PROJ_START_DT, PROJ_END_DT = b.PROJ_END_DT
from wdata a left outer join b on a.Proj_ID = b.Proj_ID -- join up and update
select * from wdata
If that doesn't help, post the query plan...
Edit: minor error + one more index
Ryan Randall
Solutions are easy. Understanding the problem, now, that's the hard part.
May 20, 2008 at 8:17 am
Ryan .. that worked like a charm.. thanks a bunch.
May 20, 2008 at 8:20 am
Glad to help, thanks for the feedback 🙂
Ryan Randall
Solutions are easy. Understanding the problem, now, that's the hard part.
Viewing 8 posts - 1 through 7 (of 7 total)
You must be logged in to reply to this topic. Login to reply