walking up the tree - interesting scenario

  • I have two tables and here is the structure for both:

    CREATE TABLE [dbo].[WData](

    [empl_id] [varchar](12) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [proj_id] [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [org_id] [varchar](20) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [plc] [varchar](6) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [PROJ_END_DT] [datetime] NULL,

    [PROJ_START_DT] [datetime] NULL

    )

    CREATE TABLE [dbo].[tempProjtable](

    [Proj_ID] [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,

    [PROJ_START_DT] [datetime] NULL,

    [PROJ_END_DT] [datetime] NULL

    )

    table wdata and tempProjtable both have one column that they can be joined on which is proj_id. what I want to do is update two columns in wdata table when I find a match in tempprojtable for a proj_id and when I find a value for both PROJ_START_DT and PROJ_END_DT. if any of these fields has a null or are empty in tempprojtable then I want to look for value in the parent proj_id.

    Proj_id can have upto 5 levels. e.g. for proj_id='05591.0035.YR04.7051.L36G' in wdata table I want to update PROJ_START_DT and PROJ_END_DT columns from tempprojtable upon match on proj_id. if PROJ_START_DT and PROJ_END_DT are null or are empty in tempprojtable then I want to move up one level in proj_id string in tempprojtable ( so I would match on '05591.0035.YR04.7051') and check if PROJ_START_DT and PROJ_END_DT are both populated, if they are then update PROJ_START_DT and PROJ_END_DT in wdata table. if they both are not populate then I go up another level '05591.0035.YR04' and look if both fields are populated. I will do this till I find a value for both fields. and If I dont find a value for both fields after searching upto top level then I want to leave both PROJ_START_DT and PROJ_END_DT fields in wdata table as null.

    here is some test data for both tables.

    Insert Into wdata Select empl_id='778', proj_id='00029.9999', org_id='1.56.06106.21', plc='6CF1', PROJ_END_DT='2008-09-30 00:00:00.000', PROJ_START_DT='2008-02-29 00:00:00.000'

    Insert Into wdata Select empl_id='794', proj_id='00035.9999', org_id='1.56.06106.21', plc='6CE1', PROJ_END_DT='2008-09-30 00:00:00.000', PROJ_START_DT='2008-02-29 00:00:00.000'

    Insert Into wdata Select empl_id='489', proj_id='00043.0001.0001', org_id='1.51.02684.20', plc='B9BP', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'

    Insert Into wdata Select empl_id='492', proj_id='05591.0035.YR04.7051.L36G', org_id='1.51.02685.20', plc='B8CE', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'

    Insert Into wdata Select empl_id='498', proj_id='00053.9999', org_id='1.51.02685.20', plc='B8CH', PROJ_END_DT='2010-09-30 00:00:00.000', PROJ_START_DT='2000-09-22 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00029', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00029.9999', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00035', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00035.9999', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00040', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00040.0002', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00043', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00043.0001', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00043.0001.0001', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00043.0001.0002', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00043.0001.0003', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='05591.0035.YR04.7051', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='05591.0035.YR04.7051.L36G', PROJ_START_DT=null, PROJ_END_DT=null

    Insert Into tempProjtable Select Proj_ID='00053', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT='2002-08-15 00:00:00.000'

    Insert Into tempProjtable Select Proj_ID='00053.9999', PROJ_START_DT='2000-01-31 00:00:00.000', PROJ_END_DT=null

    hope its clear.

  • If I've understood you correctly, here's one idea...

    ; with

    a as (select *, case when charindex('.', Proj_ID) > 0 then left(Proj_ID, len(Proj_ID) - charindex('.', reverse(Proj_ID))) end as Parent_Proj_ID from tempProjtable a), --add Parent_Proj_ID column

    b as (select * from a where PROJ_START_DT is not null and PROJ_END_DT is not null --get all valid rows

    union all

    select a.Proj_Id, b.PROJ_START_DT, b.PROJ_END_DT, a.Parent_Proj_ID from b inner join a on b.Proj_Id = a.Parent_Proj_ID where a.PROJ_START_DT is null or a.PROJ_END_DT is null) --get all invalid children of valid rows and give them the dates of their parents

    update a set PROJ_START_DT = b.PROJ_START_DT, PROJ_END_DT = b.PROJ_END_DT

    from wdata a left outer join b on a.Proj_ID = b.Proj_ID -- join up and update

    select * from wdata

    /* Results

    empl_id proj_id org_id plc PROJ_END_DT PROJ_START_DT

    ------------ ------------------------------ -------------------- ------ ----------------------- -----------------------

    778 00029.9999 1.56.06106.21 6CF1 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000

    794 00035.9999 1.56.06106.21 6CE1 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000

    489 00043.0001.0001 1.51.02684.20 B9BP 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000

    492 05591.0035.YR04.7051.L36G 1.51.02685.20 B8CE NULL NULL

    498 00053.9999 1.51.02685.20 B8CH 2002-08-15 00:00:00.000 2000-01-31 00:00:00.000

    */

    Ryan Randall

    Solutions are easy. Understanding the problem, now, that's the hard part.

  • Perhaps you should change the title to walking down the tree 😀

    Ryan Randall

    Solutions are easy. Understanding the problem, now, that's the hard part.

  • Another way to join the tables together to get this is:

    select *

    from WData

    inner join tempProjtable

    on WData.proj_ID like tempprojtable.proj_id + '%'

    where tempprojtable.proj_start_dt is not null

    and tempprojtable.proj_end_dt is not null

    Then you just have to pick the one with the longest tempprojtable.proj_id from that and use that for the update.

    - Gus "GSquared", RSVP, OODA, MAP, NMVP, FAQ, SAT, SQL, DNA, RNA, UOI, IOU, AM, PM, AD, BC, BCE, USA, UN, CF, ROFL, LOL, ETC
    Property of The Thread

    "Nobody knows the age of the human race, but everyone agrees it's old enough to know better." - Anon

  • Ryan

    Your query does exactly what I wanted. However its taking forever to run with actual data sets. What can I do to make it faster?

    thanks

  • I can only suggest things to try. You'll then have to test them (if they are feasible) to see if they improve the performance on your system and on the dataset you have.

    To start with, let's try some indexes on our important columns...

    alter table tempProjtable alter column Proj_ID [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL --make Proj_ID not null

    alter table tempProjtable add primary key clustered (Proj_ID) --make Proj_ID primary key

    alter table tempProjtable add Parent_Proj_ID as case when charindex('.', Proj_ID) > 0 then left(Proj_ID, len(Proj_ID) - charindex('.', reverse(Proj_ID))) end --add computed column for Parent_Proj_ID

    create index ix_tempProjtable__Parent_Proj_ID on tempProjtable (Parent_Proj_ID) --index computed Parent_Proj_ID column

    create index ix_tempProjtable__Proj_ID__Parent_Proj_ID__PROJ_START_DT__PROJ_END_DT on tempProjtable (Proj_ID, Parent_Proj_ID, PROJ_START_DT, PROJ_END_DT) --covering index

    alter table WData alter column Proj_ID [varchar](30) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL --make not null

    create index ix_WData__Proj_ID on WData (Proj_ID) --index Proj_ID column

    ; with

    b as (select * from tempProjtable where PROJ_START_DT is not null and PROJ_END_DT is not null --get all valid rows

    union all

    select a.Proj_Id, b.PROJ_START_DT, b.PROJ_END_DT, a.Parent_Proj_ID from b inner join tempProjtable a on b.Proj_Id = a.Parent_Proj_ID where a.PROJ_START_DT is null or a.PROJ_END_DT is null) --get all invalid children of valid rows and give them the dates of their parents

    update a set PROJ_START_DT = b.PROJ_START_DT, PROJ_END_DT = b.PROJ_END_DT

    from wdata a left outer join b on a.Proj_ID = b.Proj_ID -- join up and update

    select * from wdata

    If that doesn't help, post the query plan...

    Edit: minor error + one more index

    Ryan Randall

    Solutions are easy. Understanding the problem, now, that's the hard part.

  • Ryan .. that worked like a charm.. thanks a bunch.

  • Glad to help, thanks for the feedback 🙂

    Ryan Randall

    Solutions are easy. Understanding the problem, now, that's the hard part.

Viewing 8 posts - 1 through 7 (of 7 total)

You must be logged in to reply to this topic. Login to reply