Joining a Dim to fact table where the dim table key exists in multiple fact columns

  • This has probably been asked 100 times but I haven't found anything good on it.

    Say you have a fact table with a few columns that all reference the same key column in a dimension table, you want to write a view to return the information for those keys, what is the best way?

    hopefully this will explain better...

    USE MyTestDB;

    GO

    SET NOCOUNT ON;

    IF OBJECT_ID ('dbo.FactTemp' ,'U') IS NOT NULL

    DROP TABLE dbo.FactTemp;

    IF OBJECT_ID ('dbo.DimTemp', 'U') IS NOT NULL

    DROP TABLE dbo.DimTemp;

    CREATE TABLE dbo.DimTemp

    ( DimTempID INT IDENTITY(1,1) NOT NULL

    , DimTempName NVARCHAR(100) NOT NULL

    , CONSTRAINT PK_DimTemp_DimTempID PRIMARY KEY (DimTempID)

    );

    CREATE TABLE dbo.FactTemp

    ( FactTempID INT IDENTITY (1,1) NOT NULL

    , CaseID INT NOT NULL

    , AssignedTo INT NULL

    , LoggedBy INT NOT NULL

    , LoggedFor INT NOT NULL

    , DateLogged DATETIME NOT NULL

    , CONSTRAINT PK_FactTemp_FactTempID PRIMARY KEY (FactTempID)

    , CONSTRAINT FK_FactTemp_AssignedTo FOREIGN KEY (AssignedTo) REFERENCES dbo.DimTemp(DimTempID)

    , CONSTRAINT FK_FactTemp_LoggedBy FOREIGN KEY (LoggedBy) REFERENCES dbo.DimTemp(DimTempID)

    , CONSTRAINT FK_FactTemp_LoggedFor FOREIGN KEY (LoggedFor) REFERENCES dbo.DimTemp(DimTempID)

    );

    INSERTdbo.DimTemp(DimTempName)

    VALUES('John'), ('Andrew'), ('Kev'), ('Brian'), ('Keith'), ('Lisa'), ('Laura'), ('Jane'), ('Alice');

    --SELECT * FROM dbo.DimTemp;

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(1, NULL, 2, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -1, GETDATE())))); -- LOGG FOR YESTERDAY, 5 HOURS EARLIER

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(1, 4, 2, 3, (DATEADD(DAY, -1, GETDATE()))); -- LOGG FOR YESTERDAY

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(1, 1, 2, 3, GETDATE());

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(2, 9, 8, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -3, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(2, 5, 8, 7, (DATEADD(DAY, -3, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(2, 6, 8, 7, GETDATE());

    --Cross over

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(3, 4, 2, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(3, NULL, 2, 7, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(3, 6, 2, 7, GETDATE());

    --Cross over with null

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(4, 4, 8, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(4, NULL, 8, 3, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES(4, NULL, 8, 3, GETDATE());

    --SELECT * FROM dbo.FactTemp ORDER BY CaseID ASC, DateLogged DESC;

    --Get lates record;

    IF OBJECT_ID ('tempdb..#GetLatest' ,'U') IS NOT NULL

    DROP TABLE dbo.#GetLatest;

    SELECTFactTempID

    , CaseID

    , AssignedTo

    , LoggedBy

    , LoggedFor

    , DateLogged

    , ROW_NUMBER() OVER(PARTITION BY CaseID ORDER BY CaseID ASC, DateLogged DESC) as RwNum

    INTOdbo.#GetLatest

    FROMdbo.FactTemp;

    CREATE CLUSTERED INDEX CIX_#GetLatest_All ON dbo.#GetLatest (FactTempID, CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged, RwNum);

    --GO

    DBCC FREESESSIONCACHE

    DBCC FREEPROCCACHE

    DBCC DROPCLEANBUFFERS

    GO

    SELECTFactTempID

    , CaseID

    , AssignedTo

    , dtTo.DimTempName as AssignedToName

    , LoggedBy

    , dtBy.DimTempID as LoggedByName

    , LoggedFor

    , dtFor.DimTempID as LoggedForName

    , DateLogged

    FROMdbo.#GetLatest as gl

    LEFT JOIN dbo.DimTemp as dtTo

    ONgl.AssignedTo = dtTo.DimTempID

    LEFT JOIN dbo.DimTemp as dtBy

    ONgl.LoggedBy = dtBy.DimTempID

    LEFT JOIN dbo.DimTemp as dtFor

    ONgl.LoggedFor = dtFor.DimTempID

    WHERERwNum = 1

    GO

    SELECTgl.FactTempID

    , gl.CaseID

    --, c.AssignedTo

    , MAX(CASE WHEN gl.AssignedTo = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName

    --, c.LoggedBy

    , MAX(CASE WHEN gl.LoggedBy = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS LoggedByName

    --, c.LoggedFor

    , MAX(CASE WHEN gl.LoggedFor = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName

    --, c.DateLogged

    FROMdbo.#GetLatest as gl

    JOINdbo.DimTemp as dt

    ON(gl.AssignedTo = dt.DimTempID

    ORgl.LoggedBy = dt.DimTempID

    ORgl.LoggedFor = dt.DimTempID)

    WHERERwNum = 1

    GROUP BYFactTempID

    , CaseID;

    GO

    I'm using very small data at the moment, and the query plan and statistics don't really say which way. Has anyone encountered this and found the best solution when the data grows? I'm a bit dubious about the OR join as I seem to remember having a problem with it years ago.

    Thanks,

  • Your fact key columns should always have matching entries in their associated dim tables, even if that entry is -1 (unknown, N/A, or whatever default you choose).

    Once you have this in place, you can always use inner joins.

    The absence of evidence is not evidence of absence.
    Martin Rees

    You can lead a horse to water, but a pencil must be lead.
    Stan Laurel

  • For production they will, at the the moment I was looking at the issue of joining into the multiple matching columns

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply