October 26, 2015 at 3:43 am
This has probably been asked 100 times but I haven't found anything good on it.
Say you have a fact table with a few columns that all reference the same key column in a dimension table, you want to write a view to return the information for those keys, what is the best way?
hopefully this will explain better...
USE MyTestDB;
GO
SET NOCOUNT ON;
IF OBJECT_ID ('dbo.FactTemp' ,'U') IS NOT NULL
DROP TABLE dbo.FactTemp;
IF OBJECT_ID ('dbo.DimTemp', 'U') IS NOT NULL
DROP TABLE dbo.DimTemp;
CREATE TABLE dbo.DimTemp
( DimTempID INT IDENTITY(1,1) NOT NULL
, DimTempName NVARCHAR(100) NOT NULL
, CONSTRAINT PK_DimTemp_DimTempID PRIMARY KEY (DimTempID)
);
CREATE TABLE dbo.FactTemp
( FactTempID INT IDENTITY (1,1) NOT NULL
, CaseID INT NOT NULL
, AssignedTo INT NULL
, LoggedBy INT NOT NULL
, LoggedFor INT NOT NULL
, DateLogged DATETIME NOT NULL
, CONSTRAINT PK_FactTemp_FactTempID PRIMARY KEY (FactTempID)
, CONSTRAINT FK_FactTemp_AssignedTo FOREIGN KEY (AssignedTo) REFERENCES dbo.DimTemp(DimTempID)
, CONSTRAINT FK_FactTemp_LoggedBy FOREIGN KEY (LoggedBy) REFERENCES dbo.DimTemp(DimTempID)
, CONSTRAINT FK_FactTemp_LoggedFor FOREIGN KEY (LoggedFor) REFERENCES dbo.DimTemp(DimTempID)
);
INSERTdbo.DimTemp(DimTempName)
VALUES('John'), ('Andrew'), ('Kev'), ('Brian'), ('Keith'), ('Lisa'), ('Laura'), ('Jane'), ('Alice');
--SELECT * FROM dbo.DimTemp;
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(1, NULL, 2, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -1, GETDATE())))); -- LOGG FOR YESTERDAY, 5 HOURS EARLIER
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(1, 4, 2, 3, (DATEADD(DAY, -1, GETDATE()))); -- LOGG FOR YESTERDAY
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(1, 1, 2, 3, GETDATE());
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(2, 9, 8, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -3, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(2, 5, 8, 7, (DATEADD(DAY, -3, GETDATE()))); -- LOGG FOR 3 DAYS AGO
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(2, 6, 8, 7, GETDATE());
--Cross over
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(3, 4, 2, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(3, NULL, 2, 7, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(3, 6, 2, 7, GETDATE());
--Cross over with null
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(4, 4, 8, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(4, NULL, 8, 3, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO
INSERTdbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)
VALUES(4, NULL, 8, 3, GETDATE());
--SELECT * FROM dbo.FactTemp ORDER BY CaseID ASC, DateLogged DESC;
--Get lates record;
IF OBJECT_ID ('tempdb..#GetLatest' ,'U') IS NOT NULL
DROP TABLE dbo.#GetLatest;
SELECTFactTempID
, CaseID
, AssignedTo
, LoggedBy
, LoggedFor
, DateLogged
, ROW_NUMBER() OVER(PARTITION BY CaseID ORDER BY CaseID ASC, DateLogged DESC) as RwNum
INTOdbo.#GetLatest
FROMdbo.FactTemp;
CREATE CLUSTERED INDEX CIX_#GetLatest_All ON dbo.#GetLatest (FactTempID, CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged, RwNum);
--GO
DBCC FREESESSIONCACHE
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS
GO
SELECTFactTempID
, CaseID
, AssignedTo
, dtTo.DimTempName as AssignedToName
, LoggedBy
, dtBy.DimTempID as LoggedByName
, LoggedFor
, dtFor.DimTempID as LoggedForName
, DateLogged
FROMdbo.#GetLatest as gl
LEFT JOIN dbo.DimTemp as dtTo
ONgl.AssignedTo = dtTo.DimTempID
LEFT JOIN dbo.DimTemp as dtBy
ONgl.LoggedBy = dtBy.DimTempID
LEFT JOIN dbo.DimTemp as dtFor
ONgl.LoggedFor = dtFor.DimTempID
WHERERwNum = 1
GO
SELECTgl.FactTempID
, gl.CaseID
--, c.AssignedTo
, MAX(CASE WHEN gl.AssignedTo = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName
--, c.LoggedBy
, MAX(CASE WHEN gl.LoggedBy = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS LoggedByName
--, c.LoggedFor
, MAX(CASE WHEN gl.LoggedFor = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName
--, c.DateLogged
FROMdbo.#GetLatest as gl
JOINdbo.DimTemp as dt
ON(gl.AssignedTo = dt.DimTempID
ORgl.LoggedBy = dt.DimTempID
ORgl.LoggedFor = dt.DimTempID)
WHERERwNum = 1
GROUP BYFactTempID
, CaseID;
GO
I'm using very small data at the moment, and the query plan and statistics don't really say which way. Has anyone encountered this and found the best solution when the data grows? I'm a bit dubious about the OR join as I seem to remember having a problem with it years ago.
Thanks,
October 26, 2015 at 4:28 am
Your fact key columns should always have matching entries in their associated dim tables, even if that entry is -1 (unknown, N/A, or whatever default you choose).
Once you have this in place, you can always use inner joins.
The absence of evidence is not evidence of absence.
Martin Rees
You can lead a horse to water, but a pencil must be lead.
Stan Laurel
October 26, 2015 at 4:31 am
For production they will, at the the moment I was looking at the issue of joining into the multiple matching columns
Viewing 3 posts - 1 through 2 (of 2 total)
You must be logged in to reply to this topic. Login to reply