November 10, 2017 at 1:31 pm
Hi there
I'm rather new to SQL-Server. I need to convert historic data that is stored in the form of a change log into a time series table, similar to SCD2 with a start and end date. It might have to do with the Pivot function (that's how it's done in Oracle) but I didn't master this on Microsoft.
Please find an example (three tables) of what I mean below:
create table live (
row_id int,
created datetime2,
lastupd datetime2,
attr1 varchar(50),
attr2 varchar(50),
attr3 varchar(50),
attr4 varchar(50),
attr5 varchar(50)
)
;
-- this is how the record currently looks like (live table)
insert into live
(row_id, created, lastupd, attr1, attr2, attr3, attr4, attr5)
values
(1, cast('2017-06-01T10:00:00' as datetime2), cast('2017-06-07T14:45:00' as datetime2), 'attr1 version 1', 'attr2 version 2', 'attr3 version 1', 'attr4 version 1', 'attr5 version 1')
;
create table history (
row_id int,
created datetime2,
record_id int,
fieldName varchar(50),
-- the system also provided the old value, which is not needed for this purpose
oldValue varchar(50),
newValue varchar(50)
)
;
-- this was the first (original) version of the record
insert into history
(row_id, created, record_id, fieldName, oldValue, newValue)
values
(1, cast('2017-06-01T10:00:00' as datetime2), 1, 'attr1', null, 'attr1 version1')
;
insert into history
(row_id, created, record_id, fieldName, oldValue, newValue)
values
(2, cast('2017-06-01T10:00:00' as datetime2), 1, 'attr2', null, 'attr2 version1')
;
insert into history
(row_id, created, record_id, fieldName, oldValue, newValue)
values
(3, cast('2017-06-01T10:00:00' as datetime2), 1, 'attr3', null, 'attr3 version1')
;
-- attr4 was not there from the beginning, but added later
insert into history
(row_id, created, record_id, fieldName, oldValue, newValue)
values
(4, cast('2017-06-05T12:30:00' as datetime2), 1, 'attr4', null, 'attr4 version1')
;
-- attr2 was changed later
insert into history
(row_id, created, record_id, fieldName, oldValue, newValue)
values
(5, cast('2017-06-07T14:45:00' as datetime2), 1, 'attr2', 'attr 2 version 1', 'attr2 version2')
;
-- expectation (new table)
create table timeSeries (
row_id int,
TS_Start datetime2,
TS_End datetime2,
attr1 varchar(50),
attr2 varchar(50),
attr3 varchar(50),
attr4 varchar(50),
attr5 varchar(50)
)
;
-- this represent the states of the record in the form of "valid from/valid to"
insert into timeSeries
(row_id, TS_Start, TS_End, attr1, attr2, attr3, attr4, attr5)
values
(1, cast('2017-06-01T10:00:00' as datetime2), cast('2017-06-05T12:29:59' as datetime2), 'attr1 version1', 'attr2 version1', 'attr3 version1', null, null)
;
-- notice, the end date/time will be -1 second from the next start date/time)
insert into timeSeries
(row_id, TS_Start, TS_End, attr1, attr2, attr3, attr4, attr5)
values
(1, cast('2017-06-05T12:30:00' as datetime2), cast('2017-06-07T14:44:59' as datetime2), 'attr1 version1', 'attr2 version1', 'attr3 version1', 'attr4 version1', null)
;
insert into timeSeries
(row_id, TS_Start, TS_End, attr1, attr2, attr3, attr4, attr5)
values
(1, cast('2017-06-07T14:45:00' as datetime2), cast('9999-12-31T23:59:59' as datetime2), 'attr1 version1', 'attr2 version2', 'attr3 version1', 'attr4 version1', null)
;
Thanks for any Help!
Roger
November 10, 2017 at 2:21 pm
Your history table seems to be missing an entry for attr5. You have entries for all other attributes when they were changed from NULL to something.
The following gives your expected results based on the table you provided.;
WITH NewValues AS
(
SELECT
created AS TS_Start
, DATEADD(s, -1, LEAD(created, 1, '9999-12-31 23:59:59') OVER(PARTITION BY record_id ORDER BY created)) AS TS_End
, record_id
, CASE WHEN fieldName = 'attr1' THEN ISNULL(newValue, '') END AS attr1
, CASE WHEN fieldName = 'attr2' THEN ISNULL(newValue, '') END AS attr2
, CASE WHEN fieldName = 'attr3' THEN ISNULL(newValue, '') END AS attr3
, CASE WHEN fieldName = 'attr4' THEN ISNULL(newValue, '') END AS attr4
, CASE WHEN fieldName = 'attr5' THEN ISNULL(newValue, '') END AS attr5
FROM #history
)
, pivoted AS
(
SELECT
TS_Start
, TS_End
, CAST(
SUBSTRING(
MAX(CAST(TS_Start AS BINARY(12)) + CAST(attr1 AS BINARY(50)))
OVER(PARTITION BY record_id ORDER BY TS_Start ROWS UNBOUNDED PRECEDING)
, 13, 50)
AS VARCHAR(50)) AS attr1
, CAST(
SUBSTRING(
MAX(CAST(TS_Start AS BINARY(12)) + CAST(attr2 AS BINARY(50)))
OVER(PARTITION BY record_id ORDER BY TS_Start ROWS UNBOUNDED PRECEDING)
, 13, 50)
AS VARCHAR(50)) AS attr2
, CAST(
SUBSTRING(
MAX(CAST(TS_Start AS BINARY(12)) + CAST(attr3 AS BINARY(50)))
OVER(PARTITION BY record_id ORDER BY TS_Start ROWS UNBOUNDED PRECEDING)
, 13, 50)
AS VARCHAR(50)) AS attr3
, CAST(
SUBSTRING(
MAX(CAST(TS_Start AS BINARY(12)) + CAST(attr4 AS BINARY(50)))
OVER(PARTITION BY record_id ORDER BY TS_Start ROWS UNBOUNDED PRECEDING)
, 13, 50)
AS VARCHAR(50)) AS attr4
, CAST(
SUBSTRING(
MAX(CAST(TS_Start AS BINARY(12)) + CAST(attr5 AS BINARY(50)))
OVER(PARTITION BY record_id ORDER BY TS_Start ROWS UNBOUNDED PRECEDING)
, 13, 50)
AS VARCHAR(50)) AS attr5
FROM NewValues
)
SELECT *
FROM pivoted
WHERE ts_Start < TS_End
;
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
November 10, 2017 at 2:42 pm
Hey Drew
Thanks for the real quick answer. You looked pretty closely though. That's a mistake in my example. I shouldn't have provided any value for attr5 in the live table, since I wanted to demonstrate a column that remained empty over time. Your solution perfectly fits my needs 🙂
Viewing 3 posts - 1 through 2 (of 2 total)
You must be logged in to reply to this topic. Login to reply