January 8, 2016 at 7:52 pm
Eirikur Eiriksson (1/3/2016)
Timing the different methods using the previously posted test data set😎
Test harness
-- TEST: Different ways of calculating duration
-- DATA SET: http://www.sqlservercentral.com/Forums/FindPost1749100.aspx
DECLARE @timer TABLE (T_TEXT VARCHAR(100) NOT NULL, T_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME()));
DECLARE @MT_ID INT = 0;
DECLARE @INT_BUCKET_01 INT = 0;
DECLARE @INT_BUCKET_02 INT = 0;
DECLARE @INT_BUCKET_03 INT = 0;
DECLARE @INT_BUCKET_04 INT = 0;
DECLARE @INT_BUCKET_05 INT = 0;
DECLARE @DATETIME_BUCKET_01 DATETIME = 0;
DECLARE @DATETIME_BUCKET_02 DATETIME = 0;
DECLARE @DATETIME2_7_BUCKET_01 DATETIME2(7) = CONVERT( DATETIME2(7), '2016-01-01',120);
DECLARE @DATETIME2_7_BUCKET_02 DATETIME2(7) = CONVERT( DATETIME2(7), '2016-01-01',120);
DECLARE @BIGINT_BUCKET_01 BIGINT = 0;
DECLARE @FLOAT_BUCKET FLOAT = 0.0;
--/*
INSERT INTO @timer(T_TEXT) VALUES ('DRY RUN');
SELECT
@INT_BUCKET_01 = TC.TIME_CARD_ID
,@INT_BUCKET_02 = TC.DAY_OFFSET
,@INT_BUCKET_03 = TC.START_TIME
,@INT_BUCKET_04 = TC.UTC_OFFSET_S
,@INT_BUCKET_05 = TC.DURATION_SEC
,@DATETIME_BUCKET_01 = TC.START_DATE_DT
,@DATETIME_BUCKET_02 = TC.END_DATE_DT
,@DATETIME2_7_BUCKET_01 = TC.START_DATE_DT2
,@DATETIME2_7_BUCKET_02 = TC.END_DATE_DT2
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('DRY RUN');
-- */
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 1');
SELECT
@FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 1');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 2');
SELECT
@FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 2');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 3');
SELECT
@FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 3');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 1');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 1');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 2');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 2');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 3');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 3');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 1');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 1');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 2');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 2');
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 3');
SELECT
@FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 3');
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 1');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 1');
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 2');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 2');
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 3');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 3');
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 1');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 1');
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 2');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 2');
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 3');
SELECT
@FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC;
INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 3');
-- Test results
SELECT
T.T_TEXT
,DATEDIFF(MICROSECOND,MIN(T.T_TS),MAX(T.T_TS)) AS DURATION
FROM @timer T
GROUP BY T.T_TEXT
ORDER BY DURATION ASC;
-- Verifying the calculations
SELECT
'ACTUAL' AS METHOD
,SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600 AS VALUE
FROM dbo.TBL_TIME_CARD TC
UNION ALL
SELECT
'DATETIME'
,SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600
FROM dbo.TBL_TIME_CARD TC
UNION ALL
SELECT
'DATETIME2'
,SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600
FROM dbo.TBL_TIME_CARD TC
UNION ALL
SELECT
'DECIMAL'
,SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )
FROM dbo.TBL_TIME_CARD TC
UNION ALL
SELECT
'FLOAT'
,SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24)
FROM dbo.TBL_TIME_CARD TC;
Timing results
T_TEXT DURATION
-------------------- -----------
FLOAT_TRICK 3 308017
FLOAT_TRICK 2 315018
FLOAT_TRICK 1 316018
DRY RUN 331019
SUM_DURATION 1 424024
SUM_DURATION 3 425024
SUM_DURATION 2 431025
SUM_DATEDIFF_DT 3 543031
SUM_DATEDIFF_DT 2 595034
SUM_DATEDIFF_DT 1 626036
SUM_DATEDIFF_DT2 1 648037
SUM_DATEDIFF_DT2 2 649037
SUM_DATEDIFF_DT2 3 771044
DECIMAL_TRICK 3 1375079
DECIMAL_TRICK 1 1392080
DECIMAL_TRICK 2 1433082
Method verification
METHOD VALUE
--------- ----------------------
ACTUAL 10997743.1033333
DATETIME 10997743.103333
DATETIME2 10997743.103333
DECIMAL 10997743.1033333
FLOAT 10997743.1033331
Very cool. Great test, Eirikur. I've not gone through it stone by stone but it looks great. I'll try to go through it this weekend.
--Jeff Moden
Change is inevitable... Change for the better is not.
January 10, 2016 at 4:13 pm
drew.allen (12/31/2015)
GREAT! But suppose your company is going multi-national and they want to record dates using DATETIMEOFFSET. Update your code to use a DATETIMEOFFSET field with offsets ranging between -14:00 and +14:00.Drew
DATETIMEOFFSET is not a relational data type.
It contains 2 different values in 1 placeholder, which is prohibited by Relational Theory.
Values with delimiters ("T" in this case) belong to the world of XML, JSON, other NoSQL data models, but they do not fit relational model.
In correctly designed relational database DateTime and TimeZoneOffset must be stored in separate columns.
Since DATETIMEOFFSET data type does not a place in a relational database there is no point to discuss any use of it. 😎
_____________
Code for TallyGenerator
January 11, 2016 at 3:13 pm
Sergiy (1/10/2016)
DATETIMEOFFSET is not a relational data type.It contains 2 different values in 1 placeholder, which is prohibited by Relational Theory.
Values with delimiters ("T" in this case) belong to the world of XML, JSON, other NoSQL data models, but they do not fit relational model.
You are confusing content with representation. If the presence of a delimiter is key, then you could equally well argue that date is non-relational (delimiter being "-" or "/" depending on date format used), or that time is non-relational (delimiter being ":").
Relational theory forbids combining multiple atomic values in a single column, but the definition of atomic is not as strict as some people think. (Heck, over two decades ago I had a long discussion on Usenet with someone who built a database that seperated strings into characters, then replaced each character with a pointer to the single place where that character was stored - because that was how he interpreted the normalization rules. It was gruesome).
I define atomic in the context of an application. Many people automatically have separate columns for street, number, postal code, city, and country. That may often be correct but it doesn't have to be. If all the application ever does is combine those elements and slap them on an envelope for a mailing, then you could have a single column "postal address" with the fully formatted address, and it's still atomic - because the constituting parts are never considered. On the other hand, I have also worked on a mass mailing system where better rates were gotten because the application sorted the post be elements of the postal code - if I remember correctly, the first two digits of the postal code were used in sorting. So in that system, storing postal code in a single column was a violation of the relational model.
For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.
On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.
(And on a more practical note, this forum is for users of SQL Server, which falls short of the relational model in many ways, so I will happily discuss all its features. My goal is to build and maintain working systems, not to be a relational purist - even though I know my relational theory better than most people think).
January 11, 2016 at 7:14 pm
Hugo Kornelis (1/11/2016)
You are confusing content with representation. If the presence of a delimiter is key, then you could equally well argue that date is non-relational (delimiter being "-" or "/" depending on date format used), or that time is non-relational (delimiter being ":").
No, I'm not confusing anything.
DATETIMEOFFSET is indeed made up from the different values having totally different meaning.
For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.
No, just 2:
- Datetime;
- TimeZone.
3rd part - precision - is a property of the data type and must be stored in table definition.
Splitting Datetime into parts would be wrong (another wrong doing from MS - introducing DATE and TIME data types).
Why?
Because adding 10 ms to a datetime value may change not only its millisecond part, but second, minute, hour, day, month and even year parts.
So, datetime is a single value representing a measure of a continuous entity.
But adding any time portion to the value won't change its time zone. Same as changing time zone won't necessarily change datetime part.
Correlation between datetime and time zone is defined by business logic, not by the nature of the data.
Therefore, time zone a separate value and must be stored separately from datetime part.
On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.
You never work with combination of those three.
To operate DATETIMEOFFSET you need to split datetime from timezone first, bring datetime to a common timezone offset, do whatever operation you need to do on datetime part, choose which time zone to use for the result, then merge them back together and save in the table.
Actually, having time zone stored separately makes such operation simpler, as the splitting-merging steps can be omitted.
(And on a more practical note, this forum is for users of SQL Server, which falls short of the relational model in many ways, so I will happily discuss all its features. My goal is to build and maintain working systems, not to be a relational purist - even though I know my relational theory better than most people think).
I heard complains about my too academic approach from many developers many times in my life.
Just several month (sometimes weeks) before the very same developers were asking me to resolve data consistency and/or performance issues, or to help them find a way around of too complicated report queries.
You know, most of the times normalising data was exactly what was needed to resolve all the issues.
🙂
_____________
Code for TallyGenerator
January 12, 2016 at 3:51 pm
Sergiy (1/11/2016)
No, I'm not confusing anything.DATETIMEOFFSET is indeed made up from the different values having totally different meaning.
If you think that datetimeoffset violates relational theory, then you should not take arguments from implementation internals. Implementation is deliberately left out of relational theory. (Or coming from a different angle - if Microsoft had decided to implement this in a different way, would you have had another opinion)?
For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.
No, just 2:
- Datetime;
- TimeZone.
3rd part - precision - is a property of the data type and must be stored in table definition.
So if you have a single datetime column and every query that uses it has to extract the date part and/or the time part individually, you still think you have an atomic attribute?
What data type would you use to store the time of a recurring appointment in a database that backs an agenda application?
What data type would you use to represent the dates for Pentecoast in the next ten years?
(To the last question, the best possible answer would probably be interval, but that ANSI-standard datatype has not been implemented in SQL Server yet - though I doubt if you would agree on interval as an atomic datatype).
Splitting Datetime into parts would be wrong (another wrong doing from MS - introducing DATE and TIME data types).
Why?
Because adding 10 ms to a datetime value may change not only its millisecond part, but second, minute, hour, day, month and even year parts.
So, datetime is a single value representing a measure of a continuous entity.
Yeah, and travelling ten miles if you happen to live near a timezone boundary can affect hour, day, month, and year parts, and in some areas of the world even minutes. What's the point you are making?
On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.
You never work with combination of those three.
Don't mistake your work experience as the full truth. You (and that not specifically addresses you, but everyone reading this) have simply no idea of the variety of applications that are built on top of relational databases.
To operate DATETIMEOFFSET you need to split datetime from timezone first, bring datetime to a common timezone offset, do whatever operation you need to do on datetime part, choose which time zone to use for the result, then merge them back together and save in the table.
Actually, having time zone stored separately makes such operation simpler, as the splitting-merging steps can be omitted.
I think that's the point I was making all the time - *if* you are running an application where you constantly need to split these, then use separate columns.
January 12, 2016 at 4:50 pm
Hugo Kornelis (1/12/2016)
Sergiy (1/11/2016)
No, I'm not confusing anything.DATETIMEOFFSET is indeed made up from the different values having totally different meaning.
If you think that datetimeoffset violates relational theory, then you should not take arguments from implementation internals. Implementation is deliberately left out of relational theory. (Or coming from a different angle - if Microsoft had decided to implement this in a different way, would you have had another opinion)?
I do not see any significantly other way of implementing this kind of data type.
And I do not want to speculate what datetimeoffset would be in some other world.
It is what it is:
1. 6-8 bytes of datetime2 value representing UTC date-time;
2. 2 bytes of smallint value representing time shift in minutes for the time zone.
These 2 values have nothing to do with each other.
Time Zone is used for presentation purposes only and is not participating in datetime operations.
DECLARE @Time1 datetimeoffset(7), @Time2 datetimeoffset(7)
SET @Time1 = '2010-12-15 21:04:03.1234567 +03:00'
SET @Time2 = '2010-12-15 18:04:03.1234567 +00:00'
IF @Time1 = @Time2
PRINT 'Matching UTC only'
ELSE
PRINT 'Matching Time Zone as well'
This example shows that SQL Server splits datetimeoffset values and uses only datetime2 part, ignoring time zone completely.
Necessity of splitting stored value on fly clearly indicates a violation of normalisation rules.
_____________
Code for TallyGenerator
January 12, 2016 at 5:05 pm
Hugo Kornelis (1/12/2016)
Yeah, and travelling ten miles if you happen to live near a timezone boundary can affect hour, day, month, and year parts, and in some areas of the world even minutes. What's the point you are making?
Apparently it's you who's confusing content and its representation.
Gotcha! 😛
Travelling 10 miles may change the time zone you're in, changing the rules of time representation, but it won't change the time itself.
See the code example I posted above.
_____________
Code for TallyGenerator
January 12, 2016 at 5:06 pm
If you insist on basing your ideas on relational theory on implementation choices made by Microsoft, then I guess we'll have to agree to disagree on this.
January 12, 2016 at 5:10 pm
Hugo Kornelis (1/12/2016)
If you insist on basing your ideas on relational theory on implementation choices made by Microsoft, then I guess we'll have to agree to disagree on this.
Agree to disagree on what?
_____________
Code for TallyGenerator
Viewing 9 posts - 31 through 38 (of 38 total)
You must be logged in to reply to this topic. Login to reply