query for datetime

  • Eirikur Eiriksson (1/3/2016)


    Timing the different methods using the previously posted test data set

    😎

    Test harness

    -- TEST: Different ways of calculating duration

    -- DATA SET: http://www.sqlservercentral.com/Forums/FindPost1749100.aspx

    DECLARE @timer TABLE (T_TEXT VARCHAR(100) NOT NULL, T_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME()));

    DECLARE @MT_ID INT = 0;

    DECLARE @INT_BUCKET_01 INT = 0;

    DECLARE @INT_BUCKET_02 INT = 0;

    DECLARE @INT_BUCKET_03 INT = 0;

    DECLARE @INT_BUCKET_04 INT = 0;

    DECLARE @INT_BUCKET_05 INT = 0;

    DECLARE @DATETIME_BUCKET_01 DATETIME = 0;

    DECLARE @DATETIME_BUCKET_02 DATETIME = 0;

    DECLARE @DATETIME2_7_BUCKET_01 DATETIME2(7) = CONVERT( DATETIME2(7), '2016-01-01',120);

    DECLARE @DATETIME2_7_BUCKET_02 DATETIME2(7) = CONVERT( DATETIME2(7), '2016-01-01',120);

    DECLARE @BIGINT_BUCKET_01 BIGINT = 0;

    DECLARE @FLOAT_BUCKET FLOAT = 0.0;

    --/*

    INSERT INTO @timer(T_TEXT) VALUES ('DRY RUN');

    SELECT

    @INT_BUCKET_01 = TC.TIME_CARD_ID

    ,@INT_BUCKET_02 = TC.DAY_OFFSET

    ,@INT_BUCKET_03 = TC.START_TIME

    ,@INT_BUCKET_04 = TC.UTC_OFFSET_S

    ,@INT_BUCKET_05 = TC.DURATION_SEC

    ,@DATETIME_BUCKET_01 = TC.START_DATE_DT

    ,@DATETIME_BUCKET_02 = TC.END_DATE_DT

    ,@DATETIME2_7_BUCKET_01 = TC.START_DATE_DT2

    ,@DATETIME2_7_BUCKET_02 = TC.END_DATE_DT2

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('DRY RUN');

    -- */

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 1');

    SELECT

    @FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 1');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 2');

    SELECT

    @FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 2');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 3');

    SELECT

    @FLOAT_BUCKET = SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DURATION 3');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 1');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 1');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 2');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 2');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 3');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT 3');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 1');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 1');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 2');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 2');

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 3');

    SELECT

    @FLOAT_BUCKET = SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('SUM_DATEDIFF_DT2 3');

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 1');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 1');

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 2');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 2');

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 3');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('DECIMAL_TRICK 3');

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 1');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 1');

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 2');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 2');

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 3');

    SELECT

    @FLOAT_BUCKET = SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC;

    INSERT INTO @timer(T_TEXT) VALUES ('FLOAT_TRICK 3');

    -- Test results

    SELECT

    T.T_TEXT

    ,DATEDIFF(MICROSECOND,MIN(T.T_TS),MAX(T.T_TS)) AS DURATION

    FROM @timer T

    GROUP BY T.T_TEXT

    ORDER BY DURATION ASC;

    -- Verifying the calculations

    SELECT

    'ACTUAL' AS METHOD

    ,SUM(CONVERT(DECIMAL(28,17),TC.DURATION_SEC,0)) / 3600 AS VALUE

    FROM dbo.TBL_TIME_CARD TC

    UNION ALL

    SELECT

    'DATETIME'

    ,SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT,TC.END_DATE_DT)) / 3600

    FROM dbo.TBL_TIME_CARD TC

    UNION ALL

    SELECT

    'DATETIME2'

    ,SUM(0.0 + DATEDIFF(SECOND,TC.START_DATE_DT2,TC.END_DATE_DT2)) / 3600

    FROM dbo.TBL_TIME_CARD TC

    UNION ALL

    SELECT

    'DECIMAL'

    ,SUM((CONVERT(DECIMAL(28,17),TC.END_DATE_DT,0)-CONVERT(DECIMAL(28,17),TC.START_DATE_DT,0)) * 24 )

    FROM dbo.TBL_TIME_CARD TC

    UNION ALL

    SELECT

    'FLOAT'

    ,SUM((CONVERT(FLOAT,TC.END_DATE_DT,0)-CONVERT(FLOAT,TC.START_DATE_DT,0)) * 24)

    FROM dbo.TBL_TIME_CARD TC;

    Timing results

    T_TEXT DURATION

    -------------------- -----------

    FLOAT_TRICK 3 308017

    FLOAT_TRICK 2 315018

    FLOAT_TRICK 1 316018

    DRY RUN 331019

    SUM_DURATION 1 424024

    SUM_DURATION 3 425024

    SUM_DURATION 2 431025

    SUM_DATEDIFF_DT 3 543031

    SUM_DATEDIFF_DT 2 595034

    SUM_DATEDIFF_DT 1 626036

    SUM_DATEDIFF_DT2 1 648037

    SUM_DATEDIFF_DT2 2 649037

    SUM_DATEDIFF_DT2 3 771044

    DECIMAL_TRICK 3 1375079

    DECIMAL_TRICK 1 1392080

    DECIMAL_TRICK 2 1433082

    Method verification

    METHOD VALUE

    --------- ----------------------

    ACTUAL 10997743.1033333

    DATETIME 10997743.103333

    DATETIME2 10997743.103333

    DECIMAL 10997743.1033333

    FLOAT 10997743.1033331

    Very cool. Great test, Eirikur. I've not gone through it stone by stone but it looks great. I'll try to go through it this weekend.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • drew.allen (12/31/2015)


    GREAT! But suppose your company is going multi-national and they want to record dates using DATETIMEOFFSET. Update your code to use a DATETIMEOFFSET field with offsets ranging between -14:00 and +14:00.

    Drew

    DATETIMEOFFSET is not a relational data type.

    It contains 2 different values in 1 placeholder, which is prohibited by Relational Theory.

    Values with delimiters ("T" in this case) belong to the world of XML, JSON, other NoSQL data models, but they do not fit relational model.

    In correctly designed relational database DateTime and TimeZoneOffset must be stored in separate columns.

    Since DATETIMEOFFSET data type does not a place in a relational database there is no point to discuss any use of it. 😎

    _____________
    Code for TallyGenerator

  • Sergiy (1/10/2016)


    DATETIMEOFFSET is not a relational data type.

    It contains 2 different values in 1 placeholder, which is prohibited by Relational Theory.

    Values with delimiters ("T" in this case) belong to the world of XML, JSON, other NoSQL data models, but they do not fit relational model.

    You are confusing content with representation. If the presence of a delimiter is key, then you could equally well argue that date is non-relational (delimiter being "-" or "/" depending on date format used), or that time is non-relational (delimiter being ":").

    Relational theory forbids combining multiple atomic values in a single column, but the definition of atomic is not as strict as some people think. (Heck, over two decades ago I had a long discussion on Usenet with someone who built a database that seperated strings into characters, then replaced each character with a pointer to the single place where that character was stored - because that was how he interpreted the normalization rules. It was gruesome).

    I define atomic in the context of an application. Many people automatically have separate columns for street, number, postal code, city, and country. That may often be correct but it doesn't have to be. If all the application ever does is combine those elements and slap them on an envelope for a mailing, then you could have a single column "postal address" with the fully formatted address, and it's still atomic - because the constituting parts are never considered. On the other hand, I have also worked on a mass mailing system where better rates were gotten because the application sorted the post be elements of the postal code - if I remember correctly, the first two digits of the postal code were used in sorting. So in that system, storing postal code in a single column was a violation of the relational model.

    For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.

    On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.

    (And on a more practical note, this forum is for users of SQL Server, which falls short of the relational model in many ways, so I will happily discuss all its features. My goal is to build and maintain working systems, not to be a relational purist - even though I know my relational theory better than most people think).


    Hugo Kornelis, SQL Server/Data Platform MVP (2006-2016)
    Visit my SQL Server blog: https://sqlserverfast.com/blog/
    SQL Server Execution Plan Reference: https://sqlserverfast.com/epr/

  • Hugo Kornelis (1/11/2016)


    You are confusing content with representation. If the presence of a delimiter is key, then you could equally well argue that date is non-relational (delimiter being "-" or "/" depending on date format used), or that time is non-relational (delimiter being ":").

    No, I'm not confusing anything.

    DATETIMEOFFSET is indeed made up from the different values having totally different meaning.

    http://weblogs.sqlteam.com/peterl/archive/2010/12/15/the-internal-storage-of-a-datetimeoffset-value.aspx

    For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.

    No, just 2:

    - Datetime;

    - TimeZone.

    3rd part - precision - is a property of the data type and must be stored in table definition.

    Splitting Datetime into parts would be wrong (another wrong doing from MS - introducing DATE and TIME data types).

    Why?

    Because adding 10 ms to a datetime value may change not only its millisecond part, but second, minute, hour, day, month and even year parts.

    So, datetime is a single value representing a measure of a continuous entity.

    But adding any time portion to the value won't change its time zone. Same as changing time zone won't necessarily change datetime part.

    Correlation between datetime and time zone is defined by business logic, not by the nature of the data.

    Therefore, time zone a separate value and must be stored separately from datetime part.

    On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.

    You never work with combination of those three.

    To operate DATETIMEOFFSET you need to split datetime from timezone first, bring datetime to a common timezone offset, do whatever operation you need to do on datetime part, choose which time zone to use for the result, then merge them back together and save in the table.

    Actually, having time zone stored separately makes such operation simpler, as the splitting-merging steps can be omitted.

    (And on a more practical note, this forum is for users of SQL Server, which falls short of the relational model in many ways, so I will happily discuss all its features. My goal is to build and maintain working systems, not to be a relational purist - even though I know my relational theory better than most people think).

    I heard complains about my too academic approach from many developers many times in my life.

    Just several month (sometimes weeks) before the very same developers were asking me to resolve data consistency and/or performance issues, or to help them find a way around of too complicated report queries.

    You know, most of the times normalising data was exactly what was needed to resolve all the issues.

    🙂

    _____________
    Code for TallyGenerator

  • Sergiy (1/11/2016)


    No, I'm not confusing anything.

    DATETIMEOFFSET is indeed made up from the different values having totally different meaning.

    http://weblogs.sqlteam.com/peterl/archive/2010/12/15/the-internal-storage-of-a-datetimeoffset-value.aspx

    If you think that datetimeoffset violates relational theory, then you should not take arguments from implementation internals. Implementation is deliberately left out of relational theory. (Or coming from a different angle - if Microsoft had decided to implement this in a different way, would you have had another opinion)?

    For datetimeoffset, the same applies. If the application code constantly needs to break it up in its constituting parts, then I agree with you that they should be split up in different columns - two, three, of perhaps even eight if all elements are used independently in the application.

    No, just 2:

    - Datetime;

    - TimeZone.

    3rd part - precision - is a property of the data type and must be stored in table definition.

    So if you have a single datetime column and every query that uses it has to extract the date part and/or the time part individually, you still think you have an atomic attribute?

    What data type would you use to store the time of a recurring appointment in a database that backs an agenda application?

    What data type would you use to represent the dates for Pentecoast in the next ten years?

    (To the last question, the best possible answer would probably be interval, but that ANSI-standard datatype has not been implemented in SQL Server yet - though I doubt if you would agree on interval as an atomic datatype).

    Splitting Datetime into parts would be wrong (another wrong doing from MS - introducing DATE and TIME data types).

    Why?

    Because adding 10 ms to a datetime value may change not only its millisecond part, but second, minute, hour, day, month and even year parts.

    So, datetime is a single value representing a measure of a continuous entity.

    Yeah, and travelling ten miles if you happen to live near a timezone boundary can affect hour, day, month, and year parts, and in some areas of the world even minutes. What's the point you are making?

    On the other hand, if your database has columns for date, time, and timezone, and you constantly need to query all three and combine them because you only work with the combination of the three, then a single column is better.

    You never work with combination of those three.

    Don't mistake your work experience as the full truth. You (and that not specifically addresses you, but everyone reading this) have simply no idea of the variety of applications that are built on top of relational databases.

    To operate DATETIMEOFFSET you need to split datetime from timezone first, bring datetime to a common timezone offset, do whatever operation you need to do on datetime part, choose which time zone to use for the result, then merge them back together and save in the table.

    Actually, having time zone stored separately makes such operation simpler, as the splitting-merging steps can be omitted.

    I think that's the point I was making all the time - *if* you are running an application where you constantly need to split these, then use separate columns.


    Hugo Kornelis, SQL Server/Data Platform MVP (2006-2016)
    Visit my SQL Server blog: https://sqlserverfast.com/blog/
    SQL Server Execution Plan Reference: https://sqlserverfast.com/epr/

  • Hugo Kornelis (1/12/2016)


    Sergiy (1/11/2016)


    No, I'm not confusing anything.

    DATETIMEOFFSET is indeed made up from the different values having totally different meaning.

    http://weblogs.sqlteam.com/peterl/archive/2010/12/15/the-internal-storage-of-a-datetimeoffset-value.aspx

    If you think that datetimeoffset violates relational theory, then you should not take arguments from implementation internals. Implementation is deliberately left out of relational theory. (Or coming from a different angle - if Microsoft had decided to implement this in a different way, would you have had another opinion)?

    I do not see any significantly other way of implementing this kind of data type.

    And I do not want to speculate what datetimeoffset would be in some other world.

    It is what it is:

    1. 6-8 bytes of datetime2 value representing UTC date-time;

    2. 2 bytes of smallint value representing time shift in minutes for the time zone.

    These 2 values have nothing to do with each other.

    Time Zone is used for presentation purposes only and is not participating in datetime operations.

    DECLARE @Time1 datetimeoffset(7), @Time2 datetimeoffset(7)

    SET @Time1 = '2010-12-15 21:04:03.1234567 +03:00'

    SET @Time2 = '2010-12-15 18:04:03.1234567 +00:00'

    IF @Time1 = @Time2

    PRINT 'Matching UTC only'

    ELSE

    PRINT 'Matching Time Zone as well'

    This example shows that SQL Server splits datetimeoffset values and uses only datetime2 part, ignoring time zone completely.

    Necessity of splitting stored value on fly clearly indicates a violation of normalisation rules.

    _____________
    Code for TallyGenerator

  • Hugo Kornelis (1/12/2016)


    Yeah, and travelling ten miles if you happen to live near a timezone boundary can affect hour, day, month, and year parts, and in some areas of the world even minutes. What's the point you are making?

    Apparently it's you who's confusing content and its representation.

    Gotcha! 😛

    Travelling 10 miles may change the time zone you're in, changing the rules of time representation, but it won't change the time itself.

    See the code example I posted above.

    _____________
    Code for TallyGenerator

  • If you insist on basing your ideas on relational theory on implementation choices made by Microsoft, then I guess we'll have to agree to disagree on this.


    Hugo Kornelis, SQL Server/Data Platform MVP (2006-2016)
    Visit my SQL Server blog: https://sqlserverfast.com/blog/
    SQL Server Execution Plan Reference: https://sqlserverfast.com/epr/

  • Hugo Kornelis (1/12/2016)


    If you insist on basing your ideas on relational theory on implementation choices made by Microsoft, then I guess we'll have to agree to disagree on this.

    Agree to disagree on what?

    _____________
    Code for TallyGenerator

Viewing 9 posts - 31 through 38 (of 38 total)

You must be logged in to reply to this topic. Login to reply