March 22, 2016 at 8:12 am
DECLARE @data TABLE (
PrimaryKeyID INT NOT NULL IDENTITY(1,1),
DateStart DATETIME NOT NULL,
DateEnd DATETIME NOT NULL,
Price INT NOT NULL
)
INSERT INTO @data
(
DateStart,
DateEnd,
Price
)
SELECT '20160425', '20160426', 620 UNION ALL
SELECT '20160426', '20160427', 630 UNION ALL
SELECT '20160427', '20160428', 640 UNION ALL
SELECT '20160428', '20160429', 630 UNION ALL
SELECT '20160429', '20160430', 620 UNION ALL
SELECT '20160425', '20160430', 2587 UNION ALL
SELECT '20160425', '20160502', 3979 UNION ALL
SELECT '20160429', '20160502', 3583 UNION ALL
SELECT '20160430', '20160502', 2787 UNION ALL
SELECT '20160502', '20160503', 720 UNION ALL
SELECT '20160503', '20160504', 730 UNION ALL
SELECT '20160504', '20160505', 740 UNION ALL
SELECT '20160502', '20160506', 2388 UNION ALL
SELECT '20160502', '20160507', 2587 UNION ALL
SELECT '20160502', '20160509', 3979 UNION ALL
SELECT '20160506', '20160509', 3583 UNION ALL
SELECT '20160507', '20160509', 2787 UNION ALL
SELECT '20160509', '20160513', 2388 UNION ALL
SELECT '20160509', '20160514', 2587 UNION ALL
SELECT '20160509', '20160516', 3979 UNION ALL
SELECT '20160513', '20160516', 3583 UNION ALL
SELECT '20160514', '20160516', 2787
We have a new requirement to find the best matching/covering period/s for any search done
For e.g.
If I search for ‘20160425’ to ‘20160426’ the result should be (one exact match)
PrimaryKeyIDDateStartDateEnd Price
12016-04-25 00:00:00.0002016-04-26 00:00:00.000620
If I search for ‘20160425’ to ‘20160428’ the result should be (3 periods span)
PrimaryKeyIDDateStartDateEnd Price
12016-04-25 00:00:00.0002016-04-26 00:00:00.000620
22016-04-26 00:00:00.0002016-04-27 00:00:00.000630
32016-04-27 00:00:00.0002016-04-28 00:00:00.000640
If I search for ‘20160425’ to ‘20160430’ the result should be (one exact match)
PrimaryKeyIDDateStartDateEnd Price
62016-04-25 00:00:00.0002016-04-30 00:00:00.0002587
If I search for ‘20160425’ to ‘20160501’ the result should be (one period covering the search)
PrimaryKeyIDDateStartDateEnd Price
72016-04-25 00:00:00.0002016-05-02 00:00:00.0003979
Until three levels, I solved it using 3 Cross Joins, but I am stuck how to go forward with multiple levels
For e.g.
If I search for ‘20160426’ to ‘20160515’ the result should be
PrimaryKeyIDDateStart DateEnd Price
2 2016-04-26 00:00:00.0002016-04-27 00:00:00.000630
3 2016-04-27 00:00:00.0002016-04-28 00:00:00.000640
4 2016-04-28 00:00:00.0002016-04-29 00:00:00.000630
5 2016-04-29 00:00:00.0002016-04-30 00:00:00.000620
9 2016-04-30 00:00:00.0002016-05-02 00:00:00.0002787
15 2016-05-02 00:00:00.0002016-05-09 00:00:00.0003979
20 2016-05-09 00:00:00.0002016-05-16 00:00:00.0003979
Can anyone please help me solving this? Thanks in advance
March 22, 2016 at 8:59 am
SELECT *
FROM @data
WHERE (DateStart = @DateStart AND DateEnd = @DateEnd)
OR (DateStart >= @DateStart AND DateEnd <= @DateEnd)
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 22, 2016 at 9:07 am
I would be very happy if there will be such a simple solution, but this is not giving the desired results. 🙁
March 22, 2016 at 11:45 am
jewel.sacred (3/22/2016)
I would be very happy if there will be such a simple solution, but this is not giving the desired results. 🙁
How about the following:
CREATE TABLE #Data (
PrimaryKeyID INT NOT NULL IDENTITY(1,1) PRIMARY KEY CLUSTERED,
DateStart DATETIME NOT NULL,
DateEnd DATETIME NOT NULL,
Price INT NOT NULL
);
CREATE INDEX IDX_tempdb_Data_DateStart_DateEnd_INCLUDES_Price ON #Data
(
DateStart ASC,
DateEnd ASC
)
INCLUDE
(
Price
);
CREATE INDEX IDX_tempdb_Data_DateEnd_DateStart_INCLUDES_Price ON #Data
(
DateEnd ASC,
DateStart ASC
)
INCLUDE
(
Price
);
INSERT INTO #Data
(
DateStart,
DateEnd,
Price
)
SELECT '20160425', '20160426', 620 UNION ALL
SELECT '20160426', '20160427', 630 UNION ALL
SELECT '20160427', '20160428', 640 UNION ALL
SELECT '20160428', '20160429', 630 UNION ALL
SELECT '20160429', '20160430', 620 UNION ALL
SELECT '20160425', '20160430', 2587 UNION ALL
SELECT '20160425', '20160502', 3979 UNION ALL
SELECT '20160429', '20160502', 3583 UNION ALL
SELECT '20160430', '20160502', 2787 UNION ALL
SELECT '20160502', '20160503', 720 UNION ALL
SELECT '20160503', '20160504', 730 UNION ALL
SELECT '20160504', '20160505', 740 UNION ALL
SELECT '20160502', '20160506', 2388 UNION ALL
SELECT '20160502', '20160507', 2587 UNION ALL
SELECT '20160502', '20160509', 3979 UNION ALL
SELECT '20160506', '20160509', 3583 UNION ALL
SELECT '20160507', '20160509', 2787 UNION ALL
SELECT '20160509', '20160513', 2388 UNION ALL
SELECT '20160509', '20160514', 2587 UNION ALL
SELECT '20160509', '20160516', 3979 UNION ALL
SELECT '20160513', '20160516', 3583 UNION ALL
SELECT '20160514', '20160516', 2787;
DECLARE @DateStart AS datetime = '20160426', @DateEnd AS datetime = '20160515';
WITH EXACT_MATCHES AS (
SELECT TOP (1) *, ROW_NUMBER() OVER (ORDER BY PrimaryKeyID) AS RowNum
FROM #Data
WHERE (DateStart = @DateStart AND DateEnd = @DateEnd)
ORDER BY PrimaryKeyID
),
RANGE_MAXES AS (
SELECT D.DateStart, MAX(D.DateEnd) AS MaxDateEnd, ROW_NUMBER() OVER (ORDER BY D.DateStart) AS RowNum
FROM #Data AS D
WHERE D.DateStart >= @DateStart
AND D.DateEnd <= @DateEnd
GROUP BY D.DateStart
),
RANGE_GROUPS AS (
SELECT D.*, M.RowNum
FROM #Data AS D
INNER JOIN RANGE_MAXES AS M
ON D.DateStart = M.DateStart
AND D.DateEnd = M.MaxDateEnd
WHERE NOT EXISTS (
SELECT 1
FROM RANGE_MAXES AS M2
WHERE
(
(
M2.DateStart <= M.DateStart
AND
M2.MaxDateEnd > M.MaxDateEnd
)
OR
(
M2.DateStart < M.DateStart
AND
M2.MaxDateEnd >= M.MaxDateEnd
)
AND M2.RowNum < M.RowNum
)
)
)
SELECT *
FROM EXACT_MATCHES
WHERE (SELECT COUNT(*) FROM EXACT_MATCHES) = 1
UNION
SELECT *
FROM RANGE_GROUPS AS R
WHERE (SELECT COUNT(*) FROM RANGE_GROUPS) > 1
AND (SELECT COUNT(*) FROM EXACT_MATCHES) = 0
ORDER BY DateStart;
DROP TABLE #Data;
Please poke holes in it. Test it, and make sure it meets all the criteria.
Steve (aka sgmunson) 🙂 🙂 🙂
Rent Servers for Income (picks and shovels strategy)
March 22, 2016 at 12:10 pm
Thanks. Very interesting indeed. But it fails for the following criteria
SELECT @DateStart = '20160425', @DateEnd = '20160501';
The result should have been
PrimaryKeyIDDateStartDateEnd Price
72016-04-25 00:00:00.0002016-05-02 00:00:00.0003979
March 22, 2016 at 12:40 pm
jewel.sacred (3/22/2016)
Thanks. Very interesting indeed. But it fails for the following criteriaSELECT @DateStart = '20160425', @DateEnd = '20160501';
The result should have been
PrimaryKeyIDDateStartDateEnd Price
72016-04-25 00:00:00.0002016-05-02 00:00:00.0003979
This is called "scope creep" and it's irritating when it's from your own BA team, let alone a stranger on a forum seeking help for free. Be nice. Please list any other deviations you know of.
For better assistance in answering your questions, please read this[/url].
Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]
March 22, 2016 at 12:55 pm
Yes, I can fully understand this as I have experienced it as well in past. 🙂 I really appreciate the help been provided. But I already mentioned this criteria in the question. Anyways, I will try to come up with any more variations if possible. Thanks.
March 22, 2016 at 1:09 pm
jewel.sacred (3/22/2016)
Yes, I can fully understand this as I have experienced it as well in past. 🙂 I really appreciate the help been provided. But I already mentioned this criteria in the question. Anyways, I will try to come up with any more variations if possible. Thanks.
Got it, humble apologies.
For better assistance in answering your questions, please read this[/url].
Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]
March 22, 2016 at 1:13 pm
jewel.sacred (3/22/2016)
Yes, I can fully understand this as I have experienced it as well in past. 🙂 I really appreciate the help been provided. But I already mentioned this criteria in the question. Anyways, I will try to come up with any more variations if possible. Thanks.
Examples are quite helpful indeed; in this case, though, the actual business rules defining the "best" match would be even more helpful.
So far, it seems like this based on the examples (of course correct me if I have any of it wrong):
1) For the date range input, return a set of ranges from the table. The ranges returned from the table should include all dates included by the input range.
2) An exact match to the input range is preferred over other covering sets.
3) The number of ranges from the table used to cover the input range should be as small as possible (i.e., if the input range can be exactly matched by 2 rows with larger ranges or 5 rows with smaller ranges, choose the former).
Now, 2 and 3 might end up being satisfied by different solutions, so some sort of priority needs to be specified. If I can cover the input range with 1 range from the table, but it's not an exact match, or with 5 ranges from the table that match it exactly, which is to be chosen?
Also, when constructing the sequences of ranges, must each subsequent range in sequence begin where the previous one ends, or can there be overlap within the returned set of ranges? In other words, could an input range of '20160401'-'20160420' be matched by '20160401'-'20160415' and '20160412'-'20160422'?
In the examples all the sets shown have subsequent ranges starting where the previous ranges ended, but I'm not sure if that's a rule or a coincidence.
If we can hammer out the precise rules required, this shouldn't be too hard to put together.
Cheers!
March 22, 2016 at 1:17 pm
Yes, I can fully understand this as I have experienced it as well in past. Smile I really appreciate the help been provided. But I already mentioned this criteria in the question. Anyways, I will try to come up with any more variations if possible. Thanks.
Can you provide the actual rules used to come up with those result sets? It looks like some of the results are contradictory.
March 22, 2016 at 1:51 pm
Jacob Wilkins (3/22/2016)
jewel.sacred (3/22/2016)
Yes, I can fully understand this as I have experienced it as well in past. 🙂 I really appreciate the help been provided. But I already mentioned this criteria in the question. Anyways, I will try to come up with any more variations if possible. Thanks.Examples are quite helpful indeed; in this case, though, the actual business rules defining the "best" match would be even more helpful.
So far, it seems like this based on the examples (of course correct me if I have any of it wrong):
1) For the date range input, return a set of ranges from the table. The ranges returned from the table should include all dates included by the input range.
2) An exact match to the input range is preferred over other covering sets.
Yes, that is correct. Exact match has the highest priority. If there is an exact match, rest of the date ranges falling in or covering the search criteria should be discarded
3) The number of ranges from the table used to cover the input range should be as small as possible (i.e., if the input range can be exactly matched by 2 rows with larger ranges or 5 rows with smaller ranges, choose the former).
Yes, this is correct as well
Now, 2 and 3 might end up being satisfied by different solutions, so some sort of priority needs to be specified. If I can cover the input range with 1 range from the table, but it's not an exact match, or with 5 ranges from the table that match it exactly, which is to be chosen?
if we cannot make a contiguous exact matching date range, then covering range should be provided as we need to return the date ranges falling in/ covering the search criteria in all cases.
Example being
If I search for ‘20160425’ to ‘20160428’ the result should be (3 periods span)
PrimaryKeyIDDateStartDateEnd Price
12016-04-25 00:00:00.0002016-04-26 00:00:00.000620
22016-04-26 00:00:00.0002016-04-27 00:00:00.000630
32016-04-27 00:00:00.0002016-04-28 00:00:00.000640
The contiguous date range has the priority here, as we do have the covering ranges in form of "20160425-20160502" and "20160425-20160430", but the latter ranges become invalid in this case.
Also, when constructing the sequences of ranges, must each subsequent range in sequence begin where the previous one ends, or can there be overlap within the returned set of ranges? In other words, could an input range of '20160401'-'20160420' be matched by '20160401'-'20160415' and '20160412'-'20160422'?
No, that is not possible.
In the examples all the sets shown have subsequent ranges starting where the previous ranges ended, but I'm not sure if that's a rule or a coincidence.
It is a rule. The subsequent ranges should start exactly where the previous range ends
If we can hammer out the precise rules required, this shouldn't be too hard to put together.
Cheers!
I hope this will clarify things a bit further. Thanks.
March 22, 2016 at 3:10 pm
This code doesn't look the greatest but it might do what you want. But how do you plan to handle it when there are multiple solutions? For example based on the sample data posted there are 18 possible solution that start on 4/26/2016 and end on 5/16/2016, the solution below is picking the one closest to the end date with the least number of jumps but there are lots of ways that could be prioritized.
DECLARE @start_date datetime
DECLARE @end_date datetime
SET @start_date = '2016-04-26';
SET @end_date = '2016-05-15';
IF EXISTS(SELECT * FROM #Data WHERE DateStart = @start_date AND DateEnd = @end_date)
BEGIN
SELECT * FROM #Data WHERE DateStart = @start_date AND DateEnd = @end_date
END
ELSE
BEGIN
WITH TEMP_CTE AS(
SELECT DATA_ONE.*, 1 AS TREE_LEVEL, CAST('|' + CAST(PrimaryKeyID AS varchar(30)) + '|' AS varchar(100)) AS TREE_PATH FROM #Data DATA_ONE WHERE DATA_ONE.DateStart = @start_date
UNION ALL
SELECT #Data.*, TEMP_CTE.TREE_LEVEL + 1, CAST(TEMP_CTE.TREE_PATH + '|' + CAST(#Data.PrimaryKeyID AS varchar(30)) + '|' AS varchar(100)) FROM #Data, TEMP_CTE WHERE #Data.DateStart < @end_date AND #Data.DateStart = TEMP_CTE.DateEnd AND #Data.DateStart > @start_date
), TEMP_CTE_TWO AS(
SELECT *, ROW_NUMBER() OVER(PARTITION BY 1 ORDER BY DATEDIFF(day, @end_date, DateEnd) ASC, TREE_LEVEL ASC) AS ROW_NUM FROM TEMP_CTE WHERE DateEnd >= @end_date
)
--SELECT DISTINCT TREE_PATH FROM TEMP_CTE_TWO --WHERE ROW_NUM = 1
SELECT #Data.*, TEMP_CTE_TWO.TREE_PATH FROM #Data, TEMP_CTE_TWO WHERE CHARINDEX('|' + CAST(#Data.PrimaryKeyID AS varchar(30)) + '|', TEMP_CTE_TWO.TREE_PATH) > 0 AND TEMP_CTE_TWO.ROW_NUM = 1
END
March 23, 2016 at 6:27 am
ZZartin (3/22/2016)
This code doesn't look the greatest but it might do what you want. But how do you plan to handle it when there are multiple solutions? For example based on the sample data posted there are 18 possible solution that start on 4/26/2016 and end on 5/16/2016, the solution below is picking the one closest to the end date with the least number of jumps but there are lots of ways that could be prioritized.
DECLARE @start_date datetime
DECLARE @end_date datetime
SET @start_date = '2016-04-26';
SET @end_date = '2016-05-15';
IF EXISTS(SELECT * FROM #Data WHERE DateStart = @start_date AND DateEnd = @end_date)
BEGIN
SELECT * FROM #Data WHERE DateStart = @start_date AND DateEnd = @end_date
END
ELSE
BEGIN
WITH TEMP_CTE AS(
SELECT DATA_ONE.*, 1 AS TREE_LEVEL, CAST('|' + CAST(PrimaryKeyID AS varchar(30)) + '|' AS varchar(100)) AS TREE_PATH FROM #Data DATA_ONE WHERE DATA_ONE.DateStart = @start_date
UNION ALL
SELECT #Data.*, TEMP_CTE.TREE_LEVEL + 1, CAST(TEMP_CTE.TREE_PATH + '|' + CAST(#Data.PrimaryKeyID AS varchar(30)) + '|' AS varchar(100)) FROM #Data, TEMP_CTE WHERE #Data.DateStart < @end_date AND #Data.DateStart = TEMP_CTE.DateEnd AND #Data.DateStart > @start_date
), TEMP_CTE_TWO AS(
SELECT *, ROW_NUMBER() OVER(PARTITION BY 1 ORDER BY DATEDIFF(day, @end_date, DateEnd) ASC, TREE_LEVEL ASC) AS ROW_NUM FROM TEMP_CTE WHERE DateEnd >= @end_date
)
--SELECT DISTINCT TREE_PATH FROM TEMP_CTE_TWO --WHERE ROW_NUM = 1
SELECT #Data.*, TEMP_CTE_TWO.TREE_PATH FROM #Data, TEMP_CTE_TWO WHERE CHARINDEX('|' + CAST(#Data.PrimaryKeyID AS varchar(30)) + '|', TEMP_CTE_TWO.TREE_PATH) > 0 AND TEMP_CTE_TWO.ROW_NUM = 1
END
Thanks a lot for your help. I was thinking of the recursive CTE routine, but was not successful. I am tweaking your code a bit to sort out all the variations. For e.g. (Must say that this criteria was not mention in the question so sorry for that)
For searching '20160501-20160502' the covering period is
PrimaryKeyIDDateStartDateEnd Price
92016-04-30 00:00:00.0002016-05-02 00:00:00.0002787
But once again thanks for giving your time. Really appreciated.
Viewing 13 posts - 1 through 12 (of 12 total)
You must be logged in to reply to this topic. Login to reply