October 11, 2023 at 12:23 am
Hello,
I have a dataset of route data (this is just a single route) like below:
CREATE TABLE [dbo].[___routeSampleData](
[costCenterCode] [varchar](12) NULL,
[scheduleDate] [date] NULL,
[tripid] [int] NOT NULL,
[routeId] [int] NULL,
[routename] [varchar](254) NULL,
[activityId] [int] NULL,
[isFirstStop] [int] NOT NULL,
[isLastStop] [int] NOT NULL,
[eventOrder] [int] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 4, 0, 0, 1)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 8, 0, 0, 7846152)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737192, 83, N'217', 0, 0, 0, 25000009)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737194, 83, N'217', 0, 0, 0, 43749997)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737444, 83, N'217', 0, 0, 0, 57812476)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737444, 83, N'217', 1, 0, 0, 68359353)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737194, 83, N'217', 1, 0, 0, 76269519)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24737192, 83, N'217', 1, 0, 0, 82202169)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 5, 0, 0, 86651634)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746012, 83, N'217', 0, 0, 0, 86654914)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24749828, 83, N'217', 0, 0, 0, 86655516)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746012, 83, N'217', 1, 0, 0, 86657358)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24749828, 83, N'217', 1, 0, 0, 87490209)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24736980, 83, N'217', 0, 0, 0, 88114823)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24736980, 83, N'217', 1, 0, 0, 88583313)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746168, 83, N'217', 0, 0, 0, 88934664)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746168, 83, N'217', 1, 0, 0, 89198171)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 7, 0, 0, 89988705)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746127, 83, N'217', 0, 0, 0, 91897595)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 16, 0, 0, 92255532)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24746127, 83, N'217', 1, 0, 0, 93329276)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 5, 0, 0, 97624269)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24751743, 83, N'217', 0, 0, 0, 97661368)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24751743, 83, N'217', 1, 0, 0, 97689227)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 5, 0, 0, 97710077)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24745979, 83, N'217', 0, 0, 0, 97772735)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24745979, 83, N'217', 1, 0, 0, 97884092)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24749888, 83, N'217', 0, 0, 0, 98078969)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), 24749888, 83, N'217', 1, 0, 0, 98225153)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 6, 0, 0, 98663677)
GO
INSERT [dbo].[___routeSampleData] ([costCenterCode], [scheduleDate], [tripid], [routeId], [routename], [activityId], [isFirstStop], [isLastStop], [eventOrder]) VALUES (N'424', CAST(N'2023-10-10' AS Date), -1, 83, N'217', 3, 0, 0, 99999999)
GO
I'm trying to add another column that tells me which event on the route(s) is the first passenger event, which is represented with an activityId = 0. The code I'm using has Lag to identify the pick event (activityId = 0) following the route pullout (activityId = 4) but because sometimes the first actual event is not a passenger event, it's not returning what I need. Below is the query I'm using to find/designate the First Stop (and also same thing for Last Stop):
SELECT [costCenterCode]
,[scheduleDate]
,[tripid]
,[routeId]
,[routename]
,[activityId]
,CASE WHEN [activityId] = 0 AND LAG([activityId]) OVER (PARTITION BY [costCenterCode]
,[scheduleDate]
,[routeId]
ORDER BY [eventOrder] ASC) = 4 THEN 1 ELSE 0 END [isFirstStop]
,CASE WHEN [activityId] IN (0, 1) AND LEAD([activityId]) OVER (PARTITION BY [costCenterCode]
,[scheduleDate]
,[routeId]
ORDER BY [eventOrder] ASC) = 3 THEN 1 ELSE 0 END [isLastStop]
,[eventOrder]
FROM [dbo].[___routeSampleData]
If I could somehow direct the Lead/Lag to skip over any records where the activityId is not 0 (or 0,1 for isLastStop), that should give me what I'm looking for. But not sure how this can be done, or if there's a better way to get what I need(?). Can't pull this out into any temp tables/table variables, as it needs to be in the single SELECT statement.
Does anyone have any thoughts on this? Any help would be very much appreciated!
Thanks
October 11, 2023 at 1:57 pm
For the benefit of others, here is a consumable (and simplified) version of your set-up code. Please note in future how dates should be in 'YYYYMMDD' format (no need to CAST them) and the N'' form is used when inserting to NVARCHAR columns.
DROP TABLE IF EXISTS #routeSampleData;
CREATE TABLE #routeSampleData
(
costCenterCode VARCHAR(12) NULL
,scheduleDate DATE NULL
,tripid INT NOT NULL
,routeId INT NULL
,routename VARCHAR(254) NULL
,activityId INT NULL
,isFirstStop INT NOT NULL
,isLastStop INT NOT NULL
,eventOrder INT NULL
);
INSERT #routeSampleData
(
costCenterCode
,scheduleDate
,tripid
,routeId
,routename
,activityId
,isFirstStop
,isLastStop
,eventOrder
)
VALUES
('424', '20231010', -1, 83, '217', 4, 0, 0, 1)
,('424', '20231010', -1, 83, '217', 8, 0, 0, 7846152)
,('424', '20231010', 24737192, 83, '217', 0, 0, 0, 25000009)
,('424', '20231010', 24737194, 83, '217', 0, 0, 0, 43749997)
,('424', '20231010', 24737444, 83, '217', 0, 0, 0, 57812476)
,('424', '20231010', 24737444, 83, '217', 1, 0, 0, 68359353)
,('424', '20231010', 24737194, 83, '217', 1, 0, 0, 76269519)
,('424', '20231010', 24737192, 83, '217', 1, 0, 0, 82202169)
,('424', '20231010', -1, 83, '217', 5, 0, 0, 86651634)
,('424', '20231010', 24746012, 83, '217', 0, 0, 0, 86654914)
,('424', '20231010', 24749828, 83, '217', 0, 0, 0, 86655516)
,('424', '20231010', 24746012, 83, '217', 1, 0, 0, 86657358)
,('424', '20231010', 24749828, 83, '217', 1, 0, 0, 87490209)
,('424', '20231010', 24736980, 83, '217', 0, 0, 0, 88114823)
,('424', '20231010', 24736980, 83, '217', 1, 0, 0, 88583313)
,('424', '20231010', 24746168, 83, '217', 0, 0, 0, 88934664)
,('424', '20231010', 24746168, 83, '217', 1, 0, 0, 89198171)
,('424', '20231010', -1, 83, '217', 7, 0, 0, 89988705)
,('424', '20231010', 24746127, 83, '217', 0, 0, 0, 91897595)
,('424', '20231010', -1, 83, '217', 16, 0, 0, 92255532)
,('424', '20231010', 24746127, 83, '217', 1, 0, 0, 93329276)
,('424', '20231010', -1, 83, '217', 5, 0, 0, 97624269)
,('424', '20231010', 24751743, 83, '217', 0, 0, 0, 97661368)
,('424', '20231010', 24751743, 83, '217', 1, 0, 0, 97689227)
,('424', '20231010', -1, 83, '217', 5, 0, 0, 97710077)
,('424', '20231010', 24745979, 83, '217', 0, 0, 0, 97772735)
,('424', '20231010', 24745979, 83, '217', 1, 0, 0, 97884092)
,('424', '20231010', 24749888, 83, '217', 0, 0, 0, 98078969)
,('424', '20231010', 24749888, 83, '217', 1, 0, 0, 98225153)
,('424', '20231010', -1, 83, '217', 6, 0, 0, 98663677)
,('424', '20231010', -1, 83, '217', 3, 0, 0, 99999999);
SELECT *
FROM #routeSampleData rsd;
The absence of evidence is not evidence of absence.
Martin Rees
You can lead a horse to water, but a pencil must be lead.
Stan Laurel
October 11, 2023 at 2:44 pm
Thanks for your feedback Phil,
The reason for the layout of my sample data was just for simplicity. I inserted my sample data into a physical table and then just used SSMS Generate Scripts to script out the table schema and structure all at once. So it just took seconds to generate all of it for this purpose.
October 11, 2023 at 3:17 pm
I'm not sure I fully understand the requirements.
Do you need the first and last [activityId] = 0 / IN (0, 1) in the partition regardless, or does some other row value have to precede that 0/1 entry for it to count? That is, can we just select the first passenger event, period, or do we also have to insure that a certain other activity, such as 4, precedes it?
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
October 11, 2023 at 3:24 pm
Thanks for your feedback Phil,
The reason for the layout of my sample data was just for simplicity. I inserted my sample data into a physical table and then just used SSMS Generate Scripts to script out the table schema and structure all at once. So it just took seconds to generate all of it for this purpose.
Understood, but a few more minutes of your time is all that is required to make it simpler for your unpaid audience here.
The absence of evidence is not evidence of absence.
Martin Rees
You can lead a horse to water, but a pencil must be lead.
Stan Laurel
October 11, 2023 at 3:28 pm
I need just the passenger events (0 or o,1). But I think I may have gotten the answer from a colleague:
SELECT
costCenterCode,
scheduleDate,
tripid,
routeId,
routename,
activityId,
eventOrder,
CASE WHEN eventOrder = MIN(CASE WHEN activityId = 0 THEN eventOrder END) OVER(PARTITION BY costCenterCode, scheduleDate, routeId) THEN 1 ELSE 0 END AS isFirstStop,
CASE WHEN eventOrder = MAX(CASE WHEN activityId IN (0,1) THEN eventOrder END) OVER(PARTITION BY costCenterCode, scheduleDate, routeId) THEN 1 ELSE 0 END AS isLastStop
FROM [dbo].[___routeSampleData]
Thanks for everyone's time to review.
October 11, 2023 at 3:38 pm
Thanks for your feedback Phil,
The reason for the layout of my sample data was just for simplicity. I inserted my sample data into a physical table and then just used SSMS Generate Scripts to script out the table schema and structure all at once. So it just took seconds to generate all of it for this purpose.
You misspelled laziness. The code that SSMS Generate Scripts generates has not been updated in decades and is horrendous as a result. Even if you use SSMS Generate Scripts, you should go in and clean it up afterward, if nothing else to change permanent tables to temp tables.
To answer your original question, LAG/LEAD only ever look a specific number of records before/after the current record. You cannot specify a variable number of records. What you can do, is specify the ORDER BY in such a way that you don't need to skip addiitonal records. I believe the following will give you what you're looking for, although the lack of multiple routes and lack of expected results makes that unclear.
SELECT r.costCenterCode
, r.scheduleDate
, r.tripid
, r.routeId
, r.routename
, r.activityId
, CASE WHEN r.activityId = 0 AND LAG(r.activityId) OVER(PARTITION BY r.costCenterCode, r.scheduleDate, r.routeId ORDER BY CASE WHEN r.activityId IN (4,0) THEN 1 ELSE 2 END, r.eventOrder) = 4 THEN 1 ELSE 0 END AS isFirstStop
, CASE WHEN r.activityId IN (0, 1) AND LEAD(r.activityId) OVER(PARTITION BY r.costCenterCode, r.scheduleDate, r.routeId ORDER BY CASE WHEN r.activityId IN (0, 1, 3) THEN 1 ELSE 0 END, r.eventOrder) = 3 THEN 1 ELSE 0 END AS isLastStop
, r.eventOrder
FROM #routeSampleData AS r
ORDER BY r.costCenterCode, r.scheduleDate, r.routeId, r.eventOrder
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
October 11, 2023 at 3:57 pm
I need just the passenger events (0 or o,1). But I think I may have gotten the answer from a colleague:
SELECT costCenterCode, scheduleDate, tripid, routeId, routename, activityId, eventOrder, CASE WHEN eventOrder = MIN(CASE WHEN activityId = 0 THEN eventOrder END) OVER(PARTITION BY costCenterCode, scheduleDate, routeId) THEN 1 ELSE 0 END AS isFirstStop, CASE WHEN eventOrder = MAX(CASE WHEN activityId = 1 THEN eventOrder END) OVER(PARTITION BY costCenterCode, scheduleDate, routeId) THEN 1 ELSE 0 END AS isLastStop FROM [dbo].[___routeSampleData]
Thanks for everyone's time to review.
I was thinking along the same lines, but wanted to be sure of the requirements before I posted a response. Making that the initial requirements would have considerably simplified the q.
FWIW, I don't object all that much to the method you used to post data. It's vastly better than not posting data at all.
SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".
Viewing 8 posts - 1 through 7 (of 7 total)
You must be logged in to reply to this topic. Login to reply