November 19, 2014 at 6:15 pm
I've got a working query which returns all leads within a supplied proximity to a city. I followed a tutorial I googled a couple months ago (can't find it now). It works, but would love others to look the query over (provided DDL and sample data) and tell me if it's as it should be.
Two things I don't like about query:
1. I have to do a UNION to another query that retrieves everything that is in the same city in order to have complete results.
2. very slow to retrieve results (> 1 minute)
Sample DDL: 2 tables
create table dim_lead
(
date_created datetime,
[contact_first_name] varchar(20),
[contact_last_name] varchar(20),
lead_id int,
[contact_city] varchar(20),
[contact_state] varchar(2),
[contact_zip] varchar(10),
latitude float,
longitude float,
sub_status varchar(5));
insert into dim_lead values
('2014-11-12 00:00:00.000', 'Leah', 'Loan', 1234, 'Boulder', 'CO', '80302', 40.015739, -105.279243, 'R'),
('2014-11-12 00:00:00.000', 'Mary', 'Morris', 4321, 'Boston', 'MA', '02133', 42.358631, -71.056702, 'R'),
('2014-11-12 00:00:00.000', 'Nancy', 'Nham', 3241, 'Colorado Springs', 'CO', '80949', 38.83345, -104.821808, 'R'),
('2014-11-12 00:00:00.000', 'Olga', 'Ofram', 2314, 'Denver', 'CO', '80201', 39.740009, -104.992264, 'R'),
('2014-11-12 00:00:00.000', 'Patty', 'Phillips', 3421, 'Greeley', 'CO', '80631', 40.423279, -104.694458, 'R'),
('2014-11-12 00:00:00.000', 'Queen', 'Quon', 2143, 'New York', 'NY', '10025', 40.71455, -74.007118, 'R'),
('2014-11-12 00:00:00.000', 'Rebecca', 'Rhode', 2431, 'Pueblo', 'CO', '81001', 38.263859, -104.612373, 'R');
--i got this generate script using SSMS generate script capability
CREATE TABLE [dbo].[dim_geography](
[dim_geography_key] [int] IDENTITY(1,1) NOT NULL,
[zip] [varchar](10) NOT NULL,
[city] [varchar](50) NULL,
[state] [varchar](2) NULL,
[country] [varchar](2) NULL,
[county] [varchar](40) NULL,
[latitude] [float] NULL,
[longitude] [float] NULL,
[rn] [bigint] NULL
) ON [PRIMARY]
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (47683, N'02133', N'Boston', N'MA', N'US', N'Suffolk', 42.358631, -71.056702, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (65413, N'80302', N'Boulder', N'CO', N'US', N'Boulder', 40.015739, -105.279243, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (254402, N'80949', N'Colorado Springs', N'CO', N'US', N'El Paso', 38.83345, -104.821808, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (111044, N'80201', N'Denver', N'CO', N'US', N'Denver', 39.740009, -104.992264, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (60117, N'80631', N'Greeley', N'CO', N'US', N'Weld', 40.423279, -104.694458, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (2851992, N'10025', N'New York', N'NY', N'US', N'New York', 40.71455, -74.007118, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (204835, N'81001', N'Pueblo', N'CO', N'US', N'Pueblo', 38.263859, -104.612373, 1)
SET IDENTITY_INSERT [dbo].[dim_geography] OFF
--My query
IF OBJECT_ID('tempdb..#LEADS_LAT_LONG') IS NOT NULL
DROP TABLE #LEADS_LAT_LONG;
SELECT DISTINCT
l.date_created
, l.[contact_first_name] + ' ' + l.[contact_last_name] as Name
, l.lead_id
, l.sub_status
, l.[contact_city] lead_city
, l.[contact_state] lead_state
, l.[contact_zip] lead_zip
, geo.latitude AS lead_lat
, geo.longitude AS lead_long
INTO #LEADS_LAT_LONG
FROM sandbox.dbo.dim_lead l
LEFT JOIN sandbox.dbo.dim_geography geo
ON l.[contact_zip] = geo.zip
WHERE 1=1
AND l.date_created > GETDATE() -30
AND l.sub_status = 'R'
AND geo.latitude IS NOT NULL
AND geo.longitude IS NOT NULL;
--select * from #LEADS_LAT_LONG;
SELECT distinct
lll.Name as Lead
, lll.lead_city Lead_City
, lll.lead_state Lead_State
, lll.lead_zip Lead_Zip
, lll.sub_status
--, geo.zip
, geo.city as 'City Center'
, geo.state
--, lll.lead_lat
--, lll.lead_long
--, geo.latitude AS 'city_lat'
--, geo.longitude AS 'city_long'
, distance_miles = Geography::Point(COALESCE(geo.latitude,0), COALESCE(geo.longitude,0), 4326).STDistance(Geography::Point(COALESCE(lll.lead_lat,0),COALESCE(lll.lead_long,0), 4326))/1609.34
FROM #LEADS_LAT_LONG lll
JOIN sandbox.dbo.dim_geography geo
on lll.lead_zip <> geo.zip
AND lll.lead_city <> 'Denver'
AND geo.city = 'Denver'
AND geo.state = 'CO'
AND Geography::Point(COALESCE(geo.latitude,0), COALESCE(geo.longitude,0), 4326).STDistance(Geography::Point(COALESCE(lll.lead_lat,0),COALESCE(lll.lead_long,0), 4326))/1609.34 < 250
UNION ALL
SELECT distinct
lll.Name as Lead
, lll.lead_city Lead_City
, lll.lead_state Lead_State
, lll.lead_zip Lead_Zip
, lll.sub_status
, lll.lead_city as 'City Center'
, lll.lead_state as state
, distance_miles = 0
FROM #LEADS_LAT_LONG lll
WHERE lll.lead_city = 'Denver'
AND lll.lead_state = 'CO'
Is this good way?
November 19, 2014 at 10:55 pm
Quick thought, if this type of queries are frequent, consider adding a geography column to the tables.
😎
Here is a far more efficient way of producing the same results
USE tempdb;
GO
SET NOCOUNT ON;
;WITH LEAD_DATA AS
(
SELECT
DL.contact_first_name
,DL.contact_last_name
,DL.contact_city
,DL.contact_state
,DL.contact_zip
,DL.sub_status
,Geography::Point(COALESCE(DL.latitude,0),COALESCE(DL.longitude,0), 4326) AS GEO_POINT
FROM dbo.dim_lead DL
)
,GEO_DATA AS
(
SELECT
DG.city
,DG.state
,Geography::Point(COALESCE(DG.latitude,0),COALESCE(DG.longitude,0), 4326) AS GEO_POINT
FROM dbo.dim_geography DG
WHERE DG.city = 'Denver'
AND DG.state = 'CO'
)
,FINAL_SET AS
(
SELECT
LD.contact_first_name + CHAR(32) + LD.contact_last_name AS FULL_NAME
,LD.contact_city
,LD.contact_state
,LD.contact_zip
,LD.sub_status
,GD.city
,GD.state
--,(LD.GEO_POINT.STDistance(GD.GEO_POINT) * 0.001) AS DIST_METER
,(LD.GEO_POINT.STDistance(GD.GEO_POINT) * 0.0006213) AS DIST_MILE
FROM LEAD_DATA LD
CROSS JOIN GEO_DATA GD
)
SELECT
FS.FULL_NAME
,FS.contact_city
,FS.contact_state
,FS.contact_zip
,FS.sub_status
,FS.city
,FS.state
,FS.DIST_MILE
FROM FINAL_SET FS
WHERE FS.DIST_MILE < 250;
Results
FULL_NAME contact_city contact_state contact_zip sub_status city state DIST_MILE
--------------- -------------------- ------------- ----------- ---------- ------- ----- ------------------
Leah Loan Boulder CO 80302 R Denver CO 24.3812946182232
Nancy Nham Colorado Springs CO 80949 R Denver CO 63.1959785838668
Olga Ofram Denver CO 80201 R Denver CO 0
Patty Phillips Greeley CO 80631 R Denver CO 49.7081523070755
Rebecca Rhode Pueblo CO 81001 R Denver CO 103.84809970528
November 20, 2014 at 1:45 am
You may wish to add
and geo.latitude between lll.lead_lat + 250*1610/110.0 and lll.lead_lat - 250*1610/110.0
see
http://en.wikipedia.org/wiki/Latitude#Length_of_a_degree_of_latitude
and create index on geo.latitude.
November 20, 2014 at 8:09 am
well, I'm so excited to have gotten these quality answers and will be literally happily pouring over them in my free times today. thanks so much.
November 20, 2014 at 6:44 pm
eirikur, the query you provided is good. Not sure it performs faster in myenvironment tho. Once query execution plan is cached it's about same, but it is still better in one more way which makes me favor it.
serg, why this ?
and geo.latitude between lll.lead_lat + 250*1610/110.0 and lll.lead_lat - 250*1610;
Next I will explore using geography type.
November 21, 2014 at 1:43 am
KoldCoffee (11/20/2014)
eirikur, the query you provided is good. Not sure it performs faster in myenvironment tho. Once query execution plan is cached it's about same, but it is still better in one more way which makes me favor it.serg, why this ?
and geo.latitude between lll.lead_lat + 250*1610/110.0 and lll.lead_lat - 250*1610;
Next I will explore using geography type.
When playing with geography pay attention to spatial indexes. Personally i have no experience using them but it looks promising.
My point was when you have bare Lat, Lon numeric columns you may wish first select points in a rectangle by sargable expressions. It requires some coordinate calculatuions, see the reference in my previuos post. Namely, provided you'll never be looking for points closer then @dmiles miles to North or South pole, latitude constraint for the rectangle 2*@dMiles width is as simple as (not tested)
declare @targetCity varchar(100) ='Denver';
declare @targetState varchar(100) ='CO';
declare @dmiles float = 250.;
with tl as(
select top(1) l.*
, geo.latitude AS lead_lat
, geo.longitude AS lead_long
from sandbox.dbo.dim_lead l
join sandbox.dbo.dim_geography geo
on l.[contact_zip] = geo.zip
and geo.city = @targetCity
and geo.state = @targetState
)
SELECT DISTINCT
l.date_created
, l.[contact_first_name] + ' ' + l.[contact_last_name] as Name
, l.lead_id
, l.sub_status
, l.[contact_city] lead_city
, l.[contact_state] lead_state
, l.[contact_zip] lead_zip
, geo.latitude AS lead_lat
, geo.longitude AS lead_long
INTO #LEADS_LAT_LONG
FROM sandbox.dbo.dim_lead l
--LEFT --why OUTER JOIN if then it follows geo.latitude IS NOT NULL ?
JOIN sandbox.dbo.dim_geography geo
ON l.[contact_zip] = geo.zip
AND l.date_created > GETDATE() -30
AND l.sub_status = 'R'
AND geo.city != @targetCity
JOIN tl on geo.latitude between tl.lead_lat + @dmiles*1.610/110.0 and tl.lead_lat - @dmiles*1.610/110.0
110 km/lat degree is a bit lesser then minimum length of 1 degree meridian arc to keep the constraint a bit less restrictive.
Longitude constraint is more complicated as it depends on latitude and should take into account points near 0, +-180 meridians.
November 21, 2014 at 11:56 pm
KoldCoffee (11/20/2014)
eirikur, the query you provided is good. Not sure it performs faster in myenvironment tho. Once query execution plan is cached it's about same, but it is still better in one more way which makes me favor it.
It should perform better as it halves the number of Geography CLR function calls compared to the original query, adding spatial columns, hence using only one CLR call, would be a further improvement, which adds the possibility of introducing a spatial index.
serg, why this ?
and geo.latitude between lll.lead_lat + 250*1610/110.0 and lll.lead_lat - 250*1610;
Next I will explore using geography type.
This in fact can be considered as a rough sketch of what a spatial index would do, segments the set into a grid/units which limits the search/scan needed to exhaust all possibilities.
😎
November 22, 2014 at 4:09 pm
Erirukur,
the reason I use your query is because it doesn't eliminate lead records having a NULL value in the lead_city column. The top query of *my* final UNION will do this undesireable thing when it can't evaluate true for:
AND lll.lead_city <> @city_center
Because you use a CROSS JOIN I can include any referrals for whom we've at least collected zip codes. I like that improvement in accuracy!
Still, the query takes between 1 1/2 to 2 minutes to run (depending on how many leads there are in given city), because it has to evaluate each lead row against the Geography::Point syntax.
Serg, I am fine with the lowered accuracy given I am not querying for locations at either north or south pole but I can't get that sample query to work.
Since a spatial index can only be applied against geography column and I can't make serg's query work for me I will look at another database we have (operational) which has a table populated with geography data types. It's hard to associate that one with the lead tables but if I can break through that barrier maybe that is only way to speed up this report.
November 22, 2014 at 8:12 pm
ps. serg, thanks for showing me how the LEFT JOIN is redundant. appreciate.
November 24, 2014 at 1:53 am
KoldCoffee (11/22/2014)
Serg, I am fine with the lowered accuracy given I am not querying for locations at either north or south pole but I can't get that sample query to work.
Since a spatial index can only be applied against geography column and I can't make serg's query work for me I will look at another database we have (operational) which has a table populated with geography data types. It's hard to associate that one with the lead tables but if I can break through that barrier maybe that is only way to speed up this report.
Can you share at least tables definitions ?
November 24, 2014 at 8:46 am
Serg,
The actual life tables are similar to what I initially posted, with the exception that dim_lead doesn't actually contain latitude and longitude columns. I'm don't know why I included lat/long in the original sample ddl, because I handled for the association of leads to lat/long in a temporary query.
Dim_leads = all attributes of leads
Dim_geography = zip, city, state, latitude, longitude
Goal: return the leads and cities in which they dwell that are within given distance (report parameter #1) to given city center (report parameter#2) and state (report parameter #3).
Table definitions and data:
create table dim_lead
(
dim_lead_key int,
date_created datetime,
[contact_first_name] varchar(20),
[contact_last_name] varchar(20),
lead_id int,
[contact_city] varchar(20),
[contact_state] varchar(2),
[contact_zip] varchar(10),
sub_status varchar(5));
insert into dim_lead values
('2014-11-12 00:00:00.000', 'Leah', 'Loan', 1234, 'Boulder', 'CO', '80302', 'R'),
('2014-11-12 00:00:00.000', 'Mary', 'Morris', 4321, 'Boston', 'MA', '02133', 'R'),
('2014-11-12 00:00:00.000', 'Nancy', 'Nham', 3241, 'Colorado Springs', 'CO', '80949','R'),
('2014-11-12 00:00:00.000', 'Olga', 'Ofram', 2314, 'Denver', 'CO', '80201', 'R'),
('2014-11-12 00:00:00.000', 'Patty', 'Phillips', 3421, 'Greeley', 'CO', '80631', 'R'),
('2014-11-12 00:00:00.000', 'Queen', 'Quon', 2143, 'New York', 'NY', '10025', 'R'),
('2014-11-12 00:00:00.000', 'Rebecca', 'Rhode', 2431, 'Pueblo', 'CO', '81001', 'R');
CREATE TABLE [dbo].[dim_geography](
[dim_geography_key] [int] IDENTITY(1,1) NOT NULL,
[zip] [varchar](10) NOT NULL,
[city] [varchar](50) NULL,
[state] [varchar](2) NULL,
[country] [varchar](2) NULL,
[county] [varchar](40) NULL,
[latitude] [float] NULL,
[longitude] [float] NULL,
[rn] [bigint] NULL
) ON [PRIMARY]
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (47683, N'02133', N'Boston', N'MA', N'US', N'Suffolk', 42.358631, -71.056702, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (65413, N'80302', N'Boulder', N'CO', N'US', N'Boulder', 40.015739, -105.279243, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (254402, N'80949', N'Colorado Springs', N'CO', N'US', N'El Paso', 38.83345, -104.821808, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (111044, N'80201', N'Denver', N'CO', N'US', N'Denver', 39.740009, -104.992264, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (60117, N'80631', N'Greeley', N'CO', N'US', N'Weld', 40.423279, -104.694458, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (2851992, N'10025', N'New York', N'NY', N'US', N'New York', 40.71455, -74.007118, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (204835, N'81001', N'Pueblo', N'CO', N'US', N'Pueblo', 38.263859, -104.612373, 1)
SET IDENTITY_INSERT [dbo].[dim_geography] OFF
I am using eririkur's cross join query because it doesn't exclude leads having a city = NULL...but it takes 1 1/2 = 2 minutes given the volume of leads it must search.
query
;WITH LEAD_DATA AS
(
SELECT
DL.contact_first_name
,DL.contact_last_name
,DL.contact_city
,DL.contact_state
,DL.contact_zip
,DL.sub_status
,Geography::Point(COALESCE(DL.latitude,0),COALESCE(DL.longitude,0), 4326) AS GEO_POINT
FROM dbo.dim_lead DL
)
,GEO_DATA AS
(
SELECT
DG.city
,DG.state
,Geography::Point(COALESCE(DG.latitude,0),COALESCE(DG.longitude,0), 4326) AS GEO_POINT
FROM dbo.dim_geography DG
WHERE DG.city = 'Denver'
AND DG.state = 'CO'
)
,FINAL_SET AS
(
SELECT
LD.contact_first_name + CHAR(32) + LD.contact_last_name AS FULL_NAME
,LD.contact_city
,LD.contact_state
,LD.contact_zip
,LD.sub_status
,GD.city
,GD.state
--,(LD.GEO_POINT.STDistance(GD.GEO_POINT) * 0.001) AS DIST_METER
,(LD.GEO_POINT.STDistance(GD.GEO_POINT) * 0.0006213) AS DIST_MILE
FROM LEAD_DATA LD
CROSS JOIN GEO_DATA GD
)
SELECT
FS.FULL_NAME
,FS.contact_city
,FS.contact_state
,FS.contact_zip
,FS.sub_status
,FS.city
,FS.state
,FS.DIST_MILE
FROM FINAL_SET FS
WHERE FS.DIST_MILE < 250;
November 25, 2014 at 6:13 am
It's my fault in previous script. Between predicate should be
JOIN tl on geo.latitude between tl.lead_lat - @dmiles*1.610/110.0 and tl.lead_lat + @dmiles*1.610/110.0
Now if you are still interested in geo calculations below is the script which selects candidate locations by geo "rectangle", both lan and lon coordinates. I'm afraid concerning perfomance the longitude part is of little help due to ORs. (It can be simplified though if no locations around 180 meridian are needed.) Nevertheless here it goes
create table dim_lead (
dim_lead_key int IDENTITY(1,1) NOT NULL,
date_created datetime,
[contact_first_name] varchar(20),
[contact_last_name] varchar(20),
lead_id int,
[contact_city] varchar(20),
[contact_state] varchar(2),
[contact_zip] varchar(10),
sub_status varchar(5));
insert into dim_lead values
('2014-11-12 00:00:00.000', 'Leah', 'Loan', 1234, 'Boulder', 'CO', '80302', 'R'),
('2014-11-12 00:00:00.000', 'Mary', 'Morris', 4321, 'Boston', 'MA', '02133', 'R'),
('2014-11-12 00:00:00.000', 'Nancy', 'Nham', 3241, 'Colorado Springs', 'CO', '80949','R'),
('2014-11-12 00:00:00.000', 'Olga', 'Ofram', 2314, 'Denver', 'CO', '80201', 'R'),
('2014-11-12 00:00:00.000', 'Patty', 'Phillips', 3421, 'Greeley', 'CO', '80631', 'R'),
('2014-11-12 00:00:00.000', 'Queen', 'Quon', 2143, 'New York', 'NY', '10025', 'R'),
('2014-11-12 00:00:00.000', 'Rebecca', 'Rhode', 2431, 'Pueblo', 'CO', '81001', 'R');
CREATE TABLE [dbo].[dim_geography](
[dim_geography_key] [int] IDENTITY(1,1) NOT NULL,
[zip] [varchar](10) NOT NULL,
[city] [varchar](50) NULL,
[state] [varchar](2) NULL,
[country] [varchar](2) NULL,
[county] [varchar](40) NULL,
[latitude] [float] NULL,
[longitude] [float] NULL,
[rn] [bigint] NULL
) ON [PRIMARY]
SET IDENTITY_INSERT [dbo].[dim_geography] ON;
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (47683, N'02133', N'Boston', N'MA', N'US', N'Suffolk', 42.358631, -71.056702, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (65413, N'80302', N'Boulder', N'CO', N'US', N'Boulder', 40.015739, -105.279243, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (254402, N'80949', N'Colorado Springs', N'CO', N'US', N'El Paso', 38.83345, -104.821808, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (111044, N'80201', N'Denver', N'CO', N'US', N'Denver', 39.740009, -104.992264, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (60117, N'80631', N'Greeley', N'CO', N'US', N'Weld', 40.423279, -104.694458, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (2851992, N'10025', N'New York', N'NY', N'US', N'New York', 40.71455, -74.007118, 1)
INSERT [dbo].[dim_geography] ([dim_geography_key], [zip], [city], [state], [country], [county], [latitude], [longitude], [rn]) VALUES (204835, N'81001', N'Pueblo', N'CO', N'US', N'Pueblo', 38.263859, -104.612373, 1)
SET IDENTITY_INSERT [dbo].[dim_geography] OFF
declare @targetCity varchar(100) = 'Denver' --'Greeley' -- 'Boulder';
declare @targetState varchar(100) = 'CO';
declare @dmiles float = 250.;
with tl as( --http://en.wikipedia.org/wiki/Latitude#Length_of_a_degree_of_latitude
select top(1) l.*
, geo.latitude AS lead_lat
, geo.longitude AS lead_long
, geo.latitude - @dmiles*1.610/110.0 as minlat
, geo.latitude + @dmiles*1.610/110.0 as maxlat
, case when geo.longitude-lp.dLon >= -180 then geo.longitude-lp.dLon else 360 +(geo.longitude-lp.dLon) end as lon1
, case when geo.longitude+lp.dLon <= 180 then geo.longitude+lp.dLon else geo.longitude+lp.dLon - 360 end as lon2
from dbo.dim_lead l
join dbo.dim_geography geo
on l.[contact_zip] = geo.zip
and geo.city = @targetCity
and geo.state = @targetState
cross apply ( select dLon = @dmiles*1.610/case when abs(geo.latitude) <= 15 then 107.
when abs(geo.latitude) <= 30 then 96.
when abs(geo.latitude) <= 45 then 78.
when abs(geo.latitude) <= 60 then 55.
when abs(geo.latitude) <= 75 then 28.5
else null end ) lp
) --select * from tl
SELECT DISTINCT
l.date_created
, l.[contact_first_name] + ' ' + l.[contact_last_name] as Name
, l.lead_id
, l.sub_status
, l.[contact_city] lead_city
, l.[contact_state] lead_state
, l.[contact_zip] lead_zip
, geo.latitude AS lead_lat
, geo.longitude AS lead_lon
INTO #LEADS_LAT_LONG
FROM dbo.dim_lead l
JOIN dbo.dim_geography geo
ON l.[contact_zip] = geo.zip
AND l.date_created > GETDATE() -30
AND l.sub_status = 'R'
AND geo.city != @targetCity
JOIN tl
ON geo.latitude BETWEEN tl.minlat AND tl.maxlat
AND( tl.lon1 IS NULL
OR( (tl.lon1 < tl.lon2 AND geo.longitude BETWEEN tl.lon1 AND tl.lon2)
OR(tl.lon1 > tl.lon2 AND (geo.longitude > tl.lon1 OR geo.longitude < tl.lon2) ) ))
;
SELECT * FROM #LEADS_LAT_LONG;
DROP TABLE #LEADS_LAT_LONG;
go
DROP TABLE [dbo].[dim_lead];
DROP TABLE [dbo].[dim_geography];
go
November 25, 2014 at 7:55 pm
Serg, It works quickly against sample data but against real life database I cancelled this query after 4 minutes 41 seconds because it didn't return anything.
I think I should find out how to use geography datatype. If it is so great, it should also be in datawarehouse.
November 26, 2014 at 1:14 pm
I've played with some real data (half a million cities around the world from http://dev.maxmind.com/geoip/legacy/geolite/) and have really got great perfomance gain when using sargable geo rectangle predicate against plain point1.STDistance(point2) < ...
Nothing unuexpected, just optimizer can use indexes. It requierd some dynamic sql.
Let me know if it is intresting for you.
Viewing 14 posts - 1 through 13 (of 13 total)
You must be logged in to reply to this topic. Login to reply