February 28, 2014 at 5:27 am
Hello,
I have a challange with a table with around 100 million rows which look like:
CREATE TABLE [dbo].[BRFCLSeaWeekHis](
[Year] [smallint] NOT NULL,
[Week] [smallint] NOT NULL,
[PortFrom] [char](5) NOT NULL,
[PortTo] [char](5) NOT NULL,
[CarrierID] [char](4) NOT NULL,
[ConType] [varchar](4) NOT NULL,
[ConSize] [smallint] NOT NULL,
[Commodity] [nvarchar](30) NOT NULL,
[IncotermCode] [varchar](4) NOT NULL,
[QuoteType] [varchar](2) NOT NULL,
[CurrencyCode] [char](3) NOT NULL,
[RateTotal] [decimal](18, 2) NOT NULL,
[SeaTotal] [decimal](18, 2) NOT NULL,
[Total] [decimal](18, 2) NOT NULL,
CONSTRAINT [PK_BRFCLSeaWeekHis] PRIMARY KEY CLUSTERED
(
[Year] ASC,
[Week] ASC,
[PortFrom] ASC,
[PortTo] ASC,
[CarrierID] ASC,
[ConType] ASC,
[ConSize] ASC,
[Commodity] ASC,
[IncotermCode] ASC,
[QuoteType] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]
(
[QuoteType] ASC
)
INCLUDE ( [Year],
[Week],
[PortFrom],
[PortTo],
[CarrierID],
[ConType],
[ConSize],
[Commodity],
[IncotermCode],
[CurrencyCode],
[RateTotal],
[SeaTotal],
[Total]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
The SP I run looks more or less like this, and takes aprox. 50 secs
CREATE PROCEDURE [dbo].[spSchBuyingRatesHistoryWeek]
(
@TradeLanes varchar(6000),
@Containers varchar(4000),
@QuoteType nvarchar(2),
@Incoterms nvarchar(500),
@CurrencyCode char(3),
@DateFrom smalldatetime,
@DateTo smalldatetime,
@Commodities nvarchar(4000),
@Carriers varchar(4000),
@IncludeCheapest bit = 1,
@IncludeMostExpensive bit = 1,
@OrderBy varchar(10) = 'SeaTotal',
@IsIndicatorsCommodityDependant bit = 0
)
AS
BEGIN
SET NOCOUNT ON
--SET @TradeLanes = '<ROOT><TradeLane PortFrom="CNSHA" PortTo="DKCPH" /></ROOT>'
----SET @Containers = '<ROOT><Container ConType="DV" ConSize="20" /><Container ConType="DV" ConSize="40" /><Container ConType="HC" ConSize="40" /></ROOT>'
--SET @Containers = '<ROOT><Container ConType="DV" ConSize="20" /></ROOT>'
--SET @QuoteType = 'I'
--SET @Incoterms = '<ROOT><Incoterm Code="FOB" /></ROOT>'
--SET @CurrencyCode = 'USD'
--SET @DateFrom = '2010-01-01'
--SET @DateTo = '2012-12-31'
--SET @Commodities = '<ROOT><Commodity Name="FAK" /></ROOT>'
--SET @Carriers = '<ROOT><Carrier Code="KKLU" /></ROOT>'
--SET@IncludeCheapest = 1
--SET@IncludeMostExpensive = 1
--SET@OrderBy = 'SeaTotal'
DECLARE @tradeLanesHandle int
DECLARE @containerHandle int
DECLARE @carrierHandle int
DECLARE @commodityHandle int
DECLARE @incotermsHandle int
--Iniciate the xml handlers
EXEC sp_xml_preparedocument @tradeLanesHandle OUTPUT, @TradeLanes
EXEC sp_xml_preparedocument @containerHandle OUTPUT, @Containers
EXEC sp_xml_preparedocument @carrierHandle OUTPUT, @Carriers
EXEC sp_xml_preparedocument @commodityHandle OUTPUT, @Commodities
EXEC sp_xml_preparedocument @incotermsHandle OUTPUT, @Incoterms
SELECT PortFrom, PortTo INTO #TradeLanes FROM OPENXML(@tradeLanesHandle, '/ROOT/TradeLane') WITH (PortFrom nchar(5),PortTo nchar(5)) AS tradelanes;
CREATE UNIQUE CLUSTERED INDEX IX_2 on #TradeLanes (PortFrom, PortTo);
SELECT coms.Name INTO #Commodities FROM OPENXML(@commodityHandle, '/ROOT/Commodity') WITH ([Name] nvarchar(30)) AS coms;
CREATE UNIQUE CLUSTERED INDEX IX_4 on #Commodities ([Name]);
SELECT cars.Code INTO #Carriers FROM OPENXML(@carrierHandle, '/ROOT/Carrier') WITH (Code nchar(4)) AS cars;
CREATE UNIQUE CLUSTERED INDEX IX_5 on #Carriers (Code);
SELECT ConType, consize INTO #Cons FROM OPENXML (@containerHandle, '/ROOT/Container') WITH (ConType nvarchar(4), ConSize smallint) AS cons;
CREATE UNIQUE CLUSTERED INDEX IX_6 on #Cons (ConType, consize);
SELECT Code INTO #Incoterms FROM OPENXML (@incotermsHandle, '/ROOT/Incoterm') WITH (Code nvarchar(4)) AS cons;
CREATE UNIQUE CLUSTERED INDEX IX_7 on #Incoterms (Code);
DECLARE @WeekFrom int,@WeekTo int, @YearFrom int, @YearTo int;
SET @WeekFrom = dbo.sf_IsoWeekOfYear(@DateFrom);
SET @WeekTo = dbo.sf_IsoWeekOfYear(@DateTo);
SET @YearFrom = DATEPART(yy, @DateFrom);
SET @YearTo = DATEPART(yy, @DateTo);
SELECT dbo.sf_WeekdayFromIsoWeekNumber([Year], [Week],7) AS [Day],
[Year], [Week], FCL.PortFrom, FCL.PortTo, FCL.CarrierID, FCL.ConType, FCL.ConSize, FCL.Commodity, FCL.IncotermCode, FCL.QuoteType, FCL.CurrencyCode, RateTotal, SeaTotal, Total
FROM
BRFCLSeaWeekHis AS FCL
INNER JOIN #cons c ON fcl.ConType = c.ConType AND fcl.ConSize = c.ConSize
INNER JOIN #TradeLanes as tl on tl.PortFrom = FCL.portFrom AND tl.PortTo = FCL.portTo
INNER JOIN #Incoterms as i ON i.Code = FCL.IncotermCode
WHERE QuoteType = @QuoteType AND
(CASE WHEN FCL.Year > @YearFrom THEN 1 WHEN (FCL.Week >= @WeekFrom AND FCL.Year = @YearFrom) THEN 1 ELSE 0 END) = 1
AND (CASE WHEN FCL.Year < @YearTo THEN 1 WHEN (FCL.Week <= @WeekTo AND FCL.Year = @YearTo) THEN 1 ELSE 0 END) = 1
AND (CASE WHEN @Commodities IS NULL THEN 1 WHEN Commodity IN (SELECT Name FROM #Commodities) THEN 1 ELSE 0 END) = 1
AND (CASE WHEN @Carriers IS NULL THEN 1 WHEN CarrierID IN (SELECT Code FROM #Carriers) THEN 1 ELSE 0 END) = 1
--Remove tmp tables
IF OBJECT_ID('tempdb..#Tradelanes') IS NOT NULL
DROP TABLE #Tradelanes
IF OBJECT_ID('tempdb..#Commodities') IS NOT NULL
DROP TABLE #Commodities;
IF OBJECT_ID('tempdb..#Carriers') IS NOT NULL
DROP TABLE #Carriers;
IF OBJECT_ID('tempdb..#cons') IS NOT NULL
DROP TABLE #cons;
IF OBJECT_ID('tempdb..#Incoterms') IS NOT NULL
DROP TABLE #Incoterms
--Remove handles
EXEC sp_xml_removedocument @tradeLanesHandle
EXEC sp_xml_removedocument @commodityHandle
EXEC sp_xml_removedocument @carrierHandle
EXEC sp_xml_removedocument @containerHandle
EXEC sp_xml_removedocument @incotermsHandle
END
I attached the xml of the execution plan. Any ideas on how I could optimize this?
February 28, 2014 at 6:45 am
Hi Mark
Two or three points to consider.
Firstly this is a "Catch-all query". In a nutshell, the number of rows returned could vary dramatically depending upon the parameters passed in. You'll have a minimum number of rows of n and a maximum of about 25 million. No single execution plan could be optimal for both. Gail Shaw has an excellent article here [/url]describing how you can deal with this type of query.
Secondly, your ordinary index has most of the columns of the clustered index as INCLUDE columns. This is unnecessary because nonclustered indexes contain the cluster keys - they're already included behind the scenes.
Thirdly, and this point follows on from 2: since every nonclustered index contains the cluster keys, it might be advantageous for you to use a single surrogate key instead, such as an identity column.
SSC has an excellent indexing article by David Durant here[/url].
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
February 28, 2014 at 6:49 am
For a quick win, change your non-clustered index like so:
CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]
(
[QuoteType] ASC, Year
)
INCLUDE (
[CurrencyCode],
[RateTotal],
[SeaTotal],
[Total])
and your query like so:
WHERE QuoteType = @QuoteType
AND FCL.Year >= @YearFrom
AND FCL.Year <= @YearTo
AND (CASE WHEN FCL.Year > @YearFrom THEN 1
WHEN (FCL.Week >= @WeekFrom AND FCL.Year = @YearFrom) THEN 1
ELSE 0 END) = 1
AND (CASE WHEN FCL.Year < @YearTo THEN 1 WHEN (FCL.Week <= @WeekTo AND FCL.Year = @YearTo) THEN 1 ELSE 0 END) = 1
AND (CASE WHEN @Commodities IS NULL THEN 1 WHEN FCL.Commodity IN (SELECT Name FROM #Commodities) THEN 1 ELSE 0 END) = 1
AND (CASE WHEN @Carriers IS NULL THEN 1 WHEN FCL.CarrierID IN (SELECT Code FROM #Carriers) THEN 1 ELSE 0 END) = 1
with OPTION RECOMPILE.
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
February 28, 2014 at 7:32 am
Hi Chris,
Thanks for the leads, but even with recompile and changed index, the time is the same.
In relation to your first post, then I will not likely get anymore than a few 1000 rows as result (maybe 10t-20t), as it will not include more than 5 trade lanes and there are around 250.000 different tradelanes in the table. Does that make it easier to optimize for that specific purpose?
Furthermore, where can I see if the CPU or the disk (or something third) is the bottleneck?
February 28, 2014 at 7:49 am
mark.kristensen (2/28/2014)
Hi Chris,Thanks for the leads, but even with recompile and changed index, the time is the same.
In relation to your first post, then I will not likely get anymore than a few 1000 rows as result (maybe 10t-20t), as it will not include more than 5 trade lanes and there are around 250.000 different tradelanes in the table. Does that make it easier to optimize for that specific purpose?
Furthermore, where can I see if the CPU or the disk (or something third) is the bottleneck?
Thanks for the feedback, Mark. Can you post the actual execution plan please?
Efficient filtering for those tradelanes is the first thing I'd pursue. You currently have way too many rows coming off the big table. That stacks up as logical reads if the data is in cache, physical reads if not.
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
February 28, 2014 at 8:12 am
I have attached a file to the first post - is that what you need or if not, then please tell me how to get it 🙂
February 28, 2014 at 8:19 am
I have this from the stats
Table '#Result_000000050139'. Scan count 0, logical reads 51, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Carriers_00000005013C'. Scan count 0, logical reads 170, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Commodities_00000005013B'. Scan count 0, logical reads 1066, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Cons_00000005013D'. Scan count 0, logical reads 3366, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Incoterms_00000005013E'. Scan count 0, logical reads 13464, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'BRFCLSeaWeekHis'. Scan count 1, logical reads 806044, physical reads 0, read-ahead reads 800135, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#TradeLanes_00000005013A'. Scan count 1, logical reads 2, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 36020 ms, elapsed time = 77212 ms.
February 28, 2014 at 8:31 am
mark.kristensen (2/28/2014)
I have attached a file to the first post - is that what you need or if not, then please tell me how to get it 🙂
Execution plan with the mods I suggested?
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
February 28, 2014 at 9:21 am
Ah - here it is
February 28, 2014 at 9:28 am
Thanks.
Try placing the nonclustered index keys the other way around:
CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]
(
Year, [QuoteType] ASC
)
INCLUDE (
[CurrencyCode],
[RateTotal],
[SeaTotal],
[Total])
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 2, 2014 at 3:41 pm
Thanks for the suggestion, but pretty much the same result
Are there any other tools I can use to dig deeper?
Table '#Result____00000005022E'. Scan count 0, logical reads 51, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Carriers______00000005022B'. Scan count 0, logical reads 170, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Commodities____00000005022A'. Scan count 0, logical reads 1066, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Cons____00000005022C'. Scan count 0, logical reads 3366, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#Incoterms____00000005022D'. Scan count 0, logical reads 13464, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'BRFCLSeaWeekHis'. Scan count 1, logical reads 778057, physical reads 1, read-ahead reads 775951, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#TradeLanes____000000050229'. Scan count 1, logical reads 2, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 36410 ms, elapsed time = 73626 ms.
March 2, 2014 at 6:30 pm
I guess my first recommendation would be to make the PK a nonclustered index. It's currently way too wide to be of any practical use as a clustered index and will cause BTree of the index on quotetype column to bloat quite badly (remember that all the columns of a clustered index are added to all nonclustered indexes) making it read many more pages per lookup than it should need to. I would put a new clustered index on year and week because that seems to be predominate in all of your queries.
--Jeff Moden
Change is inevitable... Change for the better is not.
March 3, 2014 at 1:57 am
A significant number of implicit conversions are interfering with optimisation of this query. Here's a few recommendations to start with:
Cast @YearFrom and @YearTo to same datatype as BRFCLSeaWeekHis.Year
Cast @QuoteType to same datatype as BRFCLSeaWeekHis.QuoteType
Temp table #TradeLanes: cast PortFrom and PortTo to same datatype as BRFCLSeaWeekHis.PortFrom and BRFCLSeaWeekHis.PortTo
Temp table #Incoterms: cast Code to same datatype as BRFCLSeaWeekHis.IncotermCode
Get these done, run the query again and post the execution plan.
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
March 4, 2014 at 9:21 am
Ok, now I think we are getting close. Down to 47 secs, but now it also suggests a new index:
CREATE NONCLUSTERED INDEX [<Name of Missing Index, sysname,>]
ON [dbo].[BRFCLSeaWeekHis] ([PortFrom],[PortTo],[QuoteType],[Year])
INCLUDE ([Week],[CarrierID],[ConType],[ConSize],[Commodity],[IncotermCode],[CurrencyCode],[RateTotal],[SeaTotal],[Total])
@jeff, thanks for the suggestion. I was not aware that all columns got copied down.
March 4, 2014 at 9:42 am
Once more with details:
[Expr1031] = Scalar Operator(CONVERT_IMPLICIT(int,[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Year] as [FCL].[Year],0)),
[Expr1032] = Scalar Operator(CONVERT_IMPLICIT(int,[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Week] as [FCL].[Week],0)),
[Expr1033] = Scalar Operator(CONVERT_IMPLICIT(nchar(3),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[CurrencyCode] as [FCL].[CurrencyCode],0)),
[Expr1034] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[RateTotal] as [FCL].[RateTotal],0)),
[Expr1035] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[SeaTotal] as [FCL].[SeaTotal],0)),
[Expr1036] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Total] as [FCL].[Total],0)),
[Expr1037] = Scalar Operator(CONVERT_IMPLICIT(nvarchar(4),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[IncotermCode] as [FCL].[IncotermCode],0)),
[Expr1038] = Scalar Operator(CONVERT_IMPLICIT(nvarchar(4),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[ConType] as [FCL].[ConType],0))
Warnings: Type conversion in expression (CONVERT_IMPLICIT(nvarchar(4),[FCL].[ConType],0)) may affect "CardinalityEstimate" in query plan choice, Type conversion in expression (CONVERT_IMPLICIT(nvarchar(4),[FCL].[IncotermCode],0)) may affect "CardinalityEstimate" in query plan choice
Set the datatype of your variables and temp table columns to be the same as the columns they are matching in BRFCLSeaWeekHis.
Then post the actual execution plan.
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
Viewing 15 posts - 1 through 15 (of 39 total)
You must be logged in to reply to this topic. Login to reply