August 2, 2010 at 3:53 am
Hi..
Can any one optimize(physically as well as logically) my following code..
--*******************************************************************
declare @emp table(empid int, apr int, may int, jun int, jul int, aug int, sep int, oct int
,nov int, dec int, jan int, feb int, mar int)
insert into @emp
select 1,40,50,60,70,80,90,100,110,120,10,20,30 union all
select 2,42,52,62,72,82,92,102,112,122,12,22,32
--select * from @emp
Select empid,Isnull(apr,0) apr, Isnull(may,0) may, Isnull(jun,0) jun, Isnull(jul,0) jul, Isnull(aug,0) aug, Isnull(sep,0) sep
, Isnull(oct,0) oct, Isnull(nov,0) nov, Isnull(dec,0) dec, Isnull(jan,0) jan, Isnull(feb,0) feb, Isnull(mar,0) mar
,incidents, Tscore,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60],Tscored,[T%]
, ( Case When [T%] >= 90 then 'A+'
When [T%] >= 85 and [T%] < 90 then 'A'
When [T%] >= 65 and [T%] < 85 then 'B+'
When [T%] >= 40 and [T%] < 65 then 'B'
When [T%] >= 20 and [T%] < 40 then 'C+'
When [T%] < 20 then 'C'
end ) Rating
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents, Tscore,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60],Tscored,( (Tscored * 100) / Tscore ) 'T%'
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents
, Tscore
,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60]
, (Isnull([<=0],0) * 5 + Isnull([>0 and <=30],0) * 4 + Isnull([>30 and <=45],0) * 3 + Isnull([>45 and <=60],0) * 2 + Isnull([>60],0) * 1) Tscored
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents
, 5 * incidents Tscore
, (case when apr <=0 then 1 else 0 end) + (case when may <=0 then 1 else 0 end)
+ (case when jun <=0 then 1 else 0 end) + (case when jul <=0 then 1 else 0 end)
+ (case when aug <=0 then 1 else 0 end) + (case when sep <=0 then 1 else 0 end)
+ (case when oct <=0 then 1 else 0 end) + (case when nov <=0 then 1 else 0 end)
+ (case when dec <=0 then 1 else 0 end) + (case when jan <=0 then 1 else 0 end)
+ (case when feb <=0 then 1 else 0 end) + (case when mar <=0 then 1 else 0 end) '<=0'
, (case when apr > 0 and apr <= 30 then 1 else 0 end) + (case when may > 0 and may <= 30 then 1 else 0 end)
+ (case when jun > 0 and jun <= 30 then 1 else 0 end) + (case when jul > 0 and jul <= 30 then 1 else 0 end)
+ (case when aug > 0 and aug <= 30 then 1 else 0 end) + (case when sep > 0 and sep <= 30 then 1 else 0 end)
+ (case when oct > 0 and oct <= 30 then 1 else 0 end) + (case when nov > 0 and nov <= 30 then 1 else 0 end)
+ (case when dec > 0 and dec <= 30 then 1 else 0 end) + (case when jan > 0 and jan <= 30 then 1 else 0 end)
+ (case when feb > 0 and feb <= 30 then 1 else 0 end) + (case when mar > 0 and mar <= 30 then 1 else 0 end) '>0 and <=30'
, (case when apr > 30 and apr <= 45 then 1 else 0 end) + (case when may > 30 and may <= 45 then 1 else 0 end)
+ (case when jun > 30 and jun <= 45 then 1 else 0 end) + (case when jul > 30 and jul <= 45 then 1 else 0 end)
+ (case when aug > 30 and aug <= 45 then 1 else 0 end) + (case when sep > 30 and sep <= 45 then 1 else 0 end)
+ (case when oct > 30 and oct <= 45 then 1 else 0 end) + (case when nov > 30 and nov <= 45 then 1 else 0 end)
+ (case when dec > 30 and dec <= 45 then 1 else 0 end) + (case when jan > 30 and jan <= 45 then 1 else 0 end)
+ (case when feb > 30 and feb <= 45 then 1 else 0 end) + (case when mar > 30 and mar <= 45 then 1 else 0 end) '>30 and <=45'
, (case when apr > 45 and apr <= 60 then 1 else 0 end) + (case when may > 45 and may <= 60 then 1 else 0 end)
+ (case when jun > 45 and jun <= 60 then 1 else 0 end) + (case when jul > 45 and jul <= 60 then 1 else 0 end)
+ (case when aug > 45 and aug <= 60 then 1 else 0 end) + (case when sep > 45 and sep <= 60 then 1 else 0 end)
+ (case when oct > 45 and oct <= 60 then 1 else 0 end) + (case when nov > 45 and nov <= 60 then 1 else 0 end)
+ (case when dec > 45 and dec <= 60 then 1 else 0 end) + (case when jan > 45 and jan <= 60 then 1 else 0 end)
+ (case when feb > 45 and feb <= 60 then 1 else 0 end) + (case when mar > 45 and mar <= 60 then 1 else 0 end) '>45 and <=60'
, (case when apr >60 then 1 else 0 end) + (case when may >60 then 1 else 0 end)
+ (case when jun >60 then 1 else 0 end) + (case when jul >60 then 1 else 0 end)
+ (case when aug >60 then 1 else 0 end) + (case when sep >60 then 1 else 0 end)
+ (case when oct >60 then 1 else 0 end) + (case when nov >60 then 1 else 0 end)
+ (case when dec >60 then 1 else 0 end) + (case when jan >60 then 1 else 0 end)
+ (case when feb >60 then 1 else 0 end) + (case when mar >60 then 1 else 0 end) '>60'
from
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,(case when apr is not null then 1 else 0 end) + (case when may is not null then 1 else 0 end)
+(case when jun is not null then 1 else 0 end) + (case when jul is not null then 1 else 0 end)
+(case when aug is not null then 1 else 0 end) + (case when sep is not null then 1 else 0 end)
+(case when oct is not null then 1 else 0 end) + (case when nov is not null then 1 else 0 end)
+(case when dec is not null then 1 else 0 end) + (case when jan is not null then 1 else 0 end)
+(case when feb is not null then 1 else 0 end) + (case when mar is not null then 1 else 0 end)
incidents
from @emp
) A
) B
) C
) D
--*******************************************************************
Thanks in advance..
[font="Comic Sans MS"]Praveen Goud[/font]
August 2, 2010 at 4:57 am
you can store case conditions in a table (so called metadata) and fetch the conditions from there.
-------Bhuvnesh----------
I work only to learn Sql Server...though my company pays me for getting their stuff done;-)
August 2, 2010 at 5:15 am
Can you explain little bit clearly please....
if possible, can you rewrite the code ..
[font="Comic Sans MS"]Praveen Goud[/font]
August 2, 2010 at 6:24 am
This definitely looks like you could join it against a Tally table or Date table.
August 2, 2010 at 8:40 am
If you want to reduce using of "case when" statements I would suggest the following:
1. Create a function to calculate your Range count:
create function dbo.udf_CalcRange(@minR int, @maxR int
, @m01 int, @m02 int, @m03 int, @m04 int, @m05 int, @m06 int
, @m07 int, @m08 int, @m09 int, @m10 int, @m11 int, @m12 int)
returns int
with schemabinding
as
begin
set @minR = isnull(@minR, -2147483648) -- min int values
set @maxR = isnull(@maxR, 2147483647) -- max int values
return
(case when @m01 between @minR and @maxR then 1 else 0 end) + (case when @m02 between @minR and @maxR then 1 else 0 end) +
(case when @m03 between @minR and @maxR then 1 else 0 end) + (case when @m04 between @minR and @maxR then 1 else 0 end) +
(case when @m05 between @minR and @maxR then 1 else 0 end) + (case when @m06 between @minR and @maxR then 1 else 0 end) +
(case when @m07 between @minR and @maxR then 1 else 0 end) + (case when @m08 between @minR and @maxR then 1 else 0 end) +
(case when @m09 between @minR and @maxR then 1 else 0 end) + (case when @m10 between @minR and @maxR then 1 else 0 end) +
(case when @m11 between @minR and @maxR then 1 else 0 end) + (case when @m12 between @minR and @maxR then 1 else 0 end)
end
Then with few more modifications your query will look like:
declare @emp table(empid int, apr int, may int, jun int, jul int, aug int, sep int, oct int
,nov int, dec int, jan int, feb int, mar int)
insert into @emp
select 1,40,50,60,70,80,90,100,110,120,10,20,30 union all
select 2,42,52,62,72,82,92,102,112,122,12,22,32
Select empid,Isnull(apr,0) apr, Isnull(may,0) may, Isnull(jun,0) jun, Isnull(jul,0) jul, Isnull(aug,0) aug, Isnull(sep,0) sep
, Isnull(oct,0) oct, Isnull(nov,0) nov, Isnull(dec,0) dec, Isnull(jan,0) jan, Isnull(feb,0) feb, Isnull(mar,0) mar
,incidents, Tscore,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60],Tscored,[T%]
,R.Rating
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents, Tscore,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60],Tscored,((Tscored * 100) / Tscore ) 'T%'
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents
, Tscore
,[<=0],[>0 and <=30],[>30 and <=45],[>45 and <=60],[>60]
, (Isnull([<=0],0) * 5 + Isnull([>0 and <=30],0) * 4 + Isnull([>30 and <=45],0) * 3 + Isnull([>45 and <=60],0) * 2 + Isnull([>60],0) * 1) Tscored
From
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
,incidents
, 5 * incidents Tscore
, dbo.udf_CalcRange(null,0,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,[dec]) '<=0'
, dbo.udf_CalcRange(1,30,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,[dec]) '>0 and <=30'
, dbo.udf_CalcRange(31,45,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,[dec]) '>30 and <=45'
, dbo.udf_CalcRange(46,60,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,[dec]) '>45 and <=60'
, dbo.udf_CalcRange(61,null,jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,[dec]) '>60'
from
(
select empid, apr, may, jun, jul, aug, sep, oct, nov, dec, jan, feb, mar
, isnull(cast(jan as bit),0)/1 + isnull(cast(feb as bit),0) + isnull(cast(mar as bit),0)
+ isnull(cast(apr as bit),0) + isnull(cast(may as bit),0) + isnull(cast(jun as bit),0)
+ isnull(cast(jul as bit),0) + isnull(cast(aug as bit),0) + isnull(cast(sep as bit),0)
+ isnull(cast(oct as bit),0) + isnull(cast(nov as bit),0) + isnull(cast(dec as bit),0)
incidents
from @emp
) A
) B
) C
) D
join (values (-2147483648,19,'C'),(20,39,'C+'),(40,64,'B'),(65,84,'B+'),(85,89,'A'),(90,2147483647,'A+')) R(minT,MaxT,Rating)
on [T%] between R.minT and R.maxT
PLEASE NOTE:
1. In calculating range you don't check for null values in month's columns, therefore they would not be counted in any of the ranges. I've created the function exactly as per your code. If you want to count nulls as zero, use is null as per: (case when isnull(@m01,0) between @minR and @maxR then 1 else 0 end) + ...
2. The query now may run slower than one with using multiple "case when", even that UDF is defined with schemabinding option to make it deterministic.
3. Devision by 1 is important when calculating [incidents] as it will make SQL to convert bit back to integer to do sum.
4. Rating is now calculated by joining to the Rating-Range subquery. You may try to define it as table variable but I don't beleive it will effect performance much...
August 2, 2010 at 8:46 am
My guess is that the code posted above with be several degrees slower than the original code. CASE statements, while they can be a mess to read, are quite fast. Scalar UDFs are quite slow. Performance could be increased by using a table-value function, but that would be an awkward fit at best.
I'm not sure exactly what the main function is doing, but just looking at it screams "look-up table" to me. Put the range of values into a lookup table, join, and grab the result from the lookup table. It's fast, elegant, and easy to read.
August 2, 2010 at 8:55 am
Even though introducing the tables(for the range values) is not working in decreasing the length of the code....
and i agree with you for introducing the UDF..bcoz it is going to be a performance issue..
anyone can help in this..(plz try to rewrite the code..instead of giving hints)..
it will be too better for helping not only me but to everyone..
[font="Comic Sans MS"]Praveen Goud[/font]
August 2, 2010 at 9:11 am
jvanderberg (8/2/2010)
My guess is that the code posted above with be several degrees slower than the original code. CASE statements, while they can be a mess to read, are quite fast. Scalar UDFs are quite slow. Performance could be increased by using a table-value function, but that would be an awkward fit at best....
What this "would be" table-value function would return? UDF created will be quite fast as it is defined to be deterministic however CLR function would be better.
Praveen,
Have you tested the UDF? Could you tell what is the "degree of slowness" or you have just a guess?
If I am right, your question was "Too many CASE statements...how to avoid??". It may be that the right answer in your case is - you should not avoid them, yes it's a lot of them, but they quite fast.
I wonder, does original data comes in a such "pivoted" way? I would think that you might have the same data somewhere in a bit different layout eg. for example where months are not columns but values on the rows. In this case it will be possible to write much more optimal query to calculate what you want without multiple CASE WHEN and UDF's... π
August 2, 2010 at 10:12 am
I looked a little deeper, and a look-up table wouldn't be as elegant as I thought. It could still be done, and it would be fast, but it might involve a little weirdness to make it fit the solution. You'd actually have to pivot (cross-tab) the lookup table to get what you want. It might look something like this:
CREATE TABLE lookup_table (
MinValue INT,
MaxValue INT,
ValueLessThan0 INT,
Value0to30 INT,
Value30to45 INT,
Value45to60 INT,
ValueMoreThan60 INT
)
INSERT INTO lookup_table VALUES(-2147483648, 0, 1, 0, 0, 0, 0)
INSERT INTO lookup_table VALUES(1, 30, 0, 1, 0, 0, 0)
INSERT INTO lookup_table VALUES(31, 45, 0, 0, 1, 0, 0)
INSERT INTO lookup_table VALUES(46, 60, 0, 0, 0, 1, 0)
INSERT INTO lookup_table VALUES(61, 2147483648, 0, 0, 0, 0, 1)
You'd then have to join this multiple times:
LEFT JOIN lookup_table ltapr ON apr BETWEEN ltapr.MinValue AND ltapr.MaxValue
LEFT JOIN lookup_table ltmay ON may BETWEEN ltmay.MinValue AND ltmay.MaxValue
...
And then you'd have to sum all of those:
ltapr.ValueLessThan0 + ltmay.ValueLessThan0 + ltjun.ValueLessThan0 + ltjul.ValueLessThan0 ...
, ltapr.Value0to30 + ltmay.Value0to30 + ltjun.Value0to30 + ltjul.Value0to30 ...
As you can see, this won't really result in less code, and it doesn't really solve the only problem I see with your original code: if you need to change the buckets for any reason, you have to change a lot of things. Also, if you don't change them all correctly, the report could generate false data. Because the lookup table has been pivotted, you lose some of the true flexibility of a look-up table. You can't just insert a new value into the table an expect it to keep working. If you wanted to add a new bucket, you'd have to add a new row, and then change the code.
---
I think Eugene is on to something. I think the data needs to be de-pivotted. Something like:
DECLARE @depivot TABLE (
empid INT,
monthnum INT,
incidents INT
)
INSERT INTO @depivot
SELECT empid, 4, apr FROM @emp
UNION ALL SELECT empid, 5, may FROM @emp
UNION ALL SELECT empid, 6, jun FROM @emp
...
After you do that, your CASE statements are reduced to:
SUM(CASE WHEN incidents <= 0 THEN 1 ELSE 0),
SUM(CASE WHEN incidents > 0 AND incidents <= 30 THEN 1 ELSE 0),
...
The depivot should be pretty fast, and can even be done in a CTE to make it one fluid statement.
August 2, 2010 at 11:11 am
A big part of your problem is that your data is not normalized. Having normalized data will greatly reduce the need for CASE statements. I was able to write a partial query using only one CASE statement.
declare @emp table(empid int, apr int, may int, jun int, jul int, aug int, sep int, oct int
,nov int, dec int, jan int, feb int, mar int)
insert into @emp
select 1,40,50,60,70,80,90,100,110,120,10,20,30 union all
select 2,42,52,62,72,82,92,102,112,122,12,22,32
;
WITH Normalized AS (
SELECT
empid
, CASE
WHEN Num > 60 THEN '>60'
WHEN Num > 45 THEN '>45 and <=60'
WHEN Num > 30 THEN '>30 and <=45'
WHEN Num > 0 THEN '>0 and <=30'
ELSE '<=0'
END AS Category
, mon
, num
, 1 AS Cnt
FROM (
SELECT empid, apr, may, jun, jul, aug, sep, oct, nov, [dec], jan, feb, mar
FROM @emp
) AS p
UNPIVOT (
num
FOR mon IN ( apr, may, jun, jul, aug, sep, oct, nov, [dec], jan, feb, mar )
) AS u
)
SELECT empid, [<=0], [>0 and <=30], [>30 and <=45], [>45 and <=60]
FROM (
SELECT empid, Category, Cnt
FROM Normalized
) AS p
PIVOT (
Sum(Cnt)
FOR Category IN ( [<=0], [>0 and <=30], [>30 and <=45], [>45 and <=60] )
) AS pvt
It doesn't return everything that you need, but it is a good starting point.
Also your CASE statement for the [T%] is more complicated that it needs to be. If a value is not null and the predicate [T%] >= 90 returns FALSE, then [T%] < 90 MUST BE TRUE and you do not need to test for it. What you have written is equivalent to
Case When [T%] >= 90 then 'A+'
When [T%] >= 85 and 1=1 then 'A'
When [T%] >= 65 and 1=1 then 'B+'
When [T%] >= 40 and 1=1 then 'B'
When [T%] >= 20 and 1=1 then 'C+'
When [T%] < 20 then 'C'
end
Drew
J. Drew Allen
Business Intelligence Analyst
Philadelphia, PA
August 3, 2010 at 12:16 am
Eugene Elutin (8/2/2010)
I wonder, does original data comes in a such "pivoted" way? I would think that you might have the same data somewhere in a bit different layout eg. for example where months are not columns but values on the rows. In this case it will be possible to write much more optimal query to calculate what you want without multiple CASE WHEN and UDF's... π
-----------------------------------------------------------------------------
Yes, your guess is correct..
I have mentioned the data after pivoting..(i.e..apr, may, jun and so on)..
[font="Comic Sans MS"]Praveen Goud[/font]
August 3, 2010 at 2:16 am
Praveen Goud Kotha (8/3/2010)
Eugene Elutin (8/2/2010)
I wonder, does original data comes in a such "pivoted" way? I would think that you might have the same data somewhere in a bit different layout eg. for example where months are not columns but values on the rows. In this case it will be possible to write much more optimal query to calculate what you want without multiple CASE WHEN and UDF's... π
-----------------------------------------------------------------------------
Yes, your guess is correct..
I have mentioned the data after pivoting..(i.e..apr, may, jun and so on)..
I've looked through the posts and have not found the one where you have mentioned that data you have is result of pivoting it just looks like it is...
Could you please provide the original structure "before pivoting"?
August 3, 2010 at 3:09 am
Eugene Elutin (8/3/2010)
I've looked through the posts and have not found the one where you have mentioned that data you have is result of pivoting it just looks like it is...
Could you please provide the original structure "before pivoting"?
i mean to say, the derived table which i declared is after the pivoting my original data..
Here, is my actual data structure..
"Memberid, amount, amount_paid, invoice_dt, invoice_duedate, paid_date"(are the columns..)
And my requirement is:
1). Need to fetch the days taken by the memberid to clear the invoices raised against him
2). this data is fetched month-wise(as mentioned/declared in the derived table)
and the rest of the steps follows as shown in my code..
[font="Comic Sans MS"]Praveen Goud[/font]
August 3, 2010 at 3:40 am
If you still loking for solution, posting table DDL and insert script for some test data would help...
August 3, 2010 at 10:24 am
There are some ideas in here...I've run out of time π
Oops wrong thread π
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
Viewing 15 posts - 1 through 15 (of 21 total)
You must be logged in to reply to this topic. Login to reply