March 31, 2010 at 7:09 am
Each
I have been tasked with writing a query to get running totals for each customer. The snag is that they must be banded, and balances move across band in each period.
The sample DDL and DML to populate the two tables code is here:
if object_ID('tempdb..#transactions') is not null
drop Table #transactions
if object_ID('tempdb..#customers') is not null
drop table #customers
create table #customers (ID int not null identity(1,1) primary key,
Acc_No int not null,
band int not null,
Period_Yr int not null,
Period_Mth int not null)
Create Table #transactions (Id int not null identity (1,1) primary key,
Acc_No int not null,
amount money not null,
Period_Yr int null,
Period_Mth int null,
Tran_dt datetime not null)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,5,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,3,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,3,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,5,12,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,2,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,2,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,3,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,3,5,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,12,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,5,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,3,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,3,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,5,12,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,5,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,12,2009)
Insert into #transactions (Acc_No, amount, Tran_dt)
select top 500 111, round(Cast(Abs(Checksum(newId()))%100000/1000.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 222, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 333, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 444, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
update #transactions
set period_yr = datepart(yy, tran_dt), period_mth = datepart(mm, tran_dt)
delete from #transactions where period_yr <> 2009
-- select * from #customers
-- select * from #transactions order by tran_dt
As you can see, this is a 2 table conundrum, and the tables are #customer, where the band the customer is in is held, and the #transactions table, where individual purchases are recorded.
As a picture is worth a thousand words, the result I'm after should look like this:
Yr Mnth band_1_Total band_2_Total band_3_Total band_4_Total etc...
----- ----- -------------- --------------- --------------- --------------
2009 1 1008.99 0.00 0.00 0.00
2009 2 1444.40 489.91 0.00 0.00
2009 3 1956.29 575.39 0.00 0.00
2009 4 881.14 1523.75 948.33 0.00
2009 5 0.00 1867.78 1220.08 0.00
2009 6 0.00 2909.71 0.00 1504.37
2009 7 0.00 1419.54 2093.45 1881.24
2009 8 0.00 1460.76 2310.24 2208.79
2009 9 0.00 0.00 0.00 3859.20
2009 10 0.00 0.00 1724.87 3122.93
2009 11 0.00 0.00 1745.27 1698.63
2009 12 0.00 0.00 00.00 0.00
So what's going on? Each customer starts the year in band 1. This is reset each year. As they make purchases, the move into higher bands. This is held in the customer table and each month a record for that period for each customer is created. This tells us where to allocate the totals for that customer in a given month. The thing is, when a customer moves up a band (they hardly ever go down), the balance that was in say band 1 moves with them into band 2 when they make the move. So if a customer has a total balance of 200 in period 2, but moves into band 2 in period 3, then 200 must move from band 1 to band 2 in period 3 onwards.
I have written a crosstab, see the code below, but cannot think of a way to present balances in the correct band for a given customer without using a loop! *ducks* 😉
The code below isn't complete, but as expected takes an age to run as is.
Anyone seen this before? Any strategies?
Edit: To try and make this easier to see, here's an example from the code below with the Where Clause uncommented, so as to process only one account
/*------------------------
select r1.* from #results r1
------------------------*/
ID Yr Mnth Acc_no band_1 band_1_Total band_2 band_2_Total band_3 band_3_Total band_4 band_4_Total band_5 band_5_Total
----------- ----------- ----------- ----------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- --------------------------------------- ---------------------------------------
1 2009 1 444 196.13 196.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
2 2009 2 444 177.18 373.31 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
3 2009 3 444 68.37 441.68 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
4 2009 4 444 135.58 577.26 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
5 2009 5 444 0.00 0.00 230.30 807.56 0.00 0.00 0.00 0.00 0.00 0.00
6 2009 6 444 0.00 0.00 192.32 999.88 0.00 0.00 0.00 0.00 0.00 0.00
7 2009 7 444 0.00 0.00 150.68 1150.56 0.00 0.00 0.00 0.00 0.00 0.00
8 2009 8 444 0.00 0.00 564.01 1714.57 0.00 0.00 0.00 0.00 0.00 0.00
9 2009 9 444 0.00 0.00 0.00 0.00 116.42 1830.99 0.00 0.00 0.00 0.00
10 2009 10 444 0.00 0.00 0.00 0.00 37.91 1868.90 0.00 0.00 0.00 0.00
11 2009 11 444 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.43 1969.33
12 2009 12 444 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 300.28 2269.61
The Band_n columns contain period totals for account 444, and band_n_Total is the running total. When the band changes, the running total moves with it. I hope that is clearer.
Thanks
Dave J
if object_ID('tempdb..#results') is not null
drop table #results
create table #results
(ID int not null identity(1,1),
Yr int not null,
Mnth int not null,
Acc_no int null,
band_1 decimal(11,2)not null default 0,
band_1_Total decimal(11,2) not null default 0,
band_2 decimal(11,2)not null default 0,
band_2_Total decimal(11,2)not null default 0,
band_3 decimal(11,2)not null default 0,
band_3_Total decimal(11,2)not null default 0,
band_4 decimal(11,2)not null default 0,
band_4_Total decimal(11,2)not null default 0,
band_5 decimal(11,2)not null default 0,
band_5_Total decimal(11,2)not null default 0,
Constraint PK_PrimaryID
Primary Key NONCLUSTERED (ID)
)
CREATE CLUSTERED INDEX IX_CL_ProdAcctNo_Yr_Mnth
on #results (Acc_no, Yr, Mnth)
SET NoCount On
DECLARE @ACCT_NO INT
SELECT @ACCT_NO = 111
-- select * from #results
Insert into #results (Yr ,
Mnth ,
Acc_no,
band_1,
band_2,
band_3,
band_4,
band_5
)
SELECT
ACCTS.[Period_Yr],
ACCTS.[Period_Mth],
TRANS.Acc_no,
SUM(CASE ACCTS.[band] WHEN 1 THEN TRANS.amount ELSE 0 END) 'band 1',
SUM(CASE ACCTS.[band] WHEN 2 THEN TRANS.amount ELSE 0 END) 'band 2',
SUM(CASE ACCTS.[band] WHEN 3 THEN TRANS.amount ELSE 0 END) 'band 3',
SUM(CASE ACCTS.[band] WHEN 4 THEN TRANS.amount ELSE 0 END) 'band 4',
SUM(CASE ACCTS.[band] WHEN 5 THEN TRANS.amount ELSE 0 END) 'band 5'
FROM
#transactions AS TRANS
INNER JOIN #customers AS ACCTS
ON TRANS.[Period_Yr] = ACCTS.[Period_Yr] AND
TRANS.[Period_Mth] = ACCTS.[Period_Mth] AND
TRANS.[Acc_no] = ACCTS.[Acc_no]
--WHERE TRANS.[Acc_no] = @ACCT_NO
Group by
ACCTS.[Period_Yr],
ACCTS.[Period_Mth],
TRANS.Acc_no,
TRANS.[Period_Mth]
Order by
ACCTS.[Period_Yr],
ACCTS.[Period_Mth]
--
--
--===== Declare the cursor storage variables
DECLARE @band_1 MONEY
DECLARE @band_2 MONEY
DECLARE @band_3 MONEY
DECLARE @band_4 MONEY
DECLARE @band_5 MONEY
DECLARE @CurAccountID INT
--===== Declare the working variables
DECLARE @PrevAccountID INT
DECLARE @AccountRunningTotal MONEY
--===== Create the cursor with rows sorted in the correct
-- order to do the running balance by account
DECLARE curRunningTotal CURSOR LOCAL FORWARD_ONLY
FOR
SELECT Acc_no, band_1, band_2, band_3, band_4, band_5
FROM dbo.#results
-- WHERE AccountID <= 10 --Uncomment for "short" testing
ORDER BY Acc_no, Mnth
OPEN curRunningTotal
--===== Read the information from the first row of the cursor
FETCH NEXT FROM curRunningTotal
INTO @CurAccountID, @band_1, @band_2, @band_3, @band_4, @band_5
--===== For each account, update the account running total
-- column until we run out of rows. Notice that the
-- CASE statement resets the running total at the
-- start of each account.
WHILE @@FETCH_STATUS = 0
BEGIN
--===== Calculate the running total for this row
-- and remember this AccountID for the next row
if @band_1 <> 0
Begin
SELECT @AccountRunningTotal = CASE
WHEN @CurAccountID = @PrevAccountID
THEN @AccountRunningTotal + @band_1
ELSE @band_1
END,
@PrevAccountID = @CurAccountID
UPDATE #results
SET band_1_total = @AccountRunningTotal
WHERE CURRENT OF curRunningTotal
End
if @band_2 <> 0
Begin
SELECT @AccountRunningTotal = CASE
WHEN @CurAccountID = @PrevAccountID
THEN @AccountRunningTotal + @band_2
ELSE @band_2
END,
@PrevAccountID = @CurAccountID
UPDATE #results
SET band_2_total = @AccountRunningTotal + band_1_total, band_1_total = 0
WHERE CURRENT OF curRunningTotal
End
if @band_3 <> 0
Begin
SELECT @AccountRunningTotal = CASE
WHEN @CurAccountID = @PrevAccountID
THEN @AccountRunningTotal + @band_3
ELSE @band_3
END,
@PrevAccountID = @CurAccountID
UPDATE #results
SET band_3_total = @AccountRunningTotal + band_2_total, band_2_total = 0
WHERE CURRENT OF curRunningTotal
End
if @band_4 <> 0
Begin
SELECT @AccountRunningTotal = CASE
WHEN @CurAccountID = @PrevAccountID
THEN @AccountRunningTotal + @band_4
ELSE @band_4
END,
@PrevAccountID = @CurAccountID
UPDATE #results
SET band_4_total = @AccountRunningTotal + band_3_total, band_3_total = 0
WHERE CURRENT OF curRunningTotal
End
if @band_5 <> 0
Begin
SELECT @AccountRunningTotal = CASE
WHEN @CurAccountID = @PrevAccountID
THEN @AccountRunningTotal + @band_5
ELSE @band_5
END,
@PrevAccountID = @CurAccountID
UPDATE #results
SET band_5_total = @AccountRunningTotal + band_4_total, band_4_total = 0
WHERE CURRENT OF curRunningTotal
End
--===== Read the information from the next row of the cursor
FETCH NEXT FROM curRunningTotal
INTO @CurAccountID, @band_1, @band_2, @band_3, @band_4, @band_5
END --End of the cursor
--======== Housekeeping
CLOSE curRunningTotal
DEALLOCATE curRunningTotal
GO
select Yr ,
Mnth ,
sum(band_1_Total) band_1_Total,
sum(band_2_Total) band_2_Total,
sum(band_3_Total) band_3_Total,
sum(band_4_Total) band_4_Total,
sum(band_5_Total) band_5_Total
from #results
group by Yr ,
Mnth
March 31, 2010 at 7:54 am
You may find a technique to achieve this in Jeff Moden's article on running totals:
http://www.sqlservercentral.com/articles/T-SQL/68467/
Hope this helps
-- Gianluca Sartori
March 31, 2010 at 8:06 am
Thanks Gianluca,
if you looks closely, that's where my cursor code is adapted from 🙂
The difficulty is moving balances across bands for a given period, so I can't work out how to do a "quirky update" in this scenario.
Dave J
March 31, 2010 at 12:16 pm
I would do it in three steps:
step 1: build an intermediate table with preaggregated values
step 2: do the quirky update
step 3: display the pivoted data using crossTab
Something like this (side note: I cannot verify your test results due to the usage of NEWID() ...):
-- Step 1: build intermediate table
IF OBJECT_ID('tempdb..#intermed') IS NOT NULL
DROP TABLE #intermed
SELECT
c.period_yr,
c.period_mth,
band,
SUM(amount) AS sub_total
INTO #intermed
FROM #transactions t
INNER JOIN #customers c
ON t.acc_no=c.acc_no
AND t.period_yr=c.period_yr
AND t.period_mth=c.period_mth
GROUP BY c.period_yr,c.period_mth,band
-- Step 2: quirky update
ALTER TABLE #intermed ADD run_total MONEY
CREATE CLUSTERED INDEX CX_#intermed ON #intermed(period_yr,period_mth,band)
DECLARE
@Prevband INT,
@Run MONEY
SELECT
@Prevband=0,
@Run=0
UPDATE #intermed
SET
@Run = run_total =
CASE WHEN band <> @Prevband
THEN sub_total
ELSE @Run+sub_total END,
@Prevband=band
FROM #intermed WITH(TABLOCKX)
OPTION (MAXDOP 1)
-- Step 3: CrossTab pivoted data
SELECT
period_yr,
period_mth,
MAX(CASE WHEN band =1 THEN run_total ELSE 0.00 END) AS band_1_Total,
MAX(CASE WHEN band =2 THEN run_total ELSE 0.00 END) AS band_2_Total,
MAX(CASE WHEN band =3 THEN run_total ELSE 0.00 END) AS band_3_Total,
MAX(CASE WHEN band =4 THEN run_total ELSE 0.00 END) AS band_4_Total,
MAX(CASE WHEN band =5 THEN run_total ELSE 0.00 END) AS band_5_Total
FROM #intermed
GROUP BY period_yr, period_mth
April 1, 2010 at 5:55 am
David,
Looks like Lutz may have is sussed but I'll ask anyway... are you all set on this one?
--Jeff Moden
Change is inevitable... Change for the better is not.
April 1, 2010 at 7:26 am
Kind of. I have jumped on his suggestion but Lutz doesn't move the balances over in the way that I need. Run it for a single account and look at my second example output to suss out what I mean. So currently the code looks like this:
DECLARE @PrevAccountID INT
DECLARE @AccountRunningTotal money
SELECT @AccountRunningTotal = 0
--===== Update the running total for this row using the "Quirky Update"
-- and a "Pseudo-cursor"
UPDATE #results
SET @AccountRunningTotal = band_1_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal+band_1
ELSE band_1
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
UPDATE r1
SET @AccountRunningTotal = band_2_total = CASE
WHEN r1.Acc_no = @PrevAccountID
THEN @AccountRunningTotal+r1.band_2
ELSE r1.band_2
END,
@PrevAccountID = r1.Acc_no
FROM #results r1 WITH (TABLOCKX)
OPTION (MAXDOP 1)
UPDATE #results
SET @AccountRunningTotal = band_3_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal+band_3
ELSE band_3
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
UPDATE #results
SET @AccountRunningTotal = band_4_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal+band_4
ELSE band_4
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
UPDATE #results
SET @AccountRunningTotal = band_5_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal+band_5
ELSE band_5
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
update #results
set band_1_total = 0
where band_1 = 0
update #results
set band_2_total = 0
where band_2 = 0
update #results
set band_3_total = 0
where band_3 = 0
update #results
set band_4_total = 0
where band_4 = 0
update #results
set band_5_total = 0
where band_5 = 0
select Yr ,
Mnth ,
sum(band_1_Total) band_1_Total,
sum(band_2_Total) band_2_Total,
sum(band_3_Total) band_3_Total,
sum(band_4_Total) band_4_Total,
sum(band_5_Total) band_5_Total
from #results
group by Yr ,
Mnth
The problem I am stuggling with is that if a person does not make a purchase, there is a missing record. So I'm trying to come up with a insert into #results missing records with the previous months balance or 0 if it is period 1, for columns band_1...band_5.
IDYrMnthAcc_noband_1band_1_Totalband_2band_2_Totalband_3band_3_Totalband_4band_4_Totalband_5band_5_Total
16200981110.000.000.000.0048.9948.990.000.000.000.00
222009111110.000.000.000.000.000.00127.36127.360.000.00
242009121110.000.000.000.000.000.000.000.00138.62138.62
So Records for periods 9 & 10 would be inserted with a balance of 0 except band_3 which would be 48.99 (Sorry, I can't make it line up 😉 )
I will in sert missing records, then run the code I've posted above, as soon as I work how the heck to do it...
does that make sense?
Dave
April 1, 2010 at 10:59 am
Please provide sample data that will describe the scenario you're strugglnig with.
Based on your vague description it is unclear whether you would have missing rows in #customers for a specific period and/or Acc_No and/or band or if you just have missing data in #transactions.
If the latter, you need to use a left join from #customers on #transactions instead of my inner join to build the #intermed table together with a ISNULL(amount,0).
If the former, I'd recommend using a calendar table and a left outer join on #customers.
In order to come up with a working solution please help us help you by providing data that actually describe your scenario.
April 1, 2010 at 9:22 pm
Sorry David... I've been staring at your latest problem with this and I just don't get it especially since your latest example doesn't actually match with the test data you gave previously (or maybe I've done it that badly that I don't see how it matches).
--Jeff Moden
Change is inevitable... Change for the better is not.
April 2, 2010 at 4:36 am
Jeff Moden (4/1/2010)
Sorry David... I've been staring at your latest problem with this and I just don't get it especially since your latest example doesn't actually match with the test data you gave previously (or maybe I've done it that badly that I don't see how it matches).
No worries Jeff. The data is different as I'm using NewId to seed the transaction table with random values, a trick I pinched from you! 😉 The second example moves the running total across all the bands, if that makes it any clearer. Have a look at the second expected output in my first post, it was run for ONE account.
I was struggling to populate the #results tables with 'missing' values before I do the quirky update, but I think I've cracked that now. The moving balance bit I seem to have lost along the way, but on the drive home last night I resolved it. Or at least I have in theory, the data and SQL are at work, so I can't test until I get back on Tuesday. I'll post when I crack it.
Happy Easter! 😛
Dave J
April 2, 2010 at 4:49 am
lmu92 (4/1/2010)
Please provide sample data that will describe the scenario you're strugglnig with.Based on your vague description it is unclear whether you would have missing rows in #customers for a specific period and/or Acc_No and/or band or if you just have missing data in #transactions.
If the latter, you need to use a left join from #customers on #transactions instead of my inner join to build the #intermed table together with a ISNULL(amount,0).
If the former, I'd recommend using a calendar table and a left outer join on #customers.
In order to come up with a working solution please help us help you by providing data that actually describe your scenario.
I know, I'm terrible at describing what I need :blush: I think I nearly have it, and as and when I get there I'll post and hopefully all will become clear 😀
What I meant to say was there are gaps in the transactions table, clients have transactions in periods 1,2,4,5,7,8,9,11,12 for arguments sake, so I need to generate records for periods 3, 6, and 10, with the appropriate balance in the right band. But I think I've cracked that now.
I'd appreciate comments on my solution when I post it on Tuesday.
Thanks for the input, and Happy Easter! 🙂
Dave J
April 2, 2010 at 11:56 am
Happy Easter to you, too, David. C'mon back if you can't put a tack in it. I just want to make sure you get an answer you can work with but if you think you might have it, I'll "relax" a bit.
--Jeff Moden
Change is inevitable... Change for the better is not.
April 3, 2010 at 12:20 pm
Has a play with this, and came up with the following, as a proof of concept.
This creates 100,000 transactions for a customer, and produces the required output.
Setup
-- =================
-- Test object drops
-- =================
IF OBJECT_ID(N'tempdb.dbo.CustomerMonthRunningTotal', N'V')
IS NOT NULL
DROP VIEW dbo.CustomerMonthRunningTotal;
IF OBJECT_ID(N'tempdb.dbo.CustomerMonthTotal', N'V')
IS NOT NULL
DROP VIEW dbo.CustomerMonthTotal;
IF OBJECT_ID(N'tempdb.dbo.Numbers', N'IF')
IS NOT NULL
DROP FUNCTION dbo.Numbers;
IF OBJECT_ID(N'tempdb.dbo.Transact', N'U')
IS NOT NULL
DROP TABLE dbo.Transact;
IF OBJECT_ID(N'tempdb.dbo.Period', N'U')
IS NOT NULL
DROP TABLE dbo.Period;
IF OBJECT_ID(N'tempdb.dbo.Band', N'U')
IS NOT NULL
DROP TABLE dbo.Band;
-- =================
-- Create tables
-- =================
CREATE TABLE dbo.Band
(
band_id INTEGER NOT NULL
PRIMARY KEY,
low MONEY NOT NULL
CHECK (low >= 0),
high MONEY NOT NULL
CHECK (high >= 0),
CHECK (high > low),
UNIQUE (low, high)
);
CREATE TABLE dbo.Period
(
period_id AS period_year * 100 + period_month
PERSISTED NOT NULL
PRIMARY KEY,
period_year INTEGER NOT NULL
CHECK (period_year BETWEEN 2000 AND 2099),
period_month INTEGER NOT NULL
CHECK (period_month BETWEEN 1 AND 12),
period_start AS
CONVERT(DATETIME,
CONVERT(CHAR(4), period_year) +
RIGHT(100 + period_month, 2) +
'01', 112)
PERSISTED,
period_end AS
DATEADD(MONTH, 1,
CONVERT(DATETIME,
CONVERT(CHAR(4), period_year) +
RIGHT(100 + period_month, 2) +
'01', 112))
PERSISTED,
);
CREATE TABLE dbo.Transact
(
transaction_id INTEGER NOT NULL
IDENTITY (1,1)
PRIMARY KEY,
transaction_dt DATETIME NOT NULL
CHECK (transaction_dt >= '2000-01-01T00:00:00'),
customer_id INTEGER NOT NULL,
amount MONEY NOT NULL,
);
GO
-- =================
-- Tally table function
-- =================
GO
CREATE FUNCTION [dbo].[Numbers]
(@N BIGINT)
RETURNS TABLE
WITH SCHEMABINDING
AS RETURN
WITH
N1 AS (SELECT N = 1 UNION ALL SELECT 1),
N2 AS (SELECT N = 1 FROM N1 T, N1),
N3 AS (SELECT N = 1 FROM N2 T, N2),
N4 AS (SELECT N = 1 FROM N3 T, N3),
N5 AS (SELECT N = 1 FROM N4 T, N4),
N6 AS (SELECT N = 1 FROM N5 T, N5),
NM AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS N FROM N6)
SELECT N
FROM NM
WHERE N <= @N;
GO
-- =================
-- Sample bands
-- =================
INSERT dbo.Band (band_id, low, high) VALUES (1, $0.00, $100.00);
INSERT dbo.Band (band_id, low, high) VALUES (2, $100.00, $250.00);
INSERT dbo.Band (band_id, low, high) VALUES (3, $250.00, $500.00);
INSERT dbo.Band (band_id, low, high) VALUES (4, $500.00, $750.00);
INSERT dbo.Band (band_id, low, high) VALUES (5, $750.00, $1000.00);
INSERT dbo.Band (band_id, low, high) VALUES (6, $1000.00, $1500.00);
INSERT dbo.Band (band_id, low, high) VALUES (7, $1500.00, $1000000.00);
GO
-- =================
-- Create periods
-- =================
INSERT dbo.Period
(period_year, period_month)
SELECT 2000 + ((Numbers.n - 1) / 12),
1 + ((Numbers.n - 1) % 12)
FROM dbo.Numbers (120);
GO
-- =================
-- Add 100,000 transactions for one customer
-- =================
INSERT dbo.Transact
(transaction_dt, customer_id, amount)
SELECT DATEADD(HOUR, Numbers.N, CONVERT(DATETIME, '2000-01-01T00:00:00', 126)),
1,
RAND(CHECKSUM(NEWID())) * $0.25
FROM dbo.Numbers (100000);
GO
CREATE NONCLUSTERED INDEX nc1
ON dbo.Transact (transaction_dt) INCLUDE (customer_id, amount);
GO
-- =================
-- Running total view
-- =================
CREATE VIEW dbo.CustomerMonthTotal
WITH SCHEMABINDING
AS
SELECT T.customer_id,
P.period_month,
P.period_year,
month_total = SUM(T.amount),
tran_count = COUNT_BIG(*)
FROM dbo.Transact T
JOIN dbo.Period P
ON T.transaction_dt >= P.period_start
AND T.transaction_dt < P.period_end
GROUP BY
T.customer_id,
P.period_month,
P.period_year;
GO
-- =================
-- Materialise the view
-- =================
CREATE UNIQUE CLUSTERED INDEX c
ON dbo.CustomerMonthTotal (customer_id, period_year, period_month);
GO
-- =================
-- Magic & Dragons
-- =================
CREATE VIEW dbo.CustomerMonthRunningTotal
WITH SCHEMABINDING
AS
SELECT MT1.customer_id,
MT1.period_year,
MT1.period_month,
running_total = SUM(MT2.month_total)
FROM dbo.CustomerMonthTotal MT1 WITH (NOEXPAND)
INNER
HASH
JOIN dbo.CustomerMonthTotal MT2 WITH (NOEXPAND)
ON MT2.customer_id = MT1.customer_id
AND MT2.period_year = MT1.period_year
AND MT2.period_month <= MT1.period_month
GROUP BY
MT1.customer_id,
MT1.period_year,
MT1.period_month;
Test run
SET STATISTICS IO, TIME ON;
WITH SourceData
AS (
SELECT T.customer_id,
T.period_year,
T.period_month,
T.running_total,
B.band_id
FROM dbo.CustomerMonthRunningTotal T
JOIN dbo.Band B
ON T.running_total >= B.low
AND T.running_total < B.high
WHERE customer_id = 1
)
SELECT yr = P.period_year,
mth = P.period_month,
band_1 = ISNULL(P.[1], 0),
band_2 = ISNULL(P.[2], 0),
band_3 = ISNULL(P.[3], 0),
band_4 = ISNULL(P.[4], 0),
band_5 = ISNULL(P.[5], 0),
band_6 = ISNULL(P.[6], 0),
band_7 = ISNULL(P.[7], 0)
FROM SourceData SD
PIVOT (
SUM(running_total)
FOR band_id IN ([1],[2],[3],[4],[5],[6],[7])
) P
ORDER BY
P.period_year,
P.period_month;
SET STATISTICS IO, TIME OFF;
Results:
(120 row(s) affected)
Table 'Band'. Scan count 120, logical reads 240
Table 'CustomerMonthTotal'. Scan count 2, logical reads 4
CPU time = 0 ms, elapsed time = 79 ms.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
April 7, 2010 at 4:50 am
Phew Paul, that's some proof of concept. 😉
I have persevered with mine and while it's not as elegant as yours, it seems to do the job, and as a bonus it should work in SQL 2000 too.
Setup code:
Set nocount On
if object_ID('tempdb..#transactions') is not null
drop Table #transactions
if object_ID('tempdb..#customers') is not null
drop table #customers
if object_ID('tempdb..#interim') is not null
drop Table #interim
create table #customers (ID int not null identity(1,1) primary key,
Acc_No int not null,
band int not null,
Period_Yr int not null,
Period_Mth int not null)
Create Table #transactions (Id int not null identity (1,1) primary key,
Acc_No int not null,
amount money not null,
Period_Yr int null,
Period_Mth int null,
Tran_dt datetime not null)
-- Create customer records and miss out certain periods to mimic no transactions for a given period
-- by inner joining the trans to customer tables
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,1,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,5,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,2,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,3,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,3,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,9,2009)
---Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,4,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(111,5,12,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,2,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,2,3,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,3,4,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,3,5,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,6,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,7,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,4,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,10,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,11,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(222,5,12,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,1,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,1,3,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,4,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,5,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,2,6,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,3,7,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,3,8,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,9,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,10,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,4,11,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(333,5,12,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,1,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,2,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,3,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,1,4,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,5,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,6,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,7,2009)
--Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,2,8,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,9,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,3,10,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,5,11,2009)
Insert into #customers (acc_no, band, Period_Mth, Period_yr) values(444,5,12,2009)
Insert into #transactions (Acc_No, amount, Tran_dt)
select top 500 111, round(Cast(Abs(Checksum(newId()))%100000/1000.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 222, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 333, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
union all
select top 500 444, round(Cast(Abs(Checksum(newId()))%10000/100.0 as money),2), Cast(Abs(Checksum(newId()))%3653.0+36524.0 as datetime) from master.sys.syscolumns
update #transactions
set period_yr = datepart(yy, tran_dt), period_mth = datepart(mm, tran_dt)
delete from #transactions where period_yr <> 2009
-- select * from #customers
-- select * from #transactions order by tran_dt
Initial grab:
if object_ID('tempdb..#results') is not null
drop table #results
create table #results
(ID int not null identity(1,1),
Yr int not null,
Mnth int not null,
Acc_no int null,
band_1 money not null default 0,
band_1_Total money not null default 0,
band_2 money not null default 0,
band_2_Total money not null default 0,
band_3 money not null default 0,
band_3_Total money not null default 0,
band_4 money not null default 0,
band_4_Total money not null default 0,
band_5 money not null default 0,
band_5_Total money not null default 0,
Constraint PK_PrimaryID
Primary Key NONCLUSTERED (ID)
)
SET NoCount On
DECLARE @ACCT_NO INT
SELECT @ACCT_NO = 444
-- select * from #results
Insert into #results (Yr ,
Mnth ,
Acc_no,
band_1,
band_2,
band_3,
band_4,
band_5
)
SELECT
ACCTS.[Period_Yr],
ACCTS.[Period_Mth],
TRANS.Acc_no,
SUM(CASE ACCTS.[band] WHEN 1 THEN TRANS.amount ELSE 0 END) 'band 1',
SUM(CASE ACCTS.[band] WHEN 2 THEN TRANS.amount ELSE 0 END) 'band 2',
SUM(CASE ACCTS.[band] WHEN 3 THEN TRANS.amount ELSE 0 END) 'band 3',
SUM(CASE ACCTS.[band] WHEN 4 THEN TRANS.amount ELSE 0 END) 'band 4',
SUM(CASE ACCTS.[band] WHEN 5 THEN TRANS.amount ELSE 0 END) 'band 5'
FROM
#transactions AS TRANS
INNER JOIN #customers AS ACCTS
ON TRANS.[Period_Yr] = ACCTS.[Period_Yr] AND
TRANS.[Period_Mth] = ACCTS.[Period_Mth] AND
TRANS.[Acc_no] = ACCTS.[Acc_no]
WHERE TRANS.[Acc_no] = @ACCT_NO
Group by
ACCTS.[Period_Yr],
ACCTS.[Period_Mth],
TRANS.[Acc_no]
Order by
ACCTS.[Period_Yr],
ACCTS.[Period_Mth]
create missing rows:
if object_ID('tempdb..#Numbers_1') is not null
drop Table #Numbers_1
CREATE TABLE #Numbers_1 (
[Num] [int] NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE #Numbers_1 WITH NOCHECK ADD
CONSTRAINT [PK_Numbers_Loop] PRIMARY KEY CLUSTERED
([Num]) ON [PRIMARY]
GO
Declare @i int
Declare @i_Max int
Select @i_Max = 12
Select @i = 1
While @i <= @i_Max
Begin
If Not exists (Select 1 from #Numbers_1 where Num = @i)
Begin
Insert INTO #Numbers_1(Num)
select @i
End
Select @i = @i + 1;
End;
select 1
while @@rowcount > 0
begin
insert into #results (yr, mnth, acc_no)
select r.yr, n.num, x.acc_no
from #numbers_1 n
cross join (select acc_no from #results group by acc_no) x
left join (select acc_no , mnth from #results ) m
on m.acc_no = x.acc_no and n.num = m.mnth
inner join #results r on r.acc_no = x.acc_no and r.mnth = n.num - 1
where m.mnth is null
end
select 1
--create missing rows, for periods less than the first transaction
while @@rowcount > 0
begin
insert into #results (yr, mnth, acc_no)
select r.yr, n.num, x.acc_no
from #numbers_1 n
cross join (select acc_no from #results group by acc_no) x
left join (select acc_no , mnth from #results ) m
on m.acc_no = x.acc_no and n.num = m.mnth
inner join #results r on r.acc_no = x.acc_no and r.mnth = n.num + 1
where m.mnth is null
end
--set rowcount 0
CREATE CLUSTERED INDEX IX_CL_ProdAcctNo_Yr_Mnth
on #results (Acc_no, Yr, Mnth)
Do quirky update
DECLARE @PrevAccountID INT
DECLARE @AccountRunningTotal money
SELECT @AccountRunningTotal = 0
--===== Update the running total for this row using the "Quirky Update"
-- and a "Pseudo-cursor"
UPDATE #results
SET @AccountRunningTotal = band_1_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal + band_1
ELSE band_1
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
SELECT @AccountRunningTotal = 0
UPDATE #results
SET @AccountRunningTotal = band_2_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal + band_2
ELSE band_2 --+band_1_total
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
SELECT @AccountRunningTotal = 0
UPDATE #results
SET @AccountRunningTotal = band_3_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal + band_3
ELSE band_3 --+band_2_total
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
SELECT @AccountRunningTotal = 0
UPDATE #results
SET @AccountRunningTotal = band_4_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal + band_4
ELSE band_4 --+band_3_total
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
SELECT @AccountRunningTotal = 0
UPDATE #results
SET @AccountRunningTotal = band_5_total = CASE
WHEN Acc_no = @PrevAccountID
THEN @AccountRunningTotal + band_5
ELSE band_5 --+band_4_total
END,
@PrevAccountID = Acc_no
FROM #results WITH (TABLOCKX)
OPTION (MAXDOP 1)
Get rid of balances we don't want:
update #results
set band_1_total = 0
where band_1 = 0
and band_2_total + band_3_total + band_4_total + band_5_total <> 0
update #results
set band_2_total = 0
where band_2 = 0
and band_1_total + band_3_total + band_4_total + band_5_total <> 0
update #results
set band_3_total = 0
where band_3 = 0
and band_1_total + band_2_total + band_4_total + band_5_total <> 0
update #results
set band_4_total = 0
where band_4 = 0
and band_1_total + band_2_total + band_3_total + band_5_total <> 0
update #results
set band_5_total = 0
where band_5 = 0
and band_1_total + band_2_total + band_3_total + band_4_total <> 0
update #results
set band_5_total = 0
where band_5 = 0
and band_1_total + band_2_total + band_3_total + band_4_total + Band_5 = 0
move the balances across: Note: There is a slight issue here in that amount can go down, so I don't really want the max amount, I want the amount in the last period a transaction was made for a given band... still needs work.
select acc_no, max(band_1_total) b1T, max(band_2_total) b2T, max(band_3_total) b3T, max(band_4_total) b4T, max(band_5_total) b5T
into #interim
from #results
group by acc_no
update r
set band_2_total = band_2_total + b1T
from #results r
inner join #interim i
on r.acc_no = i.acc_no
where band_2_total <> 0
update r
set band_3_total = band_3_total + b2T + b1T
from #results r
inner join #interim i
on r.acc_no = i.acc_no
where band_3_total <> 0
update r
set band_4_total = band_4_total + b3T + b2T + b1T
from #results r
inner join #interim i
on r.acc_no = i.acc_no
where band_4_total <> 0
update r
set band_5_total = band_5_total + b4T + b3T + b2T + b1T
from #results r
inner join #interim i
on r.acc_no = i.acc_no
where band_5_total <> 0
do the final select:
select Yr ,
Mnth ,
sum(band_1_Total) band_1_Total,
sum(band_2_Total) band_2_Total,
sum(band_3_Total) band_3_Total,
sum(band_4_Total) band_4_Total,
sum(band_5_Total) band_5_Total
from #results
group by Yr ,
Mnth
Thanks for all the replies, once again.
HTH
Dave J
April 7, 2010 at 5:05 am
David Jackson (4/7/2010)
Phew Paul, that's some proof of concept. 😉I have persevered with mine and while it's not as elegant as yours, it seems to do the job, and as a bonus it should work in SQL 2000 too.
Hey Dave,
Sorry if it was a bit tangential - I just had an idea and it sort of ran away with me 🙂
Compatibility is always a good thing, I'll take a look over it now - just for my own benefit.
Glad you got it sorted.
Paul
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
April 7, 2010 at 5:38 am
This is the code needed at the start of block 6, move the balances across. This handles negative amounts.
select acc_no, sum(band_1) b1T, sum(band_2) b2T, sum(band_3) b3T, sum(band_4) b4T, sum(band_5) b5T
into #interim
from #results
group by acc_no
Dave J
Viewing 15 posts - 1 through 14 (of 14 total)
You must be logged in to reply to this topic. Login to reply