September 24, 2006 at 3:24 am
This seems to be a tough one because I can't figure out where to start. I'll try to simplify the example. Daily I need to look through 200,000 records in 20,000 accounts (average 10 per account). The table has three fields: AccountNumber, DateIn, Amount.
How would I query?
a) If there are more than 4 records in any consecutive 60 minute period.
b) If there are more than 4 records in any consecutive 60 minute period where the average amount of those transactions is more than $20.
I'm thinking this is an impossible feat. Thank you to anyone smart enough to figure this puzzle out.
September 24, 2006 at 12:26 pm
You should try this one or at least understand the logic and try to implement your own changes. Few things:
1. I answered only on the first one, but I think it will be easy to go on to the second question.
2. You can create indexes in order to improve performance.
SET NOCOUNT ON
--8888888888888888888888888888888888888888888888888888
-- Simulate data
CREATE TABLE #Period (
AccountNumber INT,
DateIn DATETIME,
Amount INT)
INSERT #Period VALUES (1,'1/1/05 00:00',10)
INSERT #Period VALUES (1,'1/1/05 00:01',20)
INSERT #Period VALUES (1,'1/1/05 00:02',30)
INSERT #Period VALUES (1,'1/1/05 00:03',40)
INSERT #Period VALUES (1,'1/1/05 10:10',50) -- not within 60 min
INSERT #Period VALUES (2,'1/1/05 00:00',100)
INSERT #Period VALUES (2,'1/1/05 00:01',200)
INSERT #Period VALUES (2,'1/1/05 00:02',300)
INSERT #Period VALUES (2,'1/1/05 01:20',400) -- not within 60 min
INSERT #Period VALUES (3,'1/1/05 00:00',1000)
INSERT #Period VALUES (3,'1/1/05 00:01',2000)
INSERT #Period VALUES (3,'1/1/05 00:02',3000)
INSERT #Period VALUES (3,'1/1/05 10:03',4000) -- not within 60 min
INSERT #Period VALUES (4,'1/1/05 01:00',10000)
INSERT #Period VALUES (4,'1/1/05 01:01',20000)
INSERT #Period VALUES (4,'1/1/05 02:02',30000)
INSERT #Period VALUES (4,'1/1/05 03:00',40000)
-- Select data from original table
SELECT *
FROM #Period
--8888888888888888888888888888888888888888888888888888
-- starting...
-- Insert all data into temp table (with row id) ordered by date
SELECT IDENTITY(INT,1,1)AS ROW_ID, AccountNumber, DateIn, Amount
INTO #Temp_Period
FROM #Period
ORDER BY DateIn
--**************************
-- This is the query you need to run:
SELECT A.AccountNumber,
COUNT(DISTINCT A.ROW_ID) AS COUNT_ROW_ID
FROM #Temp_Period A
JOIN #Temp_Period B ON A.AccountNumber = B.AccountNumber
AND A.ROW_ID <> B.ROW_ID
AND A.DateIn BETWEEN DATEADD(HH,-1,B.DateIn) AND DATEADD(HH,1,B.DateIn)
GROUP BY A.AccountNumber
ORDER BY A.AccountNumber
--**************************
-- All data ordered by account number --> this one will help you understand the logic
SELECT *
FROM #Temp_Period
ORDER BY AccountNumber
--8888888888888888888888888888888888888888888888888888
DROP TABLE #Period
DROP TABLE #Temp_Period
September 24, 2006 at 6:34 pm
I hope this is what u are looking for:
Select * from
(
SELECT #Period.AccountNumber,#Period.DateIn,--#Period.Amount,
(SELECT sum(Amount) FROM #Period S
where #Period.AccountNumber = S.AccountNumber
and S.DateIn between #Period.DateIn and DateAdd(mi,60,#Period.DateIn)) as Amount,
(SELECT Count(*) FROM #Period S
where #Period.AccountNumber = S.AccountNumber
and S.DateIn between #Period.DateIn and DateAdd(mi,60,#Period.DateIn)) as Counts
FROM #Period
where
--Count greater than or equal to 4
(SELECT Count(*) FROM #Period S
where #Period.AccountNumber = S.AccountNumber
and S.DateIn between #Period.DateIn and DateAdd(mi,60,#Period.DateIn)) >= 4
  A
where Cast(A.Amount as decimal(15,2))/Cast(A.Counts as decimal(15,2)) > 20.0
September 24, 2006 at 8:04 pm
I goofed up:
a) If there are more than 4 records in any consecutive 60 minute period.
should be
a) If there are more than 4 records in any consecutive 60 minute period FOR THAT ACCOUNT ITSELF (looking at records on an account basis)
Sreejith, I'm getting an error when I run your query, think the smiley face may have covered some of your code?
Thanks guys (i'm still confused, this is more complicated than most stuff)
September 25, 2006 at 1:26 am
September 25, 2006 at 4:50 am
I changed the test data a bit. We are not looking at dollar amount in this. Account C and D had 5 records within an hour.
The result of this query is A=6, B=8, C=7, D=5. I'm not seeing how I'd know A+B did not have 5, and C+D did have 5.
Thanks again!!!
-----------------------------------------------
CREATE TABLE #Period
(AccountNumber VARCHAR(1), DateIn DATETIME, Amount INT)
INSERT #Period VALUES ('A','1/1/05 00:01',1)
INSERT #Period VALUES ('A','1/1/05 00:02',1)
INSERT #Period VALUES ('A','1/1/05 00:03',1)
INSERT #Period VALUES ('A','1/1/05 00:04',1)
INSERT #Period VALUES ('A','1/1/05 02:05',1)
INSERT #Period VALUES ('A','1/1/05 10:02',1)
INSERT #Period VALUES ('A','1/1/05 10:03',1) -- only had 4 within 60 minutes
INSERT #Period VALUES ('B','1/1/05 00:00',1)
INSERT #Period VALUES ('B','1/1/05 01:01',1)
INSERT #Period VALUES ('B','1/1/05 01:02',1)
INSERT #Period VALUES ('B','1/1/05 01:03',1)
INSERT #Period VALUES ('B','1/1/05 01:04',1)
INSERT #Period VALUES ('B','1/1/05 10:01',1)
INSERT #Period VALUES ('B','1/1/05 10:02',1) -- only had 4 within 60 minutes
INSERT #Period VALUES ('C','1/1/05 00:01',1)
INSERT #Period VALUES ('C','1/1/05 00:02',1)
INSERT #Period VALUES ('C','1/1/05 00:03',1)
INSERT #Period VALUES ('C','1/1/05 00:04',1)
INSERT #Period VALUES ('C','1/1/05 00:05',1)
INSERT #Period VALUES ('C','1/1/05 10:02',1)
INSERT #Period VALUES ('C','1/1/05 10:03',1) -- had 5 within 60 minutes, $ amt not average $20
INSERT #Period VALUES ('D','1/1/05 00:01',1)
INSERT #Period VALUES ('D','1/1/05 10:02',21)
INSERT #Period VALUES ('D','1/1/05 10:03',21)
INSERT #Period VALUES ('D','1/1/05 10:04',21)
INSERT #Period VALUES ('D','1/1/05 10:05',21)
INSERT #Period VALUES ('D','1/1/05 10:06',21)
INSERT #Period VALUES ('D','1/1/05 12:03',1) -- had 5 within 60 minutes, $ amt average $21
-- insert all data into temp table (with row id) ordered by date
SELECT
IDENTITY(INT,1,1)AS ROW_ID,
AccountNumber, DateIn, Amount
INTO #Temp_Period
FROM #Period
ORDER BY DateIn
SELECT
A.AccountNumber,
COUNT(DISTINCT A.ROW_ID) AS COUNT_ROW_ID
FROM #Temp_Period A
JOIN #Temp_Period B ON A.AccountNumber = B.AccountNumber
AND A.ROW_ID <> B.ROW_ID
AND A.DateIn BETWEEN DATEADD(HH,-1,B.DateIn) AND DATEADD(HH,1,B.DateIn)
GROUP BY A.AccountNumber
ORDER BY A.AccountNumber
DROP TABLE #Period
DROP TABLE #Temp_Period
/*
SELECT * FROM #Period
SELECT * FROM #Temp_Period ORDER BY AccountNumber
*/
September 25, 2006 at 5:52 am
This one will give you the result you asked for.
You should change the query to return whatever you need.
SET
NOCOUNT ON
CREATE
TABLE #Period
(
AccountNumber VARCHAR(1), DateIn DATETIME, Amount INT)
INSERT
#Period VALUES ('A','1/1/05 00:01',1)
INSERT
#Period VALUES ('A','1/1/05 00:02',1)
INSERT
#Period VALUES ('A','1/1/05 00:03',1)
INSERT
#Period VALUES ('A','1/1/05 00:04',1)
INSERT
#Period VALUES ('A','1/1/05 02:05',1)
INSERT
#Period VALUES ('A','1/1/05 10:02',1)
INSERT
#Period VALUES ('A','1/1/05 10:03',1) -- only had 4 within 60 minutes
INSERT
#Period VALUES ('B','1/1/05 00:00',1)
INSERT
#Period VALUES ('B','1/1/05 01:01',1)
INSERT
#Period VALUES ('B','1/1/05 01:02',1)
INSERT
#Period VALUES ('B','1/1/05 01:03',1)
INSERT
#Period VALUES ('B','1/1/05 01:04',1)
INSERT
#Period VALUES ('B','1/1/05 10:01',1)
INSERT
#Period VALUES ('B','1/1/05 10:02',1) -- only had 4 within 60 minutes
INSERT
#Period VALUES ('C','1/1/05 00:01',1)
INSERT
#Period VALUES ('C','1/1/05 00:02',1)
INSERT
#Period VALUES ('C','1/1/05 00:03',1)
INSERT
#Period VALUES ('C','1/1/05 00:04',1)
INSERT
#Period VALUES ('C','1/1/05 00:05',1)
INSERT
#Period VALUES ('C','1/1/05 10:02',1)
INSERT
#Period VALUES ('C','1/1/05 10:03',1) -- had 5 within 60 minutes, $ amt not average $20
INSERT
#Period VALUES ('D','1/1/05 00:01',1)
INSERT
#Period VALUES ('D','1/1/05 10:02',21)
INSERT
#Period VALUES ('D','1/1/05 10:03',21)
INSERT
#Period VALUES ('D','1/1/05 10:04',21)
INSERT
#Period VALUES ('D','1/1/05 10:05',21)
INSERT
#Period VALUES ('D','1/1/05 10:06',21)
INSERT
#Period VALUES ('D','1/1/05 12:03',1) -- had 5 within 60 minutes, $ amt average $21
-- insert all data into temp table (with row id) ordered by date
SELECT
IDENTITY
(INT,1,1)AS ROW_ID,
AccountNumber
, DateIn, Amount
INTO
#Temp_Period
FROM
#Period
ORDER
BY DateIn
SELECT
DISTINCT AccountNumber, COUNT_ROW_ID
FROM
(SELECT A.ROW_ID,
A
.AccountNumber,
COUNT(A.ROW_ID)+1 AS COUNT_ROW_ID
FROM #Temp_Period A
JOIN #Temp_Period B ON A.AccountNumber = B.AccountNumber
AND A.ROW_ID <> B.ROW_ID
AND A.DateIn BETWEEN DATEADD(HH,-1,B.DateIn) AND DATEADD(HH,1,B.DateIn)
GROUP BY A.AccountNumber, A.ROW_ID
HAVING COUNT(A.ROW_ID)+1 >= 4
) Help_Table
DROP
TABLE #Period
DROP
TABLE #Temp_Period
Viewing 7 posts - 1 through 6 (of 6 total)
You must be logged in to reply to this topic. Login to reply