January 6, 2014 at 9:45 am
Hello All,
I've one complex situation with a query. I've two tables table_master and table_details. In table_master, ID, Book, Party are unique(There might also be a possibility of having duplicates based on Desc & Type). in table_details, i've attributes for the IDs present in table_master. Any of the ID can have upto 3 attributes (Attr1, Attr2, Attr3). No i want to make some join to have data in my third and final table called table_final. The rules are:
a. All the IDs in master table should be present in final table (irrespective of the availability status in details table)
b. In master_table, ABC & DEF are the only eligible types. If ID has only one record with either of them, then that ID has to be included. If any ID has both of the types, then the one with "ABC" should be considered.'
c. There might be some IDs which are all same but only change is in less valued Desc column. In such scenario, pick any one randomly.
Here is the sample data:
table_master
IDBookPartyDescType
1234X12DY1828This is First BookABC
8888ROME56513Class ActABC
9999ASL7223412RoutineDEF
345612LM322891Not SingleABC
345612LM322891Not SingleDEF
44445NNK227823Different KindABC
44445NNK227823Different BookABC
3489345H834345New ThingABC
3489345H834345New ThingDEF
3489345H834345New BookABC
table_details
IDBookPartyAttributeValue
1234X12DY1828Attr1Red
1234X12DY1828Attr2Round
1234X12DY1828Attr3Big
9999ASL7223412Attr1Blue
9999ASL7223412Attr2Square
345612LM322891Attr1White
345612LM322891Attr2Square
44445NNK227823Attr1Blue
44445NNK227823Attr2Rectangle
3489345H834345Attr1Black
3489345H834345Attr2Round
table_final
IDBookPartyDescTypeCol1Col1_ValCol2Col2_ValCol3Col3_Val
1234X12DY1828This is First BookABCAttr1RedAttr2RoundAttr3Big
8888ROME56513Class ActABCNULLNULLNULLNULLNULLNULL
9999ASL7223412RoutineDEFAttr1BlueAttr2SquareNULLNULL
345612LM322891Not SingleABCAttr1WhiteAttr2SquareNULLNULL
44445NNK227823Different KindABCAttr1BlueAttr2RectangleNULLNULL
3489345H834345New ThingABCAttr1BlackAttr2RoundNULLNULL
January 6, 2014 at 10:10 am
You really should post consumable DDL and sample data. I'm in a good mood so I did it this time, but you've been here long enough to know how to do it.
Here's one option for you:
CREATE TABLE #Master(
IDint,
BookCHAR(5),
Partyint,
[Desc]varchar( 50),
[Type] char(3))
INSERT #Master VALUES(
1234,'X12DY',1828,'This is First Book','ABC'),(
8888,'ROME5',6513,'Class Act','ABC'),(
9999,'ASL72',23412,'Routine','DEF'),(
3456,'12LM3',22891,'Not Single','ABC'),(
3456,'12LM3',22891,'Not Single','DEF'),(
4444,'5NNK2',27823,'Different Kind','ABC'),(
4444,'5NNK2',27823,'Different Book','ABC'),(
3489,'345H8',34345,'New Thing','ABC'),(
3489,'345H8',34345,'New Thing','DEF'),(
3489,'345H8',34345,'New Book','ABC')
CREATE TABLE #Details(
IDint,
BookCHAR(5),
Partyint,
Attributechar(5),
Valuevarchar(15))
INSERT #Details VALUES(
1234,'X12DY',1828,'Attr1','Red'),(
1234,'X12DY',1828,'Attr2','Round'),(
1234,'X12DY',1828,'Attr3','Big'),(
9999,'ASL72',23412,'Attr1','Blue'),(
9999,'ASL72',23412,'Attr2','Square'),(
3456,'12LM3',22891,'Attr1','White'),(
3456,'12LM3',22891,'Attr2','Square'),(
4444,'5NNK2',27823,'Attr1','Blue'),(
4444,'5NNK2',27823,'Attr2','Rectangle'),(
3489,'345H8',34345,'Attr1','Black'),(
3489,'345H8',34345,'Attr2','Round');
WITH cteMaster AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ID, Book, Party ORDER BY [Type]) rn
FROM #Master
WHERE [Type] IN ('ABC', 'DEF')
),
cteDetails AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ID, Book, Party ORDER BY Attribute) rn
FROM #Details
)
SELECT m.ID,
m.Book,
m.Party,
m.[Desc],
m.[Type],
MAX(CASE WHEN d.rn = 1 THEN d.Attribute END) Col1,
MAX(CASE WHEN d.rn = 1 THEN d.Value END) Col1_Val,
MAX(CASE WHEN d.rn = 2 THEN d.Attribute END) Col2,
MAX(CASE WHEN d.rn = 2 THEN d.Value END) Col2_Val,
MAX(CASE WHEN d.rn = 3 THEN d.Attribute END) Col3,
MAX(CASE WHEN d.rn = 3 THEN d.Value END) Col3_Val
FROM cteMaster m
LEFT
JOIN cteDetails d ON m.ID = d.ID AND m.Book = d.Book AND m.Party = d.Party
WHERE m.rn = 1
GROUP BY m.ID,
m.Book,
m.Party,
m.[Desc],
m.[Type]
DROP TABLE #Master
DROP TABLE #Details
January 6, 2014 at 10:10 am
Please post ddl and insert scripts for your sample data.
What have you tried so far?
For better assistance in answering your questions, please read this[/url].
Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]
January 6, 2014 at 10:33 am
Hi Chris, so far i have written this query:
;with cte_1
as(SELECT ID, Book, Party, count(*) as SumCount FROM table_master nolock
GROUP BY ID, Book, Party having count(*) = 1)
insert into table_final
select tm.ID, tm.Book, tm.Party, tm.LDesc, tm.LType,
td.Attribute, td.Val, td1.Attribute, td1.Val, td2.Attribute, td2.Val
from table_master tm inner join cte_1 on tm.ID = cte_1.id and tm.Book = cte_1.Book and tm.Party = cte_1.Party
left outer join table_details td on tm.ID = td.id and tm.Book = td.Book and tm.Party = td.Party and td.Attribute ='Attr1'
left outer join table_details td1 on tm.ID = td1.id and tm.Book = td1.Book and tm.Party = td1.Party and td1.Attribute ='Attr2'
left outer join table_details td2 on tm.ID = td2.id and tm.Book = td2.Book and tm.Party = td2.Party and td2.Attribute ='Attr3'
go
;with cte_2
as(SELECT ID, Book, Party, count(*) as SumCount FROM table_master nolock
GROUP BY ID, Book, Party having count(*) > 1),
cte_3 as
(select id, book, party, Ldesc, ltype,
row_number() over(partition by id, book, party order by ltype) as sno
from table_master
where id in(select id from cte_2))
insert into table_final
select tm.ID, tm.Book, tm.Party, tm.LDesc, tm.LType,
td.Attribute, td.Val, td1.Attribute, td1.Val, td2.Attribute, td2.Val
from table_master tm inner join cte_3 on tm.ID = cte_3.id and tm.Book = cte_3.Book and tm.Party = cte_3.Party and cte_3.sno = 1
left outer join table_details td on tm.ID = td.id and tm.Book = td.Book and tm.Party = td.Party and td.Attribute ='Attr1'
left outer join table_details td1 on tm.ID = td1.id and tm.Book = td1.Book and tm.Party = td1.Party and td1.Attribute ='Attr2'
left outer join table_details td2 on tm.ID = td2.id and tm.Book = td2.Book and tm.Party = td2.Party and td2.Attribute ='Attr3'
go
January 6, 2014 at 10:33 am
Thanks for this query Luiz. Ket me check this out.
January 6, 2014 at 10:48 am
Why do you have all those nolock hints? Are your users aware that they might get incorrect data? Are they okay with that?
January 6, 2014 at 10:58 am
It's basically for OLAP environment so no worruies about using nolock. Your query seems good. But what do you think about the performance from the bulk (which i mentioned) perspective ?
January 6, 2014 at 11:07 am
Luiz, One thing to add here. The master table has around 2 billion records and Details have around 3 billion records.
How will be the performance of your query ?
January 6, 2014 at 11:18 am
There seems to be a catch in your query Luiz.
Say in any case the data does not have any Attribute at all (say Attr2 in our example). In this case, all Attr3 values will go to Attr2.
January 6, 2014 at 11:43 am
sqlnaive (1/6/2014)
It's basically for OLAP environment so no worruies about using nolock. Your query seems good. But what do you think about the performance from the bulk (which i mentioned) perspective ?
If it's an OLAP environment, then there shouldn't be any need to use nolock hints that might give you inconsistent data.
About the bulk perspective, I'm not sure what you're talking about.
For performance, you should test yourself. If there's a problem, then alternatives can be considered.
There seems to be a catch in your query Luiz.
Say in any case the data does not have any Attribute at all (say Attr2 in our example). In this case, all Attr3 values will go to Attr2.
I'm sure that you can figure that out if you understand the query. If you don't, check the following article and come back for specific questions. http://www.sqlservercentral.com/articles/T-SQL/63681/
January 6, 2014 at 11:57 am
Luis Cazares (1/6/2014)
sqlnaive (1/6/2014)
It's basically for OLAP environment so no worruies about using nolock. Your query seems good. But what do you think about the performance from the bulk (which i mentioned) perspective ?If it's an OLAP environment, then there shouldn't be any need to use nolock hints that might give you inconsistent data.
About the bulk perspective, I'm not sure what you're talking about.
For performance, you should test yourself. If there's a problem, then alternatives can be considered.
There seems to be a catch in your query Luiz.
Say in any case the data does not have any Attribute at all (say Attr2 in our example). In this case, all Attr3 values will go to Attr2.
I'm sure that you can figure that out if you understand the query. If you don't, check the following article and come back for specific questions. http://www.sqlservercentral.com/articles/T-SQL/63681/
Luiz, May be i was not specific to where i am getting an issue. I already have a table (in this case table_final) which has fixed number of columns Col1, Col2 and Col3 (along with the associated values). Now value Attr1 should always come to Col1 or else NULL, Attr2 should always come to Col2 or else NULL and Attr3 should always come to Col3 or else NULL.
In this query, in case there are just Attr2 and Attr3 for any ID, then their associated rn will be 1 and 2. In final query, these will be going to Col1 & Col2 respectively and not in Col2 & Col3.
MAX(CASE WHEN d.rn = 1 THEN d.Attribute END) Col1,
MAX(CASE WHEN d.rn = 1 THEN d.Value END) Col1_Val,
MAX(CASE WHEN d.rn = 2 THEN d.Attribute END) Col2,
MAX(CASE WHEN d.rn = 2 THEN d.Value END) Col2_Val,
MAX(CASE WHEN d.rn = 3 THEN d.Attribute END) Col3,
MAX(CASE WHEN d.rn = 3 THEN d.Value END) Col3_Val
Pleas ehelp me understand if i'm incorrect here.
January 6, 2014 at 12:12 pm
It seems that you're facing the same problem as your previous post.
http://www.sqlservercentral.com/Forums/Topic1524935-392-1.aspx
And it was solved over there.
January 7, 2014 at 5:59 am
I'm getting the required data from the following query.
WITH cteMaster AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ID, Book, Party ORDER BY [Type]) rn
FROM #Master
WHERE [Type] IN ('ABC', 'DEF')
),
ctenew AS(
SELECT * FROM cteMaster
where rn = 1
)
SELECT m.ID,
m.Book,
m.Party,
m.[Desc],
m.[Type],
d.Attribute,d.Value,
d1.Attribute,d1.Value,
d2.Attribute,d2.Value
FROM ctenew m
LEFT JOIN cteDetails d ON m.ID = d.ID AND m.Book = d.Book AND m.Party = d.Party and d.Attribute = 'Attr1'
LEFT JOIN cteDetails d1 ON m.ID = d1.ID AND m.Book = d1.Book AND m.Party = d1.Party and d1.Attribute = 'Attr2'
LEFT JOIN cteDetails d2 ON m.ID = d2.ID AND m.Book = d2.Book AND m.Party = d2.Party and d2.Attribute = 'Attr3'
The only thing which worries me with your query is that the row numbering is happening dynamically and based on that, we are allocating Attribute values to different columns. While I've a table already with fix columns. Otherwise this query seems fantastic. Please still feel free to let me understand if this query can handle the situation (or there is somethign which i am not able to pick here).
I'll update my other chain and close that. That totally got out of my mind. It's a bad practice. I admit it and feel sorry for that.
January 7, 2014 at 6:46 am
Working on a completely different principle, this will perform differently to Luiz' fine solution. Test them and assuming they return correct values, choose the fastest - on the assumption that current indexing is optimal for both.
WITH Masters AS (
SELECT
rn = ROW_NUMBER() OVER(PARTITION BY ID, Book, Party ORDER BY [Type], [Desc]),
ID, Book, Party, [Desc], [Type]
FROM #Master
WHERE [Type] IN ('ABC', 'DEF')
)
SELECT ID, Book, Party, [Desc], [Type], x.*
FROM Masters m
CROSS APPLY (
SELECT
Col1 = MAX(CASE WHEN Attribute = 'Attr1' THEN Attribute ELSE NULL END),
Col1_Val = MAX(CASE WHEN Attribute = 'Attr1' THEN Value ELSE NULL END),
Col2 = MAX(CASE WHEN Attribute = 'Attr2' THEN Attribute ELSE NULL END),
Col2_Val = MAX(CASE WHEN Attribute = 'Attr2' THEN Value ELSE NULL END),
Col3 = MAX(CASE WHEN Attribute = 'Attr3' THEN Attribute ELSE NULL END),
Col3_Val = MAX(CASE WHEN Attribute = 'Attr3' THEN Value ELSE NULL END)
FROM #Details d
WHERE d.ID = m.ID
AND d.Book = m.Book
AND d.Party = m.Party
) x
WHERE rn = 1;
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden
January 8, 2014 at 6:25 pm
other solution is using the CTE and PIVOT like the follow script, in the sample I define the tables, but you can use your defined tables and only use the CTE statement, I wish resolve the problem :
DECLARE @master TABLE (
ID INT ,
Book CHAR (5) ,
Party INT ,
[Desc] VARCHAR (50),
[Type] CHAR (3) );
INSERT @master ([ID], [Book], [Party], [Desc], [Type])
VALUES (1234, 'X12DY', 1828, 'This is First Book', 'ABC'),
(8888, 'ROME5', 6513, 'Class Act', 'ABC'),
(9999, 'ASL72', 23412, 'Routine', 'DEF'),
(3456, '12LM3', 22891, 'Not Single', 'ABC'),
(3456, '12LM3', 22891, 'Not Single', 'DEF'),
(4444, '5NNK2', 27823, 'Different Kind', 'ABC'),
(4444, '5NNK2', 27823, 'Different Book', 'ABC'),
(3489, '345H8', 34345, 'New Thing', 'ABC'),
(3489, '345H8', 34345, 'New Thing', 'DEF'),
(3489, '345H8', 34345, 'New Book', 'ABC');
DECLARE @Details TABLE (
ID INT ,
Book CHAR (5) ,
Party INT ,
Attribute CHAR (5) ,
[Value] VARCHAR (15));
INSERT @Details ([ID], [Book], [Party], [Attribute], [value])
VALUES (1234, 'X12DY', 1828, 'Attr1', 'Red'),
(1234, 'X12DY', 1828, 'Attr2', 'Round'),
(1234, 'X12DY', 1828, 'Attr3', 'Big'),
(9999, 'ASL72', 23412, 'Attr1', 'Blue'),
(9999, 'ASL72', 23412, 'Attr2', 'Square'),
(3456, '12LM3', 22891, 'Attr1', 'White'),
(3456, '12LM3', 22891, 'Attr2', 'Square'),
(4444, '5NNK2', 27823, 'Attr1', 'Blue'),
(4444, '5NNK2', 27823, 'Attr2', 'Rectangle'),
(3489, '345H8', 34345, 'Attr1', 'Black'),
(3489, '345H8', 34345, 'Attr2', 'Round');
WITH cteMaster
AS (SELECT TOP 100 PERCENT ROW_NUMBER() OVER (PARTITION BY ID, Book, Party ORDER BY [Type]) AS rn,
[ID],
[Book],
[Party],
[Desc],
[Type]
FROM @master
ORDER BY [Type], id),
pivoting
AS (SELECT [ID],
[Book],
[Party],
CASE [Attr1]
WHEN 1 THEN (SELECT [Attribute]
FROM @Details AS d
WHERE [Attribute] = 'Attr1'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col1],
CASE [Attr1]
WHEN 1 THEN (SELECT [value]
FROM @Details AS d
WHERE [Attribute] = 'Attr1'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col1_Val],
CASE [Attr2]
WHEN 1 THEN (SELECT [Attribute]
FROM @Details AS d
WHERE [Attribute] = 'Attr2'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col2],
CASE [Attr2]
WHEN 1 THEN (SELECT [value]
FROM @Details AS d
WHERE [Attribute] = 'Attr2'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col2_Val],
CASE [Attr3]
WHEN 1 THEN (SELECT [Attribute]
FROM @Details AS d
WHERE [Attribute] = 'Attr3'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col3],
CASE [Attr3]
WHEN 1 THEN (SELECT [value]
FROM @Details AS d
WHERE [Attribute] = 'Attr3'
AND [d].[ID] = [pvt].[ID]) ELSE NULL
END AS [Col3_Val]
FROM (SELECT [ID],
[Book],
[Party],
[Attribute],
[Value]
FROM @Details) AS t PIVOT (COUNT ([value]) FOR attribute IN ([Attr1], [Attr2], [Attr3])) AS pvt)
SELECT [m].[ID], [m].[Book], [m].[Party], [m].[Desc], [m].[Type],
[p].[Col1], [p].[Col1_Val], [p].[Col2], [p].[Col2_Val], [p].[Col3], [p].[Col3_Val]
FROM [cteMaster] AS m LEFT OUTER JOIN
[pivoting] AS p
ON [p].[ID] = [m].[ID]
AND [p].[Book] = [m].[Book]
AND [p].[Party] = [m].[Party]
WHERE [rn] = 1;
Viewing 15 posts - 1 through 15 (of 18 total)
You must be logged in to reply to this topic. Login to reply