March 19, 2010 at 9:55 am
Hi Guys,
here is a table sturucture for managing a chain link(binary tree)application.
user_registration_tbl
(id int identity,
user_first_name varchar(100) not null,
parentid int not null, ---coresponding ID of parent
Node int not null, ----'0' if member is leftside of parent and '1' if right.
creationtime datetime null,
primary key(id));
Id U_f_name parentid Node creationtime
1 john 0 0 NULL
2 jack 1 0 NULL
3 jam 1 1 NULL
4 sam 2 0 NULL
5 sat 2 1 NULL
6 jay 3 0
7 rai 3 1
8 ram 4 0
9 dan 4 1
So can any one please let me know a stored procedure logic for getting a total left count and right count for a particaulr Id.
e.g. if i pass Id 2 the procedure should give its
total number of left nodes =3
total number of riht nodes = 1
Thanks in advance
March 19, 2010 at 11:03 am
i think SELECT COUNT(DISTINCT fieldname) will do what you want:
CREATE TABLE user_registration_tbl
(id int identity,
user_first_name varchar(100) not null,
parentid int not null, ---coresponding ID of parent
Node int not null, ----'0' if member is leftside of parent and '1' if right.
creationtime datetime null,
primary key(id))
set identity_insert user_registration_tbl on
insert into user_registration_tbl (Id,user_first_name, parentid, Node )
SELECT 1,'john',0,0 UNION ALL
SELECT 2,'jack',1,0 UNION ALL
SELECT 3,'jam',1,1 UNION ALL
SELECT 4,'sam',2,0 UNION ALL
SELECT 5,'sat',2,1 UNION ALL
SELECT 6,'jay',3,0 UNION ALL
SELECT 7,'rai',3,1 UNION ALL
SELECT 8,'ram',4,0 UNION ALL
SELECT 9,'dan',4,1
set identity_insert user_registration_tbl off
select count(distinct Node) from user_registration_tbl where Node <> 0
select count(distinct parentid) from user_registration_tbl
Lowell
March 19, 2010 at 11:51 am
I think I could not explain my requirements clearly. Here I have attached an image file which may give u clear understanding.
March 19, 2010 at 11:55 am
How do you come to the value of 3 for left nodes with a value of 2 passed in? The way I understand it. ID 2 is a left (that’s one) and it’s parent is ID 1 which is also a left, so that would be two left and 0 right nodes, right?
Is this recursive through your hierarchy?
So if you passed in ID 9, you would get (1 right for 9) + (1 left for it’s owner 4) + (1 left for 4’s owner 2) + (1 left for 2’s owner 1) or 1 right and 3 left.
______________________________________________________________________
Personal Motto: Why push the envelope when you can just open it?
If you follow the direction given HERE[/url] you'll likely increase the number and quality of responses you get to your question.
Jason L. SelburgMarch 19, 2010 at 12:11 pm
when I am sending a value that will be considered as root node. Then I will need to count the left side nodes and Right sides node of that root. eg, I am sending 1 it will return Left = 5 and Right = 3, when sending 2 it will return Left = 3 and right = 1
March 19, 2010 at 2:11 pm
Well, probably not very scalable but maybe this one will do:
if object_id('dbo.user_registration_tbl', 'u') is not null
drop table dbo.user_registration_tbl
create table dbo.user_registration_tbl
(
id int identity,
user_first_name varchar(100) not null,
parentid int not null, ---coresponding ID of parent
Node int not null, ----'0' if member is leftside of parent and '1' if right.
creationtime datetime null,
primary key(id)
);
set identity_insert dbo.user_registration_tbl on
insert into user_registration_tbl
(id, user_first_name, parentid, Node)
values
(1, 'john', 0, 0),
(2, 'jack', 1, 0),
(3, 'jam', 1, 1),
(4, 'sam', 2, 0),
(5, 'sat', 2, 1),
(6, 'jay', 3, 0),
(7, 'rai', 3, 1),
(8, 'ram', 4, 0),
(9, 'dan', 4, 1)
set identity_insert dbo.user_registration_tbl off;
with Traverse as
(
select
id, cast(Node as varchar(max)) Nodes
from
dbo.user_registration_tbl c
where
parentid = 0
union all
select
c.id, r.Nodes + CAST(c.Node as varchar)
from
dbo.user_registration_tbl c
join
Traverse r on r.id = c.parentid
)
select
t1.id, u.parentid, t1.Nodes, u.user_first_name, u.creationtime,
(select COUNT(*) from Traverse t2 where t1.Nodes + '0' = LEFT(t2.Nodes, len(t1.Nodes) + 1)) LeftCount,
(select COUNT(*) from Traverse t2 where t1.Nodes + '1' = LEFT(t2.Nodes, len(t1.Nodes) + 1)) RightCount
from
Traverse t1
join
dbo.user_registration_tbl u on u.id = t1.id
order by
t1.id
option (maxrecursion 0)
The recursive CTE is used to compute a string of Node values along the path from the root to each respective id. The prefix (with a length of the depth of the id in the tree) of this string is then used to count the number of nodes in the left and right branche for each id.
HTH,
Peter
Edit: removed parentid from the select-list in the CTE
March 20, 2010 at 9:37 am
An implementation using hierarchyid:
Setup:
USE tempdb;
GO
IF OBJECT_ID(N'dbo.Tree', N'U')
IS NOT NULL
DROP TABLE dbo.Tree;
GO
-- Test table
CREATE TABLE dbo.Tree
(
node_id INTEGER NOT NULL,
name VARCHAR(20) NOT NULL,
hid HIERARCHYID NOT NULL,
parent AS hid.GetAncestor(1) PERSISTED,
level AS hid.GetLevel(),
path AS hid.ToString(),
);
-- Indexes and constraints
CREATE UNIQUE CLUSTERED INDEX [CUQ dbo.Tree depth_first] ON dbo.Tree (hid);
CREATE UNIQUE INDEX [UQ dbo.Tree node_id] ON dbo.Tree (node_id);
CREATE INDEX [IX dbo.Tree parent] ON dbo.Tree (parent);
ALTER TABLE dbo.Tree ADD FOREIGN KEY (parent) REFERENCES dbo.Tree (hid);
-- Nodes
INSERT dbo.Tree(node_id, name, hid) VALUES (1, 'Node 1', hierarchyid::GetRoot());
INSERT dbo.Tree(node_id, name, hid) VALUES (2, 'Node 2', hierarchyid::Parse(N'/1/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (3, 'Node 3', hierarchyid::Parse(N'/2/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (4, 'Node 4', hierarchyid::Parse(N'/1/1/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (5, 'Node 5', hierarchyid::Parse(N'/1/2/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (6, 'Node 6', hierarchyid::Parse(N'/2/1/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (7, 'Node 7', hierarchyid::Parse(N'/2/2/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (8, 'Node 8', hierarchyid::Parse(N'/1/1/1/'));
INSERT dbo.Tree(node_id, name, hid) VALUES (9, 'Node 9', hierarchyid::Parse(N'/1/1/2/'));
Solution:
DECLARE @root HIERARCHYID,
@left HIERARCHYID,
@right HIERARCHYID;
SELECT @root = hid FROM dbo.Tree WHERE node_id = 1;
SELECT @left = (SELECT MIN(hid) FROM dbo.Tree WHERE hid.GetAncestor(1) = @root),
@right = (SELECT MAX(hid) FROM dbo.Tree WHERE hid.GetAncestor(1) = @root);
SELECT left_count =
(
SELECT COUNT_BIG(*)
FROM dbo.Tree T
WHERE T.hid.IsDescendantOf(@left) = 1
),
right_count =
(
SELECT COUNT_BIG(*)
FROM dbo.Tree T
WHERE T.hid.IsDescendantOf(@right) = 1
);
GO
-- Tidy up
DROP TABLE dbo.Tree;
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi
March 20, 2010 at 10:59 am
Unfortunately we don't have a realistic testset (and I'm too lazy now to generate one), but I'm almost certain that Paul's solution outperforms my solution by a factor of 'a large number'. Well done, Paul
March 20, 2010 at 12:23 pm
Excellent! I got the perfect one. I really thank you very much.
May 10, 2010 at 11:51 pm
Dear Peter,
Now I need little bit more support. You helped me to count the left and right side child nodes. Now I need the list of those left and right side members. Here I have given the output that I need by using your table
ID | LEFT_Members | RIGHT_Members
------------------------------------------------------
1 | jack, sam, sat, ram, dan | jam, jay, rai
------------------------------------------------------
2 | sam, ram, dan | sat
------------------------------------------------------
3 | jay | rai
------------------------------------------------------
4 | ram | dan
------------------------------------------------------
5 | |
------------------------------------------------------
6 | |
------------------------------------------------------
7 | |
------------------------------------------------------
May 11, 2010 at 2:22 am
mahbub422 (5/10/2010)
Dear Peter,Now I need little bit more support. You helped me to count the left and right side child nodes. Now I need the list of those left and right side members. Here I have given the output that I need by using your table
ID | LEFT_Members | RIGHT_Members
------------------------------------------------------
1 | jack, sam, sat, ram, dan | jam, jay, rai
------------------------------------------------------
2 | sam, ram, dan | sat
------------------------------------------------------
3 | jay | rai
------------------------------------------------------
4 | ram | dan
------------------------------------------------------
5 | |
------------------------------------------------------
6 | |
------------------------------------------------------
7 | |
------------------------------------------------------
Use the same recursive CTE (extended with user_first_name) and the FOR XML PATH trick for string concatenation will do it.
if object_id('dbo.user_registration_tbl', 'u') is not null
drop table dbo.user_registration_tbl
create table dbo.user_registration_tbl
(
id int identity,
user_first_name varchar(100) not null,
parentid int not null, ---coresponding ID of parent
Node int not null, ----'0' if member is leftside of parent and '1' if right.
creationtime datetime null,
primary key(id)
);
set identity_insert dbo.user_registration_tbl on
insert into user_registration_tbl
(id, user_first_name, parentid, Node)
values
(1, 'john', 0, 0),
(2, 'jack', 1, 0),
(3, 'jam', 1, 1),
(4, 'sam', 2, 0),
(5, 'sat', 2, 1),
(6, 'jay', 3, 0),
(7, 'rai', 3, 1),
(8, 'ram', 4, 0),
(9, 'dan', 4, 1)
set identity_insert dbo.user_registration_tbl off;
with Traverse as
(
select
id, user_first_name, cast(Node as varchar(max)) Nodes
from
dbo.user_registration_tbl c
where
parentid = 0
union all
select
c.id, c.user_first_name, r.Nodes + CAST(c.Node as varchar)
from
dbo.user_registration_tbl c
join
Traverse r on r.id = c.parentid
)
select
t1.id, u.parentid, t1.Nodes, u.user_first_name, u.creationtime,
stuff((
select
', ' + t2.user_first_name
from
Traverse t2
where
t1.Nodes + '0' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
for xml path(''),type
).value('.', 'varchar(101)'), 1, 1, '') LeftMembers,
stuff((
select
', ' + t2.user_first_name
from
Traverse t2
where
t1.Nodes + '1' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
for xml path(''),type
).value('.', 'varchar(101)'), 1, 1, '') RightMembers
from
Traverse t1
join
dbo.user_registration_tbl u on u.id = t1.id
order by
t1.id
option (maxrecursion 0)
Edit: changed varchar(max) to varchar(101) (length of user_first_name plus comma)
May 11, 2010 at 11:44 am
Dear Thanks for your excellent job. It's working nicely, but it's taking huge time to execute. I have 1200 records and it's taking around 50 seconds to compute. I am worried when my records will be above 10,000 then what will happen. Can you suggest me what I can do to make it faster? I am also worried about the size of LeftMember and RightMember field. When my records will increase it will not be able to hold all the names. Then how I will solve that.
May 11, 2010 at 12:13 pm
mahbub422 (5/11/2010)
Dear Thanks for your excellent job. It's working nicely, but it's taking huge time to execute. I have 1200 records and it's taking around 50 seconds to compute. I am worried when my records will be above 10,000 then what will happen. Can you suggest me what I can do to make it faster?
As I already said in my first post, I didn't expected the recursive CTE solution to be very scalable. There's some room for improvement but it will probably remain slow as your input data set grows. As I also mentioned, try out Paul White's suggestion and use the hierarchyid data type.
mahbub422 (5/11/2010)
I am also worried about the size of LeftMember and RightMember field. When my records will increase it will not be able to hold all the names. Then how I will solve that.
What do you mean by 'it will not be able to hold all names'. Is the output to be stored into another column in your database. Why don't you let the front end application (if there is any) deal with concatenating names in the first place? Be a little more specific about your requirements.
Peter
May 11, 2010 at 2:41 pm
One simple way the speed things up is to dump the outcome of the CTE into a temporary table and then use this table in the final solution. Using a temporary table avoids the execution of the CTE several time. I created an index on column parentid to speed up the execution of the CTE. On my desktop it takes less then a second to create the temporary table and fill it from the CTE. Then the final solution takes 27s too complete with an input of 10239 rows. Surprisingly, the solution takes 1:36min to complete if you create a clustered index on the id. I also changed the data type of the Nodes column in the CTE to varchar(900) instead of varchar(max). This speeds things up considerably. It also makes it possible to create an index on it in the temporary table although it seems useless in this test environment. Here is the code for the test set up and the solution:
-- Test setup
if object_id('dbo.user_registration_tbl', 'u') is not null
drop table dbo.user_registration_tbl
create table dbo.user_registration_tbl
(
id int identity,
user_first_name varchar(100) not null,
parentid int not null, ---coresponding ID of parent
Node int not null, ----'0' if member is leftside of parent and '1' if right.
creationtime datetime null,
primary key(id)
);
create index IX_user_registration_tbl on dbo.user_registration_tbl(parentid)
set identity_insert dbo.user_registration_tbl on
insert into user_registration_tbl
(id, user_first_name, parentid, Node)
values
(1, 'john', 0, 0),
(2, 'jack', 1, 0),
(3, 'jam', 1, 1),
(4, 'sam', 2, 0),
(5, 'sat', 2, 1),
(6, 'jay', 3, 0),
(7, 'rai', 3, 1),
(8, 'ram', 4, 0),
(9, 'dan', 4, 1)
set identity_insert dbo.user_registration_tbl off
declare @i int = 1
while @i <= 10
begin
insert into
dbo.user_registration_tbl(user_first_name, parentid, Node)
select
user_first_name + CAST(@i * 10 + x.Node as varchar), id, x.Node
from
dbo.user_registration_tbl t
cross join
(
select 0 Node union all select 1
) x
where
id not in (select parentid from dbo.user_registration_tbl)
set @i += 1
end
-- The solution
set statistics io on
set statistics time on
;with Traverse as
(
select
id, user_first_name, cast(Node as varchar(900)) Nodes
from
dbo.user_registration_tbl c
where
parentid = 0
union all
select
c.id, c.user_first_name, cast(r.Nodes + CAST(c.Node as varchar) as varchar(900))
from
dbo.user_registration_tbl c
join
Traverse r on r.id = c.parentid
)
select
*
into
#Traverse
from
Traverse
--create clustered index PK_user_registration_tbl on #Traverse(id)
select
t1.id, u.parentid, t1.Nodes, u.user_first_name, u.creationtime,
stuff((
select
', ' + t2.user_first_name
from
#Traverse t2
where
t1.Nodes + '0' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
for xml path(''),type
).value('.', 'varchar(102)'), 1, 2, '') LeftMembers,
stuff((
select
',' + t2.user_first_name
from
#Traverse t2
where
t1.Nodes + '1' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
for xml path(''),type
).value('.', 'varchar(102)'), 1, 2, '') RightMembers
from
#Traverse t1
join
dbo.user_registration_tbl u on u.id = t1.id
order by
t1.id
option (maxrecursion 0)
drop table #Traverse
set statistics io off
set statistics time off
Peter
Edit: initial space removed from LeftMembers and RightMembers
May 12, 2010 at 3:02 am
Just another major performance improvement. Create an index on the temporary table #Traverse on column Nodes and include user_first_name. Then make the join condition in subqueries to concatenate user_first_name SARGable. It now runs in 1s on my desktop including creating of the temporary table/index.
-- The solution
set statistics io on
set statistics time on
;with Traverse as
(
select
id, user_first_name, cast(Node as varchar(900)) Nodes
from
dbo.user_registration_tbl c
where
parentid = 0
union all
select
c.id, c.user_first_name, cast(r.Nodes + CAST(c.Node as varchar) as varchar(900))
from
dbo.user_registration_tbl c
join
Traverse r on r.id = c.parentid
)
select
*
into
#Traverse
from
Traverse
option (maxrecursion 0)
--create clustered index PK_user_registration_tbl on #Traverse(id)
create index IX_Traverse_Nodes on #Traverse(Nodes) include(user_first_name)
select
t1.id, u.parentid, t1.Nodes, u.user_first_name, u.creationtime,
stuff((
select
', ' + t2.user_first_name
from
#Traverse t2
where
-- t1.Nodes + '0' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
t2.Nodes LIKE t1.Nodes + '0%'
for xml path(''),type
).value('.', 'varchar(max)'), 1, 2, '') LeftMembers,
stuff((
select
', ' + t2.user_first_name
from
#Traverse t2
where
-- t1.Nodes + '1' = LEFT(t2.Nodes, len(t1.Nodes) + 1)
t2.Nodes LIKE t1.Nodes + '1%'
for xml path(''),type
).value('.', 'varchar(max)'), 1, 2, '') RightMembers
from
#Traverse t1
join
dbo.user_registration_tbl u on u.id = t1.id
order by
t1.id
drop table #Traverse
set statistics io off
set statistics time off
Edit: moved 'option (maxrecursion 0)' to the select/into query
Edit: fixed a bug with truncation of LeftMembers and RightMembers. Changed varchar(102) back to varchar(max)
Viewing 15 posts - 1 through 15 (of 61 total)
You must be logged in to reply to this topic. Login to reply