problem using left join - not showing all possible results

Question

Post reply

problem using left join - not showing all possible results

a20213

Say Hey Kid

Points: 664
More actions
February 6, 2012 at 5:29 am

#254954

hello all,
i am trying to cross the results of two tables and its driving me nuts for some time.
EDIT: As requested here is an example that can be run and shows the problem i am facing,
as you can see at least on student ex: 477 is on the groups list, but in the final query is not associated to any group.
CREATE TABLE #temp
(
ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,
alunos_id bigint
)
CREATE TABLE #groups
(
ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,
alunos_ids varchar(1000)
)
--Insert some data into #temp
INSERT INTO #temp (alunos_id)
SELECT 623 UNION ALL
SELECT 476 UNION ALL
SELECT 458 UNION ALL
SELECT 469 UNION ALL
SELECT 466 UNION ALL
SELECT 475 UNION ALL
SELECT 363 UNION ALL
SELECT 473 UNION ALL
SELECT 477 UNION ALL
SELECT 482 UNION ALL
SELECT 790 UNION ALL
SELECT 467 UNION ALL
SELECT 565 UNION ALL
SELECT 564 UNION ALL
SELECT 557 UNION ALL
SELECT 556 UNION ALL
SELECT 551
INSERT INTO #groups (alunos_ids)
SELECT '458, 565, 487, 531, 486, 492, 493, 495, 512, 544, 545' UNION ALL
SELECT '476, 543, 575, 527, 526, 530, 489, 501, 555, 561, 471' UNION ALL
SELECT '623, 477, 556, 507, 535, 537, 560, 566, 483, 470, 478' UNION ALL
SELECT '473, 511, 491, 534, 523, 360, 520, 515, 361, 571, 481' UNION ALL
SELECT '475, 357, 574, 503, 502, 538, 443, 504, 541, 562, 474' UNION ALL
SELECT '547, 494, 713, 516, 522, 589, 510, 549, 716, 567, 554'
select #temp.alunos_id, #groups.ID from #temp
left join #groups on (','+#groups.alunos_ids+',' LIKE '%,'+CAST(#temp.alunos_id AS VARCHAR(32))+',%')
drop table #temp
drop table #groups
what am i missing here ?
thank you in advance.

Viewing 14 posts - 1 through 13 (of 13 total)

You must be logged in to reply to this topic. Login to reply

Cadavre SSC-Forever Points: 41690 More actions · Answer 1

Hello and welcome to SSC!

I'd like to be able to help, but it seems you've forgot to post readily consumable sample data and ddl scripts.

If you could read this article[/url] about the best way to post DDL and sample data then edit your post to include it, it would be extremely useful for the unpaid volunteers of this site.

Thanks!

Forever trying to learn
My blog - http://www.cadavre.co.uk/
For better, quicker answers on T-SQL questions, click on the following...http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better, quicker answers on SQL Server performance related questions, click on the following...http://www.sqlservercentral.com/articles/SQLServerCentral/66909/

a20213 Say Hey Kid Points: 664 More actions · Answer 2

a20213

Say Hey Kid

Points: 664

February 6, 2012 at 7:48 am

#1443144

Done, as requested.

Lowell SSC Guru Points: 323518 More actions · Answer 3

because you are using string compare (the LIKE)

whitespace is important...if you look at your data, there is a preceeding space before every number...so your join criteria is limited to matching just the first value in the string., because its matching [comma][space][value][comma]

to [comma][value][comma]

what you want to do instead is use a SPLIT() function to spit your values by the comma.

CREATE FUNCTION dbo.DelimitedSplit8K

--===== Define I/O parameters

(@pString VARCHAR(8000), @pDelimiter CHAR(1))

RETURNS TABLE WITH SCHEMABINDING AS

RETURN

--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...

-- enough to cover VARCHAR(8000)

WITH E1(N) AS (

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

), --10E+1 or 10 rows

E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows

E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max

cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front

-- for both a performance gain and prevention of accidental "overruns"

SELECT 0 UNION ALL

SELECT TOP (DATALENGTH(ISNULL(@pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4

),

cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)

SELECT t.N+1

FROM cteTally t

WHERE (SUBSTRING(@pString,t.N,1) = @pDelimiter OR t.N = 0)

)

--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.

SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),

Item = SUBSTRING(@pString,s.N1,ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000))

FROM cteStart s

;

GO

CREATE TABLE #temp

(

ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,

alunos_id bigint

)

CREATE TABLE #groups

(

ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,

alunos_ids varchar(1000)

)

--Insert some data into #temp

INSERT INTO #temp (alunos_id)

SELECT 623 UNION ALL

SELECT 476 UNION ALL

SELECT 458 UNION ALL

SELECT 469 UNION ALL

SELECT 466 UNION ALL

SELECT 475 UNION ALL

SELECT 363 UNION ALL

SELECT 473 UNION ALL

SELECT 477 UNION ALL

SELECT 482 UNION ALL

SELECT 790 UNION ALL

SELECT 467 UNION ALL

SELECT 565 UNION ALL

SELECT 564 UNION ALL

SELECT 557 UNION ALL

SELECT 556 UNION ALL

SELECT 551

INSERT INTO #groups (alunos_ids)

SELECT '458, 565, 487, 531, 486, 492, 493, 495, 512, 544, 545' UNION ALL

SELECT '476, 543, 575, 527, 526, 530, 489, 501, 555, 561, 471' UNION ALL

SELECT '623, 477, 556, 507, 535, 537, 560, 566, 483, 470, 478' UNION ALL

SELECT '473, 511, 491, 534, 523, 360, 520, 515, 361, 571, 481' UNION ALL

SELECT '475, 357, 574, 503, 502, 538, 443, 504, 541, 562, 474' UNION ALL

SELECT '547, 494, 713, 516, 522, 589, 510, 549, 716, 567, 554'

select

#temp.alunos_id,

myfn.*

from #temp

CROSS JOIN #groups

CROSS APPLY dbo.DelimitedSplit8K (#groups.alunos_ids,',') myfn

WHERE myfn.Item = #temp.alunos_id

drop table #temp

drop table #groups

drop FUNCTION dbo.DelimitedSplit8K

Lowell

--help us help you! If you post a question, make sure you include a CREATE TABLE... statement and INSERT INTO... statement into that table to give the volunteers here representative data. with your description of the problem, we can provide a tested, verifiable solution to your question! asking the question the right way gets you a tested answer the fastest way possible!

Cadavre SSC-Forever Points: 41690 More actions · Answer 4

a20213 (2/6/2012)
Done, as requested.

Perfect, thanks very much 😀

Does this help?

BEGIN TRAN

CREATE TABLE #temp

(

ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,

alunos_id bigint

)

CREATE TABLE #groups

(

ID int IDENTITY(1,1)PRIMARY KEY CLUSTERED,

alunos_ids varchar(1000)

)

--Insert some data into #temp

INSERT INTO #temp (alunos_id)

SELECT 623 UNION ALL

SELECT 476 UNION ALL

SELECT 458 UNION ALL

SELECT 469 UNION ALL

SELECT 466 UNION ALL

SELECT 475 UNION ALL

SELECT 363 UNION ALL

SELECT 473 UNION ALL

SELECT 477 UNION ALL

SELECT 482 UNION ALL

SELECT 790 UNION ALL

SELECT 467 UNION ALL

SELECT 565 UNION ALL

SELECT 564 UNION ALL

SELECT 557 UNION ALL

SELECT 556 UNION ALL

SELECT 551

INSERT INTO #groups (alunos_ids)

SELECT '458, 565, 487, 531, 486, 492, 493, 495, 512, 544, 545' UNION ALL

SELECT '476, 543, 575, 527, 526, 530, 489, 501, 555, 561, 471' UNION ALL

SELECT '623, 477, 556, 507, 535, 537, 560, 566, 483, 470, 478' UNION ALL

SELECT '473, 511, 491, 534, 523, 360, 520, 515, 361, 571, 481' UNION ALL

SELECT '475, 357, 574, 503, 502, 538, 443, 504, 541, 562, 474' UNION ALL

SELECT '547, 494, 713, 516, 522, 589, 510, 549, 716, 567, 554'

GO

CREATE FUNCTION dbo.DelimitedSplit8K

--===== Define I/O parameters

(@pString VARCHAR(8000), @pDelimiter CHAR(1))

RETURNS TABLE WITH SCHEMABINDING AS

RETURN

--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...

-- enough to cover VARCHAR(8000)

WITH E1(N) AS (

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

), --10E+1 or 10 rows

E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows

E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max

cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front

-- for both a performance gain and prevention of accidental "overruns"

SELECT 0 UNION ALL

SELECT TOP (DATALENGTH(ISNULL(@pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4

),

cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)

SELECT t.N+1

FROM cteTally t

WHERE (SUBSTRING(@pString,t.N,1) = @pDelimiter OR t.N = 0)

)

--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.

SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),

Item = SUBSTRING(@pString,s.N1,ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000))

FROM cteStart s

;

GO

SELECT a.*, b.*

FROM #temp a

LEFT OUTER JOIN (SELECT Item, alunos_ids, ID

FROM #groups

CROSS APPLY dbo.DelimitedSplit8k(alunos_ids,', ') split) b ON a.alunos_id = b.Item

ROLLBACK

Read this article by Jeff Moden for how it works.[/url]

--edit--

Lowell beat me to it 😉

Forever trying to learn
My blog - http://www.cadavre.co.uk/
For better, quicker answers on T-SQL questions, click on the following...http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better, quicker answers on SQL Server performance related questions, click on the following...http://www.sqlservercentral.com/articles/SQLServerCentral/66909/

Jason-299789 SSC-Insane Points: 21601 More actions · Answer 5

Looking at the Data you have a Leading Space infront of all numbers but that isnt replicated on the Select.

so when looking for 565, you are trying to match ',565,' and ', 565,'

select #temp.alunos_id, #groups.ID

from #temp

left join #groups on (','+#groups.alunos_ids+','

LIKE '%,'+CAST(#temp.alunos_id AS VARCHAR(32))+',%')

So by adding a space at the start of the #groups.alunos_ids and a space to the LIKE '%,'

the code should read

select #temp.alunos_id, #groups.ID

from #temp

left join #groups on (', '+#groups.alunos_ids+','

LIKE '%, '+CAST(#temp.alunos_id AS VARCHAR(32))+',%')

After adding this I get, 477 with an ID of 3.

EDIT : I do like Cadavre's solution its very eligant and robust.

_________________________________________________________________________
SSC Guide to Posting and Best Practices

a20213 Say Hey Kid Points: 664 More actions · Answer 6

but it if i do

select #groups.ID from #groups where alunos_ids like '%477%'

it does not answer NULL.

a20213 Say Hey Kid Points: 664 More actions · Answer 7

Jason-299789 (2/6/2012)
Looking at the Data you have a Leading Space infront of all numbers but that isnt replicated on the Select.
so when looking for 565, you are trying to match ',565,' and ', 565,'
select #temp.alunos_id, #groups.ID
from #temp
left join #groups on (','+#groups.alunos_ids+','
LIKE '%,'+CAST(#temp.alunos_id AS VARCHAR(32))+',%')
So by adding a space at the start of the #groups.alunos_ids and a space to the LIKE '%,'
the code should read
select #temp.alunos_id, #groups.ID
from #temp
left join #groups on (', '+#groups.alunos_ids+','
LIKE '%, '+CAST(#temp.alunos_id AS VARCHAR(32))+',%')
After adding this I get, 477 with an ID of 3.
EDIT : I do like Cadavre's solution its very eligant and robust.

Hum, its possible to match with space or not in my querie ?

@cadavre and @lowell, i shall try now the function

a20213 Say Hey Kid Points: 664 More actions · Answer 8

The function Split works great 😛

Still, i did

update #tgroups set alunos_ids = replace(alunos_ids, ' ','')

and my previous query worked well, so i think i have to make sure i don't have spaces between the IDS

Thank you both to the help.

Jason-299789 SSC-Insane Points: 21601 More actions · Answer 9

It is possbible but needs to use a regular expresion, so you could code the the select as follows

select #temp.alunos_id, #groups.ID

from #temp

left join #groups on (','+#groups.alunos_ids+','

LIKE '%[, |,]'+CAST(#temp.alunos_id AS VARCHAR(32))+',%')

The use of squared brackets in the "LIKE '%[, |,]'+" portion ensures you catch either or situation (Comma & Space or Comma only).

Its not foolproof thats why the other solution is so robust.

_________________________________________________________________________
SSC Guide to Posting and Best Practices

a20213 Say Hey Kid Points: 664 More actions · Answer 10

Jason-299789 (2/6/2012)
It is possbible but needs to use a regular expresion, so you could code the the select as follows
select #temp.alunos_id, #groups.ID
from #temp
left join #groups on (','+#groups.alunos_ids+','
LIKE '%[, |,]'+CAST(#temp.alunos_id AS VARCHAR(32))+',%')
The use of squared brackets in the "LIKE '%[, |,]'+" portion ensures you catch either or situation (Comma & Space or Comma only).
Its not foolproof thats why the other solution is so robust.

🙂

nice, i am not strong with regular expressions, but its good to know that there were several ways of solving my problem.

Jason-299789 SSC-Insane Points: 21601 More actions · Answer 11

I'm not a Reg ex specialist either, and normally only resort to using them when the need is desperate.

_________________________________________________________________________
SSC Guide to Posting and Best Practices

Cadavre SSC-Forever Points: 41690 More actions · Answer 12

a20213 (2/6/2012)
The function Split works great 😛
Still, i did
update #tgroups set alunos_ids = replace(alunos_ids, ' ','')
and my previous query worked well, so i think i have to make sure i don't have spaces between the IDS
Thank you both to the help.

The split function will be faster than your previous method on large tables, here's a test on a small data-set to prove it.

BEGIN TRAN

SET NOCOUNT ON

IF object_id('tempdb..#groups') IS NOT NULL

BEGIN

DROP TABLE #groups

END

IF object_id('tempdb..#temp') IS NOT NULL

BEGIN

DROP TABLE #temp

END

CREATE TABLE #groups (ID INT IDENTITY(1, 1) PRIMARY KEY CLUSTERED, alunos_ids VARCHAR(1000))

CREATE TABLE #temp (ID INT IDENTITY(1, 1) PRIMARY KEY CLUSTERED, alunos_id BIGINT)

--Create 10 groups with some items each

;WITH CTE(n) AS (SELECT 1 UNION ALL SELECT 1),

CTE2(n) AS (SELECT x.n FROM CTE x, CTE y),

CTE3(n) AS (SELECT x.n FROM CTE2 x, CTE2 y),

CTE4(n) AS (SELECT x.n FROM CTE3 x, CTE3 y),

TALLY(n) AS (SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

FROM CTE4 x, CTE4 y),

TALLY2(n) AS (SELECT n

FROM TALLY

WHERE n >= 100 AND n <= 999),

TALLY3(n,randomJoin) AS (SELECT n, (ABS(CHECKSUM(NEWID())) % 10) + 1

FROM TALLY2),

GROUPS (randomJoin, n) AS (SELECT randomJoin, STUFF((SELECT ', ' + CONVERT(VARCHAR(MAX),n)

FROM TALLY3 t2

WHERE t2.randomJoin = t1.randomJoin

FOR XML PATH('')),1,1,'')

FROM TALLY3 t1

GROUP BY randomJoin)

INSERT INTO #groups

SELECT n

FROM GROUPS

--Create some items

;WITH CTE(n) AS (SELECT 1 UNION ALL SELECT 1),

CTE2(n) AS (SELECT x.n FROM CTE x, CTE y),

CTE3(n) AS (SELECT x.n FROM CTE2 x, CTE2 y),

CTE4(n) AS (SELECT x.n FROM CTE3 x, CTE3 y),

TALLY(n) AS (SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

FROM CTE4 x, CTE4 y),

TALLY2(n) AS (SELECT n

FROM TALLY

WHERE n >= 100 AND n <= 999)

INSERT INTO #temp

SELECT TOP 100 n

FROM TALLY2

ORDER BY NEWID()

PRINT 'LIKE'

SET STATISTICS TIME ON

SELECT #temp.alunos_id, #groups.ID

FROM #temp

LEFT JOIN #groups ON (',' + #groups.alunos_ids + ',' LIKE '%[, |,]' + CAST(#temp.alunos_id AS VARCHAR(32)) + ',%')

SET STATISTICS TIME OFF

PRINT REPLICATE('-',80)

GO

CREATE FUNCTION dbo.DelimitedSplit8K

--===== Define I/O parameters

(@pString VARCHAR(8000), @pDelimiter CHAR(1))

RETURNS TABLE WITH SCHEMABINDING AS

RETURN

--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...

-- enough to cover VARCHAR(8000)

WITH E1(N) AS (

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

), --10E+1 or 10 rows

E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows

E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max

cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front

-- for both a performance gain and prevention of accidental "overruns"

SELECT 0 UNION ALL

SELECT TOP (DATALENGTH(ISNULL(@pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4

),

cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)

SELECT t.N+1

FROM cteTally t

WHERE (SUBSTRING(@pString,t.N,1) = @pDelimiter OR t.N = 0)

)

--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.

SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),

Item = SUBSTRING(@pString,s.N1,ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000))

FROM cteStart s

;

GO

PRINT '8K Splitter'

SET STATISTICS TIME ON

SELECT a.alunos_id, b.ID

FROM #temp a

LEFT OUTER JOIN (SELECT Item, alunos_ids, ID

FROM #groups

CROSS APPLY dbo.DelimitedSplit8k(alunos_ids,', ') split) b ON a.alunos_id = b.Item

SET STATISTICS TIME OFF

PRINT REPLICATE('-',80)

ROLLBACK

LIKE

SQL Server Execution Times:

CPU time = 109 ms, elapsed time = 105 ms.

--------------------------------------------------------------------------------

8K Splitter

SQL Server Execution Times:

CPU time = 0 ms, elapsed time = 5 ms.

--------------------------------------------------------------------------------

Forever trying to learn
My blog - http://www.cadavre.co.uk/
For better, quicker answers on T-SQL questions, click on the following...http://www.sqlservercentral.com/articles/Best+Practices/61537/
For better, quicker answers on SQL Server performance related questions, click on the following...http://www.sqlservercentral.com/articles/SQLServerCentral/66909/

jcb SSCertifiable Points: 6574 More actions · Answer 13

This is a good example of "dont rely on casting" when modeling.

(and implicit casting is still worst).

Since it appears to be only some type of academic exercise (students tables are a classical)

there's no big deal.

But dont be fooled.

When implemeting something like it use a relational table to join students and groups.

Avoid implementing something ill rely in keeping multiples ids in a string.