July 6, 2015 at 3:48 am
Hi All,
I am having a column which is ntext and contains below type of data.
{ "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}
above is a single row. There are many such rows are present in the table. Now I want to find all alias and GPA and display as below.
idGPA
111111.75
111111.43
333331.43
444441.43
In oracle it can be done by using REGEXP_SUBSTR. But how can we do this in SQL? Please help.
Thanks,
Abhas.
July 6, 2015 at 7:23 am
abhas (7/6/2015)
Hi All,I am having a column which is ntext and contains below type of data.
{ "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}
above is a single row. There are many such rows are present in the table. Now I want to find all alias and GPA and display as below.
idGPA
111111.75
111111.43
333331.43
444441.43
In oracle it can be done by using REGEXP_SUBSTR. But how can we do this in SQL? Please help.
Thanks,
Abhas.
If the maximum length of the data is less than or equal to 8,000 characters for a single ntext field, then this should work:
CREATE FUNCTION dbo.fnDelimitedSplit8K (
@pString VARCHAR(8000),
@pDelimiter CHAR(1)
)
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 1
),--10E+1 or 10 rows
E2(N) AS (
SELECT 1
FROM E1 AS a, E1 AS b
),--10E+2 or 100 rows
E4(N) AS (
SELECT 1
FROM E2 AS a, E2 AS b
),--10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1
UNION ALL
SELECT t.N+1
FROM cteTally AS t
WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS (--==== Return start and length (for use in substring)
SELECT s.N1, ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1), Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l;
GO
DECLARE @NTEXT_TABLE AS TABLE (
NTEXT_COLUMN_NAME ntext
);
INSERT INTO @NTEXT_TABLE (NTEXT_COLUMN_NAME)
VALUES ('{ "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}');
WITH SOURCE_DATA_ALIAS AS (
SELECT DS.Item, DS.ItemNumber AS RN
FROM @NTEXT_TABLE AS NT
CROSS APPLY dbo.fnDelimitedSplit8K(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(CAST(NTEXT_COLUMN_NAME AS varchar(8000)), '{', ''), '}', ''), '[', ''), ']', ''), '"', ''), ' ', ''), ',') AS DS
WHERE DS.ItemNumber > 3
AND DS.Item LIKE 'alias%'
),
SOURCE_DATA_GPA AS (
SELECT DS.Item, DS.ItemNumber AS RN
FROM @NTEXT_TABLE AS NT
CROSS APPLY dbo.fnDelimitedSplit8K(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(CAST(NTEXT_COLUMN_NAME AS varchar(8000)), '{', ''), '}', ''), '[', ''), ']', ''), '"', ''), ' ', ''), ',') AS DS
WHERE DS.ItemNumber > 3
AND DS.Item LIKE 'GPA%'
)
SELECT RIGHT(A.Item, LEN(A.Item) - CHARINDEX(':', A.Item)) AS Alias, RIGHT(G.Item, LEN(G.Item) - CHARINDEX(':', G.Item)) AS GPA
FROM SOURCE_DATA_ALIAS AS A
INNER JOIN SOURCE_DATA_GPA AS G
ON A.RN = G.RN - 1;
It makes use of Jeff Moden's string splitter function, which is a highly performant piece of code that is ideal for this.
Steve (aka sgmunson) 🙂 🙂 🙂
Rent Servers for Income (picks and shovels strategy)
Viewing 2 posts - 1 through 1 (of 1 total)
You must be logged in to reply to this topic. Login to reply