July 8, 2013 at 11:00 am
Hi,
I have a xml document and I need to shred it and store it into a table. The difficulty for me is that each element in same level has different element name.
Here is a part of the xml, which has 4 level hierarchy.
<root>
<elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101010>
<elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101020>
</elem10>
<elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">
<elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">
<elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />
<elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151010>
<elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">
<elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151020>
<elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">
<elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />
</elem151030>
</elem15>
<elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201010>
<elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201020>
</elem20>
</root>
Here is the expected table with result:(Sorry I cannot edit the expected output in good format, so I attached a image which is a well formatted output)
ElementLV1ElementLV2ElementLV3Attr01Attr02Attr03Attr04Attr05Attr06
elem10NULLNULL999999
elem10elem101010NULL999999
elem10elem101010elem10101010999999
elem10elem101010elem10101020999999
elem10elem101020NULL999999
elem10elem101020elem10102010999999
elem10elem101020elem10102020999999
elem10elem101020elem10102030999999
elem10elem101020elem10102040999999
elem10elem101020elem10102050999999
elem15NULLNULL0.08-0.059974
elem15elem151010NULL0.03-0.019921
elem15elem151010elem151010100.02-0.019911
elem15elem151010elem151010200.0109910
elem15elem151010elem15101030009900
elem15elem151010elem15101040009900
elem15elem151010elem15101050009900
elem15elem151020NULL009900
elem15elem151020elem15102010009900
elem15elem151030NULL0.02-0.029931
elem15elem151030elem151030100.01-0.029911
elem15elem151030elem151030200.0109920
elem20NULLNULL999999
elem20elem201010NULL999999
elem20NULLelem20101010999999
elem20elem201020NULL999999
elem20NULLelem20102010999999
Could anyone help me out? Thanks,
Tao
July 8, 2013 at 12:48 pm
Hi, I've just come up with this... does this help with what you need:
declare @xml xml = '<root>
<elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101010>
<elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101020>
</elem10>
<elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">
<elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">
<elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />
<elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151010>
<elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">
<elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151020>
<elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">
<elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />
</elem151030>
</elem15>
<elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201010>
<elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201020>
</elem20>
</root>'
select l1.c.query('local-name(.)') as ElementLVL1
, null as ElementLVL2
, null as ElementLVL3
, l1.c.value('@Attr01', 'decimal(12,2)') as Attr01
, l1.c.value('@Attr02', 'decimal(12,2)') as Attr02
, l1.c.value('@Attr03', 'decimal(12,2)') as Attr03
, l1.c.value('@Attr04', 'decimal(12,2)') as Attr04
, l1.c.value('@Attr05', 'decimal(12,2)') as Attr05
, l1.c.value('@Attr06', 'decimal(12,2)') as Attr06
from @xml.nodes('/root/*') l1(c)
union all
select l1.c.query('local-name(.)')
, l2.c.query('local-name(.)')
, null
, l2.c.value('@Attr01', 'decimal(12,2)')
, l2.c.value('@Attr02', 'decimal(12,2)')
, l2.c.value('@Attr03', 'decimal(12,2)')
, l2.c.value('@Attr04', 'decimal(12,2)')
, l2.c.value('@Attr05', 'decimal(12,2)')
, l2.c.value('@Attr06', 'decimal(12,2)')
from @xml.nodes('/root/*') l1(c)
outer apply l1.c.nodes('*') l2(c)
union all
select l1.c.query('local-name(.)')
, l2.c.query('local-name(.)')
, l3.c.query('local-name(.)')
, l3.c.value('@Attr01', 'decimal(12,2)')
, l3.c.value('@Attr02', 'decimal(12,2)')
, l3.c.value('@Attr03', 'decimal(12,2)')
, l3.c.value('@Attr04', 'decimal(12,2)')
, l3.c.value('@Attr05', 'decimal(12,2)')
, l3.c.value('@Attr06', 'decimal(12,2)')
from @xml.nodes('/root/*') l1(c)
outer apply l1.c.nodes('*') l2(c)
outer apply l2.c.nodes('*') l3(c)
July 8, 2013 at 3:07 pm
Here's a stored procedure that will shred the XML into a table. I had to add a set of <body></body> tags below the root but otherwise your XML parsed perfectly.
[The code for creating stored procedure is below with attached png file output sample.]
EXEC dbo.ParseXML_ByXMLInput
N'<root>
<body>
<elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101010>
<elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101020>
</elem10>
<elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">
<elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">
<elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />
<elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151010>
<elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">
<elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151020>
<elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">
<elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />
</elem151030>
</elem15>
<elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201010>
<elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201020>
</elem20>
</body>
</root>'
,'root'
,'/body'
,1
,0
[/code]
CREATE PROCEDURE [dbo].[ParseXML_ByXMLInput]
@strXML NVARCHAR(MAX)
,@schemanode NVARCHAR(255)
,@rootnode NVARCHAR(255)
,@showAll BIT = 0
,@debug BIT = 0
AS
BEGIN
SET NOCOUNT ON
DECLARE
@idoc INT
,@id INT
,@parentid INT
,@SoapEnvOpen NVARCHAR(MAX)
,@SoapEnvClose NVARCHAR(MAX)
,@SoapBodyOpen NVARCHAR(MAX)
,@SoapBodyClose NVARCHAR(MAX)
,@SchemaNodeOpen NVARCHAR(MAX)
,@SchemaNodeClose NVARCHAR(MAX)
,@checkSoap INT
,@isSoap BIT = 0
,@checkSchema INT
,@isSchema BIT = 0
,@checkXMLHeader INT
,@isHeader BIT = 0
,@XMLHeader NVARCHAR(MAX)
IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL
DROP TABLE #ChildList
CREATE TABLE #ChildList (
[RowNum] INT IDENTITY(1,1) NOT NULL,
[parentid] INT NULL,
[id] INT NULL,
PRIMARY KEY (RowNum),
UNIQUE (RowNum))
IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL
DROP TABLE #NodeList
CREATE TABLE #NodeList (
[RowNum] INT NOT NULL,
[id] INT NULL,
[parentid] INT NULL,
[nodetype] INT NULL,
[localname] NVARCHAR(MAX) NULL,
[text] NVARCHAR(MAX) NULL,
PRIMARY KEY (RowNum),
UNIQUE (RowNum))
SET @id = 1
SET @parentid = NULL
/* Check to see if any XML exists */
SET @strXML = NULLIF(@strXML,'')
IF @strXML IS NULL
BEGIN
SELECT
NULL AS RowNum
,NULL AS id
,NULL AS parentid
,NULL AS nodepath
,NULL AS nodetype
,NULL AS nodename
,NULL AS property
,NULL AS value
,NULL AS nodecontents
,'No XML to process' AS XMLStatus
RETURN
END
ELSE
BEGIN
-- Get rid of tabs and extra spaces
SELECT @strXML = dbo.svfRemoveExcessSpacesFromXML(@strXML)
IF CHARINDEX('<',@strXML,1) > 0
BEGIN
SET @strXML = REPLACE(@strXML,'<','<')
SET @strXML = REPLACE(@strXML,'>','>')
IF @debug = 1
SELECT @strXML AS ReplacedXML
END
END
IF @debug = 1
SELECT @strXML AS InputXML
/* Check to see if the XML has a header */
SET @checkXMLHeader = CHARINDEX('<?xml version',@strXML,0)
IF @checkXMLHeader > 0
SET @isHeader = 1
/* If the XML has a header then remove it */
IF @isHeader = 1
BEGIN
SET @XMLHeader = SUBSTRING(@strXML,CHARINDEX('<?xml version',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@XMLHeader,'')
END
IF @debug = 1
SELECT @XMLHeader AS XMLHeader
/* Check to see if the XML has a SOAP wrapper */
SET @checkSoap = CHARINDEX('<soapenv:Envelope',@strXML,0)
IF @checkSoap > 0
SET @isSoap = 1
/* If the XML has a SOAP wrapper then remove it */
IF @isSoap = 1
BEGIN
SET @SoapEnvOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Envelope',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SoapEnvOpen,'')
SET @SoapBodyOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Body',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SoapBodyOpen,'')
SET @SoapEnvClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Envelope>',@strXML,0),LEN('</soapenv:Envelope>'))
SET @strXML = REPLACE(@strXML,@SoapEnvClose,'')
SET @SoapBodyClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Body>',@strXML,0),LEN('</soapenv:Body>'))
SET @strXML = REPLACE(@strXML,@SoapBodyClose,'')
END
IF @debug = 1
BEGIN
SELECT @SoapEnvOpen AS Soap_Wrapper
SELECT @SoapBodyOpen AS Soap_Body
SELECT @strXML AS XML_Without_SoapWrapper
END
--for testing
--SELECT @strXML
--RETURN
/* Check to see if the XML has a schema definition node */
SET @checkSchema = CHARINDEX('<'+@schemanode,@strXML,0)
IF @checkSchema > 0
SET @isSchema = 1
/* If a schema definition node exists remove it */
IF @isSchema = 1
BEGIN
SET @SchemaNodeOpen = SUBSTRING(@strXML,CHARINDEX('<'+@schemanode,@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SchemaNodeOpen,'')
SET @SchemaNodeClose = SUBSTRING(@strXML,CHARINDEX('</'+@schemanode+'>',@strXML,0),LEN('</'+@schemanode+'>'))
SET @strXML = REPLACE(@strXML,@SchemaNodeClose,'')
END
BEGIN TRY
EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML
END TRY
BEGIN CATCH
BEGIN
IF @debug = 1
BEGIN
SELECT @strXML AS ModifiedXML
SELECT CONVERT(XML,@strXML) AS FormattedXML
END
SELECT
NULL AS RowNum
,NULL AS id
,NULL AS parentid
,NULL AS nodepath
,NULL AS nodetype
,NULL AS nodename
,NULL AS property
,NULL AS value
,NULL AS nodecontents
,'Invalid XML' AS XMLStatus
RETURN
END
END CATCH
;WITH cte
AS (
SELECT
CAST(p1.parentid AS INT) AS parentid
,CAST(p1.id AS INT) AS id
FROM
OPENXML (@idoc,@rootnode,2) AS p1
UNION ALL
SELECT
CAST(p2.parentid AS INT) AS parentid
,CAST(p2.id AS INT) AS id
FROM
OPENXML (@idoc,@rootnode,2) AS p2
JOIN
cte
ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT)
WHERE
CAST(p2.parentid AS INT) = @parentid
)
INSERT INTO #ChildList
SELECT *
FROM cte
INSERT INTO #NodeList
SELECT
#ChildList.RowNum
,xmllist.id
,xmllist.parentid
,xmllist.nodetype
,xmllist.localname
,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text]
FROM #ChildList
INNER JOIN
OPENXML (@idoc,@rootnode,2) AS xmllist
ON #ChildList.id = xmllist.id
WHERE
#ChildList.RowNum > 0
--for testing
--SELECT * FROM #NodeList
--RETURN
;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype)
AS (
SELECT
#NodeList.RowNum
,#NodeList.id
,#NodeList.parentid
,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath
,#NodeList.localname
,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text]
,0 AS nodetype
FROM #ChildList
INNER JOIN
#NodeList
ON #ChildList.id = #NodeList.id
WHERE
#NodeList.parentid IS NULL
AND #ChildList.RowNum > 0
AND #NodeList.RowNum > 0
UNION ALL
SELECT
n.RowNum
,n.id
,n.parentid
,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath
,n.localname
,n.[text]
,n.nodetype
FROM #NodeList AS n
INNER JOIN
RecursiveNodes AS r
ON n.parentid = r.id
WHERE
n.RowNum > 0
AND r.RowNum > 0
AND n.parentid >= 0
)
SELECT
ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum
,Result.id
,Result.parentid
,Result.nodepath
,Result.nodetype
,Result.nodename
,Result.property
,Result.value
,Result.nodecontents
,'OK' AS XMLStatus
FROM
(
SELECT
rn.RowNum
,rn.id
,rn.parentid
,rn.nodepath
,(CASE
WHEN rn.nodetype = 0 THEN 'Root'
WHEN rn.nodetype = 1 THEN 'Node'
WHEN rn.nodetype = 2 THEN 'Property'
ELSE 'Data'
END) AS nodetype
,(CASE
WHEN rn.nodetype = 0 THEN rn.localname
WHEN rn.nodetype = 1 THEN rn.localname
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid)
ELSE NULL
END) AS nodename
,(CASE
WHEN rn.nodetype = 2 THEN rn.localname
ELSE NULL
END) AS property
,(CASE
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)
ELSE NULL
END) AS value
,(CASE
WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null)
ELSE NULL
END) AS nodecontents
FROM
RecursiveNodes AS rn
WHERE
rn.localname <> '#text'
) AS Result
WHERE
Result.id >= 0
AND (@ShowAll = 0
AND (Result.id = 0
OR property IS NOT NULL
OR value IS NOT NULL
OR nodecontents IS NOT NULL))
OR (@ShowAll = 1)
END
July 8, 2013 at 10:50 pm
Hi arthurolcot,
Thanks, this solution works for me.
Tao
July 8, 2013 at 10:55 pm
Hi Steven,
Thank you very much, the solution works for me.
Steven Willis (7/8/2013)
Here's a stored procedure that will shred the XML into a table. I had to add a set of <body></body> tags below the root but otherwise your XML parsed perfectly.[The code for creating stored procedure is below with attached png file output sample.]
EXEC dbo.ParseXML_ByXMLInput
N'<root>
<body>
<elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101010>
<elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
<elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem101020>
</elem10>
<elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">
<elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">
<elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />
<elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
<elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151010>
<elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">
<elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />
</elem151020>
<elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">
<elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />
<elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />
</elem151030>
</elem15>
<elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201010>
<elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">
<elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />
</elem201020>
</elem20>
</body>
</root>'
,'root'
,'/body'
,1
,0
[/code]
CREATE PROCEDURE [dbo].[ParseXML_ByXMLInput]
@strXML NVARCHAR(MAX)
,@schemanode NVARCHAR(255)
,@rootnode NVARCHAR(255)
,@showAll BIT = 0
,@debug BIT = 0
AS
BEGIN
SET NOCOUNT ON
DECLARE
@idoc INT
,@id INT
,@parentid INT
,@SoapEnvOpen NVARCHAR(MAX)
,@SoapEnvClose NVARCHAR(MAX)
,@SoapBodyOpen NVARCHAR(MAX)
,@SoapBodyClose NVARCHAR(MAX)
,@SchemaNodeOpen NVARCHAR(MAX)
,@SchemaNodeClose NVARCHAR(MAX)
,@checkSoap INT
,@isSoap BIT = 0
,@checkSchema INT
,@isSchema BIT = 0
,@checkXMLHeader INT
,@isHeader BIT = 0
,@XMLHeader NVARCHAR(MAX)
IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL
DROP TABLE #ChildList
CREATE TABLE #ChildList (
[RowNum] INT IDENTITY(1,1) NOT NULL,
[parentid] INT NULL,
[id] INT NULL,
PRIMARY KEY (RowNum),
UNIQUE (RowNum))
IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL
DROP TABLE #NodeList
CREATE TABLE #NodeList (
[RowNum] INT NOT NULL,
[id] INT NULL,
[parentid] INT NULL,
[nodetype] INT NULL,
[localname] NVARCHAR(MAX) NULL,
[text] NVARCHAR(MAX) NULL,
PRIMARY KEY (RowNum),
UNIQUE (RowNum))
SET @id = 1
SET @parentid = NULL
/* Check to see if any XML exists */
SET @strXML = NULLIF(@strXML,'')
IF @strXML IS NULL
BEGIN
SELECT
NULL AS RowNum
,NULL AS id
,NULL AS parentid
,NULL AS nodepath
,NULL AS nodetype
,NULL AS nodename
,NULL AS property
,NULL AS value
,NULL AS nodecontents
,'No XML to process' AS XMLStatus
RETURN
END
ELSE
BEGIN
-- Get rid of tabs and extra spaces
SELECT @strXML = dbo.svfRemoveExcessSpacesFromXML(@strXML)
IF CHARINDEX('<',@strXML,1) > 0
BEGIN
SET @strXML = REPLACE(@strXML,'<','<')
SET @strXML = REPLACE(@strXML,'>','>')
IF @debug = 1
SELECT @strXML AS ReplacedXML
END
END
IF @debug = 1
SELECT @strXML AS InputXML
/* Check to see if the XML has a header */
SET @checkXMLHeader = CHARINDEX('<?xml version',@strXML,0)
IF @checkXMLHeader > 0
SET @isHeader = 1
/* If the XML has a header then remove it */
IF @isHeader = 1
BEGIN
SET @XMLHeader = SUBSTRING(@strXML,CHARINDEX('<?xml version',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@XMLHeader,'')
END
IF @debug = 1
SELECT @XMLHeader AS XMLHeader
/* Check to see if the XML has a SOAP wrapper */
SET @checkSoap = CHARINDEX('<soapenv:Envelope',@strXML,0)
IF @checkSoap > 0
SET @isSoap = 1
/* If the XML has a SOAP wrapper then remove it */
IF @isSoap = 1
BEGIN
SET @SoapEnvOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Envelope',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SoapEnvOpen,'')
SET @SoapBodyOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Body',@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SoapBodyOpen,'')
SET @SoapEnvClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Envelope>',@strXML,0),LEN('</soapenv:Envelope>'))
SET @strXML = REPLACE(@strXML,@SoapEnvClose,'')
SET @SoapBodyClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Body>',@strXML,0),LEN('</soapenv:Body>'))
SET @strXML = REPLACE(@strXML,@SoapBodyClose,'')
END
IF @debug = 1
BEGIN
SELECT @SoapEnvOpen AS Soap_Wrapper
SELECT @SoapBodyOpen AS Soap_Body
SELECT @strXML AS XML_Without_SoapWrapper
END
--for testing
--SELECT @strXML
--RETURN
/* Check to see if the XML has a schema definition node */
SET @checkSchema = CHARINDEX('<'+@schemanode,@strXML,0)
IF @checkSchema > 0
SET @isSchema = 1
/* If a schema definition node exists remove it */
IF @isSchema = 1
BEGIN
SET @SchemaNodeOpen = SUBSTRING(@strXML,CHARINDEX('<'+@schemanode,@strXML,0),CHARINDEX('>',@strXML,0))
SET @strXML = REPLACE(@strXML,@SchemaNodeOpen,'')
SET @SchemaNodeClose = SUBSTRING(@strXML,CHARINDEX('</'+@schemanode+'>',@strXML,0),LEN('</'+@schemanode+'>'))
SET @strXML = REPLACE(@strXML,@SchemaNodeClose,'')
END
BEGIN TRY
EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML
END TRY
BEGIN CATCH
BEGIN
IF @debug = 1
BEGIN
SELECT @strXML AS ModifiedXML
SELECT CONVERT(XML,@strXML) AS FormattedXML
END
SELECT
NULL AS RowNum
,NULL AS id
,NULL AS parentid
,NULL AS nodepath
,NULL AS nodetype
,NULL AS nodename
,NULL AS property
,NULL AS value
,NULL AS nodecontents
,'Invalid XML' AS XMLStatus
RETURN
END
END CATCH
;WITH cte
AS (
SELECT
CAST(p1.parentid AS INT) AS parentid
,CAST(p1.id AS INT) AS id
FROM
OPENXML (@idoc,@rootnode,2) AS p1
UNION ALL
SELECT
CAST(p2.parentid AS INT) AS parentid
,CAST(p2.id AS INT) AS id
FROM
OPENXML (@idoc,@rootnode,2) AS p2
JOIN
cte
ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT)
WHERE
CAST(p2.parentid AS INT) = @parentid
)
INSERT INTO #ChildList
SELECT *
FROM cte
INSERT INTO #NodeList
SELECT
#ChildList.RowNum
,xmllist.id
,xmllist.parentid
,xmllist.nodetype
,xmllist.localname
,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text]
FROM #ChildList
INNER JOIN
OPENXML (@idoc,@rootnode,2) AS xmllist
ON #ChildList.id = xmllist.id
WHERE
#ChildList.RowNum > 0
--for testing
--SELECT * FROM #NodeList
--RETURN
;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype)
AS (
SELECT
#NodeList.RowNum
,#NodeList.id
,#NodeList.parentid
,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath
,#NodeList.localname
,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text]
,0 AS nodetype
FROM #ChildList
INNER JOIN
#NodeList
ON #ChildList.id = #NodeList.id
WHERE
#NodeList.parentid IS NULL
AND #ChildList.RowNum > 0
AND #NodeList.RowNum > 0
UNION ALL
SELECT
n.RowNum
,n.id
,n.parentid
,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath
,n.localname
,n.[text]
,n.nodetype
FROM #NodeList AS n
INNER JOIN
RecursiveNodes AS r
ON n.parentid = r.id
WHERE
n.RowNum > 0
AND r.RowNum > 0
AND n.parentid >= 0
)
SELECT
ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum
,Result.id
,Result.parentid
,Result.nodepath
,Result.nodetype
,Result.nodename
,Result.property
,Result.value
,Result.nodecontents
,'OK' AS XMLStatus
FROM
(
SELECT
rn.RowNum
,rn.id
,rn.parentid
,rn.nodepath
,(CASE
WHEN rn.nodetype = 0 THEN 'Root'
WHEN rn.nodetype = 1 THEN 'Node'
WHEN rn.nodetype = 2 THEN 'Property'
ELSE 'Data'
END) AS nodetype
,(CASE
WHEN rn.nodetype = 0 THEN rn.localname
WHEN rn.nodetype = 1 THEN rn.localname
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid)
ELSE NULL
END) AS nodename
,(CASE
WHEN rn.nodetype = 2 THEN rn.localname
ELSE NULL
END) AS property
,(CASE
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)
ELSE NULL
END) AS value
,(CASE
WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)
WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null)
ELSE NULL
END) AS nodecontents
FROM
RecursiveNodes AS rn
WHERE
rn.localname <> '#text'
) AS Result
WHERE
Result.id >= 0
AND (@ShowAll = 0
AND (Result.id = 0
OR property IS NOT NULL
OR value IS NOT NULL
OR nodecontents IS NOT NULL))
OR (@ShowAll = 1)
END
Viewing 5 posts - 1 through 4 (of 4 total)
You must be logged in to reply to this topic. Login to reply