Reading an XML File into a TABLE

  • Hi,

    I am tasked with an investigation, cross referencing SQL 2000 Data with XML Logs to determine which Customers appear in both.

    Below is my methodology:

    1. Loop through each XML File [there are 500+]

    1a. Read each XML file contents into a #Table

    2. Loop thru each row within the #Table

    2a. determine each Node name and [determine whether to] create a new column based on this name.

    2b. Insert the Node Value into this column

    Hopefully this would give me a table with all the XML data, within the relevant column name..

    Problem is Step 2a. Because the XML files have different Nodes [based upon the Log entry activity], I cannot guarantee that Node "PhoneNumber" will be in all Log files.. So.. How can I iterate thru the Nodes using its index? Is there a way of counting the number of Nodes within an XML string?

    My SQL so far:

    BEGIN TRAN

    -- Loop Vars

    DECLARE @FileID int; set @FileID = 1;

    DECLARE @MaxFileID int; set @MaxFileID = 2

    -- XML File Vars

    DECLARE @FileName varchar(255)

    DECLARE @ExecCmd VARCHAR(255)

    DECLARE @y INT

    DECLARE @x INT

    DECLARE @XMLData VARCHAR(8000)

    CREATE TABLE #tempXML(PK INT NOT NULL IDENTITY(1,1), ThisLine VARCHAR(255))

    CREATE TABLE #TempXMLData ( XMLID INT NOT NULL IDENTITY(1,1), FileID INT NOT NULL, XMLData VARCHAR(8000) )

    WHILE @FileID < @MaxFileID

    BEGIN

    -- Get XML file

    SET @FileName = 'C:\Temp\XML\log' + cast(@FileID as varchar(4)) + '.xml'

    SET @ExecCmd = 'type ' + @FileName

    SET @XMLData = ''

    -- Execute xp_cmdshell to get XML text

    INSERT INTO #tempXML EXEC master.dbo.xp_cmdshell @ExecCmd

    SELECT @y = count(*) from #tempXML

    -- Grab each 'line' of the XML from #tempXML

    SET @x = 0

    WHILE @x <> @y

    BEGIN

    SET @x = @x + 1

    SELECT @XMLData = @XMLData + ThisLine from #tempXML WHERE PK = @x

    END

    -- INSERT Values

    insert into #TempXMLData ( FileID, XMLData )

    select @FileID, @XMLData

    -- Clear temp data

    DELETE FROM #tempXML

    -- Reseed #tempXML

    DBCC CHECKIDENT (#tempXML, RESEED, 0)

    -- Move Next

    SET @FileID = @FileID + 1

    END

    -- Data Cleaning

    ALTER TABLE #TempXMLData DROP COLUMN XMLID;

    GO

    DELETE FROM #TempXMLData WHERE ISNULL(LEN(XMLData), '') < 1;

    GO

    ALTER TABLE #TempXMLData ADD XMLRowID INT NOT NULL IDENTITY(1,1);

    GO

    -- Check #TempXMLData Records

    SELECT * FROM #TempXMLData

    /* Loop thru each row in the table and extract the NODE NAME, create a COLUMN [if needed]

    based on this value, then insert the VALUE of this NODE in that column..

    */

    -- Create XML Data Table

    CREATE TABLE #ComplexXMLData ( CXD_ID INT NOT NULL IDENTITY(1,1), FileID INT NOT NULL )

    -- Set Up Loop Vars

    DECLARE @XMLRow VARCHAR(8000)

    DECLARE @XMLRowID INT; SET @XMLRowID = 1;

    DECLARE @MaxXMLRowID INT;

    SELECT MAX(XMLRowID) FROM #TempXMLData

    WHILE @XMLRowID < @MaxXMLRowID

    BEGIN

    SELECT @XMLRow = XMLData FROM #TempXMLData WHERE XMLRowID = @XMLRowID

    /* ################### I NEED HELP WITH THIS BIT.. ################### */

    SET @XMLRowID = @XMLRowID + 1

    END

    -- Housekeeping

    DROP TABLE #ComplexXMLData

    DROP TABLE #TempXMLData

    DROP TABLE #tempXML

    ROLLBACK

    Example XML below:

    <soapenv:Envelope>

    <soapenv:Header/>

    <soapenv:Body>

    <man:createContact>

    <inputContact>

    <Caller>

    <dom:UserLogin>ewingb</dom:UserLogin>

    </Caller>

    <SystemCredentials>

    <dom:SourceSystemName>ABC</dom:SourceSystemName>

    <dom:ServiceName>createContact</dom:ServiceName>

    <dom:SystemDateTime>2011-01-21T13:27:08.73</dom:SystemDateTime>

    </SystemCredentials>

    <identification>

    <sourceType>SWB</sourceType>

    <contact>5890000000310</contact>

    <retailCentre>

    <code>100001</code>

    </retailCentre>

    </identification>

    <status>

    <status>Contact Opened</status>

    <by>ewingb</by>

    <dateTime>2011-01-21T13:27:08.73</dateTime>

    </status>

    <consultant>

    <secondName>Bobby Ewing</secondName>

    <loginId>ewingb</loginId>

    <effectiveDate>2011-01-21T13:27:08.73</effectiveDate>

    </consultant>

    <customerContact>

    <workPhone>9999999</workPhone>

    </customerContact>

    <communication>

    <enquiryType>New Sale</enquiryType>

    <method>Store</method>

    </communication>

    </inputContact>

    </man:createContact>

    </soapenv:Body>

    </soapenv:Envelope>

  • When I say "using its index", I mean of course something like:

    value[0], value[1] etc.

    😉

  • Some work on the final WHILE..

    WHILE @XMLRowID < @MaxXMLRowID

    BEGIN

    -- Fetch XML Row

    SELECT @XMLRow = XMLData FROM #TempXMLData WHERE XMLRowID = @XMLRowID

    -- Create XML

    EXEC sp_xml_preparedocument @hdocument OUTPUT, @XMLRow

    INSERT INTO #TempComplexXMLData ( id, parentid, nodetype, localname, prefix, namespaceuri, datatype, prev, [text] )

    SELECT id, parentid, nodetype, localname, prefix, namespaceuri, datatype, prev, convert(VARCHAR(200), [text])

    FROM OPENXML( @hdocument, '//*',2)

    -- Destroy XML

    EXEC sp_xml_removedocument @hdocument

    SELECT A.localname, B.[text] FROM #TempComplexXMLData A

    INNER JOIN #TempComplexXMLData B ON A.id = B.parentid

    -- Still need to perform some Jiggery Pokery here..

    DELETE FROM #TempComplexXMLData

    SET @XMLRowID = @XMLRowID + 1

    END

  • Okay, solved it! 😀

    Given a little more info, I was able to determine that I only needed one value from the XML Log files, as this would/could lead to the Customer [and hence any changes to his/her profile] via a Foreign Key.. Nice.. 😉

    DECLARE @SalesLeadIDs TABLE ( SalesLeadID VARCHAR(20) )

    WHILE @XMLRowID < @MaxXMLRowID

    BEGIN

    -- Fetch XML Row

    SELECT @XMLRow = XMLData FROM #TempXMLData WHERE XMLRowID = @XMLRowID

    -- Create XML

    EXEC sp_xml_preparedocument @hdocument OUTPUT, @XMLRow

    INSERT INTO #TempComplexXMLData ( id, parentid, nodetype, localname, prefix, namespaceuri, datatype, prev, [text] )

    SELECT id, parentid, nodetype, localname, prefix, namespaceuri, datatype, prev, convert(VARCHAR(200), [text])

    FROM OPENXML( @hdocument, '//*',2)

    -- Destroy XML

    EXEC sp_xml_removedocument @hdocument

    INSERT INTO @SalesLeadIDs ( SalesLeadID )

    SELECT DISTINCT B.[text] -- This is the SalesLeadID

    FROM #TempComplexXMLData A

    INNER JOIN #TempComplexXMLData B ON A.id = B.parentid

    WHERE A.localname = 'contact'

    DELETE FROM #TempComplexXMLData

    SET @XMLRowID = @XMLRowID + 1

    END

    -- Now we can see who was affected from the XML Log files.. BOOM!

    SELECT DISTINCT tS.SalesLeadID, S.SalesLeadID, C.ID, C.Title + ' ' + C.Forename + ' ' + C.Surname + ' ' AS [Customer Name]

    FROM @SalesLeadIDs tS

    INNER JOIN SalesLeads S on tS.SalesLeadID = S.SalesLeadID

    INNER JOIN Customer C on S.CustomerID = C.ID

Viewing 4 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply