sqlsql-serverxmlopenxmlsql-server-openxml

How to store data in a table using openxml with While loop in SQL for multiple XMLs


I have a temp table with 3 XMLs in it:

CREATE TABLE #XMLwithOpenXML
(
    Id INT IDENTITY PRIMARY KEY,
    XMLData XML,
    LoadedDateTime DATETIME
);
go

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime) 
    SELECT 
        CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE() 
    FROM 
        OPENROWSET(BULK 'D:\Test\Test1.xml', SINGLE_BLOB) AS x;

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime) 
    SELECT 
        CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE() 
    FROM 
        OPENROWSET(BULK 'D:\Test\Test2.xml', SINGLE_BLOB) AS x;

INSERT INTO #XMLwithOpenXML(XMLData, LoadedDateTime) 
    SELECT 
        CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE() 
    FROM 
        OPENROWSET(BULK 'D:\Test\Test2.xml', SINGLE_BLOB) AS x;
go

I'm trying to extract some data from it into another temp table and I'm successful in doing so with the below code:

DECLARE @XML AS XML, @hDoc as int, @SQL nvarchar(max)

SELECT @XML = XMLData 
FROM #XMLwithOpenXML

SELECT @XML

EXEC sp_xml_preparedocument @hDoc OUTPUT, @XML

EXEC Rm #DocNameForCapsilonFieldId

SELECT
    DocTypeName = typeName, DataPointName, FieldId = FieldName, ValueData
INTO
    #DocNameForCapsilonFieldId
FROM
    OPENXML(@hDoc, 'documents/document/dataPoints/dataPoint/field/value')
    WITH
        (
            typeName [varchar](256) '../../../../typeName',
            DataPointName [varchar](256) '../../@name',
            FieldName [varchar](256) '../@name',
            ValueData [varchar](256) '../value'
        ) AS DocTypeName

EXEC sp_xml_removedocument @hDoc
go

But the problem is that it picks up only the first XML and extracts its data. I want to extract data from all the 3 XMLs and store it in #DocNameForCapsilonFieldId or any table in database. I know this is done through While loop but I don't really know how to implement it. Can someone help me out with this or maybe even make this a stored procedure?


Solution

  • First of all: FROM OPENXML, together with the SPs to prepare and to remove a document, is outdated and should not be used any more (rare exceptions exist).

    Always try to use the native XML methods provided by the XML data type. Namely .nodes(), to retrieve repeating elements as many rows in a derived set, and .value() to fetch scalar values.

    This is not really a new answer, more an addition to the answer by @AlwaysLearning.

    The backward navigation (the parent axis, used in your example with multiple ../) is performing very badly. Better use cascades of APPLY with .nodes(), diving deeper and deeper over realtive XPaths:

    (Credits for DDL and sample: @AlwaysLearning's answer)

    drop table if exists #XMLwithOpenXML;
    create table #XMLwithOpenXML (
      XMLData xml
    );
    insert #XMLwithOpenXML values
    (N'<documents>
     <document>
      <typeName>Example1</typeName>
      <dataPoints>
       <dataPoint name="dp11">
        <field name="foo">
         <value>42</value>
        </field>
       </dataPoint>
       <dataPoint name="dp12">
        <field name="bar">
         <value>47</value>
        </field>
       </dataPoint>
      </dataPoints>
     </document>
    </documents>'),
    (N'<documents>
     <document>
      <typeName>Example2</typeName>
      <dataPoints>
       <dataPoint name="dp21">
        <field name="baz">
         <value>21</value>
        </field>
       </dataPoint>
       <dataPoint name="dp22">
        <field name="chaz">
         <value>22</value>
        </field>
       </dataPoint>
      </dataPoints>
     </document>
    </documents>');
    

    --The query:

    SELECT A.doc.value('(typeName/text())[1]','nvarchar(max)') AS TypeName
          ,B.dp.value('@name','nvarchar(max)') AS DataPoint_Name
          ,C.fld.value('@name','nvarchar(max)') AS Field_Name
          ,C.fld.value('(value/text())[1]','int') AS Field_Value
    FROM #XMLwithOpenXML t
    CROSS APPLY XMLData.nodes('/documents/document') A(doc)
    OUTER APPLY A.doc.nodes('dataPoints/dataPoint') B(dp)
    OUTER APPLY B.dp.nodes('field') C(fld);
    

    The result

    TypeName    DataPoint_Name  Field_Name  Field_Value
    Example1    dp11            foo         42
    Example1    dp12            bar         47
    Example2    dp21            baz         21
    Example2    dp22            chaz        22