htmldoctypemsxml

How to set the doctype of an XMLDOMDocument?


How can i set the doctype of an DOMDocument60 object?

For example i try:

IXMLDOMDocument60 doc = new DOMDocument60();
doc.doctype.name = "html";

except that IXMLDOMDocumentType.name is read-only:

IXMLDOMDocumentType = interface(IXMLDOMNode)
{
   ['{2933BF8B-7B36-11D2-B20E-00C04F983E60}']
   string Get_name();
   ...
   property String name read Get_name;
}

and IXMLDOMDocument60.doctype is read-only:

IXMLDOMDocument = interface(IXMLDOMNode)
{
   ['{2933BF81-7B36-11D2-B20E-00C04F983E60}']
   IXMLDOMDocumentType Get_doctype();
   ...
   property IXMLDOMDocumentType doctype read Get_doctype;
}

So how can i set the doctype of an XML document?


Bonus Question: How can i create an DOMDocument60 object with a specified doctype?


Note: You see no mention of XSLT, because there is none. i'm building an HTML DOM tree in MSXML.


Solution

  • For perfomance reasons and security reasons, Microsoft does not usually allow <!DOCTYPE> (aka the Document Type Definition). Because of this, you must use the loadXML method to set the <!DOCTYPE>. So, it cannot be set after a document has been created or imported.

    On top of that, because of default security settings in MSXML6, you normally cannot import XML that has a <!DOCTYPE>. So, you must disable the ProhibitDTD setting on the object.

    Edit: You should know that HTML5 is not XML. Also, the <!DOCTYPE> is considered optional for XHTML5.

    First, let's start with the desired output.

    <!DOCTYPE html>
    <html />
    

    Based on the syntax, I'm assuming you are using C# and have added a reference to msxml6.dll. The following code will allow you to create those two processing instructions.

    MSXML2.DOMDocument60 doc  = new MSXML2.DOMDocument60();
    
    // Disable validation when importing the XML
    doc.validateOnParse = false;
    // Enable the ability to import XML that contains <!DOCTYPE>
    doc.setProperty("ProhibitDTD", false);
    // Perform the import
    doc.loadXML("<!DOCTYPE html><html />");
    // Display the imported XML
    Console.WriteLine(doc.xml);
    

    Here's a copy of the code written in VBScript, as well.

    Set doc = CreateObject("MSXML2.DOMDocument.6.0")
    
    ' Disable validation when importing the XML
    doc.validateOnParse = False
    ' Enable the ability to import XML that contains <!DOCTYPE>
    doc.setProperty "ProhibitDTD", false
    ' Perform the import
    doc.loadXML "<!DOCTYPE html><html />"
    ' Display the imported XML
    WScript.Echo objXML.xml
    

    Finally, here's a copy of the code written in C++.

    #include <comutil.h>
    #pragma comment(lib, "comsuppw.lib")
    #include <msxml6.h>
    #pragma comment(lib, "msxml6.lib")
    
    int main(int argc, char* argv[])
    {
    
        HRESULT hr = S_OK;
        VARIANT_BOOL success = VARIANT_TRUE;
        // IXMLDOMDocument2 is needed for setProperty
        IXMLDOMDocument2 *doc;
    
        // Initialize COM
        hr = CoInitialize(NULL);
        if (SUCCEEDED(hr))
        {
            // Create the object
            hr = CoCreateInstance(CLSID_DOMDocument60, NULL, CLSCTX_INPROC_SERVER, IID_IXMLDOMDocument2, (void**)&doc);
            if (SUCCEEDED(hr))
            {
                // Disable validation when importing the XML
                hr = doc->put_validateOnParse(VARIANT_FALSE);
                // Enable the ability to import XML that contains <!DOCTYPE>
                hr = doc->setProperty(_bstr_t(L"ProhibitDTD"), _variant_t(VARIANT_FALSE));
                // Perform the import
                hr = doc->loadXML(_bstr_t(L"<!DOCTYPE html><html />"), &success);
                // Retrieve the XML
                _bstr_t output{};
                hr = doc->get_xml(output.GetAddress());
                // Display the imported XML
                MessageBoxW(NULL, output, NULL, 0);
            }
            // Cleanup COM
            CoUninitialize();
        }
        return 0;
    }