javaxmlxsltsaxon

Saxon and XMLFilter: omit-xml-declaration=“yes” has no effect


We use Saxon 12.5 to generate some export files. These transformations usually consist of multiple XSLT transformations applied one after the other. In order to increase performance I introduced code that uses XMLFilters to prevent serializing results to strings and parse these strings again for the next transformation.

The coding works and also shows a significant performance boost.

However, some exports are not XML, but text based. And I cannot get rid of the XML declaration, although omit-xml-declaration="yes" is set in the last XSLT.

The old implementation which also uses Saxon, produces the same output without the XML declaration, although slower.

I stipped down both implementations as much as possible and created a unit test, hoping to see which difference would cause the effect. But no luck so far. What am I missing?

Acual transformations are done in saxonWithDestinations() and saxonWithXMLFilters()

import net.sf.saxon.lib.Feature;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import org.junit.Assert;
import org.junit.Test;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Templates;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

public class SaxonXMLFilterTest {
    @Test
    public void compareTransformationResultsForXml() throws Exception {
        compareTransformations("nop", false);
    }

    @Test
    public void compareTransformationResultsForText() throws Exception {
        compareTransformations("text", false);
    }

    @Test
    public void compareTransformationResultsForTextWithHackEnabled() throws Exception {
        compareTransformations("text", true);
    }

    private void compareTransformations(String styleSheetName, boolean cutOffXmlDeclaration) throws ParserConfigurationException, TransformerException, SAXException, SaxonApiException, IOException {
        String resultStringWithDestination = saxonWithDestinations(getXMLData(), getStylesheetSource(styleSheetName));
        String resultStringWithXMLFilters = saxonWithXMLFilters(getXMLData(), getStylesheetSource(styleSheetName));

        // This hack removes the XML declaration before comparing the results
        if (cutOffXmlDeclaration == true) {
            resultStringWithXMLFilters = resultStringWithDestination.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "");
        }

        Assert.assertEquals(resultStringWithDestination, resultStringWithXMLFilters);
    }

    public String saxonWithDestinations(InputStream dataStream, StreamSource stylesheetSource) throws SaxonApiException {
        ByteArrayOutputStream out = new ByteArrayOutputStream();

        Processor processor = buildProcessor();
        Serializer destination = processor.newSerializer(out);

        net.sf.saxon.s9api.XsltTransformer transformer = processor.newXsltCompiler().compile(stylesheetSource).load();
        transformer.setSource(new StreamSource(dataStream));
        transformer.setDestination(destination);
        transformer.transform();

        return out.toString();
    }

    private String saxonWithXMLFilters(InputStream dataStream, StreamSource stylesheetSource) throws ParserConfigurationException, SAXException, TransformerException {
        String implementationClassName = "net.sf.saxon.TransformerFactoryImpl"; // Force the usage of Saxon-HE
        SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(implementationClassName, null);

        XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

        // Build Filter and use reader as first parent
        Templates template = transformerFactory.newTemplates(stylesheetSource);
        XMLFilter filter = transformerFactory.newXMLFilter(template);
        filter.setParent(reader);

        //
        // More filters to be added here
        //

        XMLFilter lastFilter = filter;

        // Push data through filter chain into ByteArrayOutputStream
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        transformerFactory.newTransformer().transform(new SAXSource(lastFilter, new InputSource(dataStream)), new StreamResult(out));

        return out.toString();
    }

    public static Processor buildProcessor() {
        Processor processor = new Processor(false);
        processor.setConfigurationProperty(Feature.TIMING, false);
        processor.setConfigurationProperty(Feature.COMPILE_WITH_TRACING, false);

        return processor;
    }

    private static StreamSource getStylesheetSource(String styleSheetName) throws IOException {

        switch (styleSheetName) {
            case "nop":
                return stringToStreamSource(
                        """
                                <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
                                                                
                                    <xsl:template match="@*|node()">
                                        <xsl:copy>
                                            <xsl:apply-templates select="@*|node()"/>
                                        </xsl:copy>
                                    </xsl:template>
                                                                
                                </xsl:stylesheet>
                                """
                );

            case "text":
                return stringToStreamSource(
                        """
                                <?xml version="1.0" encoding="UTF-8"?>
                                <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
                                    <xsl:output encoding="utf-8" method="text" indent="no" omit-xml-declaration="yes" xml:space="default"/>
                                                    
                                    <xsl:template match="/">
                                        <xsl:apply-templates select="Data/Measurement" />
                                    </xsl:template>
                                                    
                                    <xsl:template match="Data/Measurement">
                                        <xsl:apply-templates select="x"/><xsl:text>|</xsl:text>
                                        <xsl:apply-templates select="y"/><xsl:text>|</xsl:text>
                                        <xsl:apply-templates select="z"/><xsl:text>&#xa;</xsl:text>
                                    </xsl:template>
                                                    
                                    <xsl:template match="x|y|z">
                                        <xsl:apply-templates select="*"/>
                                    </xsl:template>
                                                    
                                    <xsl:template match="*">
                                        <xsl:apply-templates select="text()"/>
                                        <xsl:if test="not(position() = last())">
                                            <xsl:text>;</xsl:text>
                                        </xsl:if>
                                    </xsl:template>
                                                    
                                </xsl:stylesheet>
                                """
                );

            default:
                throw new IllegalArgumentException("Unknown stylesheet '" + styleSheetName + "'");
        }
    }

    private InputStream getXMLData() {
        return stringToInputStream(
                """
                        <?xml version="1.0" encoding="UTF-8"?>
                        <Data>
                            <Measurement>
                                <x>
                                    <max>60.00</max>
                                    <value>50.00</value>
                                    <min>-40.00</min>
                                    <unit>mm</unit>
                                </x>
                                <y>
                                    <max>3.00</max>
                                    <value>0.00</value>
                                    <min>-3.00</min>
                                    <unit>kN</unit>
                                </y>
                                <z>
                                    <max>0.00</max>
                                    <value>0.00</value>
                                    <min>0.00</min>
                                    <unit>ms</unit>
                                </z>
                            </Measurement>
                            <Measurement>
                                <x>
                                    <max>260.00</max>
                                    <value>250.00</value>
                                    <min>160.00</min>
                                    <unit>mm</unit>
                                </x>
                                <y>
                                    <max>203.00</max>
                                    <value>200.00</value>
                                    <min>197.00</min>
                                    <unit>kN</unit>
                                </y>
                                <z>
                                    <max>600000.00</max>
                                    <value>400000.00</value>
                                    <min>200000.00</min>
                                    <unit>ms</unit>
                                </z>
                            </Measurement>

                        </Data>
                        """
        );
    }

    private static StreamSource stringToStreamSource(String s) {
        return new StreamSource(stringToInputStream(s));
    }

    private static InputStream stringToInputStream(String s) {
        return new ByteArrayInputStream(s.getBytes(Charset.defaultCharset()));
    }
}

Solution

  • First observation is that omit-xml-declaration has no effect with method="text" since the text output method doesn't emit an XML declaration.

    In fact the final serialization is not being done by any XSLT process, it is being done by the JAXP identity transformer:

    transformerFactory.newTransformer().transform(
      new SAXSource(lastFilter, new InputSource(dataStream)), 
      new StreamResult(out));
    

    which may or may not be using Saxon depending on your classpath. You can set serialization properties on this final transformer using the JAXP API (Transformer.setOutputProperty()) but you can't do it from XSLT, since there is no XSLT involved.

    I would suggest using the s9api API for this as Martin Honnen suggests.