xslt-2.0

Duplicate content while wrapping in xslt 2.0


Hi I have one xml that need to be nest based on there level attribute but some how i miss the step and it create duplicates. Logic for wrap is wrap element based on @level

My input xml

<root>
   <wrap level="101" name="pgrp">
       <title>0</title>
   </wrap>
   <wrap level="201" name="p">
       <text>2</text>
   </wrap>
   <wrap level="301" name="quote">
       <qp>q2</qp>
   </wrap>
   <wrap level="301" name="quote">
       <qp>q3</qp>
   </wrap>
   <wrap level="102" name="pgrp">
       <title>0</title>
   </wrap>
   <wrap level="201" name="p">
       <text>3</text>
   </wrap>
   <wrap level="201" name="p">
       <text>4</text>
   </wrap>
   <wrap level="301" name="list">
       <qp>q4</qp>
   </wrap>
   <wrap level="301" name="list">
       <qp>q9</qp>
   </wrap>
   <wrap level="201" name="p">
       <text>5</text>
   </wrap>
   <wrap level="301" name="quote">
       <qp>q5</qp>
   </wrap>
   <wrap level="302" name="list">
       <ql>ql51</ql>
   </wrap>
   <wrap level="202" name="p">
       <text>1a</text>
   </wrap>
   <wrap level="202" name="p">
       <text>1aa</text>
   </wrap>
   
</root>

XSLT

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    version="2.0" exclude-result-prefixes="#all">
    <xsl:output method="xml" indent="yes"/>
    <xsl:strip-space elements="*"/>
    
    <xsl:template match="root">
        <xsl:variable name="initial">
            <xsl:apply-templates select="node()"/>
        </xsl:variable>
        
        <xsl:variable name="wrapper">
            <firstwrap>
                <xsl:for-each-group select="$initial/*" group-adjacent="name(.)">
                    <wrapper>
                        <xsl:for-each-group select="current-group()" group-adjacent="concat(@name, '|', @level)">
                            <xsl:choose>
                                <xsl:when test="current-group()/@name = ('list', 'quote') and count(current-group()) gt 1">
                                    <wrap level="{current-group()[1]/@level}" name="{current-group()[1]/@name}">
                                        <xsl:copy-of select="current-group()/node()"/>
                                    </wrap>
                                </xsl:when>
                                <xsl:otherwise>
                                    <xsl:copy-of select="current-group()"/>
                                </xsl:otherwise>
                            </xsl:choose>
                        </xsl:for-each-group>
                    </wrapper>
                </xsl:for-each-group>
            </firstwrap>
        </xsl:variable>
        
        <xsl:result-document href="tempoutput/000wrapper.xml">
            <xsl:copy-of select="$wrapper"></xsl:copy-of>
        </xsl:result-document>
        
        <secondwrap>
            <xsl:apply-templates select="$wrapper"/>
        </secondwrap>
    
    </xsl:template>
    
    <xsl:template match="node() | @*">
        <xsl:copy>
            <xsl:apply-templates select="node() | @*"/>
        </xsl:copy>
    </xsl:template>
    
    <xsl:template match="root//wrap">
        <xsl:copy>
            <xsl:apply-templates select="@*"/>
            <xsl:attribute name="posid" select="generate-id(.)"/>
            <xsl:apply-templates/>
        </xsl:copy>
    </xsl:template>
    

    <xsl:template match="firstwrap | secondwrap">
        <xsl:apply-templates/>
    </xsl:template>
    
    <xsl:template match="secondwrap//*[@tempid = preceding::*/@tempid]" priority="10"/>
    
    <xsl:template match="secondwrap//@tempid"/>
        
    <xsl:template match="wrapper" priority="10">
        <xsl:choose>
            <xsl:when test="wrap">
                <xsl:variable name="wrap" select="wrap[1]"/>
                <xsl:variable name="level" select="$wrap/@level"/>
                <xsl:call-template name="wrapper">
                    <xsl:with-param name="wrap" select="$wrap"/>
                </xsl:call-template>                
            </xsl:when>
            <xsl:otherwise>
                <xsl:apply-templates/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>        
    
    <xsl:template name="wrapper">
        <xsl:param name="wrap"/>
        <xsl:param name="wrappercallsame" select="false()"/>
        <xsl:variable name="level" select="$wrap/@level"/>
        <xsl:variable name="wrapidentifier" select="$wrap/@posid"/>
        
        <xsl:element name="{$wrap/@name}">
            <xsl:attribute name="tempid" select="$wrapidentifier"/>
            <xsl:copy-of select="$wrap/node()"/>
            <xsl:if test="$wrap/following-sibling::wrap[1][@level gt $wrap/@level]">
                <xsl:call-template name="wraplevelnest">
                    <xsl:with-param name="wrap" select="$wrap"/>
                </xsl:call-template>
            </xsl:if>
        </xsl:element>
        <!--<xsl:if test="not($wrappercallsame)">-->
            <xsl:if test="$wrap/following-sibling::*[@level le $wrap/@level][preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrap//@posid]]">
                <xsl:call-template name="wraplevelsame">
                    <xsl:with-param name="wrap" select="$wrap"/>
                </xsl:call-template>
            </xsl:if>
        <!--</xsl:if>-->
        
    </xsl:template>
    
    <xsl:template name="wraplevelnest">
        <xsl:param name="wrap"/>
        <xsl:variable name="wrapidentifier" select="$wrap/@posid"/>
        <xsl:for-each select="$wrap/following-sibling::*[@level gt $wrap/@level][not(@level = $wrap/@level)][preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrapidentifier]]">
            <xsl:variable name="currlevel" select="@level"/>
            <xsl:choose>
                <xsl:when test="preceding-sibling::wrap[preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrapidentifier]]">
                    <xsl:variable name="level">
                        <xsl:for-each select="preceding-sibling::wrap[preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrapidentifier]]">
                            <lev><xsl:value-of select="@level"/></lev>
                        </xsl:for-each>
                    </xsl:variable>
                    <xsl:if test="every $l in $level//lev satisfies number(@level) lt number($l)">
                        <xsl:call-template name="wrapper">
                            <xsl:with-param name="wrap" select="."/>
                            <xsl:with-param name="wrappercallsame" select="false()"/>
                        </xsl:call-template>
                    </xsl:if>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:call-template name="wrapper">
                        <xsl:with-param name="wrap" select="."/>
                        <xsl:with-param name="wrappercallsame" select="false()"/>
                    </xsl:call-template>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each>
    </xsl:template>
    
    <xsl:template name="wraplevelsame">
        <xsl:param name="wrap"/>
        <xsl:variable name="wrapidentifier" select="$wrap/@posid"/>
        <xsl:for-each select="$wrap/following-sibling::*[@level le $wrap/@level][preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrap//@posid]]">
            <xsl:variable name="currlevel" select="@level"/>
            <xsl:choose>
                <xsl:when test="preceding-sibling::wrap[preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrapidentifier]]">
                    <xsl:variable name="level">
                        <xsl:for-each select="preceding-sibling::wrap[preceding-sibling::wrap[@level = $wrap/@level][1][@posid = $wrapidentifier]]">
                            <lev><xsl:value-of select="@level"/></lev>
                        </xsl:for-each>
                    </xsl:variable>
                    <xsl:if test="every $l in $level//lev satisfies number(@level) lt number($l)">
                        <xsl:call-template name="wrapper">
                            <xsl:with-param name="wrap" select="."/>
                            <xsl:with-param name="wrappercallsame" select="true()"/>
                        </xsl:call-template>
                    </xsl:if>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:call-template name="wrapper">
                        <xsl:with-param name="wrap" select="."/>
                        <xsl:with-param name="wrappercallsame" select="true()"/>
                    </xsl:call-template>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each>
    </xsl:template>
    
    
    <xsl:template match="wrapper/wrap[position() ne 1]"/>
        
    
    
    
</xsl:stylesheet>

current output

<?xml version="1.0" encoding="UTF-8"?>
<secondwrap>
   <pgrp tempid="d1e2">
      <title>0</title>
      <p tempid="d1e4">
         <text>2</text>
         <quote tempid="">
            <qp>q2</qp>
            <qp>q3</qp>
         </quote>
      </p>
      <pgrp tempid="d1e10">
         <title>0</title>
         <p tempid="d1e12">
            <text>3</text>
         </p>
         <p tempid="d1e14">
            <text>4</text>
            <list tempid="">
               <qp>q4</qp>
               <qp>q9</qp>
            </list>
         </p>
         <p tempid="d1e20">
            <text>5</text>
            <quote tempid="d1e22">
               <qp>q5</qp>
               <list tempid="d1e25">
                  <ql>ql51</ql>
               </list>
               <p tempid="d1e27">
                  <text>1a</text>
               </p>
               <p tempid="d1e29">
                  <text>1aa</text>
               </p>
            </quote>
            <p tempid="d1e27">
               <text>1a</text>
            </p>
            <p tempid="d1e29">
               <text>1aa</text>
            </p>
            <p tempid="d1e27">
               <text>1a</text>
            </p>
            <p tempid="d1e29">
               <text>1aa</text>
            </p>
         </p>
      </pgrp>
      <pgrp tempid="d1e10">
         <title>0</title>
         <p tempid="d1e12">
            <text>3</text>
         </p>
         <p tempid="d1e14">
            <text>4</text>
            <list tempid="">
               <qp>q4</qp>
               <qp>q9</qp>
            </list>
         </p>
         <p tempid="d1e20">
            <text>5</text>
            <quote tempid="d1e22">
               <qp>q5</qp>
               <list tempid="d1e25">
                  <ql>ql51</ql>
               </list>
               <p tempid="d1e27">
                  <text>1a</text>
               </p>
               <p tempid="d1e29">
                  <text>1aa</text>
               </p>
            </quote>
            <p tempid="d1e27">
               <text>1a</text>
            </p>
            <p tempid="d1e29">
               <text>1aa</text>
            </p>
            <p tempid="d1e27">
               <text>1a</text>
            </p>
            <p tempid="d1e29">
               <text>1aa</text>
            </p>
         </p>
      </pgrp>
   </pgrp>
</secondwrap>


Expected output

<?xml version="1.0" encoding="UTF-8"?>
<secondwrap>
   <pgrp tempid="d1e2">
      <title>0</title>
      <p tempid="d1e4">
         <text>2</text>
         <quote tempid="">
            <qp>q2</qp>
            <qp>q3</qp>
         </quote>
      </p>
      <pgrp tempid="d1e10">
         <title>0</title>
         <p tempid="d1e12">
            <text>3</text>
         </p>
         <p tempid="d1e14">
            <text>4</text>
            <list tempid="">
               <qp>q4</qp>
               <qp>q9</qp>
            </list>
         </p>
         <p tempid="d1e20">
            <text>5</text>
            <quote tempid="d1e22">
               <qp>q5</qp>
               <list tempid="d1e25">
                  <ql>ql51</ql>
               </list>
            </quote>
            <p tempid="d1e27">
               <text>1a</text>
            </p>
            <p tempid="d1e29">
               <text>1aa</text>
            </p>
         </p>
      </pgrp>
   </pgrp>
</secondwrap>

Thanks in Advance for your help


Solution

  • I would try to solve this through recursive and nested grouping, the following code sample uses XSLT 3, not 2, as I think in 2025 the supported versions of e.g. Saxon or Altova support XSLT 3 anyway:

    <?xml version="1.0" encoding="utf-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="3.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      exclude-result-prefixes="#all"
      xmlns:mf="http://example.com/mf"
      expand-text="yes">
      
      <xsl:function name="mf:nest" as="node()*">
        <xsl:param name="nodes" as="node()*"/>
        <xsl:variable name="min-level" select="min($nodes/@level/xs:integer(.))"/>
    
        <xsl:for-each-group select="$nodes" group-starting-with="*[@level = $min-level]">
          <xsl:choose>
            <xsl:when test="self::*[@level = $min-level]">
              <xsl:element name="{@name}">
                <xsl:copy-of select="@level, node()"/>
                <xsl:sequence select="mf:nest(tail(current-group()))"/>
              </xsl:element>          
            </xsl:when>
            <xsl:otherwise>
              <xsl:sequence select="mf:nest(current-group())"/>
            </xsl:otherwise>
          </xsl:choose>
    
        </xsl:for-each-group>
      </xsl:function>
      
      
      <xsl:function name="mf:merge" as="node()*">
        <xsl:param name="nodes" as="node()*"/>
        <xsl:for-each-group select="$nodes" composite="yes" group-adjacent="node-name(), @level, node-name() = (QName('', 'list'), QName('', 'quote'))">
          <xsl:choose>
            <xsl:when test="current-grouping-key()[3]">
              <xsl:copy>
                <xsl:sequence select="@* except @level, mf:merge(current-group()/node())"/>
              </xsl:copy>
            </xsl:when>
            <xsl:otherwise>
              <xsl:apply-templates select="current-group()" mode="merge"/>
            </xsl:otherwise>
          </xsl:choose>
        </xsl:for-each-group>    
      </xsl:function>
      
      <xsl:mode name="merge" on-no-match="shallow-copy"/>
      
      <xsl:template mode="merge" match="*">
        <xsl:copy>
          <xsl:sequence select="mf:merge(node())"/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:template match="root">
        <secondwrap>
          <xsl:sequence select="mf:nest(*) => mf:merge()"/>
        </secondwrap>
      </xsl:template>
    
      <xsl:output method="xml" indent="yes"/>
    
      <xsl:mode on-no-match="shallow-copy"/>
    
    </xsl:stylesheet>
    

    I get e.g.

    <secondwrap>
       <pgrp>
          <title>0</title>
          <p>
             <text>2</text>
             <quote>
                <qp>q2</qp>
                <qp>q3</qp>
             </quote>
          </p>
          <pgrp>
             <title>0</title>
             <p>
                <text>3</text>
             </p>
             <p>
                <text>4</text>
                <list>
                   <qp>q4</qp>
                   <qp>q9</qp>
                </list>
             </p>
             <p>
                <text>5</text>
                <quote>
                   <qp>q5</qp>
                   <list>
                      <ql>ql51</ql>
                   </list>
                </quote>
                <p>
                   <text>1a</text>
                </p>
                <p>
                   <text>1aa</text>
                </p>
             </p>
          </pgrp>
       </pgrp>
    </secondwrap>
    

    which I think has the result structure you want, with the exception of the tempid attributes I hope you can add in where you need them.

    As for an XSLT 2.0 version, I would hope that

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="2.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      exclude-result-prefixes="#all"
      xmlns:mf="http://example.com/mf">
    
      <xsl:template match="@* | node()" mode="#all">
        <xsl:copy>
          <xsl:apply-templates select="@*, node()" mode="#current"/>
        </xsl:copy>
      </xsl:template>
    
      
      <xsl:function name="mf:nest" as="node()*">
        <xsl:param name="nodes" as="node()*"/>
        <xsl:variable name="min-level" select="min($nodes/@level/xs:integer(.))"/>
    
        <xsl:for-each-group select="$nodes" group-starting-with="*[@level = $min-level]">
          <xsl:choose>
            <xsl:when test="self::*[@level = $min-level]">
              <xsl:element name="{@name}">
                <xsl:copy-of select="@level, node()"/>
                <xsl:sequence select="mf:nest(subsequence(current-group(), 2))"/>
              </xsl:element>          
            </xsl:when>
            <xsl:otherwise>
              <xsl:sequence select="mf:nest(current-group())"/>
            </xsl:otherwise>
          </xsl:choose>
    
        </xsl:for-each-group>
      </xsl:function>
      
      
      <xsl:function name="mf:merge" as="node()*">
        <xsl:param name="nodes" as="node()*"/>
        <xsl:for-each-group select="$nodes" group-adjacent="concat(node-name(), '|', @level, '|',  node-name() = (QName('', 'list'), QName('', 'quote')))">
          <xsl:choose>
            <xsl:when test="tokenize(trace(current-grouping-key()), '\|')[last()] = 'true'">
              <xsl:copy>
                <xsl:sequence select="@* except @level, mf:merge(current-group()/node())"/>
              </xsl:copy>
            </xsl:when>
            <xsl:otherwise>
              <xsl:apply-templates select="current-group()" mode="merge"/>
            </xsl:otherwise>
          </xsl:choose>
        </xsl:for-each-group>    
      </xsl:function>
      
    
      <xsl:template mode="merge" match="*">
        <xsl:copy>
          <xsl:sequence select="mf:merge(node())"/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:template match="root">
        <secondwrap>
          <xsl:sequence select="mf:merge(mf:nest(*))"/>
        </secondwrap>
      </xsl:template>
    
      <xsl:output method="xml" indent="yes"/>
    
    </xsl:stylesheet>
    

    runs with an XSLT 2.0 processor and gives the same result as the XSLT 3 version run with an XSLT 3 processor.