xslt

How to use XSLT to group consecutive following elements satisfying a certain condition (colspan)


See the XML code of a table below. My task is to add two attributes (NAMEST and NAMEEND) to every ENTRY element which has one or more consecutive following ENTRY elements with the text "##colspan##".

The NAMEST attribute should contain the colname of the start of the colspan and the NAMEEND attribute should contain the colname of the end of the colspan.

I've been experimenting with for-each-group and for-each but haven't managed to find a solution yet. My main problem is to ignore the gaps. After all I want to count only the consecutive following rows satisfying the condition, and not all the following rows satisfying the condition.

Current table:

<TABLE>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>42</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>155</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT></CONTENT>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="5" COLNAME="col5">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="6" COLNAME="col6">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>147</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>jalla</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="6" COLNAME="col6">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>147</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>300</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="5" COLNAME="col5">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="6" COLNAME="col6">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>1814</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>1905</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="6" COLNAME="col6">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
     <ENTRY COLNUM="6" COLNAME="col6">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
  </ROW>
</TABLE>

Desired table:

 <TABLE DEFINITION="false">
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>42</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>155</CONTENT>
     </ENTRY>
     <ENTRY NAMEST="col3" NAMEEND="col6" COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>147</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT>155</CONTENT>
     </ENTRY>
     <ENTRY NAMEST="col3" NAMEEND="col4" COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY NAMEST="col5" NAMEEND="col6" COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY NAMEST="col1" NAMEEND="col2" COLNUM="1" COLNAME="col1">
        <CONTENT>147</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY NAMEST="col4" NAMEEND="col6" COLNUM="4" COLNAME="col4">
        <CONTENT>300</CONTENT>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY NAMEST="col1" NAMEEND="col3" COLNUM="1" COLNAME="col1">
        <CONTENT>1814</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="4" COLNAME="col4">
        <CONTENT>1905</CONTENT>
     </ENTRY>
     <ENTRY NAMEST="col5" NAMEEND="col6" COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
  </ROW>
  <ROW>
     <ENTRY COLNUM="1" COLNAME="col1">
        <CONTENT>##colspan##</CONTENT>
     </ENTRY>
     <ENTRY COLNUM="2" COLNAME="col2">
        <CONTENT/>
     </ENTRY>
     <ENTRY NAMEST="col3" NAMEEND="col4" COLNUM="3" COLNAME="col3">
        <CONTENT/>
     </ENTRY>
     <ENTRY NAMEST="col5" NAMEEND="col6" COLNUM="5" COLNAME="col5">
        <CONTENT/>
     </ENTRY>
  </ROW>
</TABLE>

I have tried the following code but it doesn't manage to stop when it comes to the end of a group of ENTRY elements with ##colspan##:

<xsl:variable name="colspanText" as="xs:string" select="'##colspan##'"/>

<xsl:template match="ENTRY">

    <xsl:variable name="myColName" as="xs:string" select="@COLNAME"/>

    <!-- Keep all ENTRY elements except those with ##colspan## in CONTENT. Element #1 should always be kept though. -->

    <xsl:if test="not(CONTENT = $colspanText and not(position() = 2))">
        <xsl:copy>

            <!-- Add NAMEST and NAMEEND parameters if we have a colspan -->

            <xsl:for-each-group
                select="following-sibling::ENTRY" group-adjacent="CONTENT = $colspanText">
                <xsl:if test="current-grouping-key()">
                    <xsl:attribute name="NAMEST"  select="$myColName"/>
                    <xsl:attribute name="NAMEEND" select="@COLNAME"  />
                </xsl:if>
            </xsl:for-each-group>
            
            
            <!-- Copy everything else -->
            <xsl:apply-templates select="@* | node()"/>

        </xsl:copy>
    </xsl:if>

</xsl:template>

Solution

  • I think that this will do it for you.

    It produces something very similar to what you posted as the desired output, although not identical.

    <?xml version="1.0" encoding="utf-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="3.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      xmlns:l="local:functions"
      exclude-result-prefixes="#all">
      
      <xsl:output indent="yes" />
    
      <xsl:variable name="colspanText" as="xs:string" select="'##colspan##'"/>
    
      <xsl:template match="@*|*" >
        <xsl:copy>
          <xsl:apply-templates select="@* | node()" />
        </xsl:copy>
      </xsl:template>
    
      <xsl:template match="ENTRY[CONTENT eq $colspanText and position() ne 1]" />
    
      <xsl:template match="ENTRY[CONTENT ne $colspanText or position() eq 1]">
    
        <xsl:variable name="endSpan" as="xs:string?" 
                      select="l:endSpanIfStartsWithNext(.)" />
    
        <xsl:copy>
          
          <xsl:if test="exists($endSpan)" >
            <xsl:attribute name="NAMEST"  select="./@COLNAME"/>
            <xsl:attribute name="NAMEEND" select="$endSpan"  />
          </xsl:if>
    
          <xsl:apply-templates select="@* | node()"/>
    
        </xsl:copy>
    
    </xsl:template>
    
      <xsl:function name="l:endSpanIfStartsWithNext" as="xs:string?" >
        <xsl:param name="entry" as="element(ENTRY)" />
        
        <xsl:variable name="next" as="element(ENTRY)?" select="$entry/following-sibling::ENTRY[1]" />
        
        <xsl:sequence select="if ($next/CONTENT eq $colspanText)
                              then l:findLastInChainOfColSpans($next)
                              else ()" />
    
      </xsl:function>
    
      <xsl:function name="l:findLastInChainOfColSpans" as="xs:string" >
        <xsl:param name="entry" as="element(ENTRY)" />
        
        <xsl:variable name="next" as="element(ENTRY)?" select="$entry/following-sibling::ENTRY[1]" />
        
        <xsl:sequence select="if (not($next))
                              then $entry/@COLNAME
                              else if (not($next/CONTENT eq $colspanText))
                              then $entry/@COLNAME
                              else l:findLastInChainOfColSpans($next)" />
      </xsl:function>
      
    </xsl:stylesheet>