xmlxsltcountingxslt-grouping

Output available content from Grouped elements using XSLT 2.0


Following the solution provided to this post (credits to: Martin Honnen) where I needed to group the elements in the XML by the id and counting the duplicates based on some rules.

Now I'm having an issue with the same grouped elements that have different content, which is expected, that for the same case id with cont="n | n" to have a content in one page that is not going to be present in the other.

Source XML:

<?xml version="1.0" encoding="utf-8"?>
<cases>
    <case id="1" cont="1 | 2">
        <serial>111</serial>
        <content total="10">
          <misc val="5" />
          <misc val="5" />
        </content>
    </case>
    <case id="1" cont="2 | 2">
        <serial>111</serial>
        <message>this is a note 1</message>
    </case>
    <case id="2" cont="">
        <serial>222</serial>
        <content total="8">
          <misc val="3" />
          <misc val="5" />
        </content>
        <message>this is a note 2</message>
    </case>
</cases>

XSLT 2.0:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
                xmlns:xs="http://www.w3.org/2001/XMLSchema" 
                exclude-result-prefixes="#all" 
                version="2.0">
                
<xsl:output method="xml" indent="yes"/>

<xsl:template match="cases">
    <output>
        <xsl:for-each-group 
            select="case[@cont = '' or count(distinct-values(tokenize(@cont, '\s*\|\s*'))) = 1]" 
            group-by="@id">
            <xsl:for-each-group select="current-group()" group-by="if (@cont = '') then '' else tokenize(@cont, '\s*\|\s*')[2]">
                <val id="{@id}">
                    <duplicates>
                        <xsl:value-of select="count(current-group()) - 1"/>
                    </duplicates>
                    <content total="{content/@total}">
                        <xsl:for-each select="content/misc">
                            <item>
                                <xsl:value-of select="@val"/>
                            </item>
                        </xsl:for-each>
                    </content>
                    <message val="{message}" />
                </val>
            </xsl:for-each-group>
        </xsl:for-each-group>
    </output>
</xsl:template>

</xsl:stylesheet>

Expected output:

<?xml version="1.0" encoding="UTF-8"?>
<output>
   <val id="1">
      <duplicates>0</duplicates>
      <content total="10">
         <item>5</item>
         <item>5</item>
      </content>
      <message val="this is a note 1"/>
   </val>
   <val id="2">
      <duplicates>0</duplicates>
      <content total="8">
         <item>3</item>
         <item>5</item>
      </content>
      <message val="this is a note 2"/>
   </val>
</output>

Right now the content in the output for id="1" is coming empty: <content total=""/>. I guess some changes need to be done to the grouping piece because I was testing showing the content of what is coming in the grouping and the content from case id="1" is not coming at all.

What changes do I need to do in order for the same case id with cont="n | n" to have all the content present in all the pages?

From the proposed solution:

Source XML:

<?xml version="1.0" encoding="utf-8"?>
<cases>
    <case id="2" cont="">
        <serial>222</serial>
        <content total="8">
          <misc val="3" />
          <misc val="5" />
        </content>
        <message>this is a note 2</message>
    </case>
    <case id="1" cont="1 | 2">
        <serial>111</serial>
        <content total="10">
          <misc val="5" />
          <misc val="5" />
        </content>
    </case>
    <case id="1" cont="2 | 2">
        <serial>111</serial>
        <message>this is a note 1</message>
    </case>
    <case id="2" cont="">
        <serial>222</serial>
        <content total="8">
          <misc val="3" />
          <misc val="5" />
        </content>
        <message>this is a note 2</message>
    </case>
</cases>

Output:

<?xml version="1.0" encoding="UTF-8"?>
<output>
   <val id="2">
      <duplicates>1</duplicates>
      <content total="8 10">
         <item>3</item>
         <item>5</item>
         <item>5</item>
         <item>5</item>
      </content>
      <message val="this is a note 2"/>
   </val>
   <val id="1">
      <duplicates>0</duplicates>
      <content total="8 10">
         <item>3</item>
         <item>5</item>
         <item>5</item>
         <item>5</item>
      </content>
      <message val="this is a note 1"/>
   </val>
</output>

This other input data not working:

<?xml version="1.0" encoding="utf-8"?>
<cases>
    <case id="1" cont="1 | 2">
        <serial>111</serial>
        <content total="10">
          <misc val="5" />
          <misc val="5" />
        </content>
    </case>
    <case id="1" cont="2 | 2">
        <serial>111</serial>
        <message>this is a note 1</message>
    </case>
    <case id="1" cont="1 | 2">
        <serial>111</serial>
        <content total="10">
          <misc val="5" />
          <misc val="5" />
        </content>
    </case>
    <case id="1" cont="2 | 2">
        <serial>111</serial>
        <message>this is a note 1</message>
    </case>
    <case id="2" cont="">
        <serial>222</serial>
        <content total="8">
          <misc val="3" />
          <misc val="5" />
        </content>
        <message>this is a note 2</message>
    </case>
</cases>

Output:

<?xml version="1.0" encoding="UTF-8"?>
<output>
   <val id="1">
      <duplicates>1</duplicates>
      <content total="10 10">
         <item>5</item>
         <item>5</item>
         <item>5</item>
         <item>5</item>
      </content>
      <message val="this is a note 1"/>
   </val>
   <val id="2">
      <duplicates>0</duplicates>
      <content total="8">
         <item>3</item>
         <item>5</item>
      </content>
      <message val="this is a note 2"/>
   </val>
</output>

Now then content is getting duplicated for the duplicated elements.


Solution

  • With XSLT 3 (as supported by the current/supported version of Saxon Java, Saxon .NET, SaxonC and SaxonJS) you could use the following code: it first processes all cases to identify groups of cases starting with @cont being empty or @cont starting with 1 | n. For each group the collected elements are stored in the group property of a map. Then the code does the grouping from the previous solution (i.e. only looks at the cases with @cont being empty or being n | n (i.e. the last page). Inside of the grouping, to reference back to the other items, the code just checks in the stored sequence of the groups for the one which contains the current element, that is done with the is XPath node identity operator. Then all the elements are used for search content/@total and content/misc:

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="#all"
        expand-text="yes"
        version="3.0">
    
      <xsl:mode on-no-match="shallow-skip"/>
    
      <xsl:output method="xml" indent="yes" cdata-section-elements="groups" />
    
      <xsl:template match="cases">
        <output>
          <xsl:variable name="groups" as="map(*)*">
              <xsl:for-each-group select="case" group-starting-with="case[@cont = '' or tokenize(@cont, '\s*\|\s*')[1] = '1']">
                <xsl:sequence select="map { 'id' : xs:integer(@id), 'pos' : position(), 'group' : current-group() }"/>
              </xsl:for-each-group>
          </xsl:variable>
          <xsl:for-each-group 
              select="case[@cont = '' or count(distinct-values(tokenize(@cont, '\s*\|\s*'))) = 1]" 
              composite="yes" 
              group-by="if (@cont = '') then (@id, '') else (@id, tokenize(@cont, '\s*\|\s*')[2])">
            <val id="{@id}">
               <duplicates>{count(current-group()) - 1}</duplicates>
               <xsl:variable name="complete-group" select="$groups[some $case in ?group satisfies current() is $case]"/>
               <content total="{$complete-group?group/content/@total}">
                  <xsl:for-each select="$complete-group?group/content/misc">
                      <item>
                          <xsl:value-of select="@val"/>
                      </item>
                  </xsl:for-each>
               </content>
               <message val="{message}" />
            </val>
          </xsl:for-each-group>
        </output>
      </xsl:template>
      
    </xsl:stylesheet>
    

    I have no clear idea/description of the wanted result for total and misc if there are duplicates.