xslt-2.0xslt-3.0

XSLT Deleting the duplicate segments has issue


I have an issue while deleting duplicate segments, i have multiple HEADER/SUBHEADER/ORDER segments, in each ORDER segment, separetly i need to check for duplicate OGS11 segments without considering OGS11/SEG/SEQ1 value, if rest of the sub nodes are same under OGS11 then we can consider that as duplicate and can delete that duplicate segment.

i have used composite function as i have lot of subsegments under OGS11, but its not deleting the duplicates. Please review it once

Input:

<?xml version="1.0" encoding="UTF-8"?>
<ns0:HEADER xmlns:ns0="http://idenity.com">
    <ns0:SUBHEADER>
        <ORDER>
            <HN>
                <field1>1233</field1>
            </HN>
            <GS1>
                <field1>1</field1>
            </GS1>
            <OGS11>
                <SEG>
                    <SEQ1>21</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <OGS11>
                <SEG>
                    <SEQ1>22</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <OGS11>
                <SEG>
                    <SEQ1>23</SEQ1>
                    <SEQ>
                        <FIELD1>1234</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>1414</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <NEWSEGMENT>
                <FIELD1>123</FIELD1>
            </NEWSEGMENT>
            <NEWSEG1>
                <FIELD1>COUNT</FIELD1>
            </NEWSEG1>
        </ORDER>
        <ORDER>
            <HN>
                <field1>1234</field1>
            </HN>
            <GS1>
                <field1>1</field1>
            </GS1>
            <OGS11>
                <SEG>
                    <SEQ1>21</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <OGS11>
                <SEG>
                    <SEQ1>22</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <NEWSEGMENT>
                <FIELD1>123</FIELD1>
            </NEWSEGMENT>
            <NEWSEG1>
                <FIELD1>COUNT</FIELD1>
            </NEWSEG1>
        </ORDER>
    </ns0:SUBHEADER>
</ns0:HEADER>



Desired output:

<?xml version="1.0" encoding="UTF-8"?>
<ns0:HEADER xmlns:ns0="http://idenity.com">
    <ns0:SUBHEADER>
        <ORDER>
            <HN>
                <field1>1233</field1>
            </HN>
            <GS1>
                <field1>1</field1>
            </GS1>
            <OGS11>
                <SEG>
                    <SEQ1>21</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <OGS11>
                <SEG>
                    <SEQ1>23</SEQ1>
                    <SEQ>
                        <FIELD1>1234</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>1414</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <NEWSEGMENT>
                <FIELD1>123</FIELD1>
            </NEWSEGMENT>
            <NEWSEG1>
                <FIELD1>COUNT</FIELD1>
            </NEWSEG1>
        </ORDER>
        <ORDER>
            <HN>
                <field1>1234</field1>
            </HN>
            <GS1>
                <field1>1</field1>
            </GS1>
            <OGS11>
                <SEG>
                    <SEQ1>21</SEQ1>
                    <SEQ>
                        <FIELD1>123</FIELD1>
                        <FIELD2>EN</FIELD2>
                    </SEQ>
                </SEG>
                <SEG2>
                    <FIELD1>5</FIELD1>
                    <SEQ>
                        <FIELD2>123</FIELD2>
                        <FIELD3>AS</FIELD3>
                    </SEQ>
                </SEG2>
            </OGS11>
            <NEWSEGMENT>
                <FIELD1>123</FIELD1>
            </NEWSEGMENT>
            <NEWSEG1>
                <FIELD1>COUNT</FIELD1>
            </NEWSEG1>
        </ORDER>
    </ns0:SUBHEADER>
</ns0:HEADER>


XSLT I used is below:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="3.0"
  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  exclude-result-prefixes="#all">
<xsl:template match="*[ORDER/OGS11]">
<xsl:copy>  
<xsl:apply-templates select="@*, * except OGS11"/>    
<xsl:for-each-group select="OGS11" composite="yes" group-by="*/*[name() != 'SEQ1']">    
<xsl:sequence select="."/>
</xsl:for-each-group>
</xsl:copy>
</xsl:template>
 
 
  <xsl:output method="xml" indent="yes"/>
 
  <xsl:mode on-no-match="shallow-copy"/>
 
</xsl:stylesheet>


   
        

Solution

  • It might be necessary to push the child elements of OGS11 through a mode that removes the SEG/SEQ1 element and then to use deep-equal on the result to eliminate duplicates; the following attempts that

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
      version="3.0"
      xmlns:xs="http://www.w3.org/2001/XMLSchema"
      xmlns:map="http://www.w3.org/2005/xpath-functions/map"
      xmlns:mf="http://example.com/mf"
      exclude-result-prefixes="#all"
      expand-text="yes">
      
      <xsl:function name="mf:delete" as="element()*">
        <xsl:param name="elements" as="element()*"/>
        <xsl:apply-templates select="$elements" mode="delete"/>
      </xsl:function>
      
      <xsl:mode name="delete" on-no-match="shallow-copy"/>
      
      <xsl:template mode="delete" match="SEG/SEQ1"/>
      
      <xsl:template match="ORDER">
        <xsl:copy>
          <xsl:apply-templates select="@*, OGS11[1]/preceding-sibling::*"/>
          <xsl:variable name="child-map" select="map:merge(OGS11!map { generate-id(): mf:delete(*) })"/>
          <xsl:sequence 
            select="fold-left(
                      OGS11, 
                      (), 
                      function($r, $o) { 
                         if (some $o1 in $r 
                             satisfies 
                               deep-equal(
                                 $child-map($o1!generate-id()),
                                 $child-map($o!generate-id())
                               )
                             ) 
                             then $r 
                             else ($r, $o) 
                      }
                    )"/>
          <xsl:apply-templates select="OGS11[last()]/following-sibling::*"/>
        </xsl:copy>
      </xsl:template>
      
      <xsl:strip-space elements="*"/>
      <xsl:output indent="yes"/>
    
      <xsl:mode on-no-match="shallow-copy"/>
      
    </xsl:stylesheet>
    

    As for the problem raised in a comment about elements getting lost if there are no OGS11 elements, here is a different way to handle that:

      <xsl:template match="ORDER">
        <xsl:copy>
          <xsl:apply-templates select="@*"/>
          <xsl:variable name="elements-except-OGS11" select="* except OGS11"/>
          <xsl:variable name="child-map" select="map:merge(OGS11!map { generate-id(): mf:delete(*) })"/>
          <xsl:variable name="unique-OGS11" 
            select="fold-left(
                      OGS11, 
                      (), 
                      function($r, $o) { 
                         if (some $o1 in $r 
                             satisfies 
                               deep-equal(
                                 $child-map($o1!generate-id()),
                                 $child-map($o!generate-id())
                               )
                             ) 
                             then $r 
                             else ($r, $o) 
                      }
                    )"/>
          <xsl:apply-templates select="$elements-except-OGS11 | $unique-OGS11"/>
        </xsl:copy>
      </xsl:template>