I have an issue while deleting duplicate segments, i have multiple HEADER/SUBHEADER/ORDER segments, in each ORDER segment, separetly i need to check for duplicate OGS11 segments without considering OGS11/SEG/SEQ1 value, if rest of the sub nodes are same under OGS11 then we can consider that as duplicate and can delete that duplicate segment.
i have used composite function as i have lot of subsegments under OGS11, but its not deleting the duplicates. Please review it once
Input:
<?xml version="1.0" encoding="UTF-8"?>
<ns0:HEADER xmlns:ns0="http://idenity.com">
<ns0:SUBHEADER>
<ORDER>
<HN>
<field1>1233</field1>
</HN>
<GS1>
<field1>1</field1>
</GS1>
<OGS11>
<SEG>
<SEQ1>21</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<OGS11>
<SEG>
<SEQ1>22</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<OGS11>
<SEG>
<SEQ1>23</SEQ1>
<SEQ>
<FIELD1>1234</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>1414</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<NEWSEGMENT>
<FIELD1>123</FIELD1>
</NEWSEGMENT>
<NEWSEG1>
<FIELD1>COUNT</FIELD1>
</NEWSEG1>
</ORDER>
<ORDER>
<HN>
<field1>1234</field1>
</HN>
<GS1>
<field1>1</field1>
</GS1>
<OGS11>
<SEG>
<SEQ1>21</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<OGS11>
<SEG>
<SEQ1>22</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<NEWSEGMENT>
<FIELD1>123</FIELD1>
</NEWSEGMENT>
<NEWSEG1>
<FIELD1>COUNT</FIELD1>
</NEWSEG1>
</ORDER>
</ns0:SUBHEADER>
</ns0:HEADER>
Desired output:
<?xml version="1.0" encoding="UTF-8"?>
<ns0:HEADER xmlns:ns0="http://idenity.com">
<ns0:SUBHEADER>
<ORDER>
<HN>
<field1>1233</field1>
</HN>
<GS1>
<field1>1</field1>
</GS1>
<OGS11>
<SEG>
<SEQ1>21</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<OGS11>
<SEG>
<SEQ1>23</SEQ1>
<SEQ>
<FIELD1>1234</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>1414</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<NEWSEGMENT>
<FIELD1>123</FIELD1>
</NEWSEGMENT>
<NEWSEG1>
<FIELD1>COUNT</FIELD1>
</NEWSEG1>
</ORDER>
<ORDER>
<HN>
<field1>1234</field1>
</HN>
<GS1>
<field1>1</field1>
</GS1>
<OGS11>
<SEG>
<SEQ1>21</SEQ1>
<SEQ>
<FIELD1>123</FIELD1>
<FIELD2>EN</FIELD2>
</SEQ>
</SEG>
<SEG2>
<FIELD1>5</FIELD1>
<SEQ>
<FIELD2>123</FIELD2>
<FIELD3>AS</FIELD3>
</SEQ>
</SEG2>
</OGS11>
<NEWSEGMENT>
<FIELD1>123</FIELD1>
</NEWSEGMENT>
<NEWSEG1>
<FIELD1>COUNT</FIELD1>
</NEWSEG1>
</ORDER>
</ns0:SUBHEADER>
</ns0:HEADER>
XSLT I used is below:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="#all">
<xsl:template match="*[ORDER/OGS11]">
<xsl:copy>
<xsl:apply-templates select="@*, * except OGS11"/>
<xsl:for-each-group select="OGS11" composite="yes" group-by="*/*[name() != 'SEQ1']">
<xsl:sequence select="."/>
</xsl:for-each-group>
</xsl:copy>
</xsl:template>
<xsl:output method="xml" indent="yes"/>
<xsl:mode on-no-match="shallow-copy"/>
</xsl:stylesheet>
It might be necessary to push the child elements of OGS11 through a mode that removes the SEG/SEQ1 element and then to use deep-equal
on the result to eliminate duplicates; the following attempts that
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:map="http://www.w3.org/2005/xpath-functions/map"
xmlns:mf="http://example.com/mf"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:function name="mf:delete" as="element()*">
<xsl:param name="elements" as="element()*"/>
<xsl:apply-templates select="$elements" mode="delete"/>
</xsl:function>
<xsl:mode name="delete" on-no-match="shallow-copy"/>
<xsl:template mode="delete" match="SEG/SEQ1"/>
<xsl:template match="ORDER">
<xsl:copy>
<xsl:apply-templates select="@*, OGS11[1]/preceding-sibling::*"/>
<xsl:variable name="child-map" select="map:merge(OGS11!map { generate-id(): mf:delete(*) })"/>
<xsl:sequence
select="fold-left(
OGS11,
(),
function($r, $o) {
if (some $o1 in $r
satisfies
deep-equal(
$child-map($o1!generate-id()),
$child-map($o!generate-id())
)
)
then $r
else ($r, $o)
}
)"/>
<xsl:apply-templates select="OGS11[last()]/following-sibling::*"/>
</xsl:copy>
</xsl:template>
<xsl:strip-space elements="*"/>
<xsl:output indent="yes"/>
<xsl:mode on-no-match="shallow-copy"/>
</xsl:stylesheet>
As for the problem raised in a comment about elements getting lost if there are no OGS11
elements, here is a different way to handle that:
<xsl:template match="ORDER">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:variable name="elements-except-OGS11" select="* except OGS11"/>
<xsl:variable name="child-map" select="map:merge(OGS11!map { generate-id(): mf:delete(*) })"/>
<xsl:variable name="unique-OGS11"
select="fold-left(
OGS11,
(),
function($r, $o) {
if (some $o1 in $r
satisfies
deep-equal(
$child-map($o1!generate-id()),
$child-map($o!generate-id())
)
)
then $r
else ($r, $o)
}
)"/>
<xsl:apply-templates select="$elements-except-OGS11 | $unique-OGS11"/>
</xsl:copy>
</xsl:template>