xmlxsltodt

XSL change structure of ODT XML file


I am trying to convert an fodt file into a different structure using XSLT.

I have the following xml (extracted from an fodt file)

<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"> 
    <office:blablaTags>
        officeblabla
    </office:blablaTags>

    <office:body>
        <office:text text:use-soft-page-breaks="true">
            <text:variable-decls>
officeBlabla
            </text:variable-decls>
            <text:h text:outline-level="1">1. Chapter<text:variable-set/>
            </text:h>
            <text:p>paragraphe 1</text:p>
            <text:p>paragraphe 2</text:p>
            <text:h text:outline-level="1">2. Chapter<text:variable-set/>
            </text:h>
            <text:p>paragraphe 1</text:p>
            <text:h text:outline-level="2">
                <text:soft-page-break/>2.1. Chapter</text:h>
            <text:h text:outline-level="3">2.1.1. Chapter</text:h>
            <text:p>liste :</text:p>
            <text:list>
                <text:list-item>
                    <text:p>item 1</text:p>
                </text:list-item>
                <text:list-item>
                    <text:p>item 2</text:p>
                </text:list-item>
                <text:list-item>
                    <text:p>item 3</text:p>
                </text:list-item>
            </text:list>
            <text:h text:outline-level="3">2.1.2. Chapter</text:h>
            <text:p>paragraphe 1</text:p>
            <text:h text:outline-level="2">
                <text:bookmark/>2.2. Chapter<text:variable-set/>
            </text:h>
            <text:p>paragraphe 1</text:p>
            <text:p>
                <draw:frame draw:name="Image1">
                    <draw:image>
                        <office:binary-data>53dgh5436dfgh54
      </office:binary-data>
                    </draw:image>
                </draw:frame>
            </text:p>
            <text:h text:outline-level="1">3. Chapter<text:variable-set/>
            </text:h>
            <text:p>Paragraphe 1</text:p>
            <text:p>Paragraphe 2</text:p>
            <text:p>Paragraphe 3</text:p>
        </office:text>
    </office:body>
</office:document>

It is composed of a hierarchy of chapters (text:h) and inside are text or lists or images (text:p, text:list or draw:frame).

And what I would like to do is :

I managed to do the first part using the answer from XSLT: Select following-sibling until reaching a specified tag.

-EDIT- Thanks to the help of Bryn, i am now able to convert FODT tags into HTML.

So now, all i need is to put every chapter bloc into each other and I have no idea how to do this.

So here is my XSL :

    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:plu="https://cnig.gouv.fr/reglementDU">
    <xsl:output method="xml" indent="yes"/>
    <xsl:strip-space elements="*"/>
    <!-- my first node -->
    <xsl:template match="/">
        <plu:ReglementDU>
            <xsl:apply-templates/>
        </plu:ReglementDU>
    </xsl:template>
    <!-- delete officeblabla -->
    <xsl:template match="*[following::text:h[1] and not(self::text:h[1])]"/>
    <!-- insert Titre blocs -->
    <xsl:template match="text:h">
        <xsl:variable name="header-id" select="generate-id(.)"/>
        <plu:Titre>
            <h1>
                <xsl:apply-templates/>
            </h1>
            <xsl:for-each select="following::*[generate-id(preceding-sibling::text:h[1]) = $header-id]">
                <xsl:call-template name="convertHTML">
                    <xsl:with-param name="content" select="."/>
                </xsl:call-template>
            </xsl:for-each>
        </plu:Titre>
    </xsl:template>
    <!-- delete copied content -->
    <xsl:template match="text:p"/>
    <!-- convert content to HTML -->
    <xsl:template name="convertHTML">
        <xsl:param name="content"/>
        <xsl:for-each select="$content">
            <xsl:if test="name(.)='text:p'and name(./*[1])!='draw:frame'">
                <div>
                    <xsl:value-of select="."/>
                </div>
            </xsl:if>
            <xsl:if test="name(.)='text:list'">
                <ul>
                    <xsl:for-each select="./*">
                        <li>
                            <xsl:value-of select="."/>
                        </li>
                    </xsl:for-each>
                </ul>
            </xsl:if>
            <xsl:if test="name(./*)='draw:frame'">
                <a>
                    <xsl:value-of select="./draw:frame/@draw:name"/>
                </a>
            </xsl:if>
        </xsl:for-each>
    </xsl:template>
</xsl:stylesheet>

Here is my current result :

<plu:ReglementDU xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:plu="https://cnig.gouv.fr/reglementDU">
    <plu:Titre>
        <h1>1. Titre</h1>
        <div>paragraphe 1</div>
        <div>paragraphe 2</div>
    </plu:Titre>
    <plu:Titre>
        <h1>2. Titre</h1>
        <div>paragraphe 1</div>
    </plu:Titre>
    <plu:Titre>
        <h1>2.1. Titre</h1>
    </plu:Titre>
    <plu:Titre>
        <h1>2.1.1. Titre</h1>
        <div>liste :</div>
        <ul>
            <li>item 1</li>
            <li>item 2</li>
            <li>item 3</li>
        </ul>
    </plu:Titre>
    <plu:Titre>
        <h1>2.1.2. Titre</h1>
        <div>paragraphe 1</div>
    </plu:Titre>
    <plu:Titre>
        <h1>2.2. Titre</h1>
        <div>paragraphe 1</div>
        <a>Image1</a>
    </plu:Titre>
    <plu:Titre>
        <h1>3. Titre</h1>
        <div>Paragraphe 1</div>
        <div>Paragraphe 2</div>
        <div>Paragraphe 3</div>
    </plu:Titre>
</plu:ReglementDU>

Here is the expected result :

<?xml version="1.0" encoding="UTF-8"?>
<plu:ReglementDU xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:plu="https://cnig.gouv.fr/reglementDU">
    <plu:Titre>
        <h1>1. Titre</h1>
        <div>paragraphe 1</div>
        <div>paragraphe 2</div>
    </plu:Titre>
    <plu:Titre>
        <h1>2. Titre</h1>
        <div>paragraphe 1</div>
        <plu:Titre>
            <h1>2.1. Titre</h1>
            <plu:Titre>
                <h1>2.1.1. Titre</h1>
                <div>liste :</div>
                <ul>
                    <li>item 1</li>
                    <li>item 2</li>
                    <li>item 3</li>
                </ul>
            </plu:Titre>
            <plu:Titre>
                <h1>2.1.2. Titre</h1>
                <div>paragraphe 1</div>
            </plu:Titre>
        </plu:Titre>
        <plu:Titre>
            <h1>2.2. Titre</h1>
            <div>paragraphe 1</div>
            <a>Image1</a>
        </plu:Titre>
    </plu:Titre>
    <plu:Titre>
        <h1>3. Titre</h1>
        <div>Paragraphe 1</div>
        <div>Paragraphe 2</div>
        <div>Paragraphe 3</div>
    </plu:Titre>
</plu:ReglementDU>

Thanks


Solution

  • Thanks to Bryn and Martin Honnen from this thread How to convert flat xml data to hierarchical data xml 2, here is a working solution :

    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:mf="http://example.com/mf" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:plu="https://cnig.gouv.fr/reglementDU" version="3.0" exclude-result-prefixes="mf office text draw">
        <!-- supprime les blancs-->
        <xsl:strip-space elements="*"/>
        <!-- indente le XML résultat-->
        <xsl:output indent="yes"/>
        <!--===================== Templates =====================-->
        <!-- suppression des balises office -->
        <xsl:template match="*[following::text:h[1] and not(self::text:h[1])]"/>
        <xsl:template match="text:*[not(text:p or text:h or text:list or text:list-item)]"/>
        <!-- template fourre-tout -->
        <xsl:template match="node()|@*">
            <xsl:apply-templates/>
        </xsl:template>
        <!-- template principal -->
        <xsl:template match="office:text|@*">
            <plu:ReglementDU>
                <xsl:sequence select="mf:group(*, 1)"/>
            </plu:ReglementDU>
        </xsl:template>
        <!-- conversion ODT/html -->
        <xsl:template name="convertHTML">
            <xsl:param name="content"/>
            <xsl:for-each select="$content">
                <xsl:if test="name(.)='text:p'and name(./*[1])!='draw:frame'">
                    <div>
                        <xsl:value-of select="."/>
                    </div>
                </xsl:if>
                <xsl:if test="name(.)='text:list'">
                    <ul>
                        <xsl:for-each select="./*">
                            <li>
                                <xsl:value-of select="."/>
                            </li>
                        </xsl:for-each>
                    </ul>
                </xsl:if>
                <xsl:if test="name(./*[1])='draw:frame'">
                    <a>
                        <xsl:value-of select="./draw:frame/@draw:name"/>
                    </a>
                </xsl:if>
            </xsl:for-each>
        </xsl:template>
        <!-- ====================== functions ======================== -->
        <!-- fonction pemettant de hiérarchiser les titres -->
        <xsl:function name="mf:group" as="element()*">
            <xsl:param name="elements" as="element()*"/>
            <xsl:param name="level" as="xs:integer"/>
            <xsl:for-each-group select="$elements" group-starting-with="text:h[@text:outline-level = $level]">
                <xsl:choose>
                    <xsl:when test="not(self::text:h[@text:outline-level = $level])">
                        <xsl:where-populated>
                            <xsl:call-template name="convertHTML">
                                <xsl:with-param name="content" select="current-group()"/>
                            </xsl:call-template>
                        </xsl:where-populated>
                    </xsl:when>
                    <xsl:otherwise>
                        <plu:Titre niveau="{$level}">
                            <xsl:element name="{concat('h',$level)}">
                                <xsl:value-of select="."/>
                            </xsl:element>
                            <xsl:sequence select="mf:group(current-group() except ., ($level + 1))"/>
                        </plu:Titre>
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:for-each-group>
        </xsl:function>
    </xsl:stylesheet>