haskellxslthxtarrow-abstraction

Transform nodes with HXT using the number of <section> ancestor nodes


I'm looking to replace all title elements with h1, h2, ... , h6 elements depending on how many ancestors are section elements. Example input/output:

Input.xml

<document>
    <section>
        <title>Title A</title>
        <section>
            <title>Title B</title>
        </section>
        <section>
            <title>Title C</title>
            <section>
                <title>Title D</title>
            </section>
        </section>
    </section>
</document>

Output.xml

<document>
    <section>
        <h1>Title A</h1>
        <section>
            <h2>Title B</h2>
        </section>
        <section>
            <h2>Title C</h2>
            <section>
                <h3>Title D</h3>
            </section>
        </section>
    </section>
</document>

I can replace all titles with h1s using something like this

swapTitles :: ArrowXml a => a XmlTree XmlTree
swapTitles = processTopDown $
             (changeQName . const $ mkName "h1")
             `when`
             (isElem >>> (hasQName $ mkName "title"))

I believe I should be using ArrowState, but I've not been able to figure out how. Can someone point me in the right direction?


Solution

  • Using XSL with package hxt-xslt. Standards make life easier :-)

    {-# LANGUAGE Arrows, PackageImports #-} 
    
    import System.Environment ( getArgs )
    import System.Exit (exitSuccess, exitWith, ExitCode(..))
    
    import Control.Arrow
    import "hxt" Text.XML.HXT.Core 
    import "hxt" Text.XML.HXT.DOM.XmlKeywords 
    import "hxt-xslt" Text.XML.HXT.XSLT.XsltArrows
    import "hxt" Text.XML.HXT.Arrow.XmlState.TraceHandling (withTraceLevel)
    
    process :: String -> String -> IO [String]
    process xslStylesheetPath xmlDocPath = do
    
        -- compile stylesheet
    
        compiledStyleSheetResults <- runX $ 
            arr (const xslStylesheetPath) 
            >>> readXSLTDoc [ withValidate yes, withInputEncoding utf8]   -- withTrace 2 
            >>> {- withTraceLevel 2 -} xsltCompileStylesheet
    
        case compiledStyleSheetResults of
             [] -> return ["error compiling " ++ xslStylesheetPath] 
             compiledStyleSheet : _ -> do
    
                 -- apply compiled stylesheet to xml doc 
    
                 runX $ arr (const xmlDocPath) 
                     >>> readXSLTDoc [ withValidate yes, withInputEncoding utf8] -- withTrace 2
                     >>> xsltApplyStylesheet compiledStyleSheet
                     >>> writeDocumentToString [withOutputEncoding utf8, 
                                                withXmlPi yes, withIndent yes]
    
    -- readXSLTDoc from internals of module Text.XML.HXT.XSLT.XsltArrows
    
    readXSLTDoc :: SysConfigList -> IOSArrow String XmlTree
    readXSLTDoc options
        = readFromDocument (options ++ defaultOptions)
        where
        defaultOptions
            = [ withCheckNamespaces yes
              , withValidate        no
              , withPreserveComment no
              ]         
    
    main = do
     args <- getArgs
     case args of 
       [arg1, arg2] -> do
           results <- process arg1 arg2
           case results of
                [] -> putStrLn "errors"
                result : _ -> putStrLn result
    
           exitSuccess
    
       _ -> do 
                putStrLn "missing parameters: xslStylesheetPath xmlDocPath"
                exitWith $ ExitFailure 1
    

    with XSL file "mystyle.xsl"

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output indent="yes"/>
    <xsl:strip-space elements="*"/>
    
    <xsl:template match="/">
      <xsl:for-each select="document">
        <xsl:copy>
          <xsl:call-template name="myloop">
            <xsl:with-param name="nesting" select="0"/>
          </xsl:call-template>
        </xsl:copy>
      </xsl:for-each>
    </xsl:template>
    
    <xsl:template name="myloop">
      <xsl:param name="nesting"/>
    
      <xsl:if test="title">
       <xsl:element name="{concat('h',string($nesting))}">
         <xsl:value-of select="title" /> 
       </xsl:element>
      </xsl:if> 
    
      <xsl:for-each select="section">
        <xsl:copy>
          <xsl:call-template name="myloop">
            <xsl:with-param name="nesting" select="$nesting+1"/>
          </xsl:call-template>
        </xsl:copy>
      </xsl:for-each>
    
    </xsl:template>
    </xsl:stylesheet>
    

    with "yourdata.xml"

    <?xml version="1.0" encoding="UTF-8"?>
    <document>
        <section>
            <title>Title A</title>
            <section>
                <title>Title B</title>
            </section>
            <section>
                <title>Title C</title>
                <section>
                    <title>Title D</title>
                </section>
            </section>
        </section>
    </document>
    

    running

    runhaskell test.hs mystyle.xsl yourdata.xml
    

    result:

    <?xml version="1.0" encoding="UTF-8"?>
    <document>
      <section>
        <h1>Title A</h1>
        <section>
          <h2>Title B</h2>
        </section>
        <section>
          <h2>Title C</h2>
          <section>
            <h3>Title D</h3>
          </section>
        </section>
      </section>
    </document>