I'm looking to replace all title
elements with h1
, h2
, ... , h6
elements depending on how many ancestors are section
elements. Example input/output:
Input.xml
<document>
<section>
<title>Title A</title>
<section>
<title>Title B</title>
</section>
<section>
<title>Title C</title>
<section>
<title>Title D</title>
</section>
</section>
</section>
</document>
Output.xml
<document>
<section>
<h1>Title A</h1>
<section>
<h2>Title B</h2>
</section>
<section>
<h2>Title C</h2>
<section>
<h3>Title D</h3>
</section>
</section>
</section>
</document>
I can replace all title
s with h1
s using something like this
swapTitles :: ArrowXml a => a XmlTree XmlTree
swapTitles = processTopDown $
(changeQName . const $ mkName "h1")
`when`
(isElem >>> (hasQName $ mkName "title"))
I believe I should be using ArrowState, but I've not been able to figure out how. Can someone point me in the right direction?
Using XSL with package hxt-xslt. Standards make life easier :-)
{-# LANGUAGE Arrows, PackageImports #-}
import System.Environment ( getArgs )
import System.Exit (exitSuccess, exitWith, ExitCode(..))
import Control.Arrow
import "hxt" Text.XML.HXT.Core
import "hxt" Text.XML.HXT.DOM.XmlKeywords
import "hxt-xslt" Text.XML.HXT.XSLT.XsltArrows
import "hxt" Text.XML.HXT.Arrow.XmlState.TraceHandling (withTraceLevel)
process :: String -> String -> IO [String]
process xslStylesheetPath xmlDocPath = do
-- compile stylesheet
compiledStyleSheetResults <- runX $
arr (const xslStylesheetPath)
>>> readXSLTDoc [ withValidate yes, withInputEncoding utf8] -- withTrace 2
>>> {- withTraceLevel 2 -} xsltCompileStylesheet
case compiledStyleSheetResults of
[] -> return ["error compiling " ++ xslStylesheetPath]
compiledStyleSheet : _ -> do
-- apply compiled stylesheet to xml doc
runX $ arr (const xmlDocPath)
>>> readXSLTDoc [ withValidate yes, withInputEncoding utf8] -- withTrace 2
>>> xsltApplyStylesheet compiledStyleSheet
>>> writeDocumentToString [withOutputEncoding utf8,
withXmlPi yes, withIndent yes]
-- readXSLTDoc from internals of module Text.XML.HXT.XSLT.XsltArrows
readXSLTDoc :: SysConfigList -> IOSArrow String XmlTree
readXSLTDoc options
= readFromDocument (options ++ defaultOptions)
where
defaultOptions
= [ withCheckNamespaces yes
, withValidate no
, withPreserveComment no
]
main = do
args <- getArgs
case args of
[arg1, arg2] -> do
results <- process arg1 arg2
case results of
[] -> putStrLn "errors"
result : _ -> putStrLn result
exitSuccess
_ -> do
putStrLn "missing parameters: xslStylesheetPath xmlDocPath"
exitWith $ ExitFailure 1
with XSL file "mystyle.xsl"
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/">
<xsl:for-each select="document">
<xsl:copy>
<xsl:call-template name="myloop">
<xsl:with-param name="nesting" select="0"/>
</xsl:call-template>
</xsl:copy>
</xsl:for-each>
</xsl:template>
<xsl:template name="myloop">
<xsl:param name="nesting"/>
<xsl:if test="title">
<xsl:element name="{concat('h',string($nesting))}">
<xsl:value-of select="title" />
</xsl:element>
</xsl:if>
<xsl:for-each select="section">
<xsl:copy>
<xsl:call-template name="myloop">
<xsl:with-param name="nesting" select="$nesting+1"/>
</xsl:call-template>
</xsl:copy>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
with "yourdata.xml"
<?xml version="1.0" encoding="UTF-8"?>
<document>
<section>
<title>Title A</title>
<section>
<title>Title B</title>
</section>
<section>
<title>Title C</title>
<section>
<title>Title D</title>
</section>
</section>
</section>
</document>
running
runhaskell test.hs mystyle.xsl yourdata.xml
result:
<?xml version="1.0" encoding="UTF-8"?>
<document>
<section>
<h1>Title A</h1>
<section>
<h2>Title B</h2>
</section>
<section>
<h2>Title C</h2>
<section>
<h3>Title D</h3>
</section>
</section>
</section>
</document>