Purpose: I have a xml document with a lot of mixed content cdata elements that I need to programmatically edit. Annoyingly beacause the cdata elements have other/mixed content, the default ",cdata" tagging does not function correctly (to xml spec). If you have questions on the specifics on this then please let me know.
Issue: In simplified example below I have marked the element with cdata in it as ",innerxml" in order to handle the prefix/suffix myself. Everything works as expected with the unmarshalling, however with the marshalling (encoding) the special characters are escaped. Why does the EncodeElement method escape the special characters when the tag specifically says not to (via the ",innerxml" tag)? When I read about this method in the docs it refers me to the xml.Marshal method where it says the following:
a field with tag ",innerxml" is written verbatim, not subject to the usual marshaling procedure.
Example:
Here is the code (also available at https://go.dev/play/p/MH_ONAVaG_1):
package main
import (
"encoding/xml"
"fmt"
"strings"
)
var xmlFile string = `<?xml version="1.0" encoding="UTF-8"?>
<statusdb>
<status date="today">
<![CDATA[today is < yesterday]]>
</status>
<status date="yesterday">
<![CDATA[PM,
1. there are issues with the marshaller
2. i don't know how to solve them]]>
</status>
</statusdb>`
type statusDB struct {
Status []*status `xml:"status"`
}
type status struct {
Text string `xml:",innerxml"`
Date string `xml:"date,attr"`
}
type statusMarshaller status
func main() {
var projectStatus statusDB
err := xml.Unmarshal([]byte(xmlFile), &projectStatus)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("In Go: \"" + projectStatus.Status[0].Text + "\"")
fmt.Println("In Go: \"" + projectStatus.Status[1].Text + "\"")
x, err := xml.MarshalIndent(projectStatus, "", " ")
if err != nil {
fmt.Println(err)
return
}
//why this is not printing properly
fmt.Printf("%s\n", x)
}
func (tagElement *status) UnmarshalXML(d *xml.Decoder, se xml.StartElement) error {
temp := statusMarshaller{}
d.DecodeElement(&temp, &se)
temp.Text = strings.TrimSpace(temp.Text)
temp.Text = strings.TrimPrefix(temp.Text, "<![CDATA[")
temp.Text = strings.TrimSuffix(temp.Text, "]]>")
*tagElement = status(temp)
return nil
}
func (tagElement status) MarshalXML(d *xml.Encoder, se xml.StartElement) error {
tagElement.Text = "<![CDATA[" + tagElement.Text + "]]>"
temp, _ := xml.Marshal(statusMarshaller(tagElement))
return d.EncodeElement(temp, se)
}
This code returns the following:
In Go: "today is < yesterday"
In Go: "PM,
1. there are issues with the marshaller
2. i don't know how to solve them"
<statusDB>
<status><statusMarshaller date="today"><![CDATA[today is < yesterday]]></statusMarshaller></status>
<status><statusMarshaller date="yesterday"><![CDATA[PM,
 1. there are issues with the marshaller
 2. i don't know how to solve them]]></statusMarshaller></status>
</statusDB>
Program exited.
Conclusion: Can someone please explain why the xml package is doing this, and what a potential workaround could be?
Thank you!
of course it would be nice if the cdata in the package allowed for mixed elements, but for now I have found my workaround which is the above code with the small change to not call 'marshal' on the statusMarshaller type in the marhshalXML func. Instead I only cast the tagElement to the statusMarshaller type and then encode that element. see the details below:
Rev history:
package main
import (
"encoding/xml"
"fmt"
"strings"
)
var xmlFile string = `<?xml version="1.0" encoding="UTF-8"?>
<statusdb>
<status date="today">
<![CDATA[today is < yesterday]]>
</status>
<status date="yesterday">
<![CDATA[PM,
1. there are issues with the marshaller
2. i don't know how to solve them]]>
</status>
</statusdb>`
type statusDB struct {
Status []*status `xml:"status"`
}
type status struct {
XMLName xml.Name
Text string `xml:",innerxml"`
Date string `xml:"date,attr"`
}
type statusMarshaller status
func main() {
var projectStatus statusDB
err := xml.Unmarshal([]byte(xmlFile), &projectStatus)
if err != nil {
fmt.Println(err)
return
}
fmt.Println("In Go: \"" + projectStatus.Status[0].Text + "\"")
fmt.Println("In Go: \"" + projectStatus.Status[1].Text + "\"")
x, err := xml.MarshalIndent(projectStatus, "", " ")
if err != nil {
fmt.Println(err)
return
}
//why this is not printing properly
fmt.Printf("%s\n", x)
}
func (tagElement *status) UnmarshalXML(d *xml.Decoder, se xml.StartElement) error {
temp := statusMarshaller{}
d.DecodeElement(&temp, &se)
temp.Text = strings.TrimSpace(temp.Text)
temp.Text = strings.TrimPrefix(temp.Text, "<![CDATA[")
temp.Text = strings.TrimSuffix(temp.Text, "]]>")
*tagElement = status(temp)
return nil
}
func (tagElement status) MarshalXML(d *xml.Encoder, se xml.StartElement) error {
tagElement.Text = "<![CDATA[" + tagElement.Text + "]]>"
temp := statusMarshaller(tagElement)
return d.EncodeElement(temp, se)
}