I'm writing an xml file using the stax and the dom api.
The stax api writes the newline unchanged.
The dom api escapes the newline in the attribute to
.
Why is there a difference? How do I force stax to also escape the newline?
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
public class WriteXml {
public static void main(String[] args) throws Exception {
// returns "foo SPACE bar" because writeWithStax literally writes the newline
Path staxFile = writeWithStax("foo\nbar");
System.out.println("staxFile = " + Files.readString(staxFile));
System.out.println("attribute read with stax = " + readWithStax(staxFile));
System.out.println("attribute read with dom = " + readWithDom(staxFile));
System.out.println();
// returns "foo NEWLINE bar" because writeWithDom escapes the newline to " "
Path domFile = writeWithDom("foo\nbar");
System.out.println("domFile = " + Files.readString(domFile));
System.out.println("attribute read with stax = " + readWithStax(domFile));
System.out.println("attribute read with dom = " + readWithDom(domFile));
System.out.println();
}
private static Path writeWithStax(String text) throws Exception {
Path file = Files.createTempFile("stax", ".xml");
try (OutputStream out = Files.newOutputStream(file)) {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
XMLStreamWriter writer = factory.createXMLStreamWriter(out);
writer.writeStartDocument();
writer.writeStartElement("element");
writer.writeAttribute("attribute", text);
writer.writeEndElement();
writer.writeEndDocument();
writer.flush();
writer.close();
}
return file;
}
private static Path writeWithDom(String text) throws Exception {
Path file = Files.createTempFile("dom", ".xml");
try (OutputStream out = Files.newOutputStream(file)) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.newDocument();
Element warningElement = document.createElement("element");
warningElement.setAttribute("attribute", text);
document.appendChild(warningElement);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.transform(new DOMSource(document), new StreamResult(out));
}
return file;
}
private static String readWithStax(Path file) throws Exception {
try (InputStream in = Files.newInputStream(file)) {
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLStreamReader reader = factory.createXMLStreamReader(in);
while (reader.hasNext()) {
int event = reader.next();
if (event == XMLStreamReader.START_ELEMENT
&& "element".equals(reader.getLocalName())) {
return reader.getAttributeValue(null, "attribute");
}
}
}
return null;
}
private static String readWithDom(Path file) throws Exception {
try (InputStream in = Files.newInputStream(file)) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(in);
return document.getDocumentElement().getAttribute("attribute");
}
}
}
Based on the first answer I have now written this workaround. This feels like a hack, but I guess it works.
private static Path writeWithStax(String text) throws Exception {
Path file = Files.createTempFile("stax", ".xml");
try (Writer out = Files.newBufferedWriter(file, UTF_8)) {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
XMLStreamWriter writer = factory.createXMLStreamWriter(out);
writer.writeStartDocument();
writer.writeStartElement("element");
// hack to preserve whitespace
writeAttributePreservingWhitespace(out, writer, "attribute", text);
writer.writeEndElement();
writer.writeEndDocument();
writer.flush();
writer.close();
}
return file;
}
private static void writeAttributePreservingWhitespace(Writer out, XMLStreamWriter writer, String name, String value) throws XMLStreamException, IOException {
// see https://www.w3.org/TR/xml/#AVNormalize
if (value.contains("\t") || value.contains("\n") || value.contains("\r")) {
writer.flush();
out.write(" ");
writeXMLContent(out, name);
out.write("=\"");
writeXMLContent(out, value);
out.write("\"");
} else {
writer.writeAttribute(name, value);
}
}
private static void writeXMLContent(Writer out, String text) throws IOException {
for (char ch : text.toCharArray()) {
switch (ch) {
case '<' -> out.write("<");
case '&' -> out.write("&");
case '>' -> out.write(">");
case '"' -> out.write(""");
case '\t' -> out.write("	");
case '\n' -> out.write("
");
case '\r' -> out.write("
");
default -> out.write(ch);
}
}
}
New lines would have to be manually escaped as XMLStreamWriter will not do that
The XMLStreamWriter does not perform well formedness checking on its input. However the writeCharacters method is required to escape & , < and > For attribute values the writeAttribute method will escape the above characters plus " to ensure that all character content and attribute values are well formed.
New lines are not listed for escaping.