javaxmldomxml-parsingjdom

JAVA code snippet to replace single quote(') to double quote in whole XML file


I have a XML file having nested tags. We can use DOM, JDOM parser I want to replace inside the string of all tag from single quote(') to double quote in whole XML file. tag can be nested inside tags also. I want some for loop which looks for all tag and replace value like HYPER SHIPPING'SDN BHD_First_Page --> HYPER SHIPPING''SDN BHD_First_Page

Sample code

    public void iterateChildNodes(org.jdom.Element parentNode) {
        if(parentNode.getChildren().size() == 0) {
            if(parentNode.getText().contains("'")) {
                parentNode.setText(parentNode.getText().replaceAll("'", "\'"));
                LOGGER.info("*************  Below Value updated");
                LOGGER.info(parentNode.getText());
            }
        }else {
            List<Element> rec = parentNode.getChildren();
            for(Element i : rec) {
                iterateChildNodes(i);
            }
        }
    }

Sample XML File

    <Document>
        <Identifier>DOC1</Identifier>
        <Type>HYPER SHIPPING SDN BHD</Type>
        <Description>HYPER SHIPPING SDN BHD</Description>
        <Confidence>33.12</Confidence>
        <ConfidenceThreshold>10.0</ConfidenceThreshold>
        <Valid>true</Valid>
        <Reviewed>true</Reviewed>
        <ReviewedBy>SYSTEM</ReviewedBy>
        <ValidatedBy>SYSTEM</ValidatedBy>
        <ErrorMessage/>
        <Value>HYPER SHIPPING'SDN BHD_First_Page</Value>  //Value to be replaced here
        <DocumentDisplayInfo/>
        <DocumentLevelFields/>
        <Pages>
            <Page>
                <Identifier>PG0</Identifier>
                <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                <NewFileName>BI2E7_0.tif</NewFileName>
                <SourceFileID>1</SourceFileID>
                <PageLevelFields>
                    <PageLevelField>
                        <Name>Search_Engine_Classification</Name>
                        <Value>Park Street '10 road</Value>     //Value to be replaced here
                        <Type/>
                        <Confidence>66.23</Confidence>
                        <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                        <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                        <OcrConfidence>0.0</OcrConfidence>
                        <FieldOrderNumber>0</FieldOrderNumber>
                        <ForceReview>false</ForceReview>
                    </PageLevelField>
                </PageLevelFields>
            </Page>
        </Pages>
    </Document>

Solution

  • This code can replace all ' with " from an XML file.

    Adding no description here, try to code step by step. It is very easy to understand.

    (Updated)

    Part 1: Using JDOM

    import java.util.ArrayList;
    import java.util.List;
    
    import org.w3c.dom.NodeList;
    import org.jdom2.input.SAXBuilder;
    import org.jdom2.transform.JDOMSource;
    import org.w3c.dom.*;
    
    import java.io.*;
    
    public class XmlModificationJDom {
    
        public static void main(String[] args) {
            XmlModificationJDom xmlModificationJDom = new XmlModificationJDom();
            xmlModificationJDom.updateXmlAndSaveJDom();
    
        }
    
        public void updateXmlAndSaveJDom() {
            try {
                File inputFile = new File("document.xml");
                SAXBuilder saxBuilder = new SAXBuilder();
                org.jdom2.Document xmlDocument = saxBuilder.build(inputFile);
                org.jdom2.Element rootElement = xmlDocument.getRootElement();
    
                iterateAndUpdateElementsUsingJDom(rootElement);
    
                saveUpdatedXmlUsingJDomSource(xmlDocument);
    
            } catch (Exception ex) {
                ex.printStackTrace();
            }
    
        }
    
        public void iterateAndUpdateElementsUsingJDom(org.jdom2.Element element) {
    
            if (element.getChildren().size() == 0) {
                // System.out.println(element.getName() + ","+ element.getText());
                if (element.getText().contains("'")) {
                    element.setText(element.getText().replaceAll("\'", "\""));
                }
            } else {
                // System.out.println(element.getName());
                for (org.jdom2.Element childElement : element.getChildren()) {
                    iterateAndUpdateElementsUsingJDom(childElement);
                }
            }
        }
    }
    

    Part 2: Using DOM

    import javax.xml.parsers.*;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    
    import java.util.ArrayList;
    import java.util.List;
    
    import java.io.*;
    
    public class XmlModificationDom {
    
        public static void main(String[] args) {
            XmlModificationDom XmlModificationDom = new XmlModificationDom();
            XmlModificationDom.updateXmlAndSave();
        }
        
        public void updateXmlAndSave() {
            try {
                File inputFile = new File("document.xml");
                DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
                DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
                Document document = dBuilder.parse(inputFile);
                document.getDocumentElement().normalize();
    
                Node parentNode = document.getFirstChild();
                iterateChildNodesAndUpate(parentNode);
    
                writeAndSaveXML(document);
    
            } catch (Exception ex) {
                ex.printStackTrace();
            }
    
        }
    
        public void writeAndSaveXML(Document document) throws Exception {
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            DOMSource source = new DOMSource(document);
            StreamResult result = new StreamResult(new File("updated-document.xml"));
            transformer.transform(source, result);
        }
    
        public void iterateChildNodesAndUpate(Node parentNode) {
    
            NodeList nodeList = parentNode.getChildNodes();
    
            for (int index = 0; index < nodeList.getLength(); index++) {
                Node node = nodeList.item(index);
                if (node.getNodeType() == Node.ELEMENT_NODE) {
                    Element element = (Element) node;
                    //System.out.print(element.getNodeName());
    
                    if (element.hasChildNodes() && element.getChildNodes().getLength() > 1) {
                        //System.out.println("Child > " + element.getNodeName());
                        iterateChildNodesAndUpate(element);
                    } else {
                        //System.out.println(" - " + element.getTextContent());
                        if (element.getTextContent().contains("'")) {
                            String str = element.getTextContent().replaceAll("\'", "\"");
                            element.setTextContent(str);
                        }
                    }
                }
            }
        }
    }
    

    Input file document.xml:

    <Document>
            <Identifier>DOC1</Identifier>
            <Type>HYPER SHIPPING SDN BHD</Type>
            <Description>HYPER SHIPPING SDN BHD</Description>
            <Confidence>33.12</Confidence>
            <ConfidenceThreshold>10.0</ConfidenceThreshold>
            <Valid>true</Valid>
            <Reviewed>true</Reviewed>
            <ReviewedBy>SYSTEM</ReviewedBy>
            <ValidatedBy>SYSTEM</ValidatedBy>
            <ErrorMessage/>
            <Value>HYPER SHIPPING'SDN BHD_First_Page</Value>  //Value to be replaced here
            <DocumentDisplayInfo/>
            <DocumentLevelFields/>
            <Pages>
                <Page>
                    <Identifier>PG0</Identifier>
                    <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                    <NewFileName>BI2E7_0.tif</NewFileName>
                    <SourceFileID>1</SourceFileID>
                    <PageLevelFields>
                        <PageLevelField>
                            <Name>Search_Engine_Classification</Name>
                            <Value>Park Street '10 road</Value>     //Value to be replaced here
                            <Type/>
                            <Confidence>66.23</Confidence>
                            <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                            <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                            <OcrConfidence>0.0</OcrConfidence>
                            <FieldOrderNumber>0</FieldOrderNumber>
                            <ForceReview>false</ForceReview>
                        </PageLevelField>
                    </PageLevelFields>
                </Page>
            </Pages>
    </Document>
    

    Output updated-document.xml/updated-document-jdom.xml:

    <?xml version="1.0" encoding="UTF-8" standalone="no"?>
    <Document>
            <Identifier>DOC1</Identifier>
            <Type>HYPER SHIPPING SDN BHD</Type>
            <Description>HYPER SHIPPING SDN BHD</Description>
            <Confidence>33.12</Confidence>
            <ConfidenceThreshold>10.0</ConfidenceThreshold>
            <Valid>true</Valid>
            <Reviewed>true</Reviewed>
            <ReviewedBy>SYSTEM</ReviewedBy>
            <ValidatedBy>SYSTEM</ValidatedBy>
            <ErrorMessage/>
            <Value>HYPER SHIPPING"SDN BHD_First_Page</Value><DocumentDisplayInfo/>
            <DocumentLevelFields/>
            <Pages>
                <Page>
                    <Identifier>PG0</Identifier>
                    <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                    <NewFileName>BI2E7_0.tif</NewFileName>
                    <SourceFileID>1</SourceFileID>
                    <PageLevelFields>
                        <PageLevelField>
                            <Name>Search_Engine_Classification</Name>
                            <Value>Park Street "10 road</Value><Type/>
                            <Confidence>66.23</Confidence>
                            <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                            <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                            <OcrConfidence>0.0</OcrConfidence>
                            <FieldOrderNumber>0</FieldOrderNumber>
                            <ForceReview>false</ForceReview>
                        </PageLevelField>
                    </PageLevelFields>
                </Page>
            </Pages>
    </Document>
    

    More details code, visit this repo