excelcheckboxapache-poixssfapache-poi-4

"Floating" checkbox/control boolean value not supported?


At this point I simply want to retrieve all checkboxes in a sheet of a .xlsx excel file. I found an older answer, but my file doesn't seem to include the x:Checked attribute, thus it retrieves the correct amount of checkboxes, however their values are all false (even though they shouldn't be).

More context: My .xlsx file was originally a .xls file and I converted it via LibreOffice, maybe that might be the problem here, not sure, so happy to get help from anyone more experienced.

My code is very similar to the one in this answer: https://stackoverflow.com/a/48972176/13600212


Solution

  • So... I wrote a "simple" .xlsx parser only for getting the checkboxes since I couldn't get it working with apache poi, here you go.

    The code can:

    1. Read regular and floating checkboxes from .xlsx files.
    2. Get the checkbox position (row/column, 0-based index)
    3. The returned checkboxes list is sorted by sheet name and then checkbox positions, meaning the checkboxes for the same sheet are grouped together and are sorted by row/column small->big.
    package com.osiris.danielmanager.excel;
    
    import org.w3c.dom.Document;
    import org.w3c.dom.Element;
    import org.w3c.dom.NodeList;
    
    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.transform.OutputKeys;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerFactory;
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.stream.StreamResult;
    import java.io.*;
    import java.nio.charset.StandardCharsets;
    import java.util.*;
    import java.util.zip.ZipEntry;
    import java.util.zip.ZipInputStream;
    
    public class ExcelCheckboxesParser {
    
        private final Map<String, Document> cacheParsedXml = new HashMap<>();
    
        public static void main(String[] args) {
            try {
                File file = new File("example.xlsx");
                var parser = new ExcelCheckboxesParser();
                List<CheckboxInfo> checkboxes = parser.parseXLSX(file);
                for (CheckboxInfo checkbox : checkboxes) {
                    System.out.println(checkbox);
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        public static String toString(Document doc) {
            try {
                StringWriter sw = new StringWriter();
                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer transformer = tf.newTransformer();
                transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
                transformer.setOutputProperty(OutputKeys.METHOD, "xml");
                transformer.setOutputProperty(OutputKeys.INDENT, "yes");
                transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
    
                transformer.transform(new DOMSource(doc), new StreamResult(sw));
                return sw.toString();
            } catch (Exception ex) {
                throw new RuntimeException("Error converting to String", ex);
            }
        }
    
        private static void parsePosition(CheckboxInfo checkbox) {
            String ref = checkbox.cellReference;
            StringBuilder colStr = new StringBuilder();
            StringBuilder rowStr = new StringBuilder();
    
            // Split reference into column letters and row number
            int i = 0;
            while (i < ref.length() && Character.isLetter(ref.charAt(i))) {
                colStr.append(ref.charAt(i));
                i++;
            }
            while (i < ref.length() && Character.isDigit(ref.charAt(i))) {
                rowStr.append(ref.charAt(i));
                i++;
            }
    
            // Convert column letters to number (A=1, B=2, AA=27, etc.)
            String col = colStr.toString().toUpperCase();
            int column = 0;
            for (int j = 0; j < col.length(); j++) {
                column = column * 26 + (col.charAt(j) - 'A' + 1);
            }
    
            checkbox.column = column;
            checkbox.row = Integer.parseInt(rowStr.toString());
        }
    
        public List<CheckboxInfo> parseXLSX(File file) throws Exception {
            List<CheckboxInfo> checkboxes = new ArrayList<>();
            Map<String, String> sheetNames = new HashMap<>();
            Map<String, List<Relation>> sheetAndRelationshipPaths = new HashMap<>();
    
            try (ZipInputStream zis = new ZipInputStream(new FileInputStream(file))) {
                ZipEntry entry;
                Map<String, String> xmlFiles = new HashMap<>();
    
                // Extract XML files from .xlsx
                while ((entry = zis.getNextEntry()) != null) {
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    byte[] buffer = new byte[1024];
                    int length;
                    while ((length = zis.read(buffer)) > 0) {
                        baos.write(buffer, 0, length);
                    }
                    xmlFiles.put(entry.getName(), baos.toString(StandardCharsets.UTF_8));
                    // Uncomment if you need to print the complete contents of the .xlsx file:
    //                try{
    //                    System.out.println("\n\n\n"+entry.getName()+"\n\n"+toString(parseXml(xmlFiles.get(entry.getName()))));
    //                } catch (Exception e) {
    //                    System.err.println("Failed to debug show: "+entry.getName()+" error: "+e.getMessage());
    //                    e.printStackTrace();
    //                }
    
                }
    
                // Parse sheet names and relationships
                if (xmlFiles.containsKey("xl/workbook.xml")) {
                    String workbookXml = xmlFiles.get("xl/workbook.xml");
                    Document doc = parseXml(workbookXml);
                    NodeList sheets = doc.getElementsByTagName("sheet");
                    for (int i = 0; i < sheets.getLength(); i++) {
                        Element sheet = (Element) sheets.item(i);
                        String sheetId = sheet.getAttribute("sheetId");
                        String sheetName = sheet.getAttribute("name");
                        sheetNames.put(sheetId, sheetName);
    
                        // Find the corresponding relationship for each sheet
                        String sheetRelsPath = "xl/worksheets/_rels/sheet" + sheetId + ".xml.rels";
                        if (xmlFiles.containsKey(sheetRelsPath)) {
                            String relsXml = xmlFiles.get(sheetRelsPath);
                            Document relsDoc = parseXml(relsXml);
                            NodeList relationships = relsDoc.getElementsByTagName("Relationship");
                            for (int j = 0; j < relationships.getLength(); j++) {
                                Element relationship = (Element) relationships.item(j);
                                String type = relationship.getAttribute("Type");
                                if (type.contains("ctrlProp")) {
                                    String absolutePath = relationship.getAttribute("Target").replace("../ctrlProps/", "xl/ctrlProps/");
                                    var list = sheetAndRelationshipPaths.get(sheetId);
                                    if (list == null) {
                                        list = new ArrayList<>();
                                        sheetAndRelationshipPaths.put(sheetId, list);
                                    }
                                    list.add(new Relation(sheetId, relationship.getAttribute("Id"), absolutePath));
                                }
                            }
                        }
                    }
                }
    
                // Parse checkboxes in each sheet
                for (String sheetId : sheetNames.keySet()) {
                    String sheetName = sheetNames.get(sheetId);
                    var docSheet = parseXml(xmlFiles.get("xl/worksheets/sheet" + sheetId + ".xml"));
                    if (sheetAndRelationshipPaths.containsKey(sheetId)) {
                        // Extract the control properties xml for checkboxes
                        for (var rel : sheetAndRelationshipPaths.get(sheetId)) {
                            String ctrlPropsXml = xmlFiles.get(rel.pathToXml);
                            Objects.requireNonNull(ctrlPropsXml);
                            Document ctrlDoc = parseXml(ctrlPropsXml);
    
                            NodeList formControls = ctrlDoc.getElementsByTagName("formControlPr");
                            for (int i = 0; i < formControls.getLength(); i++) {
                                Element formControl = (Element) formControls.item(i);
                                if ("CheckBox".equals(formControl.getAttribute("objectType"))) {
                                    CheckboxInfo checkboxInfo = new CheckboxInfo();
                                    checkboxInfo.sheetName = sheetName;
                                    checkboxInfo.isChecked = "Checked".equalsIgnoreCase(formControl.getAttribute("checked"));
                                    checkboxInfo.cellReference = formControl.getAttribute("cellReference");
                                    if (!checkboxInfo.cellReference.isEmpty())
                                        parsePosition(checkboxInfo);
                                    else {
    //                                    // 1. search for a "control" element in docSheet with r:id = rel.id and get the sub element "controlPr" altText attribute
    //                                    // 2. also search for xdr:row, xdr:rowOff, xdr:col, xdr:colOff elements within "control", get their contents
    //                                    // 3. if all are == 0 means this is a floating checkbox and we cannot use the values, otherwise set to checkbox.column/row accordingly
    //                                    // 4. if floating checkbox continue with checking .vml files, like shown below:
    //                                    for(Map.Entry<String, String> e : xmlFiles.entrySet()){
    //                                        if(e.getKey().endsWith(".vml")){
    //                                            var docVml = parseXml(e.getValue());
    //
    //                                            // 5. in docVml search for a v:shape element that has id = altText
    //                                            // 6. get the sub element named x:Anchor and its content, which will be a integer list separated by commas
    //                                            // 7. set checkbox.column to list[0] and set checkbox.row to list[2]
    //                                        }
    //                                    }
    
                                        // Search for control element in worksheet
                                        NodeList controls = docSheet.getElementsByTagName("control");
                                        for (int j = 0; j < controls.getLength(); j++) {
                                            Element control = (Element) controls.item(j);
                                            if (control.getAttribute("r:id").equals(rel.id)) {
                                                // Get position from control element
                                                NodeList controlPrs = control.getElementsByTagName("controlPr");
                                                if (controlPrs.getLength() > 0) {
                                                    Element controlPr = (Element) controlPrs.item(0);
                                                    String altText = controlPr.getAttribute("altText");
    
                                                    // Check if it's a floating checkbox
                                                    NodeList rows = control.getElementsByTagName("xdr:row");
                                                    NodeList cols = control.getElementsByTagName("xdr:col");
                                                    NodeList rowOffs = control.getElementsByTagName("xdr:rowOff");
                                                    NodeList colOffs = control.getElementsByTagName("xdr:colOff");
    
                                                    if (rows.getLength() > 0 && cols.getLength() > 0) {
                                                        int row = Integer.parseInt(rows.item(0).getTextContent());
                                                        int col = Integer.parseInt(cols.item(0).getTextContent());
                                                        long rowOff = Long.parseLong(rowOffs.item(0).getTextContent());
                                                        long colOff = Long.parseLong(colOffs.item(0).getTextContent());
    
                                                        checkboxInfo.row = row;
                                                        checkboxInfo.column = col;
                                                        if (row != 0 && col != 0 && rowOff != 0 && colOff != 0)
                                                            continue;
                                                    }
    
                                                    // If we get here, it's a floating checkbox - check VML files
                                                    for (Map.Entry<String, String> e : xmlFiles.entrySet()) {
                                                        if (e.getKey().endsWith(".vml")) {
                                                            Document docVml = parseXml(e.getValue());
                                                            NodeList shapes = docVml.getElementsByTagName("v:shape");
    
                                                            for (int k = 0; k < shapes.getLength(); k++) {
                                                                Element shape = (Element) shapes.item(k);
                                                                if (shape.getAttribute("id").equals(altText)) {
                                                                    NodeList anchors = shape.getElementsByTagName("x:Anchor");
                                                                    if (anchors.getLength() > 0) {
                                                                        String[] coords = anchors.item(0).getTextContent().split(",");
                                                                        if (coords.length >= 4) {
                                                                            checkboxInfo.column = Integer.parseInt(coords[0].trim());
                                                                            checkboxInfo.row = Integer.parseInt(coords[2].trim());
                                                                        }
                                                                    }
                                                                    break;
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                                break;
                                            }
                                        }
                                    }
                                    checkboxes.add(checkboxInfo);
                                }
                            }
                        }
                    }
                }
            }
    
            // Sort checkboxes by row and column
            Collections.sort(checkboxes);
            return checkboxes;
        }
    
        private synchronized Document parseXml(String xmlContent) throws Exception {
            synchronized (cacheParsedXml) {
                var doc = cacheParsedXml.get(xmlContent);
                if (doc != null) return doc;
                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
                DocumentBuilder builder = factory.newDocumentBuilder();
                doc = builder.parse(new ByteArrayInputStream(xmlContent.getBytes()));
                cacheParsedXml.put(xmlContent, doc);
                return doc;
            }
        }
    
        /**
         * Is equivalent to something like this: <br>
         * <pre>
         *     <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/ctrlProp" Target="../ctrlProps/ctrlProps2.xml"/>
         * </pre>
         */
        public static class Relation {
            public String sheetId;
            public String id;
            public String pathToXml;
    
            public Relation(String sheetId, String id, String pathToXml) {
                this.sheetId = sheetId;
                this.id = id;
                this.pathToXml = pathToXml;
            }
        }
    
        public static class CheckboxInfo implements Comparable<CheckboxInfo> {
            public String sheetName;
            public boolean isChecked;
            public String cellReference;
            public int row;
            public int column;
    
            @Override
            public String toString() {
                return "Checkbox [Sheet: " + sheetName + ", Checked: " + isChecked +
                        ", Cell: " + cellReference + ", Row: " + row + ", Column: " + column + "]";
            }
    
            @Override
            public int compareTo(CheckboxInfo other) {
                // First compare by sheet name
                int sheetCompare = this.sheetName.compareTo(other.sheetName);
                if (sheetCompare != 0) return sheetCompare;
    
                // Then by row
                int rowCompare = Integer.compare(this.row, other.row);
                if (rowCompare != 0) return rowCompare;
    
                // Finally by column
                return Integer.compare(this.column, other.column);
            }
        }
    }
    

    enter image description here