javams-wordapache-poidocxdoc

APACHE POI: adding comment to existing word document


I am working on a legacy product, in which some features need to be implemented. I am trying to use apache-poi 5.2.2 to add comments to an existing word document, based on a search criteria. Basically, if the word in the docx document matches the originalText that is defined in the action, a comment need to be added.

I have been able to add a comment to the document.

I have however been unable to add a comment range start and end to the comment (at the text that needs the comment). I am assuming that it needs some form of annotation as well. When I use for example a document with a pre-existing comment, I notice that the text at that location looks like:

<w:commentRangeStart w:id="0"/><w:r><w:rPr><w:b/><w:sz w:val="27"/></w:rPr><w:t>Júlio</w:t></w:r><w:r><w:rPr><w:b/><w:spacing w:val="-3"/><w:sz w:val="27"/></w:rPr><w:t xml:space="preserve"> </w:t></w:r><w:r><w:rPr><w:b/><w:sz w:val="27"/></w:rPr><w:t>César</w:t></w:r><w:commentRangeEnd w:id="0"/><w:r w:rsidR="00B43339"><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r><w:r>

And that the XML for the comment, which I understand is in a separate CommentsDocument looks like:

        //<xml-fragment w:id="0" w:author="<NAME OF COMMENT CREATOR>" w:date="2024-03-13T10:11:00Z" w:initials="<HERE COME THE INITIALS>" xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:oel="http://schemas.microsoft.com/office/2019/extlst" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape">
        //  <w:p w14:paraId="4A10B938" w14:textId="77777777" w:rsidR="00B43339" w:rsidRDefault="00B43339" w:rsidP="00B43339">
        //    <w:r>
        //      <w:rPr>
        //        <w:rStyle w:val="CommentReference"/>
        //      </w:rPr>
        //      <w:annotationRef/>
        //    </w:r>
        //    <w:r>
        //      <w:rPr>
        //        <w:color w:val="000000"/>
        //        <w:sz w:val="20"/>
        //        <w:szCs w:val="20"/>
        //      </w:rPr>
        //      <w:t>This is a pre-annotation existing comment</w:t>
        //    </w:r>
        //  </w:p>
        //</xml-fragment>

Taking into account some of these posts (Adding comment to a specific word or run in docx document using Apache POI) I have tried a few things:

I am guessing that things need to be a combination of both. For now I want to focus on just adding the comment on a specific word.

if(paragraph.getText().contains(action.getOriginalText())){
                //SINCE NOT THE ENTIRE PARAGRAPH NEEDS TO BE ANNOTATED, WE NEED TO LOOK AT THE RUNS INSIDE THE PARAGRAPH
                for(int runIndex = internalParagraphRunIndex; runIndex < paragraph.getRuns().size(); runIndex++) {
                    XWPFRun run = paragraph.getRuns().get(runIndex);

                    if (run.text().equals(action.getOriginalText())) {
                        

                        //THE ENTIRE RUN NEEDS TO BE ANNOTATED
                        throw new RuntimeException("Not yet implemented");
                    } else if (run.text().contains(action.getOriginalText())) {
                        System.out.println("Part of the run needs to be annotated");
                        //THE TEXT THAT NEEDS TO BE ANNOTATED IS PART OF THE RUN
                        //Getting the comments from the document
                        XWPFComments comments = wordDocument.getDocComments();

                        CTComments existingCtComments = comments.getCtComments();

                        //Creating CTComment
                        CTComment newCTComment = existingCtComments.addNewComment();
                        newCTComment.setId(getCommentId(existingCtComments));

                        String[] splittedText = splitRunTextIntoParts(run, action.getOriginalText());

                        int indexOfTextThatNeedsToBeAnnotatedInSplittedText = findLocationOfTextInSplittedText(splittedText, action.getOriginalText());

                        for (int z = 0; z < splittedText.length; z++) {
                            if (indexOfTextThatNeedsToBeAnnotatedInSplittedText == -1) {
                                throw new RuntimeException("The text that needs to be annotated is not found in the splitted run.");
                            } else {
                                if (z == indexOfTextThatNeedsToBeAnnotatedInSplittedText) {
                                    //the exact word that needs to be annotated
                                    XWPFRun runToInsert = paragraph.insertNewRun(runIndex + z);

                                    //insert part of the text of the run
                                    runToInsert.setText(splittedText[z]);

                                    paragraph.getCTP().addNewCommentRangeEnd().setId(newCTComment.getId());

                                    //add the comment reference AFTER the text
                                    runToInsert.getCTR().addNewCommentReference().setId(newCTComment.getId());

                                    //TODO: remove styling
                                    runToInsert.setBold(true);

                                } else if (z == indexOfTextThatNeedsToBeAnnotatedInSplittedText - 1) {

                                    XWPFRun runToInsert = paragraph.insertNewRun(runIndex + z);

                                    //insert part of the text of the run
                                    runToInsert.setText(splittedText[z]);

                                    //add the range start after the text of the run
                                    CTMarkupRange rangeStartMarkupRange = paragraph.getCTP().addNewCommentRangeStart();
                                    rangeStartMarkupRange.setId(newCTComment.getId());
                                    newCTComment.setCommentRangeStartArray(new CTMarkupRange[]{rangeStartMarkupRange});

                                    //TODO: remove styling
                                    runToInsert.setItalic(true);


                                } else if (z == indexOfTextThatNeedsToBeAnnotatedInSplittedText + 1) {


                                    //add the range end before the text of the run
                                    CTMarkupRange rangeEndeMarkupRange = paragraph.getCTP().addNewCommentRangeEnd();
                                    rangeEndeMarkupRange.setId(newCTComment.getId());

                                    newCTComment.setCommentRangeEndArray(new CTMarkupRange[]{rangeEndeMarkupRange});
                                    //newCTComment.setCommentRangeEndArray(new CTMarkupRange[]{rangeEndeMarkupRange});

                                    //insert new run
                                    XWPFRun runToInsert = paragraph.insertNewRun(runIndex + z);
                                    //insert part of the text of the run
                                    runToInsert.setText(splittedText[z]);

                                    //TODO: remove styling
                                    runToInsert.setUnderline(UnderlinePatterns.SINGLE);
                                }
                            }
                        }

                        //remove original run
                        paragraph.removeRun(runIndex + splittedText.length); //the  new runs are put in front of the old run

                        //Creating the new XWPFComment based on the CTComment
                        XWPFComment newComment = new XWPFComment(newCTComment, comments);
                        newComment.setAuthor("Teradactor");
                        newComment.setDate(new GregorianCalendar());
                        newComment.setInitials("TD");
                        newComment.createParagraph().createRun().setText(action.getAnnotationText());


                        comments.createComment(BigInteger.valueOf(Long.parseLong(newComment.getId())));


                        setParagraphIndex(paragraphToLookAt);
                        setInternalParagraphRunIndex(runIndex + 3); // the next time we want to start from the runs after th, since this one is already annotated
                        setActionFound(true);
                        //because we are splitting the run into several runs, we need to decrease the index of the tnsmap text

                        break;

                    }

This method adds succesfully a comment reference after the word that needs to be annotated. I can also see that the text before or after the word that needs to be annotated is styled (italic or underline) and that the word itself is bold. However, there is no proper comment reference on the wordt itself. If an annotation reference is necessary, it would be good to know and how to set it up.


Solution

  • So your main question is:

    How to comment single text parts in an existing Word document either being single text runs already or being within long text runs?

    This is very broad. Too broad to answer here. To get single text parts as own text runs see How to Highlight Replaced Word Using Apache POI. Please read The value "name" and "surname" aren't read apache poi and Apache POI: ${my_placeholder} is treated as three different runs too as this provides a bug fix for XWPFParagraph.searchText.

    Aside the question of how to get single text parts as own text runs, the question how to comment single text runs can be answered so:

    Each commented text run looks like so in word/document.xml:

    ...
    <w:commentRangeStart w:id="1"/>
    <w:r>
     <w:rPr>
     ...
     </w:rPr>
     <w:t>run text</w:t>
    </w:r>
    <w:commentRangeEnd w:id="1"/>
    <w:r>
     <w:commentReference w:id="1"/>
    </w:r>
    ..
    

    There is a commentRangeStart immediately before the text run and a commentRangeEnd immediately after the text run immediately followed by a text run containing the commentReference only.

    To create this we need using the org.openxmlformats.schemas.wordprocessingml.x2006.main.* classes and native XML methods like org.apache.xmlbeans.XmlCursor as Apache POI does not provide methods to do this in XWPF.

    Complete example which simply comments each single text run to show that it works.

    import java.io.*;
    
    import org.apache.poi.xwpf.usermodel.*;
    
    import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
    import org.apache.xmlbeans.XmlCursor;
    
    import java.math.BigInteger;
    import java.util.GregorianCalendar;
    import java.util.Locale;
    
    public class WordCommentTextRuns {
    
     //method to get or create the CommentsDocument /word/comments.xml in the *.docx ZIP archive  
     private static XWPFComments createCommentsDocument(XWPFDocument document) throws Exception {
      XWPFComments commentsDocument = null;
      //trying to get the CommentsDocument
      commentsDocument = document.getDocComments();
      //create a new CommentsDocument if there is not one already
      if (commentsDocument == null) {
       commentsDocument = document.createComments();  
       System.out.println("comments document created");
      }
      return commentsDocument;
     }
    
     //method to get the next comment Id from CTComments
     private static BigInteger getCommentId(CTComments comments) {
      BigInteger cId = BigInteger.ZERO;
      for (CTComment ctComment : comments.getCommentList()) {
       if (ctComment.getId().compareTo(cId) == 1) {
        cId = ctComment.getId();
       }
      }
      cId = cId.add(BigInteger.ONE);
      return cId;
     }
    
     //method to set CommentRangeStart before text run
     private static CTMarkupRange insertCommentRangeStartBefore(XWPFRun run) {
      String uri = CTMarkupRange.type.getName().getNamespaceURI();
      String localPart = "commentRangeStart";
      XmlCursor cursor = run.getCTR().newCursor();
      cursor.beginElement(localPart, uri);
      cursor.toParent();
      CTMarkupRange commentRangeStart = (CTMarkupRange)cursor.getObject();
      return commentRangeStart;  
     }
     
     //method to set CommentRangeEnd after text run
     private static CTMarkupRange insertCommentRangeEndAfter(XWPFRun run) {
      String uri = CTMarkupRange.type.getName().getNamespaceURI();
      String localPart = "commentRangeEnd";
      XmlCursor cursor = run.getCTR().newCursor();
      cursor.toEndToken();
      cursor.toNextToken();
      cursor.beginElement(localPart, uri);
      cursor.toParent();
      CTMarkupRange commentRangeStart = (CTMarkupRange)cursor.getObject();
      return commentRangeStart;  
     }
    
     //method to set CommentReference after CommentRangeEnd 
     private static void insertCommentReferenceAfter(CTMarkupRange commentRangeEnd, BigInteger cId) {
      String uri = CTR.type.getName().getNamespaceURI();
      String localPart = "r";
      XmlCursor cursor = commentRangeEnd.newCursor();
      cursor.toEndToken();
      cursor.toNextToken();
      cursor.beginElement(localPart, uri);
      cursor.toParent();
      CTR ctr = (CTR)cursor.getObject();
      ctr.addNewCommentReference().setId(cId);
     }
    
     //method to comment single text runs
     private static void commentTextRun(XWPFRun run, CTComments comments, String commentText) {
      CTComment ctComment;
      
      //comment for the run
      BigInteger cId = getCommentId(comments);
      ctComment = comments.addNewComment();
      ctComment.setAuthor("Axel Ríchter");
      ctComment.setInitials("AR");
      ctComment.setDate(new GregorianCalendar(Locale.US));
      ctComment.addNewP().addNewR().addNewT().setStringValue(commentText);
      ctComment.setId(cId);
    
      //set CommentRangeStart
      CTMarkupRange commentRangeStart = insertCommentRangeStartBefore(run);
      commentRangeStart.setId(cId);
    
      //set CommentRangeEnd and CommentReference
      CTMarkupRange commentRangeEnd = insertCommentRangeEndAfter(run);
      commentRangeEnd.setId(cId);
      insertCommentReferenceAfter(commentRangeEnd, cId); 
     }
    
     public static void main(String[] args) throws Exception {
    
      XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
    
      XWPFComments commentsDocument = createCommentsDocument(document);
      CTComments comments = commentsDocument.getCtComments();
        
      for (XWPFParagraph paragraph : document.getParagraphs()) {
       for (XWPFRun run : paragraph.getRuns()) {
        // simply comment each single text run to show that it works
        commentTextRun(run, comments, "Comment text");
       }
      }
    
      FileOutputStream out = new FileOutputStream("./WordDocumentWithComments.docx");
      document.write(out);
      out.close();
      document.close();
    
     }
    }