I have code written using PDFBox API that highlights the words in a PDF but when I convert highlighted PDF pages to images, then whatever I have highlighted gets disappeared from the image.
Below screenshot is with highlighted text, for highlighting I have used PDFBox's PDAnnotationTextMarkup class:
Below is the image after converting the pdf page to image:
Highlighted PDF Page Image after converting
Below is the code I have used for converting PDF to Image:
PDDocument document = PDDocument.load(new File(pdfFilename));
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCounter = 0;
for (PDPage page : document.getPages())
{
BufferedImage bim = pdfRenderer.renderImageWithDPI(pageCounter, 300, ImageType.RGB);
ImageIOUtil.writeImage(bim, pdfFilename + "-" + (pageCounter++) + ".png", 300);
}
document.close();
Please suggest what is wrong here, why PDFRenderer not able to take PDF page image along with the highlighted red box.
Below is the code I used to highlight the text in PDF:
private void highlightText(String pdfFilePath, String highlightedPdfFilePath) {
try {
// Loading an existing document
File file = new File(highlightedPdfFilePath);
if (!file.exists()) {
file = new File(pdfFilePath);
}
PDDocument document = PDDocument.load(file);
// extended PDFTextStripper class
PDFTextStripper stripper = new PDFTextHighlighter();
// Get number of pages
int number_of_pages = document.getDocumentCatalog().getPages().getCount();
// The method writeText will invoke an override version of
// writeString
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
// Print collected information
System.out.println("tokenStream:::"+tokenStream);
System.out.println("tokenStream size::"+tokenStream.size());
System.out.println("coordinates size::"+coordinates.size());
double page_height;
double page_width;
double width, height, minx, maxx, miny, maxy;
int rotation;
// scan each page and highlitht all the words inside them
for (int page_index = 0; page_index < number_of_pages; page_index++) {
// get current page
PDPage page = document.getPage(page_index);
// Get annotations for the selected page
List<PDAnnotation> annotations = page.getAnnotations();
// Define a color to use for highlighting text
PDColor red = new PDColor(new float[] { 1, 0, 0 }, PDDeviceRGB.INSTANCE);
// Page height and width
page_height = page.getMediaBox().getHeight();
page_width = page.getMediaBox().getWidth();
// Scan collected coordinates
for (int i = 0; i < coordinates.size(); i++) {
if (!differencePgaeNumber.contains(page_index)) {
differencePgaeNumber.add(page_index);
}
// if the current coordinates are not related to the current
// page, ignore them
if ((int) coordinates.get(i)[4] != (page_index + 1))
continue;
else {
// get rotation of the page...portrait..landscape..
rotation = (int) coordinates.get(i)[7];
// page rotated of 90degrees
if (rotation == 90) {
height = coordinates.get(i)[5];
width = coordinates.get(i)[6];
width = (page_height * width) / page_width;
// define coordinates of a rectangle
maxx = coordinates.get(i)[1];
minx = coordinates.get(i)[1] - height;
miny = coordinates.get(i)[0];
maxy = coordinates.get(i)[0] + width;
} else // i should add here the cases -90/-180 degrees
{
height = coordinates.get(i)[5];
minx = coordinates.get(i)[0];
maxx = coordinates.get(i)[2];
miny = page_height - coordinates.get(i)[1];
maxy = page_height - coordinates.get(i)[3] + height;
}
// Add an annotation for each scanned word
PDAnnotationTextMarkup txtMark = new PDAnnotationTextMarkup(
PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
txtMark.setColor(red);
txtMark.setConstantOpacity((float) 0.3); // 30%
// transparent
PDRectangle position = new PDRectangle();
position.setLowerLeftX((float) minx);
position.setLowerLeftY((float) miny);
position.setUpperRightX((float) maxx);
position.setUpperRightY((float) ((float) maxy + height));
txtMark.setRectangle(position);
float[] quads = new float[8];
quads[0] = position.getLowerLeftX(); // x1
quads[1] = position.getUpperRightY() - 2; // y1
quads[2] = position.getUpperRightX(); // x2
quads[3] = quads[1]; // y2
quads[4] = quads[0]; // x3
quads[5] = position.getLowerLeftY() - 2; // y3
quads[6] = quads[2]; // x4
quads[7] = quads[5]; // y5
txtMark.setQuadPoints(quads);
txtMark.setContents(tokenStream.get(i).toString());
annotations.add(txtMark);
}
}
}
// Saving the document in a new file
File highlighted_doc = new File(highlightedPdfFilePath);
document.save(highlighted_doc);
document.close();
} catch (IOException e) {
System.out.println(e);
}
}
You need to construct the visual appearance of the annotation with this call:
txtMark.constructAppearances(document);