javapdfpdf-generationpdfboxghost4j

Convert a PDF file to image


I wanted to convert PDF document into image. I was using Ghost4j.

Problem: Ghost4J needs gsdll32.dll file at runtime, and I do not want to use the dll file.

Question 1: is there any way, in ghost4j to convert image without the dll?

Question 2: I found the solution in PDFBox API. org.apache.pdfbox.pdmodel.PDPagep have method convertToImage()` which converts PDF page to Image format.

PDDocument doc = PDDocument.load(new File("/document.pdf"));
List<PDPage>pages = doc.getDocumentCatalog().getAllPages();
PDPage page = pages.get(0);
BufferedImage image = page.convertToImage();
File outputfile = new File("/image.png");
ImageIO.write(image, "png", outputfile);
doc.close();

I have only text on the PDF document. and I have that Exception when I run this code:

Aug 12, 2013 6:00:24 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
INFO: unsupported/disabled operation: BDC
Exception in thread "main" java.lang.ExceptionInInitializerError
    at org.apache.pdfbox.pdmodel.font.PDTrueTypeFont.getawtFont(PDTrueTypeFont.java:481)
    at org.apache.pdfbox.pdmodel.font.PDSimpleFont.drawString(PDSimpleFont.java:109)
    at org.apache.pdfbox.pdfviewer.PageDrawer.processTextPosition(PageDrawer.java:235)
    at org.apache.pdfbox.util.PDFStreamEngine.processEncodedText(PDFStreamEngine.java:496)
    at org.apache.pdfbox.util.operator.ShowTextGlyph.process(ShowTextGlyph.java:62)
    at org.apache.pdfbox.util.PDFStreamEngine.processOperator(PDFStreamEngine.java:554)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:268)
    at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:235)
    at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:215)
    at org.apache.pdfbox.pdfviewer.PageDrawer.drawPage(PageDrawer.java:125)
    at org.apache.pdfbox.pdmodel.PDPage.convertToImage(PDPage.java:781)
    at org.apache.pdfbox.pdmodel.PDPage.convertToImage(PDPage.java:712)
    at ge.eid.esignature.adessa.pades.sign.PDFtoImage.main(PDFtoImage.java:25)
Caused by: java.lang.IllegalArgumentException
    at java.nio.Buffer.position(Buffer.java:216)
    at sun.font.TrueTypeFont.lookupName(TrueTypeFont.java:1153)
    at sun.font.TrueTypeFont.getPostscriptName(TrueTypeFont.java:1205)
    at java.awt.Font.getPSName(Font.java:1156)
    at org.apache.pdfbox.pdmodel.font.FontManager.loadFonts(FontManager.java:101)
    at org.apache.pdfbox.pdmodel.font.FontManager.<clinit>(FontManager.java:53)
    ... 13 more

Solution

  • You can easily convert 04-Request-Headers.pdf file pages into image format.

    Convert all pdf pages into image format in Java using PDF Box.

    Solution for Apache PDFBox 1.8.* version:

    Jar required pdfbox-1.8.3.jar

    or the maven dependency

    <dependency>
        <groupId>org.apache.pdfbox</groupId>
        <artifactId>pdfbox</artifactId>
        <version>1.8.3</version>
    </dependency>
    

    Here is the solution:

    package com.pdf.pdfbox.examples;
    
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.util.List;
    
    import javax.imageio.ImageIO;
    
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.pdmodel.PDPage;
    
    @SuppressWarnings("unchecked")
    public class ConvertPDFPagesToImages {
        public static void main(String[] args) {
            try {
            String sourceDir = "C:/Documents/04-Request-Headers.pdf"; // Pdf files are read from this folder
            String destinationDir = "C:/Documents/Converted_PdfFiles_to_Image/"; // converted images from pdf document are saved here
    
            File sourceFile = new File(sourceDir);
            File destinationFile = new File(destinationDir);
            if (!destinationFile.exists()) {
                destinationFile.mkdir();
                System.out.println("Folder Created -> "+ destinationFile.getAbsolutePath());
            }
            if (sourceFile.exists()) {
                System.out.println("Images copied to Folder: "+ destinationFile.getName());             
                PDDocument document = PDDocument.load(sourceDir);
                List<PDPage> list = document.getDocumentCatalog().getAllPages();
                System.out.println("Total files to be converted -> "+ list.size());
    
                String fileName = sourceFile.getName().replace(".pdf", "");             
                int pageNumber = 1;
                for (PDPage page : list) {
                    BufferedImage image = page.convertToImage();
                    File outputfile = new File(destinationDir + fileName +"_"+ pageNumber +".png");
                    System.out.println("Image Created -> "+ outputfile.getName());
                    ImageIO.write(image, "png", outputfile);
                    pageNumber++;
                }
                document.close();
                System.out.println("Converted Images are saved at -> "+ destinationFile.getAbsolutePath());
            } else {
                System.err.println(sourceFile.getName() +" File not exists");
            }
    
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    }
    

    Possible conversions of image into jpg, jpeg, png, bmp, gif format.

    Note: I mentioned the mainly used image formats.

    ImageIO.write(image , "jpg", new File( destinationDir +fileName+"_"+pageNumber+".jpg" ));
    ImageIO.write(image , "jpeg", new File( destinationDir +fileName+"_"+pageNumber+".jpeg" ));
    ImageIO.write(image , "png", new File( destinationDir +fileName+"_"+pageNumber+".png" ));
    ImageIO.write(image , "bmp", new File( destinationDir +fileName+"_"+pageNumber+".bmp" ));
    ImageIO.write(image , "gif", new File( destinationDir +fileName+"_"+pageNumber+".gif" ));
    

    Console Output:

    Images copied to Folder: Converted_PdfFiles_to_Image
    Total files to be converted -> 13
    Aug 06, 2014 1:35:49 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_1.png
    Aug 06, 2014 1:35:50 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_2.png
    Aug 06, 2014 1:35:51 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_3.png
    Aug 06, 2014 1:35:51 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_4.png
    Aug 06, 2014 1:35:52 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_5.png
    Aug 06, 2014 1:35:52 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_6.png
    Aug 06, 2014 1:35:53 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_7.png
    Aug 06, 2014 1:35:53 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_8.png
    Aug 06, 2014 1:35:54 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_9.png
    Aug 06, 2014 1:35:54 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_10.png
    Aug 06, 2014 1:35:54 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_11.png
    Aug 06, 2014 1:35:55 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_12.png
    Aug 06, 2014 1:35:55 PM org.apache.pdfbox.util.PDFStreamEngine processOperator
    INFO: unsupported/disabled operation: i
    Image Created -> 04-Request-Headers_13.png
    Converted Images are saved at -> C:\Documents\Converted_PdfFiles_to_Image
    

    Solution for Apache PDFBox 2.0.* version:

    Required Jars pdfbox-2.0.16.jar, fontbox-2.0.16.jar, commons-logging-1.2.jar

    or from the pom.xml dependencies

    <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
    <dependency>
        <groupId>org.apache.pdfbox</groupId>
        <artifactId>pdfbox</artifactId>
        <version>2.0.16</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox -->
    <dependency>
        <groupId>org.apache.pdfbox</groupId>
        <artifactId>fontbox</artifactId>
        <version>2.0.16</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
    <dependency>
        <groupId>commons-logging</groupId>
        <artifactId>commons-logging</artifactId>
        <version>1.2</version>
    </dependency>
    

    Solution for 2.0.16 version:

    package com.pdf.pdfbox.examples;
    
    import java.awt.image.BufferedImage;
    import java.io.File;
    
    import javax.imageio.ImageIO;
    
    import org.apache.pdfbox.pdmodel.PDDocument;
    import org.apache.pdfbox.rendering.ImageType;
    import org.apache.pdfbox.rendering.PDFRenderer;
    
    /**
     * 
     * @author venkataudaykiranp
     * 
     * @version 2.0.16(Apache PDFBox version support)
     *
     */
    public class ConvertPDFPagesToImages {
        public static void main(String[] args) {
            try {
                String sourceDir = "C:\\Users\\venkataudaykiranp\\Downloads\\04-Request-Headers.pdf"; // Pdf files are read from this folder
                String destinationDir = "C:\\Users\\venkataudaykiranp\\Downloads\\Converted_PdfFiles_to_Image/"; // converted images from pdf document are saved here
    
                File sourceFile = new File(sourceDir);
                File destinationFile = new File(destinationDir);
                if (!destinationFile.exists()) {
                    destinationFile.mkdir();
                    System.out.println("Folder Created -> "+ destinationFile.getAbsolutePath());
                }
                if (sourceFile.exists()) {
                    System.out.println("Images copied to Folder Location: "+ destinationFile.getAbsolutePath());             
                    PDDocument document = PDDocument.load(sourceFile);
                    PDFRenderer pdfRenderer = new PDFRenderer(document);
    
                    int numberOfPages = document.getNumberOfPages();
                    System.out.println("Total files to be converting -> "+ numberOfPages);
    
                    String fileName = sourceFile.getName().replace(".pdf", "");             
                    String fileExtension= "png";
                    /*
                     * 600 dpi give good image clarity but size of each image is 2x times of 300 dpi.
                     * Ex:  1. For 300dpi 04-Request-Headers_2.png expected size is 797 KB
                     *      2. For 600dpi 04-Request-Headers_2.png expected size is 2.42 MB
                     */
                    int dpi = 300;// use less dpi for to save more space in harddisk. For professional usage you can use more than 300dpi 
    
                    for (int i = 0; i < numberOfPages; ++i) {
                        File outPutFile = new File(destinationDir + fileName +"_"+ (i+1) +"."+ fileExtension);
                        BufferedImage bImage = pdfRenderer.renderImageWithDPI(i, dpi, ImageType.RGB);
                        ImageIO.write(bImage, fileExtension, outPutFile);
                    }
    
                    document.close();
                    System.out.println("Converted Images are saved at -> "+ destinationFile.getAbsolutePath());
                } else {
                    System.err.println(sourceFile.getName() +" File not exists");
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }