javaitext

How to convert HTML to PDF using iText


import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import com.itextpdf.text.Document;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfWriter;

public class GeneratePDF {
    public static void main(String[] args) {
        try {

            String k = "<html><body> This is my Project </body></html>";

            OutputStream file = new FileOutputStream(new File("E:\\Test.pdf"));

            Document document = new Document();
            PdfWriter.getInstance(document, file);

            document.open();

            document.add(new Paragraph(k));

            document.close();
            file.close();

        } catch (Exception e) {

            e.printStackTrace();
        }
    }
}

This is my code to convert HTML to PDF. I am able to convert it but in PDF file it saves as whole HTML while I need to display only text. <html><body> This is my Project </body></html> gets saved to PDF while it should save only This is my Project.


Solution

  • You can do it with the HTMLWorker class (deprecated) like this:

    import com.itextpdf.text.html.simpleparser.HTMLWorker;
    //...
    try {
        String k = "<html><body> This is my Project </body></html>";
        OutputStream file = new FileOutputStream(new File("C:\\Test.pdf"));
        Document document = new Document();
        PdfWriter.getInstance(document, file);
        document.open();
        HTMLWorker htmlWorker = new HTMLWorker(document);
        htmlWorker.parse(new StringReader(k));
        document.close();
        file.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
    

    or using the XMLWorker, (download from this jar) using this code:

    import com.itextpdf.tool.xml.XMLWorkerHelper;
    //...
    try {
        String k = "<html><body> This is my Project </body></html>";
        OutputStream file = new FileOutputStream(new File("C:\\Test.pdf"));
        Document document = new Document();
        PdfWriter writer = PdfWriter.getInstance(document, file);
        document.open();
        InputStream is = new ByteArrayInputStream(k.getBytes());
        XMLWorkerHelper.getInstance().parseXHtml(writer, document, is);
        document.close();
        file.close();
    } catch (Exception e) {
        e.printStackTrace();
    }