javascripthtml5-canvasgetimagedatapdf.jsputimagedata

Render .pdf to single Canvas using pdf.js and ImageData


I am trying to read an entire .pdf Document using PDF.js and then render all the pages on a single canvas.

My idea: render each page onto a canvas and get the ImageData (context.getImageData()), clear the canvas do the next page. I store all the ImageDatas in an array and once all pages are in there I want to put all the ImageDatas from the array onto a single canvas.

var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
    //Prepare some things
    var canvas = document.getElementById('cv');
    var context = canvas.getContext('2d');
    var scale = 1.5;
    PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
        pdf = _pdf;
        //Render all the pages on a single canvas
        for(var i = 1; i <= pdf.numPages; i ++){
            pdf.getPage(i).then(function getPage(page){
                var viewport = page.getViewport(scale);
                canvas.width = viewport.width;
                canvas.height = viewport.height;
                page.render({canvasContext: context, viewport: viewport});
                pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
                context.clearRect(0, 0, canvas.width, canvas.height);
                p.Out("pre-rendered page " + i);
            });
        }

    //Now we have all 'dem Pages in "pages" and need to render 'em out
    canvas.height = 0;
    var start = 0;
    for(var i = 0; i < pages.length; i++){
        if(canvas.width < pages[i].width) canvas.width = pages[i].width;
        canvas.height = canvas.height + pages[i].height;
        context.putImageData(pages[i], 0, start);
        start += pages[i].height;
    }
    });

So from the way I understnad thing this should work, right? When I run this I end up with the canvas that is big enought to contain all the pages of the pdf but doesn't show the pdf...

Thank you for helping.


Solution

  • I can’t speak to the part of your code that renders the pdf into a canvas, but I do see some problems.

    So to get you started, I would start by changing your code to this (very, very untested!):

    var pdf = null;
    PDFJS.disableWorker = true;
    var pages = new Array();
    //Prepare some things
    var canvas = document.getElementById('cv');
    var context = canvas.getContext('2d');
    var scale = 1.5;
    var canvasWidth=0;
    var canvasHeight=0;
    var pageStarts=new Array();
    pageStarts[0]=0;
    
    PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
        pdf = _pdf;
        //Render all the pages on a single canvas
        for(var i = 1; i <= pdf.numPages; i ++){
            pdf.getPage(i).then(function getPage(page){
                var viewport = page.getViewport(scale);
                // changing canvas.width and/or canvas.height auto-clears the canvas
                canvas.width = viewport.width;
                canvas.height = viewport.height;
                page.render({canvasContext: context, viewport: viewport});
                pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
                // calculate the width of the final display canvas
                if(canvas.width>maxCanvasWidth){
                  maxCanvasWidth=canvas.width;
                }
                // calculate the accumulated with of the final display canvas
                canvasHeight+=canvas.height;
                // save the "Y" starting position of this pages[i]
                pageStarts[i]=pageStarts[i-1]+canvas.height;
                p.Out("pre-rendered page " + i);
            });
        }
    
    
        canvas.width=canvasWidth; 
        canvas.height = canvasHeight;  // this auto-clears all canvas contents
        for(var i = 0; i < pages.length; i++){
            context.putImageData(pages[i], 0, pageStarts[i]);
        }
    
    });
    

    Alternatively, here’s a more traditional way of accomplishing your task:

    Use a single “display” canvas and allow the user to “page through” each desired page.

    Since you already start by drawing each page into a canvas, why not keep a separate, hidden canvas for each page. Then when the user wants to see page#6, you just copy the hidden canvas#6 onto your display canvas.

    The Mozilla devs use this approach in their pdfJS demo here: http://mozilla.github.com/pdf.js/web/viewer.html

    You can check out the code for the viewer here: http://mozilla.github.com/pdf.js/web/viewer.js