javascriptjquerypdf.js

pdf.js with text selection


How to make the text in a PDF selectable?

Have tried here. The PDF is written fine, but no text selection

https://github.com/mozilla/pdf.js

https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js

'use strict';

PDFJS.getDocument('file.pdf').then(function(pdf){
    var page_num = 1;
    pdf.getPage(page_num).then(function(page){
        var scale = 1.5;
        var viewport = page.getViewport(scale);
        var canvas = document.getElementById('the-canvas');
        var context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;

        var canvasOffset = $(canvas).offset();
        var $textLayerDiv = $('#text-layer').css({
            height : viewport.height+'px',
            width : viewport.width+'px',
            top : canvasOffset.top,
            left : canvasOffset.left
        });

        page.render({
            canvasContext : context,
            viewport : viewport
        });

        page.getTextContent().then(function(textContent){
            var textLayer = new TextLayerBuilder({
                textLayerDiv : $textLayerDiv.get(0),
                pageIndex : page_num - 1,
                viewport : viewport
            });

            textLayer.setTextContent(textContent);
            textLayer.render();
        });
    });
});

<body>
  <div>
    <canvas id="the-canvas" style="border:1px solid black;"></canvas>
    <div id="text-layer" class="textLayer"></div>
  </div>
</body>

Solution

  • Your javascript code is perfect. You just need to include the UI utilities that Text Layer Builder depends on:

    https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js

    Or in HTML:

    <script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
    

    If you run your code (without ui_utils) and check the debug console, you will see ReferenceError: CustomStyle is not defined. A quick search in PDFjs's repo will show you it is defined in ui_utils.js.

    Here is my minimal but complete code for your reference. I am using PDFjs's demo pdf here. Note that in production you should not link to raw.github.

    <!DOCTYPE html><meta charset="utf-8">
    <link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
    <script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
    <script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
    <script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
    <body>
      <div>
        <canvas id="the-canvas" style="border:1px solid black;"></canvas>
        <div id="text-layer" class="textLayer"></div>
      </div>
    <script>
    'use strict';
    
    PDFJS.getDocument('file.pdf').then(function(pdf){
        var page_num = 1;
        pdf.getPage(page_num).then(function(page){
            var scale = 1.5;
            var viewport = page.getViewport(scale);
            var canvas = $('#the-canvas')[0];
            var context = canvas.getContext('2d');
            canvas.height = viewport.height;
            canvas.width = viewport.width;
    
            var canvasOffset = $(canvas).offset();
            var $textLayerDiv = $('#text-layer').css({
                height : viewport.height+'px',
                width : viewport.width+'px',
                top : canvasOffset.top,
                left : canvasOffset.left
            });
    
            page.render({
                canvasContext : context,
                viewport : viewport
            });
    
            page.getTextContent().then(function(textContent){
               console.log( textContent );
                var textLayer = new TextLayerBuilder({
                    textLayerDiv : $textLayerDiv.get(0),
                    pageIndex : page_num - 1,
                    viewport : viewport
                });
    
                textLayer.setTextContent(textContent);
                textLayer.render();
            });
        });
    });
    </script>