javascripthtmlpdf.js

How to make text on pdf.js render selectable?


So, i have a PDF in a modal, which i rendered with PDF.js and my main goal is to select text from the rendered pdf and extract it into the console, however my first problem is arising from making the pdf.js render selectable.

I tried using viewer.html in the pdf.js directory. I just need to figure out how to make the text layer selectable, and if I'm doing everything right.

Here is my code:

{% extends 'base.html' %}

{% block title %}Get Started{% endblock %}

{% block content %}
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-C6RzsynM9kWDrMNeT87bh95OGNyZPhcTNXj1NW7RuBCsyN/o0jlpcV8Qyq46cDfL" crossorigin="anonymous"></script>

<style>
    #get-started-page {
        text-align: center;
        margin-top: 50px; /* Add margin-top for space */
    }
    .get-started-card {
        margin: auto;
        margin-bottom: 50px;
        padding: 20px;
        text-align: left; /* Align text to the left */
    }
    .get-started-btn {
        margin-top: 10px; /* Add margin-top for space below the input */
    }
    .modal-body {
        display: flex;
        align-items: center;
        justify-content: center;
        height: 80vh; /* Adjust the height of the modal body */
        overflow-y: auto; /* Enable vertical scrolling if content exceeds the modal height */
    }

    #the-canvas {
        max-width: 100%; /* Make sure the canvas doesn't exceed its container */
        max-height: 100%; /* Make sure the canvas doesn't exceed its container */
    }

    .textLayer {
        position: absolute;
        top: 0;
        left: 0;
        height: 100%;
        width: 100%;
        --scale-factor: 1; /* Set the --scale-factor CSS variable */
    }
</style>

<section id="get-started-page">
    <div class="container">
        <div class="row justify-content-center">
            <div class="col-md-8">
                <div class="card bg-dark text-white transparent-card get-started-card">
                    <div class="card-body">
                        <h2 class="text-center mb-4">Get Started with IntelliQuest</h2>
                        <div class="mb-3 centerrrrr">
                            <label class="form-label">Upload PDF</label>
                            <div>
                                <input type="file" accept=".pdf" id="pdfInput" />
                            </div>
                            <button type="button" class="btn btn-primary get-started-btn" id="uploadButton">Upload PDF</button>
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </div>
</section>

<!-- Bootstrap Modal -->
<div class="modal fade" id="pdfModal" tabindex="-1" aria-labelledby="pdfModalLabel" aria-hidden="true">
    <div class="modal-dialog modal-lg">
        <div class="modal-content">
            <div class="modal-header">
                <h5 class="modal-title" id="pdfModalLabel">Full-size PDF (Page 1 of 1)</h5>
                <button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
            </div>
            <div class="modal-body">
                <canvas id="the-canvas"></canvas>
                <div class="textLayer"></div>
            </div>
            <!-- Add navigation buttons inside the modal -->
            <div class="modal-footer">
                <button type="button" class="btn btn-secondary" id="prevPageBtn">Previous Page</button>
                <button type="button" class="btn btn-secondary" id="nextPageBtn">Next Page</button>
            </div>
        </div>
    </div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.js" integrity="sha512-dfMpvQclalfL7nRtHdy4+U2GLYb2XJJOgGLgKibrbcbarI/ZLgCAaBCS6+AuWN0OtLn/zFpu+Cggd8TCBYx9Ag==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>

<script>
    // Set the worker source for PDF.js
    pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.js';

    function handleUpload() {
        var fileInput = document.getElementById('pdfInput');
        var uploadButton = document.getElementById('uploadButton');

        uploadButton.addEventListener('click', function () {
            var file = fileInput.files[0];

            if (file) {
                var pdfUrl = URL.createObjectURL(file);

                loadPdf(pdfUrl);
            } else {
                alert('Please select a PDF file for upload.');
            }
        });
    }

    function loadPdf(url) {
        pdfjsLib.getDocument(url).promise.then(function (pdfDoc_) {
            pdfDoc = pdfDoc_;
            var canvas = document.getElementById('the-canvas');
            var textLayer = document.querySelector(".textLayer");
            var ctx = canvas.getContext('2d');
            var scale = 1;

            function renderPage(num) {
                pdfDoc.getPage(num).then(function (page) {
                    var viewport = page.getViewport({ scale: scale });
                    canvas.height = viewport.height;
                    canvas.width = viewport.width;

                    var renderContext = {
                        canvasContext: ctx,
                        viewport: viewport
                    };
                    page.render(renderContext);

                    // Enable text selection on the canvas
                    canvas.style.userSelect = 'text';

                    // Show text layer
                    pdfjsLib.textLayer.render({
                        textContent: page.getTextContent(),
                        container: textLayer,
                        viewport: viewport,
                        textDivs: []
                    });
                });
            }

            var numPages = pdfDoc.numPages;
            var pageNum = 1;

            renderPage(pageNum);

            document.getElementById('prevPageBtn').addEventListener('click', function () {
                if (pageNum > 1) {
                    pageNum--;
                    renderPage(pageNum);
                }
            });

            document.getElementById('nextPageBtn').addEventListener('click', function () {
                if (pageNum < numPages) {
                    pageNum++;
                    renderPage(pageNum);
                }
            });

            var pdfModal = new bootstrap.Modal(document.getElementById('pdfModal'));
            pdfModal.show();
        });
    }

    handleUpload();
</script>
{% endblock %}

any clue on how i can make the PDF selecatble?


Solution

  • The problem is that the Modal is not a PDF it is simply a canvas image of one.

    enter image description here

    Basically PDF.js is a PDF image Renderer (which can when used as designed), also can place a text extraction from the PDF, over the image.

    enter image description here

    In addition when used in a frame the PDF can also be drawn in another canvas, and the worker then append the annotations onto the end of he unseen PDF.

    If you don't use a framed PDF then any added layer is outside of the PDF itself, in effect just another floating canvas overlay with font based content (independent of the PDF).

    enter image description here

    However, as commented by Daniel Cruz, there are other options discussed in pdf.js with text selection