javascripthtmlhtml5-canvaspdf.jspdfjs-dist

render pdf.js page like real html elements not canvas or svg?


I am trying to make simple mobile UI that could be reading pdf, but I am planning to add many features by using my own made pdf read, not just by using the viewer made by pdf.js team, so I was asking if there is any way of rendering pdf as just an HTML with elements, as they did on their viewer, I am not comfortable with canvas, any help guys, Thanks in advance


Solution

  • okay guys i finally found out the way pdf.js has a method called getTextContent() these method is called on each page as you render a page

    simply get every page on you document

    PDFJS.getDocument(url)
      .then(function(pdf) {
    
        // Get div#container and cache it for later use
        var container = document.getElementById("container");
    
        // Loop from 1 to total_number_of_pages in PDF document
        for (var i = 1; i <= pdf.numPages; i++) {
    
            // Get desired page
            pdf.getPage(i).then(function(page) {
    
              var scale = 1.5;
              var viewport = page.getViewport(scale);
              var div = document.createElement("div");
    
              // Set id attribute with page-#{pdf_page_number} format
              div.setAttribute("id", "page-" + (page.pageIndex + 1));
    
              // This will keep positions of child elements as per our needs
              div.setAttribute("style", "position: relative");
    
              // Append div within div#container
              container.appendChild(div);
    
              // Create a new Canvas element
              var canvas = document.createElement("canvas");
    
              // Append Canvas within div#page-#{pdf_page_number}
              div.appendChild(canvas);
    
              var context = canvas.getContext('2d');
              canvas.height = viewport.height;
              canvas.width = viewport.width;
    
              var renderContext = {
                canvasContext: context,
                viewport: viewport
              };
    
              // Render PDF page
              page.render(renderContext);
            });
        }
    });
    

    get every page's text content remember that it is continuous codes from previous ones then inside page.render() add modify it like this way

    // Render PDF page
    page.render(renderContext)
      .then(function() {
        // Get text-fragments
        return page.getTextContent();
      })
      .then(function(textContent) {
        // Create div which will hold text-fragments
        var textLayerDiv = document.createElement("div");
    
        // Set it's class to textLayer which have required CSS styles
        textLayerDiv.setAttribute("class", "textLayer");
    
        // Append newly created div in `div#page-#{pdf_page_number}`
        div.appendChild(textLayerDiv);
    
        // Create new instance of TextLayerBuilder class
        var textLayer = new TextLayerBuilder({
          textLayerDiv: textLayerDiv, 
          pageIndex: page.pageIndex,
          viewport: viewport
        });
    
        // Set text-fragments
        textLayer.setTextContent(textContent);
    
        // Render text-fragments
        textLayer.render();
      });
    

    for full tutorial on how to do it go here