google-apps-scriptgoogle-sheetspdfdocx

Error while merging docx files into single pdf using google apps script


I used a Google Apps script code proposed by Tanaike to merge multiple PDF files into one. My requirements are:

  1. Get file id
  2. Check if it is pdf or docx
  3. If it is docx, convert it into pdf and then merge it

Here is the modified snippet:

async function newMain() {
  // Retrieve PDF data.
 
  var destinationFolder = DriveApp.getFolderById("your-folder-id")
  var urls = sheet.getRange(2,2).getValue().toString().split(",");  //split the urls joined by commas 
  const ids = urls.map((url) => {
  const matches = url.match(/\/file\/d\/([^\/]+)\/edit/); //get file id from urls
  return matches ? [matches[1]] : [];
});

  const data = ids.map(([id]) => {
  const file = DriveApp.getFileById(id);
  const mimeType = file.getMimeType();

  // Check if the file is a DOCX
if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
  
  
   var blob = file.getBlob();
   var tempfile = Drive.Files.insert({}, blob, {convert:true});
   var id = tempfile["id"];
  
   var doc = DocumentApp.openById(id);
   var body = doc.getBody();
   var text = body.getText();
   var pdfFile = destinationFolder.createFile(`temp.pdf`, text, "application/pdf"); 

    // Get the PDF content
   const pdfData = pdfFile.getBlob().getBytes();
   Drive.Files.remove(id);
    // Remove the temporary PDF file
   DriveApp.getFileById(pdfFile.getId()).setTrashed(true);
   return new Uint8Array(pdfData);

  } else if (mimeType === 'application/pdf') {
    // File is already a PDF, fetch its content
    return new Uint8Array(file.getBlob().getBytes());
  } 
});



  // Load pdf-lib
  const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
  eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));

  // Merge PDFs.
  const pdfDoc = await PDFLib.PDFDocument.create();
  for (let i = 0; i < data.length; i++) {
    const pdfData = await PDFLib.PDFDocument.load(data[i]);
    for (let j = 0; j < pdfData.getPageCount(); j++) {
      const [page] = await pdfDoc.copyPages(pdfData, [j]);
      pdfDoc.addPage(page);
    }
  }
  const bytes = await pdfDoc.save();

  // Create a PDF file.
  var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
  file.moveTo(destinationFolder);
 
 }

when I run it, I receive the following error:

Error: Failed to parse PDF document (line:368 col:0 offset=18311): No PDF header found e
e
e.parseHeader
eval
eval
eval
eval
i
e.parseDocument

Due to my beginner skills, I am unable to resolve or improve it, any guidance in this regard would be much appreciated.


Solution

  • Modification points:

    In this case, how about the following modification?

    Modified script:

    async function newMain() {
      // Retrieve PDF data.
      var destinationFolder = DriveApp.getFolderById("your-folder-id")
      var urls = sheet.getRange(2, 2).getValue().toString().split(",");  //split the urls joined by commas 
      const ids = urls.map((url) => {
        const matches = url.match(/\/file\/d\/([^\/]+)\/edit/); //get file id from urls
        return matches ? [matches[1]] : [];
      });
    
    
      // --- I modifieid the below script.
      const data = ids.flatMap(([id]) => {
        const file = DriveApp.getFileById(id);
        const mimeType = file.getMimeType();
        if (mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
          var blob = file.getBlob();
          var tempfile = Drive.Files.insert({}, blob, { convert: true });
          const pdfData = DriveApp.getFileById(tempfile.id).getBlob().getBytes();
          Drive.Files.remove(tempfile.id);
          return new Uint8Array(pdfData);
        } else if (mimeType === 'application/pdf') {
          return new Uint8Array(file.getBlob().getBytes());
        }
        return [];
      });
      // ---
    
    
      // Load pdf-lib
      const cdnjs = "https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js";
      eval(UrlFetchApp.fetch(cdnjs).getContentText().replace(/setTimeout\(.*?,.*?(\d*?)\)/g, "Utilities.sleep($1);return t();"));
    
      // Merge PDFs.
      const pdfDoc = await PDFLib.PDFDocument.create();
      for (let i = 0; i < data.length; i++) {
        const pdfData = await PDFLib.PDFDocument.load(data[i]);
        for (let j = 0; j < pdfData.getPageCount(); j++) {
          const [page] = await pdfDoc.copyPages(pdfData, [j]);
          pdfDoc.addPage(page);
        }
      }
      const bytes = await pdfDoc.save();
    
      // Create a PDF file.
      var file = DriveApp.createFile(Utilities.newBlob([...new Int8Array(bytes)], MimeType.PDF, "sample2.pdf"));
      file.moveTo(destinationFolder);
    }
    

    Note: