excelgoogle-sheetsgoogle-apps-scriptimport-from-excelsheetjs

Error while converting data from Excel to CSV using SheetJS Library and Google Apps Script


I have 6 files (5 xlsx, 1 csv) in Google Drive folder which I want to import into 6 different tabs of Google Sheet. Tanaike proposed a solution here that works well if the file size is not big. Unfortunately, my files have thousands of entries (approximately 50000 entries) with file size of 4mb-5mb. Here is the script:

function importExcel1(file, sheet) {
  // Library Key: 1B0eoHz03BVtSZhJAocaGNq94RjoXocz8xGMaLzwVdmAvYW5k8s5Yd360
  // Retrieve values from XLSX file.
  const MD = MicrosoftDocsApp.setFileId(file.getId());
  const srcSS = MD.getSpreadsheet();
  const values = srcSS.getSheets()[0].getDataRange().getValues();
  // console.log(values); // Confirm the retrieved values in the log.

  MD.end(); // When this line is run, the Spreadsheet created as a temporal file from the XLSX file is removed.
  if (values.length > 0) {
    sheet.getRange(1, 1, values.length, values[0].length).setValues(values);
  }
}

It gets the data quickly from Excel files, but I get the following error when setting the values in tabs:

Function (importExcel1): Error: Exceeded maximum execution time

I have tried another approach with SheetJS Library - Code adapted from Tanaike's solution:

 function importFiles() {
  var folderId = 'folder ID'; // ID of the folder where files are stored
  var folder = DriveApp.getFolderById(folderId);
  var files = folder.getFiles();
  var ss = SpreadsheetApp.getActiveSpreadsheet();

  while (files.hasNext()) {
    var file = files.next();
    var fileName = file.getName();

    // Skip files that already have "Done" in the name
    if (fileName.includes('Done')) {
      console.log(`Skipping already processed file: ${fileName}`);
      continue;
    }

    var fileType = fileName.slice(0, 6).toLowerCase();
    var sheet;
    var range;

    switch (fileType) {
      case 'parcel':
        sheet = ss.getSheetByName("import - shipping");
        range = 'A:T';
        break;
      case 'kit-re':
        sheet = ss.getSheetByName("import - kitting");
        range = 'A:G';
        break;
      case 'req-re':
        sheet = ss.getSheetByName("import - orders");
        range = 'A:U';
        break;
      case 'billin':
        sheet = ss.getSheetByName("import - billing codes");
        range = 'A:G';
        break;
      case 'req_li':
        sheet = ss.getSheetByName("import - order line");
        range = 'A:G';
        break;
      case 'packag':
        if (fileName.endsWith('.csv')) {
          sheet = ss.getSheetByName("Manual Extract - Package Grid");
          range = 'A:AO';
        }
        break;
      default:
        continue; // Skip files that do not match the criteria
    }

    if (sheet && range) {
      sheet.getRange(range).clearContent();
      SpreadsheetApp.flush();
      console.log("Sheet: " + sheet.getName());
      console.log("File: " + file.getName());
      if (file.getMimeType() === MimeType.CSV) {
        importCSV(file, sheet);
      } else {
        convertExcelToCSV(file, sheet);
      }

      // Rename the file by adding "Done" to the end of its name
      // file.setName(fileName + ' Done');
      // console.log(`Processed and renamed file: ${fileName} to ${file.getName()}`);
    }
  }
}

function importCSV(dataOrFile, sheet) {
  var csvData;

  if (typeof dataOrFile === 'string') {
    // If dataOrFile is a string, treat it as CSV data
    csvData = dataOrFile;
  } else {
    // If dataOrFile is a file, extract the data as a string
    csvData = dataOrFile.getBlob().getDataAsString();
  }

  var sheetId = sheet.getSheetId();
  console.log(sheetId);

  var resource = {
    requests: [
      {
        pasteData: {
          data: csvData,
          coordinate: { sheetId: sheetId },
          delimiter: ","
        }
      }
    ]
  };

  Sheets.Spreadsheets.batchUpdate(resource, 'Spreadsheet ID');
}

function convertExcelToCSV(file, sheet) {
  // Get the Excel file as a blob
  var excelBlob = DriveApp.getFileById(file.getId()).getBlob();

  // Convert the Blob into a byte array and read it using the XLSX library
  var data = new Uint8Array(excelBlob.getBytes());
  var workbook = XLSX.read(data, { type: 'array' });

  // Get the first sheet from the workbook
  var firstSheetName = workbook.SheetNames[0];
  var firstSheet = workbook.Sheets[firstSheetName];

  var csvData = XLSX.utils.sheet_to_csv(firstSheet);

  // Import the CSV data into the Google Sheet
  importCSV(csvData, sheet);
}

I am not sure whether it gets data more efficiently because I keep getting this error:

Error: invalid column -1

encode_col @ SheetJS.gs:4454

sheet_to_csv @ SheetJS.gs:27507

Is there any other way that we can make a batch request more reliably, any help to resolve this error is much appreciated?


Solution

  • In your situation, how about the following modification?

    In this modified script, the following steps are run.

    1. Convert XLSX or CSV data to Google Spreadsheet.
    2. Retrieve values from the converted Spreadsheet using Sheets API.
    3. Put values to each sheet in the destination Spreadsheet.
    4. Delete the converted Spreadsheet.

    Before you use this script, please enable Drive API v3 and Sheets API at Advanced Google services. And, please set your folder ID.

    I guessed that in your situation, you want to retrieve the values from the 1st sheet in each XLSX.

    Modified script 1:

    In this script, after all values are retrieved, the values are put into each sheet.

    function importFiles() {
      var folderId = '###'; // ID of the folder where files are stored
      var folder = DriveApp.getFolderById(folderId);
      var files = folder.getFiles(); // or const files = DriveApp.getFolderById(folderId).searchFiles(`mimeType='${MimeType.MICROSOFT_EXCEL}' or mimeType='${MimeType.CSV}' and trashed=false`);
      var ss = SpreadsheetApp.getActiveSpreadsheet();
    
      var data = []; // Added
      while (files.hasNext()) {
        var file = files.next();
        var fileName = file.getName();
    
        // Skip files that already have "Done" in the name
        if (fileName.includes('Done')) {
          console.log(`Skipping already processed file: ${fileName}`);
          continue;
        }
    
        var fileType = fileName.slice(0, 6).toLowerCase();
        var sheet;
        var range;
    
        switch (fileType) {
          case 'parcel':
            sheet = ss.getSheetByName("import - shipping");
            range = 'A:T';
            break;
          case 'kit-re':
            sheet = ss.getSheetByName("import - kitting");
            range = 'A:G';
            break;
          case 'req-re':
            sheet = ss.getSheetByName("import - orders");
            range = 'A:U';
            break;
          case 'billin':
            sheet = ss.getSheetByName("import - billing codes");
            range = 'A:G';
            break;
          case 'req_li':
            sheet = ss.getSheetByName("import - order line");
            range = 'A:G';
            break;
          case 'packag':
            if (fileName.endsWith('.csv')) {
              sheet = ss.getSheetByName("Manual Extract - Package Grid");
              range = 'A:AO';
            }
            break;
          default:
            continue; // Skip files that do not match the criteria
        }
    
        if (sheet && range) {
          sheet.getRange(range).clearContent();
          SpreadsheetApp.flush();
          console.log("Sheet: " + sheet.getName());
          console.log("File: " + file.getName());
    
    // I modified the below script.
          const id = Drive.Files.copy({ mimeType: MimeType.GOOGLE_SHEETS }, file.getId()).id;
          const { values } = Sheets.Spreadsheets.Values.get(id, "A:ZZZ");
          data.push({ values, range: sheet.getName() });
          Drive.Files.remove(id);
        }
      }
      if (data.length == 0) return;
      Sheets.Spreadsheets.Values.batchUpdate({ data, valueInputOption: "USER_ENTERED" }, ss.getId());
    }
    

    Modified script 2:

    In this script, each value of each file is put into each sheet in the while loop.

    function importFiles() {
      var folderId = '###'; // ID of the folder where files are stored
      var folder = DriveApp.getFolderById(folderId);
      var files = folder.getFiles();
      var ss = SpreadsheetApp.getActiveSpreadsheet();
    
      while (files.hasNext()) {
        var file = files.next();
        var fileName = file.getName();
    
        // Skip files that already have "Done" in the name
        if (fileName.includes('Done')) {
          console.log(`Skipping already processed file: ${fileName}`);
          continue;
        }
    
        var fileType = fileName.slice(0, 6).toLowerCase();
        var sheet;
        var range;
    
        switch (fileType) {
          case 'parcel':
            sheet = ss.getSheetByName("import - shipping");
            range = 'A:T';
            break;
          case 'kit-re':
            sheet = ss.getSheetByName("import - kitting");
            range = 'A:G';
            break;
          case 'req-re':
            sheet = ss.getSheetByName("import - orders");
            range = 'A:U';
            break;
          case 'billin':
            sheet = ss.getSheetByName("import - billing codes");
            range = 'A:G';
            break;
          case 'req_li':
            sheet = ss.getSheetByName("import - order line");
            range = 'A:G';
            break;
          case 'packag':
            if (fileName.endsWith('.csv')) {
              sheet = ss.getSheetByName("Manual Extract - Package Grid");
              range = 'A:AO';
            }
            break;
          default:
            continue; // Skip files that do not match the criteria
        }
    
        if (sheet && range) {
          sheet.getRange(range).clearContent();
          SpreadsheetApp.flush();
          console.log("Sheet: " + sheet.getName());
          console.log("File: " + file.getName());
    
          const id = Drive.Files.copy({ mimeType: MimeType.GOOGLE_SHEETS }, file.getId()).id;
          const { values } = Sheets.Spreadsheets.Values.get(id, "A:ZZZ");
          if (values.length == 0) continue;
          Sheets.Spreadsheets.Values.update({ values }, ss.getId(), sheet.getName(), { valueInputOption: "USER_ENTERED" });
          Drive.Files.remove(id);
          // Utilities.sleep(3000); // This line might be required to be used.
        }
      }
    }
    

    References: