google-apps-scriptgoogle-bigqueryline-breaks

Push Data from Google Sheet to Big Query with Appscript - Manage Line Breaks


I am using the script below to read data from a google sheet and push it to a Big Query Table.

Everything works well and as expected, except where there are line breaks in any of the fields. In these cases the insert operation fails on the Big Query Side

Question:
How can I handle line breaks correctly so that the data inserted into the Big Query Table actually contains line breaks?

Code:

function loadCogsPlayupHistory() {
  // Enter BigQuery Details as variable.
  var projectId = 'myproject';
  // Dataset
  var datasetId = 'my_Dataset';
  // Table
  var tableId = 'my table';
    
  // WRITE_APPEND: If the table already exists, BigQuery appends the data to the table.
  var writeDispositionSetting = 'WRITE_APPEND';
  
  // The name of the sheet in the Google Spreadsheet to export to BigQuery:
  var sheetName = 'data';
  Logger.log(sheetName)
  
  var file = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
  Logger.log(file)
  // This represents ALL the data
  var rows = file.getDataRange().getValues();
  var rowsCSV = rows.join("\n");
  var blob = Utilities.newBlob(rowsCSV, "text/csv");
  var data = blob.setContentType('application/octet-stream');
  Logger.log(rowsCSV)
  
  // Create the data upload job. 
  var job = {
    configuration: {
      load: {
        destinationTable: {
          projectId: projectId,
          datasetId: datasetId,
          tableId: tableId
        },
        skipLeadingRows: 1,
        writeDisposition: writeDispositionSetting
      }
    }
  };
  Logger.log(job)

  
  // send the job to BigQuery so it will run your query  
  var runJob = BigQuery.Jobs.insert(job, projectId, data);
  //Logger.log('row 61  '+ runJob.status);
  var jobId = runJob.jobReference.jobId
  Logger.log('jobId: ' + jobId);
  Logger.log('status: '+ runJob.status);
  Logger.log('FINISHED!');
 // }
} 

Solution

  • Although I'm not sure about your actual situation, how about the following modification? In this modification, the CSV data is exported by the endpoint of https://docs.google.com/spreadsheets/d/${spreadsheetId}/export?format=csv&gid=${sheetId}. Please modify your script as follows.

    From:

    var file = SpreadsheetApp.getActiveSpreadsheet().getSheetByName(sheetName);
    Logger.log(file)
    // This represents ALL the data
    var rows = file.getDataRange().getValues();
    var rowsCSV = rows.join("\n");
    var blob = Utilities.newBlob(rowsCSV, "text/csv");
    

    To:

    var ss = SpreadsheetApp.getActiveSpreadsheet();
    var sheet = ss.getSheetByName(sheetName);
    var url = `https://docs.google.com/spreadsheets/d/${ss.getId()}/export?format=csv&gid=${sheet.getSheetId()}`;
    var res = UrlFetchApp.fetch(url, { headers: { authorization: "Bearer " + ScriptApp.getOAuthToken() } });
    var rowsCSV = res.getContentText();
    var blob = res.getBlob();
    

    The whole modified script is as follows.

    function loadCogsPlayupHistory() {
      // Enter BigQuery Details as variable.
      var projectId = 'myproject';
      // Dataset
      var datasetId = 'my_Dataset';
      // Table
      var tableId = 'my table';
    
      // WRITE_APPEND: If the table already exists, BigQuery appends the data to the table.
      var writeDispositionSetting = 'WRITE_APPEND';
    
      // The name of the sheet in the Google Spreadsheet to export to BigQuery:
      var sheetName = 'data';
      Logger.log(sheetName)
    
    
      // --- I modified the below script.
      var ss = SpreadsheetApp.getActiveSpreadsheet();
      var sheet = ss.getSheetByName(sheetName);
      var url = `https://docs.google.com/spreadsheets/d/${ss.getId()}/export?format=csv&gid=${sheet.getSheetId()}`;
      var res = UrlFetchApp.fetch(url, { headers: { authorization: "Bearer " + ScriptApp.getOAuthToken() } });
      var rowsCSV = res.getContentText();
      var blob = res.getBlob();
      // ---
    
    
      var data = blob.setContentType('application/octet-stream');
      Logger.log(rowsCSV)
    
      // Create the data upload job. 
      var job = {
        configuration: {
          load: {
            destinationTable: {
              projectId: projectId,
              datasetId: datasetId,
              tableId: tableId
            },
            skipLeadingRows: 1,
            writeDisposition: writeDispositionSetting
          }
        }
      };
      Logger.log(job)
    
    
      // send the job to BigQuery so it will run your query  
      var runJob = BigQuery.Jobs.insert(job, projectId, data);
      //Logger.log('row 61  '+ runJob.status);
      var jobId = runJob.jobReference.jobId
      Logger.log('jobId: ' + jobId);
      Logger.log('status: ' + runJob.status);
      Logger.log('FINISHED!');
      // }
    } 

    Note: