google-apps-scriptgoogle-sheetsgmailhtml-emailcell-formatting

Is it possible to export table from Gmail body while preserving the format, to Google sheets using GAS?


I am trying to get data from Gmail body using GAS. To be specific, I get an email with a table content; I am trying to copy the table from gmail and write it to google sheet for my further analysis. Below is a sample email I get: enter image description here

The output I am expecting in Google sheets: enter image description here

UPDATE: I was able to make some modifications to the code I had by referring to Insert table from gmail to google spreadsheet by google script

Here's how the email body and output looks like now. Email: enter image description here

GSheet Output: enter image description here

The issue occurs with merged cells in table. The code does not generate output as how it appears in the gmail body. Is there any workaround for this?

Final code:

var SEARCH_QUERY = "SearchKey";

function getEmailss_(q) {
    var emails = [];
    var threads = GmailApp.search(q);
    if (threads.length == 0) {
      console.log("No threads found that match the search query: " + q);
    }
    for (var i in threads) {
        var msgs = threads[i].getMessages();
        for (var j in msgs) {
            var arrStr = msgs[j].getBody()
              .replace(/<\/tr>/gm, '[/tr]')
              .replace(/<\/td>/gm, '[/td]')
              .replace(/<.*?>/g, '\n')
              .replace(/^\s*\n/gm, '')
              .replace(/^\s*/gm, '')
              .replace(/\s*\n/gm, '\n')
              .split("[/tr]");
            if (arrStr.length == 1) {
              console.log("No data found in thread: " + threads[i].getFirstMessageSubject());
            }
            var line = [];

            for (var i = 0; i < arrStr.length - 1; i++) {

              line = arrStr[i].split("[/td]");
              line.length -= 1;
              emails.push(line);
            }
        }
    }
    if (emails.length == 0) {
      console.log("No emails found that match the search query: " + q);
    }
    return convert2ArrayToRectangular_(emails);
}

function convert2ArrayToRectangular_(array2d)
{
  // get max width
  var res = [];
  var w = 0;
  for (var i = 0; i < array2d.length; i++)
  {
    if (array2d[i].length > w) {w = array2d[i].length;}    
  }

  var row = [];
  for (var i = 0; i < array2d.length; i++)
  {
    row = array2d[i];
    if(array2d[i].length < w)
    {
      for (var ii = array2d[i].length; ii < w; ii++)
      {
        row.push('');        
      }  
    }
    res.push(row);
  }
  return res;
}


function appendData_(sheet, array2d) {
    var h = array2d.length;
    var l = array2d[0].length;
    sheet.getRange(1, 1, h, l).setValues(array2d);
 } 

function saveEmailsss() {
    var array2d = getEmailss_(SEARCH_QUERY);
    if (array2d) {
        appendData_(SpreadsheetApp.getActive().getSheetByName('Sheet1'), convert2ArrayToRectangular_(array2d));
    }
    markArchivedAsRead();
}

function markArchivedAsRead() {
    var threads = GmailApp.search('label:inbox is:unread to:me subject:importnumberlist');
    GmailApp.markThreadsRead(threads);
};

Solution

  • As another approach, how about using Sheets API? When Sheets API is used, the HTML table can be parsed by including the merged cells. The sample script is as follows.

    Sample script:

    This script uses Sheets API. Please enable Sheets API at Advanced Google services.

    var SEARCH_QUERY = "SearchKey";
    
    function getEmailss_(q, sheetName) {
      var emails = [];
      var threads = GmailApp.search(q);
      if (threads.length == 0) {
        console.log("No threads found that match the search query: " + q);
      }
      var tables = [];
      for (var i in threads) {
        var msgs = threads[i].getMessages();
        for (var j in msgs) {
          var arrStr = msgs[j].getBody();
          var table = arrStr.match(/<table[\s\S\w]+?<\/table>/);
          if (table) {
            tables.push(table[0]);
          }
        }
      }
      if (emails.length == 0) {
        console.log("No emails found that match the search query: " + q);
      }
      if (tables.length == 0) {
        console.log("No tables.");
        return
      };
      var ss = SpreadsheetApp.getActiveSpreadsheet();
      var sheet = ss.getSheetByName(sheetName);
      var requests = [{ pasteData: { html: true, data: tables.join(""), coordinate: { sheetId: sheet.getSheetId() } } }];
      Sheets.Spreadsheets.batchUpdate({ requests }, ss.getId());
    }
    
    function saveEmailsss() {
      var sheetName = "Sheet1"; // Please set your sheet name.
      getEmailss_(SEARCH_QUERY, sheetName);
      markArchivedAsRead();
    }
    

    References: