javascriptgoogle-apps-scripthtml-tablecheerio

How to pull multiple sub-table data as a single table data in JavaScript with Cheerio


I want to download the whole table data from this webpage. It has five sub tables having its own thead and tbody each, under 'table[class="calendar"]'. My codes below can pull all thead texts and tbody texts as whole, but doesn't group them by each sub table.

I want to pull thead texts and tbody texts from each sub tables and then combine them to one table so that I can have the whole organized table data just in the same arrangement as the webpage shows. How can I do that?

function test() {
  const url = "https://finviz.com/calendar.ashx";
  const res = UrlFetchApp.fetch(url, {
    muteHttpExceptions: true,
  }).getContentText();
  const $ = Cheerio.load(res);

  var thead = $("thead")
    .find("th")
    .toArray()
    .map(el => $(el).text());
  var tableHead = [],
    column = 9;
  while (thead.length) tableHead.push(thead.splice(0, column)); //Convert 1D array to 2D array
  console.log(tableHead);

  var tcontents = $("body > div.content")
    .find("td")
    .toArray()
    .map(el => $(el).text());
  var idx = tcontents.indexOf("No economic releases");
  if (idx) tcontents.splice(idx + 1, 0, "", "", "", "", "", ""); // Add empty elemets to match number of table columns
  var tableContents = [],
    column = 9;
  while (tcontents.length)
    tableContents.push(tcontents.splice(0, column)); //Convert 1D array to 2D array
  tableContents.pop(); // remove last empty array
  console.log(tableContents);
}

My expected output is: enter image description here


Solution

  • I figured out a solution as below. I'm using Google Apps Script.

    function test() {
    
      const url = 'https://finviz.com/calendar.ashx';
      const res = UrlFetchApp.fetch(url, { muteHttpExceptions: true }).getContentText();
      const $ = Cheerio.load(res);
      var table = [];
    
      for (var i = 2; i < 15; i += 3) {
        var tableData = $('body > div.content > div > div > table:nth-child(' + i + ')');
        var thead = tableData.find('th').toArray().map(el => $(el).text());
        var tcontents = tableData.find('td').toArray().map(el => $(el).text());
        var idx = tcontents.indexOf('No economic releases');
        if (idx > -1) {
          tcontents.splice(idx + 1, 0, '', '', '', '', '')// Add empty elemets to match number of table columns
        }
        else tcontents.slice(5);
        var tableContents = [], column = 9;
        while (tcontents.length) tableContents.push(tcontents.splice(0, column)); //Convert 1D array to 2D array
    
        var table = [...table, ...[thead], ...tableContents]
      }
    
      console.log(table)
    }