google-apps-scriptweb-scrapinggoogle-sheetsurlfetch

scrape table using google app scripts


I would love to scape data from this website: https://finviz.com/screener.ashx?v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume

I want to scrape the whole table. I tried using this :

function myFunction(start) {
    var url = "https://finviz.com/screener.ashx?
v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume&r="+ 
start;
    var fromText = '<tbody>';
    var toText = '</tbody>';
    var content = UrlFetchApp.fetch(url).getContentText();
    var scraped = Parser
              .data(content)
              .from(fromText)
              .to(toText)
              .iterate();
}

I could scrape every element using xpath, but I think it would be quite slow.

Here is the html and the table: enter image description here enter image description here

Can I get the whole table ? Thanks


Solution

  • How about a following modification? The retrieved data is imported to Spreadsheet.

    NOTE: Parser is a GAS library. You can see the detail at https://www.kutil.org/2016/01/easy-data-scrapping-with-google-apps.html

    Modified script :

    function myFunction(start) {
      var url = "https://finviz.com/screener.ashx?v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume&r="+ start;
      var content = UrlFetchApp.fetch(url).getContentText();
      var scraped = Parser.data(content).from('class=\"screener-body-table-nw\"').to('</td>').iterate();
      var res = [];
    
      // If you don't want column titles, please remove this part.
      var temp = [];
      var titles = Parser.data(content).from("style=\"cursor:pointer;\">").to("</td>").iterate();
      titles.forEach(function(e){
        if (!~e.indexOf('\">')) {
          temp.push(e);
        } else if (~e.indexOf('img')) {
          temp.push(e.replace(/<img.+>/g, ''));
        }
      });
      res.push(temp);
      // -----
    
      var temp = [];
      var oticker = "";
      scraped.forEach(function(e){
        var ticker = Parser.data(e).from("<a href=\"quote.ashx?t=").to("&").build();
        var data1 = Parser.data(e).from("screener-link\">").to("</a>").build();
        var data2 = Parser.data(data1).from(">").to("<").build();
        if (oticker == "") oticker = ticker;
        if (ticker != oticker) {
          temp.splice(1, 0, oticker);
          res.push(temp);
          temp = [];
          oticker = ticker;
          temp.push(data1);
        } else {
          if (!~(data2 || data1).indexOf('<')) temp.push(data2 || data1);
        }
      });
      var ss = SpreadsheetApp.getActiveSheet();
      ss.getRange(ss.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
    }
    

    Result :

    enter image description here