google-apps-scriptcookiesurlfetchhttp-status-code-302stackexchange-api

Failed to log in to a website to scrape my profile name using apps script


I've been trying to log in to this website using my credentials in order to scrape my profile name using google apps script. The status code is 200 and I can see that the script is able to get cookies. However, I get Undefined as result instead of profile name.

This is how I'm trying:

function loginAndParseProfile() {
  var link = 'https://stackoverflow.com/users/login?ssrc=head&returnurl=https%3a%2f%2fstackoverflow.com%2f';

  var options = {
    "method": "get",
    "headers": {
      "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
    }

  };
  var res = UrlFetchApp.fetch(link, options);
  var $ = Cheerio.load(res.getContentText());
  var fkey = $("input[name='fkey']").first().attr('value');

  var payload = {
    'fkey': fkey,
    'ssrc': 'head',
    'email': 'emailaddress',
    'password': 'password',
    'oauth_version': '',
    'oauth_server': ''
  };

  var options = {
    "method" : "post",
    'payload': payload,
    'muteHttpExceptions': true,
    "headers": {
        "Content-Type": "application/x-www-form-urlencoded",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
     }
    };

  var loginURL = "https://stackoverflow.com/users/login?ssrc=head&returnurl=https%3a%2f%2fstackoverflow.com%2f";

  var resp = UrlFetchApp.fetch(loginURL,options);
  console.log(resp.getResponseCode());
  console.log(resp.getAllHeaders()['Set-Cookie']);
  var $ = Cheerio.load(resp.getContentText());
  var item = $('a.my-profile > [class^="gravatar-wrapper"]').first().attr('title');
  console.log(item);
}

How can I make the script work?


Solution

    1. Disable redirects by setting followRedirects to false:

      var options = {
        "method" : "post",
        'payload': payload,
        'muteHttpExceptions': true,
        "headers": {
          "Content-Type": "application/x-www-form-urlencoded",
          "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
        },
        'followRedirects': false
      };
      
    2. Grab the acct cookie from the response to the POST /users/login request:

      const acct = resp.getAllHeaders()['Set-Cookie']
        .find(cookie => cookie.includes('acct=t='))
        .match(/(acct=t=.*?)\s/)[1];
      
    3. Make a GET / request supplying the acct cookie and grab your profile name:

      const profileRequest = UrlFetchApp.fetch('https://stackoverflow.com', {
        method: 'get',
        headers: {
          Cookie: acct
        }
      });
      
      const $main = Cheerio.load(profileRequest.getContentText());
      const myName = $main('.s-topbar--item.s-user-card > span').text();
      console.log(myName);
      

    If your credentials are correct, this should output robots.txt.