javascriptnode.jsweb-scrapingpuppeteer

Closure of Puppeteer Browser After Clicking Cookie Accept Button - Unexpected Behavior


I am trying to scrape a site to get live football (soccer) information, using node.js and puppeteer but my browser is closing immediately after the console.log("trying to select team element").

const puppeteer = require("puppeteer");

async function openPage() {
  const browser = await puppeteer.launch( {headless: true} );
  const page = await browser.newPage();
  
  await page.setViewport({ width: 1000, height: 926 });
  await page.goto("https://www.livescore.com/en/");

  // accepting cookies 
  const button = await page.waitForSelector('#onetrust-accept-btn-handler');
  if (button) {
    await button.click();
    console.log("clicked cookie button");
  };
  
  return page;
}

async function scrapeData(page) {
  let content = [];

  // Getting match elements
  let elements = await page.waitForSelector(".Ip")
  console.log("trying to select team element")

  for (let i=0; i < elements.length; i++) {
    let homeTeamElement = await elements[i].$(".Ip")
    if (homeTeamElement) {
      const homeTeamText = await homeTeamElement.evaluate(node => node.textContent);
      content.push(homeTeamText);
    }
  };

  return content;
};

(async () => {
  const page = await openPage();
  const dataScraped = await scrapeData(page);
  console.log(dataScraped)

  await page.browser().close();
})();

Any idea why this may be and further critique on my code would be greatly appreciated!


Solution

  • await page.waitForSelector(".Ip") only returns one element, not an array, so there's no way to loop over it. There should be a clear error message to this effect. Instead, use page.$$eval (or the new locators API if you're feeling cutting edge) to extract the data.

    const puppeteer = require("puppeteer"); // ^21.0.2
    
    const url = "<Your URL>";
    
    let browser;
    (async () => {
      browser = await puppeteer.launch();
      const [page] = await browser.pages();
      await page.setViewport({width: 1000, height: 926});
      await page.goto(url, {waitUntil: "domcontentloaded"});
    
      // not really necessary
      const button = await page.waitForSelector("#onetrust-accept-btn-handler");
      await button.click();
    
      await page.waitForSelector(".Ip");
      const content = await page.$$eval(".Ip", els =>
        els.map(e => {
          const text = id =>
            e.querySelector(`[id*=${id}]`).textContent.trim();
          return {
            time: text("status-or-time"),
            home: text("home-team-name"),
            away: text("away-team-name"),
            homeTeamScore: +text("home-team-score"),
            awaitTeamScore: +text("away-team-score"),
          };
        })
      );
      console.log(content);
    })()
      .catch(err => console.error(err))
      .finally(() => browser?.close());
    

    Notes: