I am trying to scrape a site to get live football (soccer) information, using node.js and puppeteer but my browser is closing immediately after the console.log("trying to select team element").
const puppeteer = require("puppeteer");
async function openPage() {
const browser = await puppeteer.launch( {headless: true} );
const page = await browser.newPage();
await page.setViewport({ width: 1000, height: 926 });
await page.goto("https://www.livescore.com/en/");
// accepting cookies
const button = await page.waitForSelector('#onetrust-accept-btn-handler');
if (button) {
await button.click();
console.log("clicked cookie button");
};
return page;
}
async function scrapeData(page) {
let content = [];
// Getting match elements
let elements = await page.waitForSelector(".Ip")
console.log("trying to select team element")
for (let i=0; i < elements.length; i++) {
let homeTeamElement = await elements[i].$(".Ip")
if (homeTeamElement) {
const homeTeamText = await homeTeamElement.evaluate(node => node.textContent);
content.push(homeTeamText);
}
};
return content;
};
(async () => {
const page = await openPage();
const dataScraped = await scrapeData(page);
console.log(dataScraped)
await page.browser().close();
})();
Any idea why this may be and further critique on my code would be greatly appreciated!
await page.waitForSelector(".Ip") only returns one element, not an array, so there's no way to loop over it. There should be a clear error message to this effect. Instead, use page.$$eval (or the new locators API if you're feeling cutting edge) to extract the data.
const puppeteer = require("puppeteer"); // ^21.0.2
const url = "<Your URL>";
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.setViewport({width: 1000, height: 926});
await page.goto(url, {waitUntil: "domcontentloaded"});
// not really necessary
const button = await page.waitForSelector("#onetrust-accept-btn-handler");
await button.click();
await page.waitForSelector(".Ip");
const content = await page.$$eval(".Ip", els =>
els.map(e => {
const text = id =>
e.querySelector(`[id*=${id}]`).textContent.trim();
return {
time: text("status-or-time"),
home: text("home-team-name"),
away: text("away-team-name"),
homeTeamScore: +text("home-team-score"),
awaitTeamScore: +text("away-team-score"),
};
})
);
console.log(content);
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Notes:
waitForSelector. It's guaranteed to be the element, otherwise it'll throw if it can't find it in time.await elements[i].$(".Ip") isn't going to help you access anything, since there's no .Ip within the .Ip element you're already holding a handle to (as far as I can tell).