javascriptnode.jspuppeteer

Puppeteer - scroll down until you can't anymore


I am in a situation where new content is created when I scroll down. The new content has a specific class name.

How can I keep scrolling down until all the elements have loaded?

In other words, I want to reach the stage where if I keep scrolling down, nothing new will load.

I was using code to scroll down, coupled with an

await page.waitForSelector('.class_name');

The problem with this approach is that after all the elements have loaded, the code keeps on scrolling down, no new elements are created and eventually I get a timeout error.

This is the code:

await page.evaluate( () => {
  window.scrollBy(0, window.innerHeight);
});
await page.waitForSelector('.class_name');

Solution

  • Give this a shot:

    const puppeteer = require('puppeteer');
    
    (async () => {
        const browser = await puppeteer.launch({
            headless: false
        });
        const page = await browser.newPage();
        await page.goto('https://www.yoursite.com');
        await page.setViewport({
            width: 1200,
            height: 800
        });
    
        await autoScroll(page);
    
        await page.screenshot({
            path: 'yoursite.png',
            fullPage: true
        });
    
        await browser.close();
    })();
    
    async function autoScroll(page){
        await page.evaluate(async () => {
            await new Promise((resolve) => {
                var totalHeight = 0;
                var distance = 100;
                var timer = setInterval(() => {
                    var scrollHeight = document.body.scrollHeight;
                    window.scrollBy(0, distance);
                    totalHeight += distance;
    
                    if(totalHeight >= scrollHeight - window.innerHeight){
                        clearInterval(timer);
                        resolve();
                    }
                }, 100);
            });
        });
    }
    

    Source: https://github.com/chenxiaochun/blog/issues/38

    EDIT

    added window.innerHeight to the calculation because the available scrolling distance is body height minus viewport height, not the entire body height.

    EDIT 2

    Sure, Dan (from comments) In order to add a counter to stop the scrolling you will need to introduce a variable that gets incremented with each iteration. When it reaches a certain value (say 50 scrolls for example), you clear the interval and resolve the promise.

    Here's themodified code with a scrolling limit set to 50:

    const puppeteer = require('puppeteer');
    
    (async () => {
        const browser = await puppeteer.launch({
            headless: false
        });
        const page = await browser.newPage();
        await page.goto('https://www.yoursite.com');
        await page.setViewport({
            width: 1200,
            height: 800
        });
    
        await autoScroll(page, 50);  // set limit to 50 scrolls
    
        await page.screenshot({
            path: 'yoursite.png',
            fullPage: true
        });
    
        await browser.close();
    })();
    
    async function autoScroll(page, maxScrolls){
        await page.evaluate(async (maxScrolls) => {
            await new Promise((resolve) => {
                var totalHeight = 0;
                var distance = 100;
                var scrolls = 0;  // scrolls counter
                var timer = setInterval(() => {
                    var scrollHeight = document.body.scrollHeight;
                    window.scrollBy(0, distance);
                    totalHeight += distance;
                    scrolls++;  // increment counter
    
                    // stop scrolling if reached the end or the maximum number of scrolls
                    if(totalHeight >= scrollHeight - window.innerHeight || scrolls >= maxScrolls){
                        clearInterval(timer);
                        resolve();
                    }
                }, 100);
            });
        }, maxScrolls);  // pass maxScrolls to the function
    }