pythonseleniumselenium-webdriverselenium-chromedriverstaleelementreferenceexception

selenium: stale element reference: element is not attached to the page document


from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import chromedriver_autoinstaller

chromedriver_autoinstaller.install()


TYPES = ['user', 'verified_audience', 'top_critics']
TYPE = TYPES[2]

URL = 'https://www.rottentomatoes.com/m/dunkirk_2017/reviews'
PAGES = 2

driver = Chrome()
driver.get(URL)

data_reviews = []
while PAGES != 0:
    wait = WebDriverWait(driver, 30)
    reviews = wait.until(lambda _driver: _driver.find_elements(
        By.CSS_SELECTOR, '.review_table_row'))

    # Extracting review data
    for review in reviews:
        if TYPE == 'top_critics':
            critic_name_el = review.find_element(
                By.CSS_SELECTOR, '[data-qa=review-critic-link]')
            critic_review_text_el = review.find_element(
                By.CSS_SELECTOR, '[data-qa=review-text]')

            data_reviews.append(critic_name_el.text)

    try:
        next_button_el = driver.find_element(
            By.CSS_SELECTOR, '[data-qa=next-btn]:not([disabled=disabled])'
        )

        if not next_button_el:
            PAGES = 0

        next_button_el.click() # refresh new reviews
        PAGES -= 1
    except Exception as e:
        driver.quit()

Here, a rotten tomatoes review page is being opened and the reviews are being scraped, but when the next button is clicked and the new reviews are going to be scraped, this error pops up... I am guessing that the new reviews have not been loaded and trying to access them is causing the problem, I tried driver.implicitly_wait but that doesn't work too.

The error originates from line 33, data_reviews.append(critic_name_el.text)


Solution

  • By clicking a next page button next_button_el the new page is being loaded but this process takes some time while your Selenium code continues instantly after that click so probably on this line reviews = wait.until(lambda _driver: _driver.find_elements(By.CSS_SELECTOR, '.review_table_row')) it collects the elements on the old page but then the page is being refreshed so some of these elements critic_name_el collected after that (still on the old page) is no more there since the old page is refreshed.
    To make your code working you need to introduce a short delay after clicking the next page button, as following:

    data_reviews = []
    while PAGES != 0:
        wait = WebDriverWait(driver, 30)
        reviews = wait.until(lambda _driver: _driver.find_elements(
            By.CSS_SELECTOR, '.review_table_row'))
    
        # Extracting review data
        for review in reviews:
            if TYPE == 'top_critics':
                critic_name_el = review.find_element(
                    By.CSS_SELECTOR, '[data-qa=review-critic-link]')
                critic_review_text_el = review.find_element(
                    By.CSS_SELECTOR, '[data-qa=review-text]')
    
                data_reviews.append(critic_name_el.text)
    
        try:
            next_button_el = driver.find_element(
                By.CSS_SELECTOR, '[data-qa=next-btn]:not([disabled=disabled])'
            )
    
            if not next_button_el:
                PAGES = 0
    
            next_button_el.click() # refresh new reviews
            PAGES -= 1
            time.sleep(2)
        except Exception as e:
            driver.quit()
    

    Also I'd suggest to wait for elements visibility, not just presence here:

    reviews = wait.until(lambda _driver: _driver.find_elements(By.CSS_SELECTOR, '.review_table_row'))
    

    Also you need to understand that driver.implicitly_wait do not introduce any actual pause. This just sets the timeout for find_element and find_elements methods.