pythonseleniumweb-scrapingexceptionstaleelementreferenceexception

selenium.common.exceptions.StaleElementReferenceException | python


I am doing web scraping using selenium in python with the following code:

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common import exceptions

def get_all_search_details(URL):
    SEARCH_RESULTS = {}

    options = Options()
    options.headless = True

    options.add_argument("--remote-debugging-port=9222") #
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-gpu")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-extensions")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    
    driver.get(URL)
    print(f"Scraping {driver.current_url}")
    try:
        medias = WebDriverWait(driver,timeout=5,).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'result-row')))
    except exceptions.StaleElementReferenceException as e:
        print(f">> {type(e).__name__}: {e.args}")
        return
    except exceptions.NoSuchElementException as e:
        print(f">> {type(e).__name__}: {e.args}")
        return
    except exceptions.TimeoutException as e:
        print(f">> {type(e).__name__}: {e.args}")
        return
    except exceptions.WebDriverException as e:
        print(f">> {type(e).__name__}: {e.args}")
        return
    except exceptions.SessionNotCreatedException as e:
        print(f">> {type(e).__name__}: {e.args}")
        return
    except Exception as e:
        print(f">> {type(e).__name__} line {e.__traceback__.tb_lineno} of {__file__}: {e.args}")
        return
    except:
        print(f">> General Exception: {URL}")
        return

    for media_idx, media_elem in enumerate(medias):
        outer_html = media_elem.get_attribute('outerHTML')      
        result = scrap_newspaper(outer_html) # some external functions
        SEARCH_RESULTS[f"result_{media_idx}"] = result
    return SEARCH_RESULTS

if __name__ == '__main__':
    in_url = "https://digi.kansalliskirjasto.fi/clippings?query=isokyr%C3%B6&categoryId=12&orderBy=RELEVANCE&page=3&resultMode=THUMB"
    my_res = get_all_search_details(in_url)

I applied several try except mentioned in documentation to ensure I would not get trapped in selenium exceptions, however, here is the error I obtained:

Traceback (most recent call last):
  File "nationalbiblioteket_logs.py", line 277, in <module>
    run()
  File "nationalbiblioteket_logs.py", line 264, in run
    all_queries(file_=get_query_log(QUERY=args.query),
  File "nationalbiblioteket_logs.py", line 219, in all_queries
    df = pd.DataFrame( df.apply( check_urls, axis=1, ) )    
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/pandas/core/frame.py", line 8740, in apply
    return op.apply()
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/pandas/core/apply.py", line 688, in apply
    return self.apply_standard()
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/pandas/core/apply.py", line 812, in apply_standard
    results, res_index = self.apply_series_generator()
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/pandas/core/apply.py", line 828, in apply_series_generator
    results[i] = self.f(v)
  File "nationalbiblioteket_logs.py", line 218, in <lambda>
    check_urls = lambda INPUT_DF: analyze_(INPUT_DF)
  File "nationalbiblioteket_logs.py", line 201, in analyze_
    df["search_results"] = get_all_search_details(in_url)
  File "/home/xenial/WS_Farid/DARIAH-FI/url_scraping.py", line 68, in get_all_search_details
    outer_html = media_elem.get_attribute('outerHTML')
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 174, in get_attribute
    self, name)
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 494, in execute_script
    'args': converted_args})['value']
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 429, in execute
    self.error_handler.check_response(response)
  File "/home/xenial/anaconda3/envs/py37/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 243, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
  (Session info: headless chrome=110.0.5481.30)

What am I doing wrong in my python script which causes such exception? I want to return None and get out of function in case such exception occurs.

Here are some more details regarding libraries I use:

>>> selenium.__version__
'4.5.0'
>>> webdriver_manager.__version__
'3.8.4'

Solution

  • Put the For loop inside of the try block to catch the StaleElementReferenceException and return none, since the Exception occurs at the line outer_html = media_elem.get_attribute('outerHTML').

    A StaleElementReferenceException occurs when you try to do something with an element that isn't attached to the page you're currently on because for example the page was refreshed, and due to that the reference to that element is "stale".