I’m new to web scraping with Selenium, and I’m trying to scrape property listings from Booking.com. My code (included below) successfully scrapes 25 results, but I suspect the issue is that more results are available if I scroll and click the "Load more results" button.
I've tried using execute_script to scroll and find_element to locate the button, but I’m not sure how to implement a loop that continues loading results until the button disappears (or no more results are available).
Here's my code so far:
# Relevant imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
# WebDriver setup
driver = webdriver.Chrome(service=Service())
driver.get("https://www.booking.com/searchresults.en-gb.html?ss=cornwall...")
def handle_no_such_element_exception(data_extraction_task):
try:
return data_extraction_task()
except NoSuchElementException:
return None
items = []
# Load more results logic (This part is where I’m struggling)
while True:
try:
load_more_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-testid='load-more-button']"))
)
load_more_button.click()
print("Clicked load more button...")
except (TimeoutException, NoSuchElementException):
print("No more results to load.")
break
# Scraping logic (This part works fine)
property_items = driver.find_elements(By.CSS_SELECTOR, "[data-testid=\"property-card\"]")
for property_item in property_items:
title = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title\"]").text)
address = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"address\"]").text)
review_score = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"review-score\"]").text)
link = handle_no_such_element_exception(lambda: property_item.find_element(By.CSS_SELECTOR, "[data-testid=\"title-link\"]").get_attribute("href"))
item = {
"title": title,
"address": address,
"review_score": review_score,
"link": link
}
items.append(item)
print(items)
driver.quit()
What I’m asking:
I did a few changes to the code to make it work for your case:
Here is the relevant part:
# get rid of the cookie banner
coookie_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
)
coookie_button.click()
# Scroll to load more results using JavaScript on the client
prev_height = -1
max_scrolls = 100
scroll_count = 0
while scroll_count < max_scrolls:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1.5) # give some time for new results to load
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == prev_height:
# no more elements were loaded
break
prev_height = new_height
scroll_count += 1
# Now click the load more button while there are more results
while True:
try:
# choosing a good selector here is a bit tricky as there's
# nothing reliable but this works at the moment
load_more_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "[data-results-container=\"1\"] button.af7297d90d.c0e0affd09"))
)
load_more_button.click()
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
print("Clicked load more button...")
except (TimeoutException, NoSuchElementException):
print("No more results to load.")
break
Using the code above I was able to extract 981 items for your search query.
The code can be improved, but it works and shows the idea and I think you can improve it further as needed.
Hope this helps!