Im working scrapping wallapop making a Second-hand cars listing. But Wallapop has a limit of 26 items. If you click "Ver más productos" (See more products) You can see more products on the website
Im trying to find the button with this code in python:
while True:
try:
shadow_host = driver.find_element(By.CSS_SELECTOR, "walla-button")
shadow_root = driver.execute_script('return arguments[0].shadowRoot', shadow_host)
skip_tutorial_button = shadow_root.find_element(By.CSS_SELECTOR, "button.walla-button__button.walla-button__button--medium.walla-button__button--tertiary.walla-button__button--neutral")
skip_tutorial_button.click()
except Exception as e:
print("No se encontró el botón 'Ver más productos' o ya se han cargado todos los productos.")
break
Thanks to Yaroslavm's suggestion on the stackoverflow dated Apr. 7, 2024(Click on element in shadow-root Selenium Python), this solution has been accomplished. The point is to wait enough before clicking the button inside shadow DOM.
By the way, use of lxml.html may contribute to faster collecting of the attributes of listings.
import lxml.html
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from time import sleep
driver = webdriver.Chrome()
driver.implicitly_wait(5)
actions = ActionChains(driver)
url='https://es.wallapop.com/app/search?engine=gasoline&min_horse_power=1&max_horse_power=1000&min_year=2006&max_year=2024&min_km=40000&max_km=200000&min_sale_price=1000&max_sale_price=90000&gearbox=automatic,manual&brand=Honda&model=Civic&filters_source=default_filters&keywords=&category_ids=100&longitude=-3.69196&latitude=40.41956&order_by=price_high_to_low'
driver.get(url)
#cookie accept
WebDriverWait(driver, 40).until(EC.element_to_be_clickable((By.XPATH,"//button[@id='onetrust-accept-btn-handler']"))).click()
sleep(20)
#click load more button
for i in range(2):
try:
button =WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH,"//walla-button[@id='btn-load-more']")))
except:
break
else:
driver.execute_script('arguments[0].scrollIntoView({behavior: "auto", block: "center"});', button)
shadow = button.shadow_root#only currently works in Chromium based browsers
button2 =WebDriverWait(shadow, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.walla-button__button--medium")))
button2.click()
print('\nload more button clicked-- {}'.format(i+1))
#load more by scrolling down
pre_count = 0
same_count_times = 0
for i in range(40):
sleep(2)
for pd in range(20):
actions.send_keys(Keys.PAGE_DOWN).perform()
WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//body")))
#get whole html (by Selenium)
page = driver.page_source # type : text
#transfer to lxml
p_root = lxml.html.fromstring(page)
list_elements = p_root.xpath("//a[@class='ItemCardList__item']")
new_count = len(list_elements)
print('len(list_elements): ',new_count)
if new_count == pre_count:
same_count_times += 1
else:
same_count_times = 0
pre_count = new_count
if same_count_times > 2:
break
while True:
a= input('Do you close this session?')
if a == 'y':
break