I am trying to scrape a web page to get the Pet shops names and corresponding addresses through looping the country states and cities. Then export it to a Excel file. I found out that when there is a city that does not contain any Pet Shop I get a TimeoutException message. I tried to scape it but I had no success. I need some help, please.
Here is my code: (I scaped the state loop to make it shorter)
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
driver_path_location = "C:\\Users\\aclav\\CHROMEDRIVER\\chromedriver.exe"
driver = webdriver.Chrome(driver_path_location)
driver.get("https://granplus.com.br/onde-comprar-lojas-fisicas/")
# Getting inside the iFrame
driver.switch_to.frame(driver.find_element_by_xpath("//iframe[@class='cz-map-frame']"))
# Click on Button "Por Cidade"
driver.find_element_by_xpath("//input[@id='searchType_2']//following::span[@class=\
'radio-checkmark'][1]").click()
# Click on Menu "Estado"
driver.find_element_by_css_selector('[name="estado"]').click()
# Select a State
driver.find_element_by_xpath('//option[@value="RN"]').click()
Petshops = []
Addresses = []
# Getting the cities names
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,\
'//select[@name="cidade"]'))).click()
selcidade = driver.find_element_by_xpath('//select[@name="cidade"]')
cidades = selcidade.find_elements_by_tag_name('option')
cidadeList = []
# Recording the cities in cidadeList
for cidade in cidades:
cidadeList.append(cidade.get_attribute("value"))
cidadeList.remove('0') # Remove option 0 "Escolha uma cidade"
# Select a city
for cidadeValue in cidadeList:
selcidade = Select(driver.find_element_by_xpath('//select[@name="cidade"]'))
cidadeSelecionada = selcidade.select_by_value(cidadeValue)
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Petshop in List_Petshops:
Petshops.append(Petshop.text) # Records the Petshops names in a list
# Getting the Petshops addresses
List_Addresses = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="p-t-5"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
# Transforming in a Data Frame and exporting to Excel
df1 = pd.DataFrame(Petshops, columns=['Petshops'])
df1.to_excel("Petshops.xlsx",sheet_name="Petshops")
df2 = pd.DataFrame(Addresses, columns=['Addresses'])
df2.to_excel("Enderecos.xlsx",sheet_name="Enderecos")
df = df1.merge(df2, left_index=True, right_index=True)
driver.quit()
As you mentioning yourself in the code, here:
# Getting the Petshops names by city
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
You are getting pet shops per selected city.
You are waiting for presence of pet shops with 10 seconds timeout.
So, what will happen if no pet shop found in that city?
Selenium will fail finding such elements in the defined timeout of 10 seconds and TimeoutException
will be thrown.
To overcome this issue you can use try-except
, something like this:
try:
List_Petshops = WebDriverWait(driver,\
10).until(EC.presence_of_all_elements_located((By.XPATH,'//span[@class="pdv-item__title"]')))
for Address in List_Addresses:
Addresses.append(Address.text) # Armazenar os Enderecos em uma lista
except:
#Do what you think will be logically correct here or at least print
print("No pet shops found in this city")