pythonselenium-webdriverweb-scraping

Not able to extract any element from Google maps using Selenium


I'm trying to iterate through a list to extract information about businesses in google maps(name, address, and URL). My script selects the first result correctly but I can not extract anything from that result. I've tried explicit waits and various selectors(which I copied from inspecting the element). There has to be something that I just don't understand, I am pretty new to selenium so any help is appreciated.

Here is my current code

import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
#from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as wait

driver = webdriver.Chrome() 


url = 'https://www.google.com/maps/search/Ace+Hardware'
driver.get(url)
#wait = WebDriverWait(driver, 10)
# Locate and click on the link to the first search result
result = driver.find_element(By.XPATH,'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[1]/div[3]/div/a')
result.click()

page_content = driver.page_source
wait(driver, 30).until(EC.presence_of_element_located((By.XPATH,'/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[9]/div[3]/button/div/div[2]/div[1]')))
url_element = driver.find_element(By.XPATH,'/html/body/div[3]/div[8]/div[9]/div/div/div[1]/div[2]/div/div[1]/div/div/div[9]/div[3]/button/div/div[2]/div[1]')
url = url_element.text
address_element = driver.find_element(By.XPATH,'//*[@id="QA0Szd"]/div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[9]/div[3]/button/div/div[2]/div[1]')
address = address_element.text
print(url)
print(address)
driver.quit()

Solution

  • This is one of the ways you can scrape the name, address, and website from the search results. The script first grabs all the links from the results, making use of the load more buttons. Once all the links to the properties are collected, the script navigates to individual links and grabs the required information.

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager
    
    url = 'https://www.google.com/maps/search/Ace+Hardware'
    
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(url)
    
    selector = "//*[starts-with(@aria-label,'Results for')]//a[@aria-label][starts-with(@href,'https://www.google.com/maps/')]"
    
    while True:
        driver.find_element(By.XPATH, selector).send_keys(Keys.END)
        try:
            find_desired_span = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, "//span[contains(.,'reached the end of the list')]")))
        except Exception as err: find_desired_span = ""
        if find_desired_span:
            break
    
    
    property_links = []
    for container in driver.find_elements(By.XPATH, selector):
        property_link = container.get_attribute("href")
        if property_link not in property_links:
            property_links.append(property_link)
    
    
    for property_link in property_links:
        driver.get(property_link)
        try:
            name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"div[role='main'] h1"))).text
        except Exception as err: name = ""
        try:
            website = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR,'a[data-tooltip="Open website"]'))).get_attribute("href")
        except Exception as err: website = ""
        try:
            address =  WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR,'button[data-tooltip="Copy address"]'))).get_attribute("aria-label")
        except Exception as err: address = ""
        print(name,website,address)