pythonselenium-webdriverweb-scrapingbeautifulsoupgoogle-trends

How to scrape google hot trend


I am trying to scrape Google Hot Trends. I tried to run Chrome developer tools to capture all requests, but it seems there are no requests in or out. So I tried to use selenium, But I could not get the data due to many reasons the data is variable and change constantly. Here is the code I tried:

from selenium import webdriver
from selenium.webdriver.chrome import options
import os
from bs4 import BeautifulSoup

options = options.Options()
options.headless = True
options.add_argument("--headless")
url = "https://trends.google.com/trends/hottrends/visualize?nrow=5&ncol=5&pn=p36"


def HeadlessBrowserHttpRequest(target: str) -> str:
    driver = webdriver.Chrome(
        options=options, executable_path=os.path.abspath("chromedriver")
    )
    while True:
        driver.get(target)
        soup = BeautifulSoup(driver.page_source, "html.parser")
        if soup.find("tile"):
            titles = [title for title in soup.find("div", class_="tile")]
            if len(titles) > 0:

                print(titles)


HeadlessBrowserHttpRequest(url)

Solution

  • I managed to solve the problem with the following code:

    from selenium import webdriver
    from selenium.webdriver.chrome import options
    import os
    from bs4 import BeautifulSoup
    
    options = options.Options()
    url = "https://trends.google.com/trends/hottrends/visualize?nrow=5&ncol=5&pn=p36"
    
    
    def HeadlessBrowserHttpRequest(target: str) -> str:
        driver = webdriver.Chrome(
            options=options, executable_path=os.path.abspath("chromedriver")
        )
        driver.get(target)
        while True:
            
            soup = BeautifulSoup(driver.page_source, "html.parser")
            if soup.find("div", {"class": "card current done-typing"}):
                titles = [title for title in soup.find("div", class_="card current done-typing")]
                
                if len(titles) > 0:
                    for title in titles:
                        print(title.text)
    
    
    HeadlessBrowserHttpRequest(url)