I am trying to extract number of youtube comments and tried several methods.
My Code:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
DRIVER_PATH = <your chromedriver path>
wd = webdriver.Chrome(executable_path=DRIVER_PATH)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
wd.get(url)
wait = WebDriverWait(wd, 100)
time.sleep(40)
v_title = wd.find_element_by_xpath('//*[@id="container"]/h1/yt-formatted-string').text
print("title Is ")
print(v_title)
comments_xpath = '//h2[@id="count"]/yt-formatted-string/span[1]'
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
#wd.find_element_by_xpath(comments_xpath)
print(len(v_comm_cnt))
I get the following error:
selenium.common.exceptions.TimeoutException: Message:
I get correct value for title but not for comment_cnt. Can any one please guide me what is wrong with my code?
Please note that comments count path - //h2[@id="count"]/yt-formatted-string/span[1] point to correct place if I search the value in inspect element.
Updated answer
Well, it was tricky!
There are several issues here:
driver.get()
method to wait until the timeout for the page loaded while it looks like the page is loaded. To overcome that I used Eager
page load strategy.from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
options = Options()
options.add_argument("--start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
s = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, desired_capabilities=caps, service=s)
url = 'https://www.youtube.com/watch?v=5qzKTbnhyhc'
driver.get(url)
driver.maximize_window()
wait = WebDriverWait(driver, 10)
title_xpath = "//div[@class='style-scope ytd-video-primary-info-renderer']/h1"
alternative_title = "//*[@id='title']/h1"
v_title = ""
try:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, title_xpath))).text
except:
v_title = wait.until(EC.visibility_of_element_located((By.XPATH, alternative_title))).text
print("Title is " + v_title)
comments_xpath = "//div[@id='title']//*[@id='count']//span[1]"
driver.execute_script("window.scrollBy(0, arguments[0]);", 600)
try:
v_comm_cnt = wait.until(EC.visibility_of_element_located((By.XPATH, comments_xpath)))
except:
pass
v_comm_cnt = driver.find_element(By.XPATH, comments_xpath).text
print("Video has " + v_comm_cnt + " comments")
The output is:
Title is Music for when you are stressed 🍀 Chil lofi | Music to Relax, Drive, Study, Chill
Video has 834 comments
Process finished with exit code 0