pythonselenium-webdriverweb-scraping

web scraping Proplem no such element


I am trying to make web scraping using Selenium, I have an issue with the below message, this issue related to no such element as what it mean ?

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"a[1]/div[1]/img"}
  (Session info: chrome=117.0.5938.149); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception

and also I searched more and more and didn't solve it, can anyone help me? as I did more modification in my code but same issue still exist here is my code

from selenium import webdriver
import time as t
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait


import urllib
import cv2
import os

try:
    os.mkdir("G:/Smokking_Project")    
except:
    pass




name="smoked"

chrome_options = webdriver.ChromeOptions() 

chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])

#driver = webdriver.Chrome(executable_path='chromedriver.exe',options=chrome_options)  
driver = webdriver.Chrome(options=chrome_options)  

strr="https://www.google.com/search?q=smokinng&tbm=isch&ved=2ahUKEwi8k9zn9eOBAxVtlycCHTa_DnUQ2-cCegQIABAA&oq=smokinng&gs_lcp=CgNpbWcQAzIJCAAQGBCABBAKMgkIABAYEIAEEAoyCQgAEBgQgAQQCjoECCMQJzoFCAAQgAQ6BggAEAUQHjoECAAQHjoICAAQgAQQsQM6BAgAEAM6BwgAEBgQgARQjwdY8xJg-RloAHAAeACAAb0BiAHsCZIBAzAuOZgBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=uUwhZfzSFO2unsEPtv66qAc&bih=723&biw=1517&hl=en"

driver.get(strr)
t.sleep(3)


pics= driver.find_element(By.XPATH,'//*[@id="islrg"]/div[1]')
links=[]
x=1
last_height=0

def download_image(url,filename):
    resource = urllib.request.urlopen(url)
    output = open(filename,"wb")
    output.write(resource.read())
    output.close()
    
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    t.sleep(4)
    pics_ =pics.find_elements(By.XPATH,'./*') 
  
 
    for pic in pics_:
        #try:
            #img_link=pic.find_element_by_xpath('a[1]/div[1]/img').get_attribute('src') 
            #img_link=pic.find_element_by_xpath('a[1]/div[1]/img').get_attribute('src')
            img_link= driver.find_element(By.XPATH,'a[1]/div[1]/img').get_attribute('src') 
         
            if img_link not in links:
                links.append(img_link)

                try:
                    os.mkdir('G://Smokking_Project//'+name)
                except:
                    pass
                try:
                    os.mkdir('G://Smokking_Project//'+name)
                except:
                    pass
                file_name='Smokking_Project//'+name+'//'+str(x)+'.jpg'
                download_image(img_link,file_name)
                
                x+=1
        #except:
          #  print('-',end='')
    new_height = driver.execute_script("return document.body.scrollHeight")
    print(new_height)
    if new_height == last_height:
        break
    last_height = new_height
        
        
driver.close()

thanks


Solution

  • The following code should locate the images:

    [..]
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    import time as t 
    [..]
    
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 5)
    
    url = 'https://www.google.com/search?q=smokinng&tbm=isch&ved=2ahUKEwi8k9zn9eOBAxVtlycCHTa_DnUQ2-cCegQIABAA&oq=smokinng&gs_lcp=CgNpbWcQAzIJCAAQGBCABBAKMgkIABAYEIAEEAoyCQgAEBgQgAQQCjoECCMQJzoFCAAQgAQ6BggAEAUQHjoECAAQHjoICAAQgAQQsQM6BAgAEAM6BwgAEBgQgARQjwdY8xJg-RloAHAAeACAAb0BiAHsCZIBAzAuOZgBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=uUwhZfzSFO2unsEPtv66qAc&bih=723&biw=1517&hl=en'
    driver.get(url)
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        img_links = wait.until(EC.presence_of_all_elements_located((By.XPATH,'//a[1]/div[1]/img')))
        t.sleep(1)
        for img in img_links:
          print(img.get_attribute('src'))  
    ### do whatever you want with this data, I'm printing it for demo purposes
    ### also make sure you have some condition to break out of this infinite loop
    

    This will return all src for images in page: some will be base64 strings, some will be urls.

    Selenium documentation can be found here.