I'm making an image scraper for bing because it has a well developed search by license function. In anycase I can't seem to get more than one image because I can't get back to the main page after switching to the iframe to get the src from the first image. Any tips?
import selenium
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
image_url = []
wd = webdriver.Firefox()
wait = WebDriverWait(wd, 10)
##search bing for images
search_url = f"https://www.bing.com/images/search?q=cat%20clipart&qs=n&form=QBIR&qft=%20filterui%3Alicense-L1%20filterui%3Aimagesize-large&sp=-1&pq=good%20clipart&sc=8-12&cvid=493F746CDC7B4E70BF3BEDDA3CF674E1&first=1&scenario=ImageBasicHover"
# load the page
wd.get(search_url)
thumbnail_result = wd.find_element_by_css_selector("img.mimg")
thumbnail_result.click()
wait.until(
EC.frame_to_be_available_and_switch_to_it((By.ID, "OverlayIFrame"))
)
wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "img.nofocus"))
)
actual_image = wd.find_element_by_css_selector('img.nofocus')
image_url.append(actual_image.get_attribute('src'))
##This is where it doesn't switch back to the original webpage
wd.switch_to.default_content()
Here is the code with the changes you suggested:
import selenium
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
image_url = []
wd = webdriver.Firefox()
wait = WebDriverWait(wd, 10)
search_url = f"https://www.bing.com/images/search?q=cat%20clipart&qs=n&form=QBIR&qft=%20filterui%3Alicense-L1%20filterui%3Aimagesize-large&sp=-1&pq=good%20clipart&sc=8-12&cvid=493F746CDC7B4E70BF3BEDDA3CF674E1&first=1&scenario=ImageBasicHover"
# load the page
wd.get(search_url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img.mimg"))).click()
wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID, "OverlayIFrame"))
)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "img.nofocus")))
actual_image = wd.find_element_by_css_selector('img.nofocus')
image_url.append(actual_image.get_attribute('src'))
print(image_url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div#close"))).click()
wd.switch_to.default_content()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img.mimg"))).click()
wd.switch_to.default_content()
not return you back to main page .It is used to come out from iframe
.You need to click on close
button to close the image page. Once you click on that you'll see main page
image_url.append(actual_image.get_attribute('src'))
#Click on close button to go back to main page
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div#close"))).click()
##Now jump out from iframe
wd.switch_to.default_content()