pythonexcelpandasselenium-webdriveropenpyxl

Scrape images into an excel file - Selenium/Pandas/Python


I'm looking to scrape an image from a website into an excel file.

Below is a snippet of my code. This gets the SRC URL and put that into the excel sheet. Is it possible to turn that into the actual image?

if len(driver.find_elements(By.CSS_SELECTOR, '#FMP-target'))>0:
    image = driver.find_element(By.CSS_SELECTOR, '#FMP-target').get_attribute('src')
    print(image)
    images.append(image)
else:
    photo = "No photo"
    print(photo)
    images.append(photo)
driver.quit()

df = pd.DataFrame(zip(images,house_name,description_details,house_price,specs,ratings,cancellation_policy,urls),columns=['Photo','Property Name','Description','Price','Specifications','Ratings','Cancellation Policy','Link'])
df.to_excel(r{file},index=False)

This is the URL I'm testing with.


Solution

  • Example to download the images from the site and insert into the Sheet down column A using the image height for cell anchor calculation.

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from PIL import Image as Image
    from openpyxl import Workbook
    import io
    from openpyxl.drawing.image import Image as opImage
    
    url = "https://www.airbnb.com.au/rooms/50961691?adults=9&children=2&pets=1&search_mode=regular_search&check_in=2024-12-22&check_out=2024-12-28&source_impression_id=p3_1720759519_P3FseNFXiyBEU7hU&previous_page_section_name=1000&federated_search_id=7f29c222-115b-444d-bcdd-895c3e3151b4"
    sheet_row = 1  # Set the row for the image anchor
    
    driver = webdriver.Chrome()
    driver.get(url)
    ### Get images from Web Site
    image_element = driver.find_elements(By.TAG_NAME, "img")
    
    wb = Workbook()
    ws = wb.active
    
    ### Add each image found to the Sheet
    for img in image_element:
        img_png = Image.open(io.BytesIO(img.screenshot_as_png)).convert("RGB")
    
        ### Format Image for Openpyxl
        img_stream = io.BytesIO()
        img_png.save(img_stream, format='PNG')
        img_openpyxl = opImage(img_stream)
        print(f"Image Height: {img_openpyxl.height}")
    
        ### Insert image at Sheet anchor cell
        location = f"A{sheet_row}"
        print(f"Sheet Location: {location}")
        ws.add_image(img_openpyxl, location)
    
        ### Set next row for image based on last image size
        sheet_row += int(img_openpyxl.height / 19) + 2
    
    wb.save("image.xlsx")
    
    driver.quit()