pythonselenium-webdriverurllibpython-3.11bytestream

How can I get an file-like object from Selenium without download a file to a local path?


I'm working on a parser platform. I need to download files, save them directly to the FTP server. For this I have to get file-like object. I don't want to save junk temporary files.

I need to use selenium specifically

For example: I need to download this document, but for this I have to enter the data and accept the check.

This code passes notify and saves cookies

import os
import pickle
import time

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

def get_file(driver: webdriver.Chrome, url: str):
    driver.set_page_load_timeout(40)
    driver.get(url=url)
    time.sleep(2)

    # accept notify
    ccc_accept = driver.find_element(By.ID, 'ccc-notify-accept')
    if WebDriverWait(driver, 5).until(ec.element_to_be_clickable(ccc_accept)):
        ccc_accept.click()

    # Enter some data
    WebDriverWait(driver, 2).until(ec.presence_of_element_located((By.ID, 'agreement_form')))
    driver.find_element(By.ID, 'contact_name').send_keys('Company')
    driver.find_element(By.ID, 'contact_title').send_keys('People')
    driver.find_element(By.ID, 'company').send_keys('cb')
    driver.find_element(By.ID, 'country').send_keys('some')

    WebDriverWait(driver, 5).until(ec.presence_of_element_located(
        (By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]')))

    # accept form
    if WebDriverWait(driver, 5).until(
            ec.element_to_be_clickable(driver.find_element(By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]'))):
        driver.find_element(By.XPATH, '//*[@id="doc_agreement"]/div[4]/input[1]').click()

    time.sleep(2)

    # Save cookie
    pickle.dump(driver.get_cookies(), open('cookies.pkl', 'wb'))

    time.sleep(10)

On the web I only found a way to download a document via selenium to a local directory. This method can only install file to local_dir.


import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def downloadDriver():
    options = webdriver.ChromeOptions()
    options.add_argument('window-size=1920x1080')
    options.add_argument("disable-gpu")

    path_loc = os.path.join(os.getcwd(), "temp")
    chrome_prefs = {
        "download.prompt_for_download": False,
        "plugins.always_open_pdf_externally": True,
        "download.open_pdf_in_system_reader": False,
        "profile.default_content_settings.popups": 0,
        "download.default_directory": path_loc,
    }
    options.add_experimental_option("prefs", chrome_prefs)
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

I tried to get the file object via urllib.request.urlopen(), but it throws a 403 error I also tried passing a cookie from Selenium to urllib, but this didn't solve the problem.

In what way can I get a stream or a file-like object or bytes, anything ?


Solution