import time
from selenium import webdriver
def main():
download(f"https://finance.yahoo.com/quote/TSLA/", 0)
def download(url, n):
# Set the path to the Firefox WebDriver executable with the exe in the path
driver_path = "D:/PROGRAMMING/Website-to-PDF/FirefoxDriver/geckodriver.exe"
download_path = "./Downloads"
output_pdf = f"Download {n}.pdf"
firefox_options = webdriver.FirefoxOptions()
firefox_options.add_argument('--headless')
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
# Start printing the page without needing to press "print"
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference('print.save_as_pdf.links.enabled', True)
# Get rid of the headers
firefox_options.set_preference("print.print_headerleft", "")
firefox_options.set_preference("print.print_headerright", "")
firefox_options.set_preference("print.print_footerleft", "")
firefox_options.set_preference("print.print_footerright", "")
# Using print_printer
firefox_options.set_preference("print_printer", "Mozilla Save to PDF")
firefox_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
firefox_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename', f"{download_path}/{output_pdf}")
gecko_service = webdriver.FirefoxService(executable_path=driver_path, port=4444)
driver = webdriver.Firefox(options=firefox_options, service=gecko_service)
print(f"Downloading File {n}")
driver.get(url)
driver.implicitly_wait(10)
driver.execute_script('window.print();')
time.sleep(15)
driver.quit()
print(f"Finished Download.\n")
if __name__ == "__main__":
main()
I do not get a file downloaded any where on my computer. I also don't get the option of where to save the file at all.
import time
from selenium import webdriver
def main():
download(f"https://finance.yahoo.com/quote/TSLA/", 0)
def download(url, n):
# Set the path to the Firefox WebDriver executable with the exe in the path
driver_path = "D:/PROGRAMMING/Website-to-PDF/FirefoxDriver/geckodriver.exe"
download_path = "./Downloads"
output_pdf = f"Download {n}.pdf"
firefox_options = webdriver.FirefoxOptions()
firefox_options.add_argument('--headless')
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
# Start printing the page without needing to press "print"
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference('print.save_as_pdf.links.enabled', True)
# Get rid of the headers
firefox_options.set_preference("print.print_headerleft", "")
firefox_options.set_preference("print.print_headerright", "")
firefox_options.set_preference("print.print_footerleft", "")
firefox_options.set_preference("print.print_footerright", "")
# Using print_printer
# firefox_options.set_preference("print_printer", "Mozilla Save to PDF")
# firefox_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# firefox_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename', f"{download_path}/{output_pdf}")
gecko_service = webdriver.FirefoxService(executable_path=driver_path, port=4444)
driver = webdriver.Firefox(options=firefox_options, service=gecko_service)
print(f"Downloading File {n}")
driver.get(url)
driver.implicitly_wait(10)
driver.execute_script('window.print();')
time.sleep(15)
driver.quit()
print(f"Finished Download.\n")
if __name__ == "__main__":
main()
As you can see here, with the print_printer commented out, my code works but it prompts me where I want to save the file and what the filename should be.
I want it to automatically save in my download_path
with filename output_pdf
.
I have tried changing /
to \
& \\
but it still won't work.
I have tried using a similar method done in this post where I added a user_agent
with my own, set the preference for it, but profile_options = FirefoxProfile()
will not work because webdriver.Firefox(options=profile_options)
will not work as the options
parameter needs to have an instance of options.Options
.
I also tried other libraries such as pyautogui
which worked but gave inconsistent results; I tried pdfkit
and wkhtmltox
but it gave me errors that I also couldn't find a solution to.
I did some logging on the geckodriver by adding the following line
gecko_service = webdriver.FirefoxService(executable_path=driver_path, port=4444, log_output='gecko.log')
From there, I saw that a line in the log:
JavaScript error: , line 0: NS_ERROR_GFX_PRINTER_NAME_NOT_FOUND
From here, I changed my print_printer
lines to:
profile.set_preference("print_printer", "Microsoft Print to PDF")
profile.set_preference("print.printer_Microsoft_Print_to_PDF.print_to_file", True)
profile.set_preference('print.printer_Microsoft_Print_to_PDF.print_to_filename', output_path)
Also, if you noticed, I changed from firefox_options
to profile
because in selenium.webdriver.firefox.options.Options
, you can specify your own .profile
attribute.
Here are the changes:
profile = webdriver.FirefoxProfile()
profile.set_preference(...)
...
from selenium.webdriver.firefox.options import Options
firefox_options = Options()
firefox_options.add_argument(...)
...
firefox_options.profile = profile
# This line remained the same
driver = webdriver.Firefox(options=firefox_options, service=gecko_service)
From this, I also saw that the only argument that did not give any errors for me was --headless
.