I'm trying to download a protected PDF from the New York State Courts NYSCEF website using Python. The URL looks like this:
https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=cdHe_PLUS_DaUdFKcTLzBtSo6zw==
When I try to use requests.get()
or even navigate to the page with Selenium, I either get:
403 Forbidden
response (via requests
)<embed>
tag (via Selenium)Here’s what I’ve tried:
Using requests:
import requests
url = "https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=..."
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": "https://iapps.courts.state.ny.us/nyscef/"
}
response = requests.get(url, headers=headers)
print(response.status_code) # Always 403
And using SeleniumBase:
from seleniumbase import SB
with SB(headless=False) as sb:
sb.open(url)
sb.wait(5)
try:
embed = sb.find_element("embed")
print(embed.get_attribute("src"))
except Exception as e:
print("❌ No embed tag found", e)
Nothing works.
Full code for reference:
from seleniumbase import SB
import requests
import os
import time
def download_pdf_with_selenium_and_requests():
# Target document URL
doc_url = "https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=cdHe_PLUS_DaUdFKcTLzBtSo6zw=="
# Setup download directory
download_dir = os.path.join(os.getcwd(), "downloads")
os.makedirs(download_dir, exist_ok=True)
filename = os.path.join(download_dir, "NYSCEF_Document.pdf")
with SB(headless=True) as sb:
# Step 1: Navigate to the document page (using browser session)
sb.open(doc_url)
time.sleep(5) # Wait for any redirects/cookies to be set
# Step 2: Grab the actual PDF <embed src>
try:
embed = sb.find_element("embed")
pdf_url = embed.get_attribute("src")
print(f"Found PDF URL: {pdf_url}")
except Exception as e:
print(f"No <embed> tag found: {e}")
return
# Step 3: Extract cookies from Selenium session
selenium_cookies = sb.driver.get_cookies()
session = requests.Session()
for cookie in selenium_cookies:
session.cookies.set(cookie['name'], cookie['value'])
# Step 4: Download PDF using requests with cookies
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": doc_url
}
response = session.get(pdf_url, headers=headers)
if response.status_code == 200 and "application/pdf" in response.headers.get("Content-Type", ""):
with open(filename, "wb") as f:
f.write(response.content)
print(f"PDF saved as: {filename}")
else:
print(f"PDF download failed. Status: {response.status_code}")
print(f"Content-Type: {response.headers.get('Content-Type')}")
print(f"Final URL: {response.url}")
if __name__ == "__main__":
download_pdf_with_selenium_and_requests()
Response:
No <embed> tag found: Message:
Element {embed} was not present after 10 seconds!
With SeleniumBase, you can do the following to download that file to the ./downloaded_files/
folder:
from seleniumbase import SB
with SB(uc=True, test=True, external_pdf=True) as sb:
url = "https://iapps.courts.state.ny.us/nyscef/ViewDocument?docIndex=cdHe_PLUS_DaUdFKcTLzBtSo6zw=="
sb.activate_cdp_mode(url)
sb.sleep(10)