I'm working on a function that returns a HTTP response from https://pypi.org/simple/ when Python's pip
installer requests it for a package. When pushing my code onto GitHub, the CodeQL checks warn of the risk of server side request forgery (SSRF), and asks me to create validation checks for the "user-defined input" (which is pip
, in this case).
I have already made many attempts at validating the URL to satisfy this SSRF warning, but GitHub CodeQL has not accepted any of them so far. How can I rewrite the following to satisfy GitHub CodeQL's requirements for guarding against SSRF?
The relevant block of code:
import requests
from fastapi import APIRouter, Response
pypi = APIRouter(prefix="/pypi", tags=["bootstrap"])
@pypi.get("/{package}/", response_class=Response)
def get_pypi_package_downloads_list(package: str) -> Response:
"""
Obtain list of all package downloads from PyPI via the simple API (PEP 503).
"""
url = f"https://pypi.org/simple/{package}"
full_path_response = requests.get(url)
The following is a non-exhaustive overview of attempts I've tried in order to satisfy that SSRF warning. However, none of them have worked for me.
# Attempt 1
# Check that it's a PyPI URL
url = f"https://pypi.org/simple/{package}"
if "pypi" in url:
full_path_response = requests.get(url)
else:
raise ValueError("This is not a valid package")
# Attempt 2
# Validate that package name is alphanumeric (allow _ and -)
if package.replace("_", "").replace("-", "").isalnum():
url = f"https://pypi.org/simple/{package}"
full_path_response = requests.get(url)
else:
raise ValueError("This is not a valid package")
# Attempt 3
# Check that it's a valid connection
with requests.get("https://pypi.org/simple/{package}") as http_response:
if http_response.status_code == 200:
full_path_response = http_response
else:
raise ValueError("This is not a valid package")
# Attempt 4
# Tried using RegEx matching to validate package name
if re.match(r"^[a-z0-9\_\-]+$", package):
full_path_response = requests.get(f"https://pypi.org/simple/{package}")
else:
raise ValueError("This is not a valid package")
# Attempt 5
# Use urllib.parse.urlparse to parse and validate the url
def validate_url(url: str) -> bool:
parsed_url = urlparse(url)
if parsed_url.scheme == "https" and parsed_url.hostname == "pypi.org":
return True
else:
return False
def validate_package(package: str) -> bool:
if package.replace("_", "").replace("-", "").isalnum():
return True
else:
return False
# Validate package and URL
if validate_package(package) and validate_url(f"https://pypi.org/simple/{package}"):
full_path_response = requests.get(
f"https://pypi.org/simple/{package}"
) # Get response from PyPI
else:
raise ValueError("This is not a valid package")
# Attempt 6
# Using a Pydantic model
from pydantic import BaseModel, HttpUrl, ValidationError
class UrlValidator(BaseModel):
url: HttpUrl
def validate(url: str):
try:
UrlValidator(url=url)
except ValidationError:
log.error(f"{url} was not a valid URL")
return False
else:
log.info(f"{url} was a valid URL")
return True
# Attempt at URL validation to satisfy GitHub CodeQL requirements
url = f"https://pypi.org/simple/{package}"
if validate(url):
full_path_response = requests.get(url)
# Attempt 7
# Encoding string before injection
from urllib.parse import quote_plus
def _validate_package_name(package: str) -> bool:
# Check that it only contains alphanumerics, "_", or "-", and isn't excessively long
if re.match(r"^[a-z0-9\-\_]+$", package):
return True
else:
return False
def _get_full_path_response(package: str) -> requests.Response:
# Sanitise string
package_clean = quote_plus(package)
print(f"Cleaned package: {package_clean}")
# Validation checks
if _validate_package_name(package_clean):
url = f"https://pypi.org/simple/{package_clean}"
print(f"URL: {url}")
return requests.get(url)
else:
raise ValueError(f"{package_clean} is not a valid package name")
full_path_response = _get_full_path_response(package)
# Attempt 8
# The nuclear option of maintaining a list of approved packages
approved_packages: list = [pkg.lower() for pkg in approved_packages] # List of package names from running `conda env list`
# Validate package and URL
if package.lower() in approved_packages:
url = f"https://pypi.org/simple/{package}"
full_path_response = requests.get(url)
else:
raise ValueError(f"{package} is not a valid package name")
Thanks!
It turns out that this was a false positive due to the py/partial-ssrf
check being incorrectly written. This issue has recently been reported as resolved, and the fix will be implemented in GitHub CodeQL v2.18.0
.
From the dialogue, partial-SSRF can be guarded against as follows:
At that point, validating your input using mechanisms like
isalnum()
(as stated in the query help) orregex
matches should help you avoid this alert.