I'm strugging to remove a SSRF vulnerability detected by checkmarx as part of our ci/cd pipeline at work and i have little knowledge about fixing this kind of problem (this vulnerability prevents me from continuing with the pipeline).
I'm calling an internal rest service that stores files to download a single file based on the file_id (uuid4) param. I've added multiple redundant validations to make sure that the file_id is right before adding it to the url. The hostname is passed from a configmap.
The vulnerability is about file_id beign a variable where a user can pass another value allowing them to access an unintended file after beign added to the url.
Is there a better way to fix this and prevent checkmarx detecting it?
import os
import re
import uuid
import validators
import requests
UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
def download_file(file_id): # <-- Attack vector starts here
if not re.match(UUID_REGEX, file_id):
raise ValueError('invalid file_id')
if not validators.uuid(file_id):
raise ValueError('invalid file_id')
try:
file_id = str(uuid.UUID(file_id))
except Exception:
raise ValueError('invalid file_id')
hostname = os.getenv('HOSTNAME', None)
if hostname is None:
raise ValueError('invalid file_id')
url = f'https://{hostname}/v1/files/{file_id}'
response = requests.get(url) # <-- Attack vector ends here
if response.status_code == 200:
pass
You can either use URL building with parameters instead of string interpolation (1) or use requests.Session
with URL Template (2)
Something like this: (1)
import os
import re
import uuid
import validators
import requests
from urllib.parse import urljoin, quote
UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
def download_file(file_id):
# Validate file_id
if not re.match(UUID_REGEX, file_id):
raise ValueError('invalid file_id')
if not validators.uuid(file_id):
raise ValueError('invalid file_id')
try:
validated_file_id = str(uuid.UUID(file_id))
except Exception:
raise ValueError('invalid file_id')
hostname = os.getenv('HOSTNAME', None)
if hostname is None:
raise ValueError('invalid hostname')
# Build URL safely - this breaks the direct taint flow
base_url = f'https://{hostname}/v1/files/'
safe_file_id = quote(validated_file_id, safe='')
url = urljoin(base_url, safe_file_id)
response = requests.get(url)
if response.status_code == 200:
return response
(2)
import os
import re
import uuid
import validators
import requests
UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
def download_file(file_id):
# Validate file_id
if not re.match(UUID_REGEX, file_id):
raise ValueError('invalid file_id')
if not validators.uuid(file_id):
raise ValueError('invalid file_id')
try:
validated_file_id = str(uuid.UUID(file_id))
except Exception:
raise ValueError('invalid file_id')
hostname = os.getenv('HOSTNAME', None)
if hostname is None:
raise ValueError('invalid hostname')
base_url = f'https://{hostname}/v1/files'
session = requests.Session()
response = session.get(f"{base_url}/{validated_file_id}")
if response.status_code == 200:
return response