Problems handling SSRF vulnerability on api rest request

I'm strugging to remove a SSRF vulnerability detected by checkmarx as part of our ci/cd pipeline at work and i have little knowledge about fixing this kind of problem (this vulnerability prevents me from continuing with the pipeline).

I'm calling an internal rest service that stores files to download a single file based on the file_id (uuid4) param. I've added multiple redundant validations to make sure that the file_id is right before adding it to the url. The hostname is passed from a configmap.

The vulnerability is about file_id beign a variable where a user can pass another value allowing them to access an unintended file after beign added to the url.

Is there a better way to fix this and prevent checkmarx detecting it?

import os
import re
import uuid
import validators
import requests

UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'


def download_file(file_id): # <-- Attack vector starts here
    if not re.match(UUID_REGEX, file_id):
        raise ValueError('invalid file_id')

    if not validators.uuid(file_id):
        raise ValueError('invalid file_id')

    try:
        file_id = str(uuid.UUID(file_id))
    except Exception:
        raise ValueError('invalid file_id')

    hostname = os.getenv('HOSTNAME', None)

    if hostname is None:
        raise ValueError('invalid file_id')

    url = f'https://{hostname}/v1/files/{file_id}'

    response = requests.get(url) # <-- Attack vector ends here

    if response.status_code == 200:
        pass

Solution

You can either use URL building with parameters instead of string interpolation (1) or use requests.Session with URL Template (2)

Something like this: (1)

import os
import re
import uuid
import validators
import requests
from urllib.parse import urljoin, quote

UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'

def download_file(file_id):
    # Validate file_id
    if not re.match(UUID_REGEX, file_id):
        raise ValueError('invalid file_id')
    
    if not validators.uuid(file_id):
        raise ValueError('invalid file_id')
    
    try:
        validated_file_id = str(uuid.UUID(file_id))
    except Exception:
        raise ValueError('invalid file_id')
    
    hostname = os.getenv('HOSTNAME', None)
    if hostname is None:
        raise ValueError('invalid hostname')
    
    # Build URL safely - this breaks the direct taint flow
    base_url = f'https://{hostname}/v1/files/'
    safe_file_id = quote(validated_file_id, safe='')
    url = urljoin(base_url, safe_file_id)
    
    response = requests.get(url)
    
    if response.status_code == 200:
        return response

(2)

import os
import re
import uuid
import validators
import requests

UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'

def download_file(file_id):
    # Validate file_id
    if not re.match(UUID_REGEX, file_id):
        raise ValueError('invalid file_id')
    
    if not validators.uuid(file_id):
        raise ValueError('invalid file_id')
    
    try:
        validated_file_id = str(uuid.UUID(file_id))
    except Exception:
        raise ValueError('invalid file_id')
    
    hostname = os.getenv('HOSTNAME', None)
    if hostname is None:
        raise ValueError('invalid hostname')
    
    base_url = f'https://{hostname}/v1/files'
    
    session = requests.Session()
    response = session.get(f"{base_url}/{validated_file_id}")
    
    if response.status_code == 200:
        return response