pythonurlrequestcheckmarxssrf

Problems handling SSRF vulnerability on api rest request


I'm strugging to remove a SSRF vulnerability detected by checkmarx as part of our ci/cd pipeline at work and i have little knowledge about fixing this kind of problem (this vulnerability prevents me from continuing with the pipeline).

I'm calling an internal rest service that stores files to download a single file based on the file_id (uuid4) param. I've added multiple redundant validations to make sure that the file_id is right before adding it to the url. The hostname is passed from a configmap.

The vulnerability is about file_id beign a variable where a user can pass another value allowing them to access an unintended file after beign added to the url.

Is there a better way to fix this and prevent checkmarx detecting it?

import os
import re
import uuid
import validators
import requests

UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'


def download_file(file_id): # <-- Attack vector starts here
    if not re.match(UUID_REGEX, file_id):
        raise ValueError('invalid file_id')

    if not validators.uuid(file_id):
        raise ValueError('invalid file_id')

    try:
        file_id = str(uuid.UUID(file_id))
    except Exception:
        raise ValueError('invalid file_id')

    hostname = os.getenv('HOSTNAME', None)

    if hostname is None:
        raise ValueError('invalid file_id')

    url = f'https://{hostname}/v1/files/{file_id}'

    response = requests.get(url) # <-- Attack vector ends here

    if response.status_code == 200:
        pass

Solution

  • You can either use URL building with parameters instead of string interpolation (1) or use requests.Session with URL Template (2)

    Something like this: (1)

    import os
    import re
    import uuid
    import validators
    import requests
    from urllib.parse import urljoin, quote
    
    UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
    
    def download_file(file_id):
        # Validate file_id
        if not re.match(UUID_REGEX, file_id):
            raise ValueError('invalid file_id')
        
        if not validators.uuid(file_id):
            raise ValueError('invalid file_id')
        
        try:
            validated_file_id = str(uuid.UUID(file_id))
        except Exception:
            raise ValueError('invalid file_id')
        
        hostname = os.getenv('HOSTNAME', None)
        if hostname is None:
            raise ValueError('invalid hostname')
        
        # Build URL safely - this breaks the direct taint flow
        base_url = f'https://{hostname}/v1/files/'
        safe_file_id = quote(validated_file_id, safe='')
        url = urljoin(base_url, safe_file_id)
        
        response = requests.get(url)
        
        if response.status_code == 200:
            return response
    

    (2)

    import os
    import re
    import uuid
    import validators
    import requests
    
    UUID_REGEX = r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
    
    def download_file(file_id):
        # Validate file_id
        if not re.match(UUID_REGEX, file_id):
            raise ValueError('invalid file_id')
        
        if not validators.uuid(file_id):
            raise ValueError('invalid file_id')
        
        try:
            validated_file_id = str(uuid.UUID(file_id))
        except Exception:
            raise ValueError('invalid file_id')
        
        hostname = os.getenv('HOSTNAME', None)
        if hostname is None:
            raise ValueError('invalid hostname')
        
        base_url = f'https://{hostname}/v1/files'
        
        session = requests.Session()
        response = session.get(f"{base_url}/{validated_file_id}")
        
        if response.status_code == 200:
            return response