I have a YAML configuration and environment variables for my application, and I'm using pydantic-settings to load both YAML and environment variables into my Pydantic models. Pydantic shall prefer the env var over the yaml.
However, I'm facing an issue with nested models where the environment variable structure doesn't align with my nested Pydantic model fields.
Here's my config.yaml:
mongodb:
host: "mongo"
port: 27017
username: "USER"
password: "123456"
name: "db"
collection: "tasks"
s3:
bucket_name: "ai"
My code looks like this:
import os
from pydantic_settings import YamlConfigSettingsSource, BaseSettings, PydanticBaseSettingsSource, EnvSettingsSource
from typing import Tuple, Type
from pydantic import BaseModel
class DatabaseSettings(BaseModel):
host: str
port: int
username: str
password: str
name: str
collection: str
class S3Settings(BaseModel):
bucket_name: str
class Settings(BaseSettings):
mongodb: DatabaseSettings
s3: S3Settings
@classmethod
def settings_customise_sources(
cls, settings_cls: Type[BaseSettings], **kwargs
) -> Tuple[PydanticBaseSettingsSource, ...]:
return (
EnvSettingsSource(
settings_cls,
env_nested_delimiter="_",
case_sensitive=False,
env_prefix="PREFIX_"
),
YamlConfigSettingsSource(settings_cls, yaml_file="config.yaml"),
)
db_host = os.environ.get('PREFIX_MONGODB_HOST')
if db_host is not None:
print(f"PREFIX_MONGODB_HOST is set to: {db_host}")
else:
print("PREFIX_MONGODB_HOST is not set")
s3_bucket_name = os.environ.get('PREFIX_S3_BUCKET_NAME')
if s3_bucket_name is not None:
print(f"PREFIX_S3_BUCKET_NAME is set to: {s3_bucket_name}")
else:
print("PREFIX_S3_BUCKET_NAME is not set")
settings = Settings()
print(settings.model_dump())
I defined the following environment variables:
PREFIX_MONGODB_HOST=localhost
PREFIX_S3_BUCKET_NAME=some-bucket
When I run this code, the output is:
```
PREFIX_MONGODB_HOST is set to: localhost
PREFIX_S3_BUCKET_NAME is set to: some-bucket
{'mongodb': {'host': 'localhost', 'port': 27017, 'username': 'USER', 'password': '123456', 'name': 'db', 'collection': 'tasks'}, 's3': {'bucket_name': 'ai'}}
```
The Problem:
PREFIX_MONGODB_HOST correctly overwrites the mongodb.host key in the YAML. However, PREFIX_S3_BUCKET_NAME doesn't overwrite s3.bucket_name. The issue seems to be that pydantic-settings parse the environment variable PREFIX_S3_BUCKET_NAME to the following json:
{
"s3": {
"bucket": {
"name": "some-bucket"
}
}
}
which isn't corrosponsing to my pydantic model.
I would like it to parse the env var to:
{
"s3": {
"bucket_name": "some-bucket"
}
}
which is corrosponsing to the pydantic model. I cannot change the environment variable name (PREFIX_S3_BUCKET_NAME), so I'm looking for a pydantic-settings-oriented solution to map this environment variable to the bucket_name field in the S3Settings model.
I've tried using aliases and different configurations, but nothing has worked so far.
Would appritaite any help :)
Pydantic can't guess when _
is a hierarchy separator and when _
is part of an attribute name. The correct solution is to pick an unambiguous hierarchy separator, for example, __
, but this requires you to change the environment variable names that you're using:
PREFIX_S3__BUCKET_NAME=foo
PREFIX_MONGODB__HOST=localhost
If you are unable to change your variable names, then we need to implement our own mechanism for setting the bucket_name
field rather than relying on pydantic's automatic mechanism. One option for doing so is to use a field_validator
, like this:
import os
from pydantic import BaseModel
from pydantic import field_validator
from pydantic_settings import BaseSettings
from pydantic_settings import EnvSettingsSource
from pydantic_settings import PydanticBaseSettingsSource
from pydantic_settings import YamlConfigSettingsSource
from typing import override
class DatabaseSettings(BaseModel):
host: str
port: int
username: str
password: str
name: str
collection: str
class S3Settings(BaseModel):
bucket_name: str
class Settings(BaseSettings):
mongodb: DatabaseSettings
s3: S3Settings
@field_validator("s3", mode="before")
@classmethod
def populate_s3_bucket_name(cls, v):
if bucket_name := os.getenv("PREFIX_S3_BUCKET_NAME"):
v["bucket_name"] = bucket_name
return v
@classmethod
@override
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
return (
init_settings,
EnvSettingsSource(
settings_cls,
env_nested_delimiter="_",
case_sensitive=False,
env_prefix="PREFIX_",
),
YamlConfigSettingsSource(settings_cls, yaml_file="config.yaml"),
)
settings = Settings()
print(settings.model_dump_json(indent=2))
With this code and your example config file, running it with environment variables set produces:
$ python example.py
{
"mongodb": {
"host": "mongo",
"port": 27017,
"username": "USER",
"password": "123456",
"name": "db",
"collection": "tasks"
},
"s3": {
"bucket_name": "ai"
}
}
Whereas if we set your target environment variables, we get:
$ PREFIX_MONGODB_HOST=localhost PREFIX_S3_BUCKET_NAME=some-bucket \
python example.py
{
"mongodb": {
"host": "localhost",
"port": 27017,
"username": "USER",
"password": "123456",
"name": "db",
"collection": "tasks"
},
"s3": {
"bucket_name": "some-bucket"
}
}
And I think that's the behavior you want.
If I could do something like this: class S3Settings(BaseModel): bucket_name: str = Field(..., env="PREFIX_S3_BUCKET_NAME") it would be ideal if a similar option exist.
You could do something like this using Annotated validators:
from typing import Annotated
from pydantic import BeforeValidator
def setFromEnvVar(envvar: str):
def _validator(value: str):
if (newvalue := os.getenv(envvar)) is not None:
return newvalue
return value
return BeforeValidator(_validator)
class S3Settings(BaseModel):
bucket_name: Annotated[str, setFromEnvVar("PREFIX_S3_BUCKET_NAME")]
This lets you tag individual fields with a specific environment variable. Using the above solution, the final code would look like:
import os
from pydantic import AfterValidator, BaseModel
from pydantic import field_validator
from pydantic import BeforeValidator
from pydantic_settings import BaseSettings
from pydantic_settings import EnvSettingsSource
from pydantic_settings import PydanticBaseSettingsSource
from pydantic_settings import YamlConfigSettingsSource
from typing import override
from typing import Annotated
def setFromEnvVar(envvar: str):
def _validator(value: str):
if (newvalue := os.getenv(envvar)) is not None:
return newvalue
return value
return BeforeValidator(_validator)
class S3Settings(BaseModel):
bucket_name: Annotated[str, setFromEnvVar("PREFIX_S3_BUCKET_NAME")]
class DatabaseSettings(BaseModel):
host: str
port: int
username: str
password: str
name: str
collection: str
class Settings(BaseSettings):
mongodb: DatabaseSettings
s3: S3Settings
@classmethod
@override
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
return (
init_settings,
EnvSettingsSource(
settings_cls,
env_nested_delimiter="_",
case_sensitive=False,
env_prefix="PREFIX_",
),
YamlConfigSettingsSource(settings_cls, yaml_file="config.yaml"),
)
settings = Settings()
print(settings.model_dump_json(indent=2))