I have Input and Output pandera SchemaModels and the Output inherits the Input which accurately represents that all attributes of the Input schema are in the scope of the Output schema.
What I want to avoid is inheriting all attributes as required (non-Optional) as they are rightly coming from the Input schema. Instead I want to preserve them as required for the Input schema but define which of them remain required for the Output schema while the other inherited attributes become optional.
This pydantic question is similar and has solution for defining __init_subclass__
method in the parent class. However, this doesn't work out of the box for pandera classes and I'm not sure if it is even implementable or the right approach.
import pandera as pa
from typing import Optional
from pandera.typing import Index, DataFrame, Series, Category
class InputSchema(pa.SchemaModel):
reporting_date: Series[pa.DateTime] = pa.Field(coerce=True)
def __init_subclass__(cls, optional_fields=None, **kwargs):
super().__init_subclass__(**kwargs)
if optional_fields:
for field in optional_fields:
cls.__fields__[field].outer_type_ = Optional
cls.__fields__[field].required = False
class OutputSchema(InputSchema, optional_fields=['reporting_date']):
test: Series[str] = pa.Field()
@pa.check_types
def func(inputs: DataFrame[InputSchema]) -> DataFrame[OutputSchema]:
inputs = inputs.drop(columns=['reporting_date'])
inputs['test'] = 'a'
return inputs
data = pd.DataFrame({'reporting_date': ['2023-01-11', '2023-01-12']})
func(data)
Error:
---> 18 class OutputSchema(InputSchema, optional_fields=['reporting_date']):
KeyError: 'reporting_date'
Edit:
Desired outcome to be able to set which fields from the inherited schema remain required while the remaining become optional:
class InputSchema(pa.SchemaModel):
reporting_date: Series[pa.DateTime] = pa.Field(coerce=True)
other_field: Series[str] = pa.Field()
class OutputSchema(InputSchema, required=['reporting_date'])
test: Series[str] = pa.Field()
The resulting OutputSchema
has reporting_date
and test
as required while other_field
as optional.
Here is a solution by reusing existing type annotation from the input schema:
import pandera as pa
import pandas as pd
from typing import Optional
from pandera.typing import Index, DataFrame, Series, Category
from pydantic import Field, BaseModel
from typing import Annotated, Type
def copy_field(from_model: Type[BaseModel], fname: str, annotations: dict[str, ...]):
annotations[fname] = from_model.__annotations__[fname]
class InputSchema(pa.SchemaModel):
reporting_date: Series[pa.DateTime] = pa.Field(coerce=True)
not_inherit: Series[str]
class OutputSchema(pa.SchemaModel):
test: Series[str] = pa.Field()
copy_field(InputSchema, "reporting_date", __annotations__)
# reporting_date: Series[pa.DateTime] = pa.Field(coerce=True)
# not_inherit: Optional[Series[str]]
data = pd.DataFrame({
'reporting_date': ['2023-01-11', '2023-01-12'],
'not_inherit': ['a','a']
})
@pa.check_types
def func(
inputs: DataFrame[InputSchema]
) -> DataFrame[OutputSchema]:
inputs = inputs.drop(columns=['not_inherit'])
inputs['test'] = 'a'
return inputs
func(data)