I am trying to use a profiler to create expectations on certain data batches.
import great_expectations as gx
from great_expectations.core.batch import BatchRequest
from great_expectations.profile.user_configurable_profiler import UserConfigurableProfiler
context = gx.get_context()
datasource = context.get_datasource(context.list_datasources()[0]["name"])
data_connector = datasource.data_connectors["sirene_update_s3_connector"].name
data_asset = datasource.get_available_data_asset_names()["sirene_update_s3_connector"][0]
expectation_suite_name = context.list_expectation_suite_names()[0]
batch_request_sirene_update_december_2022 = {
"datasource_name": datasource.name,
"data_connector_name": data_connector,
"data_asset_name": data_asset,
"data_connector_query": {
"batch_filter_parameters": {
"year": "2022",
"month": "12",
}
},
}
validator = context.get_validator(
batch_request = BatchRequest(**batch_request_sirene_update_december_2022),
expectation_suite_name = expectation_suite_name,
)
profiler = UserConfigurableProfiler(profile_dataset=validator)
But I get an error
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File ~/.pyenv/versions/3.9.6/envs/data_pipelines/lib/python3.9/site-packages/great_expectations/execution_engine/execution_engine.py:595, in ExecutionEngine._process_direct_and_bundled_metric_computation_configurations(self, metric_fn_direct_configurations, metric_fn_bundle_configurations)
592 try:
593 resolved_metrics[
594 metric_computation_configuration.metric_configuration.id
--> 595 ] = metric_computation_configuration.metric_fn(
596 **metric_computation_configuration.metric_provider_kwargs
597 )
598 except Exception as e:
File ~/.pyenv/versions/3.9.6/envs/data_pipelines/lib/python3.9/site-packages/great_expectations/expectations/metrics/metric_provider.py:34, in metric_value.<locals>.wrapper.<locals>.inner_func(*args, **kwargs)
32 @wraps(metric_fn)
33 def inner_func(*args, **kwargs):
---> 34 return metric_fn(*args, **kwargs)
File ~/.pyenv/versions/3.9.6/envs/data_pipelines/lib/python3.9/site-packages/great_expectations/expectations/metrics/column_aggregate_metric_provider.py:88, in column_aggregate_value.<locals>.wrapper.<locals>.inner_func(cls, execution_engine, metric_domain_kwargs, metric_value_kwargs, metrics, runtime_configuration)
86 df = df[df[column_name].notnull()]
---> 88 return metric_fn(
89 cls,
90 column=df[column_name],
91 **metric_value_kwargs,
92 _metrics=metrics,
93 )
...
610 ] = self.resolve_metric_bundle(
611 metric_fn_bundle=metric_fn_bundle_configurations
612 )
MetricResolutionError: unhashable type: 'dict'
Is this a bug? Am I doing something wrong?
EDIT : I should mention I'm trying to create expectations of a json file... probably not supported by GX yet, or I'm using the wrong kind of profiler.
As per the documentation - How to create a new Expectation Suite from a jsonschema file, GX does not support nested json profiling, or expectations based on such data.
This implementation does not traverse any levels of nesting.