Here is what I am trying from one of the official tutorials
import kfp
import kfp.dsl as dsl
from kfp.components import create_component_from_func
@create_component_from_func
def confusion_visualization(matrix_uri: str = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/samples/core/visualization/confusion_matrix.csv') -> NamedTuple('VisualizationOutput', [('mlpipeline_ui_metadata', 'UI_metadata')]):
"""Provide confusion matrix csv file to visualize as metrics."""
import json
metadata = {
'outputs' : [{
'type': 'confusion_matrix',
'format': 'csv',
'schema': [
{'name': 'target', 'type': 'CATEGORY'},
{'name': 'predicted', 'type': 'CATEGORY'},
{'name': 'count', 'type': 'NUMBER'},
],
'source': matrix_uri,
'labels': ['rose', 'lily', 'iris'],
}]
}
print('Printing the metadata')
print(metadata)
from collections import namedtuple
visualization_output = namedtuple('VisualizationOutput', [
'mlpipeline_ui_metadata'])
print()
return visualization_output(json.dumps(metadata))
@dsl.pipeline(
name='confusion-matrix-pipeline',
description='A sample pipeline to generate Confusion Matrix for UI visualization.'
)
def confusion_matrix_pipeline():
confusion_visualization_task = confusion_visualization('results.json')
client = kfp.Client()
client.create_run_from_pipeline_func(
confusion_matrix_pipeline,
arguments={}
)
I am not able to see a visualization in neither of the Run output or Visualizations tab. It says there are no visualizations in this step. What am I missing here?
I think an issue with your code is that you don't provide the output as a file (try using an OutputPath
).
From Kubeflow docs:
The component must also export a file output artifact with an artifact name of mlpipeline-ui-metadata, or else the Kubeflow Pipelines UI will not render the visualization.
... If the component writes such a file to its container filesystem, the Kubeflow Pipelines system extracts the file, and the Kubeflow Pipelines UI uses the file to generate the specified viewer(s). The metadata specifies where to load the artifact data from. The Kubeflow Pipelines UI loads the data into memory and renders it.
The Kubeflow docs also provide an example, which is working for me:
def confusion_matrix_viz(mlpipeline_ui_metadata_path: kfp.components.OutputPath()):
import json
metadata = {
'outputs' : [{
'type': 'confusion_matrix',
'format': 'csv',
'schema': [
{'name': 'target', 'type': 'CATEGORY'},
{'name': 'predicted', 'type': 'CATEGORY'},
{'name': 'count', 'type': 'NUMBER'},
],
'source': <CONFUSION_MATRIX_CSV_FILE>,
# Convert vocab to string because for bealean values we want "True|False" to match csv data.
'labels': list(map(str, vocab)),
}]
}
with open(mlpipeline_ui_metadata_path, 'w') as metadata_file:
json.dump(metadata, metadata_file)
Accordingly, you might try modifying your code like so:
import kfp
import kfp.dsl as dsl
from kfp.components import create_component_from_func
@create_component_from_func
def confusion_visualization(
matrix_uri: str = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/samples/core/visualization/confusion_matrix.csv',
mlpipeline_ui_metadata_path: kfp.components.OutputPath()
):
"""Provide confusion matrix csv file to visualize as metrics."""
import json
metadata = {
'outputs' : [{
'type': 'confusion_matrix',
'format': 'csv',
'schema': [
{'name': 'target', 'type': 'CATEGORY'},
{'name': 'predicted', 'type': 'CATEGORY'},
{'name': 'count', 'type': 'NUMBER'},
],
'source': matrix_uri,
'labels': ['rose', 'lily', 'iris'],
}]
}
print('Printing the metadata')
print(metadata)
with open(mlpipeline_ui_metadata_path, 'w') as metadata_file:
json.dump(metadata, metadata_file)
@dsl.pipeline(
name='confusion-matrix-pipeline',
description='A sample pipeline to generate Confusion Matrix for UI visualization.'
)
def confusion_matrix_pipeline():
confusion_visualization_task = confusion_visualization('results.json')
client = kfp.Client()
client.create_run_from_pipeline_func(
confusion_matrix_pipeline,
arguments={}
)