Im trying to return several parameters from a kubeflow component. Following the documentation it says you should use OutputPath where T is one of the types in python (str, float, int...) so I have this:
@component
def basic_data_drifting(current_csv: Input[Dataset],
reference_csv: Input[Dataset],
report_html: Output[HTML],
data_drift_score: OutputPath(float),
value: OutputPath(float)):
I know, for the documentation, that if my parameter is a string, I should use:
with open(myoutputpathstr, 'w') as output_file:
output_file.write("my string")
But I cant find any example of returning floats. ints, bools... should I cast them to str? always storing them as a file? Whats the point of having <T>
instead of only str?
Thanks in advance!
Here are two ways to get the data from component
return
from component functionfrom kfp.components import OutputPath
I provides examples about return
, OutputPath
, InputPath
with code.
case 1: return single value, type of <T>
from kfp.components import func_to_container_op
# I make component using `kfp.components.func_to_container_op`
def case_1(value_1: int, value_2: int) -> int: # to use return, write '-> {type}'
result = value_1 + value_2
return result # it can be int, float, bouble, str, bool, ....
case_1_op = func_to_container_op(func = case_1,
base_image = 'case_1/tag:0.1',
output_component_file="case_1.component.yaml")
case 2: return multiple value
from kfp.components import func_to_container_op
from typing import NamedTuple
# If there are multiple return values, wrap them in a tuple.
# And use `NamedTuple`
def case_2(value_1: int) -> NamedTuple('Output', [("key_of_foo", int),
('key_of_bar', str),
('key_of_baz', bool)]):
foo = value_1 # int
bar = "2" # str
baz = True # bool
return (foo, bar, baz) # wrap them in a tuple.
case_2_op = func_to_container_op(func = case_2,
base_image = 'case_2/tag:0.1',
output_component_file="case_2.component.yaml")
case 3: save data to file using kfp.components.OutputPath
.
def case_3(value_1: str, value_2: int, value_3: bool,
file_path: OutputPath("dict")):
# 'OutputPath' has a naming rule: it must end with `_path`.
import json
exam_dict = dict(one = value_1,
two = [value_2, value_3])
json.dump(exam_dict, open(file_path, "w"), indent=4)
case_3_op = func_to_container_op(func = case_3,
base_image = 'case_3/tag:0.1',
output_component_file="case_3.component.yaml")
case 4: load data from file using kfp.components.InputPath
.
def case_4(data_input: InputPath("dict")):
import json
with open(data_input, "r", encoding='utf-8') as f:
data = json.load(f)
print(data) # {'one': '2', 'two': [1, True]}
case_4_op = func_to_container_op(func = case_4,
base_image = 'case_4/tag:0.1',
output_component_file="case_4.component.yaml")
pipeline
import kfp.dsl as dsl
@dsl.pipeline(name="example")
def data_example(value_1: int, value_2:int):
_case_1_op = case_1_op(value_1, value_2)
# case of input from single return
_case_2_op = case_2_op(_case_1_op.output)
# case of input from multi value return. input value using key.
# key name must match each key in the NamedTuple.
_case_3_op = case_3_op(_case_2_op.outputs['key_of_bar'], _case_2_op.outputs['key_of_foo'], _case_2_op.outputs['key_of_baz'])
# case of input from path. input path using key.
# The key name must match the OutputPath name without '_path'.
_case_4_op = case_4_op(_case_3_op.outputs["file"])
I'm not sure if this answer is what you asked for, but I hope this helps.