I am trying to federate a keras model which has multiple inputs. These some of these inputs are categorical and some of them are numerical, so I have some DenseFeature layers to embed the values.
The problem is that using tff.learning.from_keras_model()
to expect as input_spec a dictionary with just 2 elements (x,y) but I have multiple inputs which then I have to distinguish in the model to perform the Embedding correctly with the feature_columns functions and the DenseFeature layers.
How can I handle the single feature columns if the model accepts just an 'x' as input without proper columns names?
Here is the code and the error:
def create_keras_model():
l = tf.keras.layers
# handling numerical columns
for header in numerical_column_names:
feature_columns.append(feature_column.numeric_column(header))
# handling the categorical feature
pickup = feature_column.categorical_column_with_vocabulary_list(
'pickup_location_id', [i for i in range(number_of_locations)])
#pickup_one_hot = feature_column.indicator_column(pickup)
#feature_columns.append(pickup_one_hot)
pickup_embedding = feature_column.embedding_column(pickup, dimension=64)
#feature_columns.append(pickup_embedding)
feature_inputs = {
'pickup_week_day_sin': tf.keras.Input((1,), name='pickup_week_day_sin'),
'pickup_week_day_cos': tf.keras.Input((1,), name='pickup_week_day_cos'),
'pickup_hour_sin': tf.keras.Input((1,), name='pickup_hour_sin'),
'pickup_hour_cos': tf.keras.Input((1,), name='pickup_hour_cos'),
'pickup_month_sin': tf.keras.Input((1,), name='pickup_month_sin'),
'pickup_month_cos': tf.keras.Input((1,), name='pickup_month_cos'),
}
numerical_features = l.DenseFeatures(feature_columns)(feature_inputs)#{'x': a}
location_input = {
'pickup_location_id': tf.keras.Input((1,), dtype=tf.dtypes.int32, name='pickup_location_id'),
}
categorical_features = l.DenseFeatures(pickup_embedding)(location_input)#{'x': a}
#i = l.Input(shape=(64+6,))
#embedded_lookup_feature = tf.feature_column.numeric_column('x', shape=(784))
conca = l.Concatenate()([categorical_features, numerical_features])
dense = l.Dense(128, activation='relu')(conca)
dense_1 = l.Dense(128, activation='relu')(dense)
dense_2 = layers.Dense(number_of_locations, kernel_initializer='zeros')(dense_1)
output = l.Softmax()(dense_2)
inputs = list(feature_inputs.values()) + list(location_input.values())
return tf.keras.Model(inputs=inputs, outputs=output)
input_spec = preprocessed_example_dataset.element_spec
def model_fn():
# We _must_ create a new model here, and _not_ capture it from an external
# scope. TFF will call this within different graph contexts.
keras_model = create_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=input_spec,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)
error when called:
ValueError: The top-level structure in `dummy_batch` or `input_spec` must contain exactly two elements, as it must contain type information for both inputs to and predictions from the model.
preprocessed_example_dataset.element_spec:
OrderedDict([('pickup_location_id',
TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('pickup_hour_sin',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('pickup_hour_cos',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('pickup_week_day_sin',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('pickup_week_day_cos',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('pickup_month_sin',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('pickup_month_cos',
TensorSpec(shape=(None,), dtype=tf.float32, name=None)),
('y', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])
I was able to find the answer looking at the Federate Learning repository on GitHub:
The way to do it is to make the 'x' value of the orderedDict an orderedDict itself using as keys the name of the columns we want as input.
A concrete example is given here: https://github.com/tensorflow/federated/blob/3b5a551c46e7eab61e40c943390868fca6422e21/tensorflow_federated/python/learning/keras_utils_test.py#L283
Where it define the input spec:
input_spec = collections.OrderedDict(
x=collections.OrderedDict(
a=tf.TensorSpec(shape=[None, 1], dtype=tf.float32),
b=tf.TensorSpec(shape=[1, 1], dtype=tf.float32)),
y=tf.TensorSpec(shape=[None, 1], dtype=tf.float32))
model = model_examples.build_multiple_inputs_keras_model()
To be used in the model defined as:
def build_multiple_inputs_keras_model():
"""Builds a test model with two inputs."""
l = tf.keras.layers
a = l.Input((1,), name='a')
b = l.Input((1,), name='b')
# Each input has a single, independent dense layer, which are combined into
# a final dense layer.
output = l.Dense(1)(
l.concatenate([
l.Dense(1)(a),
l.Dense(1)(b),
]))
return tf.keras.Model(inputs={'a': a, 'b': b}, outputs=[output])