python tensorflow machine-learning keras deep-learning

How do I format my a tensorflow dataset for a multi output model?

I have an image dataset where each image has multiple categorical features that I want to predict. I am getting this error when trying to train:

ValueError: y_true and y_pred have different structures.
y_true: *
y_pred: ['*', '*', '*', '*']

Here's is my code:

def getLabel(path):
    path = path.numpy().decode("utf-8")
    key = os.path.basename(path)[:9]
    
    if key not in allLabelDict:
        print("Missing key:", key)
        raise ValueError("Missing label key.")
    return tf.convert_to_tensor(allLabelDict[key], dtype=tf.uint8)

def getImage(path):
    img = tf.io.read_file(path)
    img = tf.io.decode_jpeg(img, channels=3)
    return tf.image.resize_with_crop_or_pad(img, 360, 360)

def processData(file_path):
    label = tf.py_function(func=getLabel, inp=[file_path], Tout=tf.uint8)
    label.set_shape([4])

    img = tf.py_function(func=getImage, inp=[file_path], Tout=tf.uint8)
    img.set_shape([360, 360, 3])

    return img, label

allLabelDict is a dict with a key of each images file name and a value with an array of the sparse one hot encoded categories.

valSize = int(imageFileNames.cardinality().numpy() * 0.2)

trainData = imageFileNames \
  .skip(valSize) \
  .map(process_path, num_parallel_calls=tf.data.AUTOTUNE) \
  .cache() \
  .batch(100) \
  .prefetch(buffer_size=tf.data.AUTOTUNE)
valData = imageFileNames \
  .take(valSize) \
  .map(process_path, num_parallel_calls=tf.data.AUTOTUNE) \
  .cache() \
  .batch(100) \
  .prefetch(buffer_size=tf.data.AUTOTUNE)

inputLayer = tf.keras.layers.Input(shape=(360, 360, 3))

baseLayers = tf.keras.layers.Rescaling(1./255)(inputLayer)
baseLayers = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(baseLayers)
baseLayers = tf.keras.layers.MaxPooling2D((2, 2))(baseLayers)
baseLayers = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(baseLayers)
baseLayers = tf.keras.layers.MaxPooling2D((2, 2))(baseLayers)
baseLayers = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(baseLayers)
baseLayers = tf.keras.layers.Flatten()(baseLayers)
baseLayers = tf.keras.layers.Dense(128, activation='relu')(baseLayers)

labelOutput = tf.keras.layers.Dense(len(labelTuple))(baseLayers)
cellShapeOutput = tf.keras.layers.Dense(len(cellShapeTuple))(baseLayers)
nucleusShapeOutput = tf.keras.layers.Dense(len(nucleusShapeTuple))(baseLayers)
cytoplasmVacuoleOutput = tf.keras.layers.Dense(len(cytoplasmVacuoleTuple))(baseLayers)

model = tf.keras.Model(inputs=inputLayer, outputs=[labelOutput, cellShapeOutput, nucleusShapeOutput, cytoplasmVacuoleOutput])

model.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss={
    "labelOutput": tf.keras.losses.SparseCategoricalCrossentropy(),
    "cellShapeOutput": tf.keras.losses.SparseCategoricalCrossentropy(),
    "nucleusShapeOutput": tf.keras.losses.SparseCategoricalCrossentropy(),
    "cytoplasmVacuoleOutput": tf.keras.losses.SparseCategoricalCrossentropy()
    },
  metrics={
    "labelOutput": ["sparse_categorical_accuracy"],
        "cellShapeOutput": ["sparse_categorical_accuracy"],
        "nucleusShapeOutput": ["sparse_categorical_accuracy"],
        "cytoplasmVacuoleOutput": ["sparse_categorical_accuracy"]
  }
)

model.summary()

Solution

I think your problem is that your labels also need to be dicts like this:

example_label = {
    "labelOutput": tf.Tensor([0]),
    "cellShapeOutput": tf.Tensor([1]),
    "nucleusShapeOutput": tf.Tensor([0]),
    "cytoplasmVacuoleOutput": tf.Tensor([1]),
}

So that your model knows which label is intended for which of the 4 loss functions. Naturally these would be extended to arrays/Tensors with a batch size=5 to something like:

example_batch = {
    "labelOutput": tf.Tensor([0, 2, 3, 1, 0]), 
    "cellShapeOutput":  tf.Tensor([3, 2, 3, 2, 0]), 
    "nucleusShapeOutput":  tf.Tensor([2, 2, 3, 4, 1]) 
    "cytoplasmVacuoleOutput":  tf.Tensor([1, 2, 3, 1, 1]),
}

Tensors instead of scalars for the non-batched labels is likely more accurate.