tensorflowmachine-learningkerasdeep-learningsiamese-network

Implementing TensorFlow Triplet Loss


I would like to implement the built in TensorFlow addons version of triplet loss with a tutorial here for a siamese network, however I can't seem to get it quite right. No matter how I wrangle the code another error pops up, currently

TypeError: Could not build a TypeSpec for <KerasTensor: shape=(3, None, 256) dtype=float32 (created by layer 'tf.math.l2_normalize_4')> with type KerasTensor.  

Note, this is just a token implementation kept simple in order to understand how to implement Triplet Loss. I don't expect the model to actually learn anything.
Code:

!pip install -U tensorflow-addons
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.datasets import fashion_mnist

# Dummy data to pass to the model
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

train_data = [x_train[:20000],x_train[20000:40000],x_train[40000:]]
train_labels = [y_train[:20000],y_train[20000:40000],y_train[40000:]] 

train_data = tf.convert_to_tensor(train_data)
train_labels = tf.convert_to_tensor(train_labels)
#train_data = np.asarray(train_data)
#train_labels = np.asarray(train_labels)

def create_model(input_shape):
  inp = tf.keras.layers.Input(shape=input_shape)
  x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
  x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
  x = tf.keras.layers.Dropout(0.3)(x)
  x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
  x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
  x = tf.keras.layers.Dropout(0.3)(x)
  x = tf.keras.layers.Flatten()(x)
  x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
  #x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
  model = tf.keras.Model(inp,x)
  return model

def get_siamese_model(input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the triplet of input images
    anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
    positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
    negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
    
    # Convolutional Neural Network (same from earlier)
    embedding_model = create_model(input_shape)
    
    # Generate the embedding outputs 
    encoded_anchor = embedding_model(anchor_input)
    encoded_positive = embedding_model(positive_input)
    encoded_negative = embedding_model(negative_input)
    
    inputs = [anchor_input, positive_input, negative_input]
    outputs = [encoded_anchor, encoded_positive, encoded_negative]
    #x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
    
    # Connect the inputs with the outputs
    siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
    
    # return the model
    return embedding_model, siamese_triplet

emb_mod, model = get_siamese_model([28,28,1])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tfa.losses.TripletSemiHardLoss())

# Train the network
#train_dataset = tf.convert_to_tensor(train_dataset)
history = model.fit(
    train_data,
    epochs=5)

Solution

  • I am not sure what exactly you are trying to do, but you also have to incorporate your labels into your training dataset when using the tfa.losses.TripletSemiHardLoss(). Here is a working example:

    import io
    import numpy as np
    import tensorflow as tf
    import tensorflow_addons as tfa
    from tensorflow.keras.datasets import fashion_mnist
    
    # Dummy data to pass to the model
    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    
    train_data = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(x_train[:20000]),
                         tf.data.Dataset.from_tensor_slices(x_train[20000:40000]),
                         tf.data.Dataset.from_tensor_slices(x_train[40000:])))
    train_labels = tf.data.Dataset.zip((tf.data.Dataset.from_tensor_slices(y_train[:20000]),
                         tf.data.Dataset.from_tensor_slices(y_train[20000:40000]),
                         tf.data.Dataset.from_tensor_slices(y_train[40000:])))
    
    dataset =  tf.data.Dataset.zip((train_data, train_labels)).batch(32)                
    def create_model(input_shape):
      inp = tf.keras.layers.Input(shape=input_shape)
      x = tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))(inp)
      x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
      x = tf.keras.layers.Dropout(0.3)(x)
      x = tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu')(x)
      x = tf.keras.layers.MaxPooling2D(pool_size=2)(x)
      x = tf.keras.layers.Dropout(0.3)(x)
      x = tf.keras.layers.Flatten()(x)
      x = tf.keras.layers.Dense(256, activation=None)(x) # No activation on final dense layer
      #x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
      model = tf.keras.Model(inp,x)
      return model
    
    def get_siamese_model(input_shape):
        """
            Model architecture
        """
        
        # Define the tensors for the triplet of input images
        anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
        positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
        negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
        
        # Convolutional Neural Network (same from earlier)
        embedding_model = create_model(input_shape)
        
        # Generate the embedding outputs 
        encoded_anchor = embedding_model(anchor_input)
        encoded_positive = embedding_model(positive_input)
        encoded_negative = embedding_model(negative_input)
        
        inputs = [anchor_input, positive_input, negative_input]
        outputs = [encoded_anchor, encoded_positive, encoded_negative]
        #x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)
        
        # Connect the inputs with the outputs
        siamese_triplet = tf.keras.Model(inputs=inputs,outputs=outputs)
        
        # return the model
        return embedding_model, siamese_triplet
    
    emb_mod, model = get_siamese_model([28,28,1])
    
    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss=tfa.losses.TripletSemiHardLoss())
    
    # Train the network
    history = model.fit(
        dataset,
        epochs=1)
    
    625/625 [==============================] - 76s 120ms/step - loss: 0.1354 - model_79_loss: 0.0572 - model_79_1_loss: 0.0453 - model_79_2_loss: 0.0330