I'm trying to set up an autoencoder with tied weights. I'm using Python 3.6.10, Tensorflow 1.15.0 and Keras 2.2.4-tf.
There is a very nice solution here using Sequential() to build the model.
random.seed(1)
class DenseTied(keras.layers.Layer):
def __init__(self, units,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
tied_to=None,
**kwargs):
self.tied_to = tied_to
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super().__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=2)
self.supports_masking = True
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
if self.tied_to is not None:
self.kernel = K.transpose(self.tied_to.kernel)
self._non_trainable_weights.append(self.kernel)
else:
self.kernel = self.add_weight(shape=(input_dim, self.units),
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),
initializer=self.bias_initializer,
name='bias',
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.built = True
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1] == self.units
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
def call(self, inputs):
output = K.dot(inputs, self.kernel)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
It defines the model as follows:
# generate data
x = np.random.rand(100, 4)
# model architecture
original_dim = 4
latent_dim = 2
input_layer = Input(shape=(original_dim,))
encoded1 = Dense(latent_dim, activation="sigmoid", use_bias=True)
decoded1 = DenseTied(original_dim, activation="sigmoid", tied_to=encoded1, use_bias=False)
autoencoder = Sequential()
autoencoder.add(input_layer)
autoencoder.add(encoded1)
autoencoder.add(decoded1)
autoencoder.compile(optimizer="adam", loss="binary_crossentropy")
print(autoencoder.summary())
autoencoder.fit(x, x, epochs=3)
print(autoencoder.layers[0].get_weights()[0])
print(autoencoder.layers[-1].get_weights()[-1])
This works great for simple models that can be defined using Sequential() class. Unfortunately, my model is way more complicated, so I need to use Model() instead of Sequential(). I need to define my model like this and preserve tied weights:
# generate data
x = np.random.rand(100, 4)
original_dim = 4
latent_dim = 2
input_layer = Input(shape=(original_dim,))
encoded1 = Dense(latent_dim, activation="sigmoid", use_bias=True)(input_layer)
decoded1 = DenseTied(original_dim, activation="sigmoid", tied_to=encoded1, use_bias=False)(encoded1)
autoencoder2 = Model(input_layer, decoded1)
autoencoder2.compile(optimizer="adam", loss="binary_crossentropy")
print(autoencoder2.summary())
autoencoder2.fit(x, x, epochs=3)
print(autoencoder2.layers[0].get_weights()[0])
print(autoencoder2.layers[-1].get_weights()[-1])
The code above breaks on the autoencoder2.compile() step.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-56-0851be462afd> in <module>
6 input_layer = Input(shape=(original_dim,))
7 encoded1 = Dense(latent_dim, activation="sigmoid", use_bias=True)(input_layer)
----> 8 decoded1 = DenseTied(original_dim, activation="sigmoid", tied_to=encoded1, use_bias=False)(encoded1)
9
10 autoencoder2 = Model(input_layer, decoded1)
/miniconda3/envs/nnet_entropy/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
822 # Build layer if applicable (if the `build` method has been
823 # overridden).
--> 824 self._maybe_build(inputs)
825 cast_inputs = self._maybe_cast_inputs(inputs)
826
/miniconda3/envs/nnet_entropy/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
2144 # operations.
2145 with tf_utils.maybe_init_scope(self):
-> 2146 self.build(input_shapes)
2147 # We must set self.built since user defined build functions are not
2148 # constrained to set self.built.
<ipython-input-15-925349097567> in build(self, input_shape)
36
37 if self.tied_to is not None:
---> 38 self.kernel = K.transpose(self.tied_to.kernel)
39 self._non_trainable_weights.append(self.kernel)
40 else:
AttributeError: 'Tensor' object has no attribute 'kernel'
I've tried defining a part of my model under Model() and pass it to the rest of the model defined under Sequential() (as was done here) but it didn't seem to be a good way to go. Is there any way to make it work? I would appreciate any suggestions.
You are not using your custom layer correctly, You need to pass the layer instance to your DenseTied tied_to
parameter, not the tensor that is obtained by passing the input to that layer, like this:
input_layer = Input(shape=(original_dim,))
encoded1_layer = Dense(latent_dim, activation="sigmoid", use_bias=True)
encoded1 = encoded1_layer(input_layer)
decoded1 = DenseTied(original_dim, activation="sigmoid", tied_to=encoded1_layer, use_bias=False)(encoded1)