How to fix AttributeError: 'NoneType' object has no attribute '_inbound_nodes' that comes while creating lstm model using manhattan distance?

I am trying to create a neural net model that return the similarity score of two sentences using manhattan LSTM (e.g.https://medium.com/mlreview/implementing-malstm-on-kaggles-quora-question-pairs-competition-8b31b0b16a07 ). I have used quora-questions pairs dataset and generated their embeddings using google-bert. Now, i want to create a LSTM model like the above examples and use it but i am getting the following error:

Using TensorFlow backend.
(100000, 1, 768)
(100000, 1, 768)
(100000,)
(100000, 100)
Traceback (most recent call last):
  File "train_model_manhattan.py", line 151, in <module>
    model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 93, in __init__
    self._init_graph_network(*args, **kwargs)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 231, in _init_graph_network
    self.inputs, self.outputs)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1366, in _map_graph_network
    tensor_index=tensor_index)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
    node_index, tensor_index)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
    node_index, tensor_index)
  File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1325, in build_map
    node = layer._inbound_nodes[node_index]
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'

Here is what I have already tried. Note that embeddings returned has shape(768) i.e. is a vector of size 768 like this [1.2e+05 2.7e-01 7.8 .... 8.9]


print(np.shape(train_vec1)) => (100000, 1, 768)
print(np.shape(train_vec2))  => (100000, 1, 768)
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
    return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))

def manhattan_distance(left, right):
    ''' Helper function for the similarity estimate of the LSTMs outputs'''
    print(np.shape(left))
    return K.sum(K.abs(left - right), axis=1, keepdims=True)    
#################################################

import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model


inp1= Input(shape=(768,))
inp2= Input(shape=(768,))


x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)


# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)

left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)

# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])

#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])

This is my entire code


import os
data_file='quora_duplicate_questions.tsv'
# 0 means dont load, 1 means fetch from file
LOAD_ENCODING_FROM_FILE=1 
encoding_data_file_quest1='encoding_quest1'
encoding_data_file_quest2='encoding_quest2'
encoding_data_file_label='quest_label'

#################################################
import numpy as np
import pandas as pd
import tensorflow as tf
import re
from bert_serving.client import BertClient
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle
from keras import models
from keras import layers
from keras import optimizers
from keras.layers import Dropout
from keras import backend as K
from keras.layers import Lambda
#################################################
maxlen = 125  # We will cut reviews after 125 words

# The next step is to tranform all sentences to fixed length encoding using bert embeddings
# [0.1 0.4 0.4] [0.9 0.6 0.1] 2.4
# [0.4 0.1 0.3] [0.5 0.6 0.1] 1.0

# Save the encodings in a file 
if LOAD_ENCODING_FROM_FILE == 1:
    with open(encoding_data_file_quest1, "rb") as fp:
        vec1=pickle.load(fp)
    with open(encoding_data_file_quest2, "rb") as fp:   
        vec2=pickle.load(fp)
    with open(encoding_data_file_label, "rb") as fp: 
        label=pickle.load(fp)


train_vec1 = np.asarray(vec1, np.float32)
train_vec2 = np.asarray(vec2, np.float32)

train_vec1 = train_vec1.reshape((100000,1,768))
train_vec2 = train_vec2.reshape((100000,1,768))


train_vec1_tensor = K.cast(train_vec1,dtype='float32')
train_vec2_tensor = K.cast(train_vec2,dtype='float32')

train_label = np.asarray(label,np.float32)
print(np.shape(train_vec1))
print(np.shape(train_vec2))
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
    return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))

def manhattan_distance(left, right):
    ''' Helper function for the similarity estimate of the LSTMs outputs'''
    print(np.shape(left))
    return K.sum(K.abs(left - right), axis=1, keepdims=True)    
#################################################

import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model


inp1= Input(shape=(768,))
inp2= Input(shape=(768,))


x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)


# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)

left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)

# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])

#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])

model.summary()
optimizer = optimizers.Adadelta(clipnorm=gradient_clipping_norm)
model.compile(optimizer,
              loss='mean_squared_error',
              metrics=['accuracy'])

history=model.fit([train_vec1, train_vec2], train_label, 
    epochs=30,batch_size=200,
    validation_split=0.2)

I want the model to take two embeddings, calculate the manhattan distance of the embeddings and return the distance.

Solution

left_output and right_output are obtained from the LSTM layer. The inputs are fed to the Input layer and through a series of Dense layers. However, note that there is no connection anywhere between the set of Dense layers and the LSTM. The Model expects the output from the LSTM layer which is not possible. This line keras.layers.concatenate should use the outputs from the shared_lstm rather than using the outputs of input layers directly. Like this

keras.layers.concatenate([left_output, right_output],axis=-1)

Only, then this can be a Siamese network.