I am new to Tensorflow and keras_tuner. I am working with PyCharm, Anaconda3, Python 3.9.12. Below you can find a minimal working example. The code trains in the first part a simple reference ANN with fixed hyperparameters, using one input and output vector computed by an artificial function. In the second part, the hyperparameters of the previous reference model should be found using the keras-tuner.
My question is, why does the "keras_tuner" (using the RandomSearch optimizer) perform so badly on my example. Below you can find my code
import pandas as pd
import numpy as np
# Make numpy values easier to read.
import pip
np.set_printoptions(precision=3, suppress=True)
import tensorflow as tf
from tensorflow.keras import layers
######################################################
from matplotlib import pyplot
np.set_printoptions(suppress=True)
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
########################################################################################################################
#######################################################################################################################
#Load training data into a pandas dataframe
#training_data = pd.read_csv('Encoder_Error.csv',names=["Position_Data", "Error_Data"])
#x_data = training_data['Position_Data']
#y_data = training_data['Error_Data']
#print(x_data)
#print(y_data)
#######################################################################################################################
from numpy import asarray
x_data = asarray([i/500 for i in range(-500,500)])
y_data = asarray([np.power(i,3)/np.exp(i/(abs(i)+1)) for i in x_data])
x_data = x_data.reshape((len(x_data), 1))
y_data = y_data.reshape((len(y_data), 1))
########################################################################################################################
#make a regression model
Encoder_error_model = tf.keras.Sequential([
layers.Dense(3 , activation="tanh", name="layer2"),
layers.Dense(2 , activation="tanh", name="layer3"),
layers.Dense(1 , name="layer4"),
])
Encoder_error_model.compile(loss = tf.keras.losses.MeanSquaredError(),optimizer = tf.keras.optimizers.Adam())
######################################################
#To train that model
Encoder_error_model.fit(x_data, y_data, batch_size=10, epochs=100)#batch_size=10, epochs=100
yhat = Encoder_error_model.predict(x_data)
########################################################################################################################
# Save the entire model as a SavedModel.
#Encoder_error_model.save('Tensorflow_Encoder_error_model_1')
########################################################################################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
import tensorflow as tf
from tensorflow import keras
import keras_tuner
#-----------------------------------------------------------------
def build_model(hp):
model = keras.Sequential()
model.add(
layers.Dense(
# Tune number of units.
units=hp.Int("units1", min_value=1, max_value=5, step=1),
# Tune the activation function to use.
#activation=hp.Choice("activation1", ["relu", "softmax", "tanh", "elu", "gelu", "selu", "softsign"]),
activation=hp.Choice("activation1", values = ["relu", "tanh"]),
#activation= "tanh",
)
)
model.add(
layers.Dense(
# Tune number of units.
units=hp.Int("units2", min_value=1, max_value=5, step=1),
# Tune the activation function to use.
#activation=hp.Choice("activation2", ["relu", "softmax", "tanh", "elu", "gelu", "selu", "softsign"]),
activation=hp.Choice("activation2", values = ["relu", "tanh"]),
#activation="tanh",
)
)
model.add(
layers.Dense(
1,
#activation=hp.Choice("output_activation", ["relu","softmax","tanh","elu","gelu","hard_sigmoid","selu","softsign","sigmoid"]),
)
)
# Define the optimizer learning rate as a hyperparameter.
learning_rate = hp.Float("lr", min_value=1e-8, max_value=1e-2, sampling="log")
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
#optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-8])),
loss='mse',#MeanSquaredError
metrics=['mae']#MeanAbsoluteError#does not work with MeanSquaredError
)
return model
build_model(keras_tuner.HyperParameters())
#-----------------------------------------------------------------
tuner = keras_tuner.RandomSearch(
hypermodel=build_model,
objective="val_mae",#"val_accuracy"
max_trials=50,#50
#executions_per_trial=2,#2
directory="my_dir",
project_name="test optimize hyperparameter",
)
tuner.search_space_summary()
#-----------------------------------------------------------------
#stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
#----------------------------------------------------------------
tuner.search(x_data, y_data,batch_size=10, epochs=100,validation_split=0.2)
#----------------------------------------------------------------
best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hyperparameters.values)
#-----------------------------------------------------------------
yhat2 = best_model.predict(x_data)
########################################################################################################################
########################################################################################################################
########################################################################################################################
########################################################################################################################
#Plot Reference Data to Model
pyplot.scatter(x_data,y_data, label='Actual')
pyplot.scatter(x_data,yhat, label='Predicted-model-reference')
pyplot.scatter(x_data,yhat2, label='Predicted-model-tuning')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
print(y_data)
Since the search space is very limited, I would expect similar results as my reference model. If I only optimise the number of neurons with the activation function "tanh" of the reference model fixed, the fitting works. But if I let him select between "tanh" and "relu", the optimiser prefers the "relu" which results in a bad approximation.
The solution was the following: I added overwrite=True
within the keras_tuner.RandomSearch()
arguments. Before, old optimisation results were used in new versions of the code (saved in "my_dir") which obviously led to wrong and old results. After adding this line, everythings works fine...