I am using the scikit-learn optimize package to tune the hyperparameters of my model. For performance and readability reasons (I am training several models with the same process), I want to structure the whole hyperparameter-tuning in a class:
...
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import InputLayer, Input, Dense, Embedding, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import skopt
from skopt import gp_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.utils import use_named_args
class hptuning:
def __init__(self, input_df):
self.inp_df = input_df
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(...)
self.param_space = self.dim_hptuning()
self.best_loss = 10000
def dim_hptuning(self):
dim_layers = Integer(low=0, high=7, name='layers')
dim_nodes = Integer(low=2, high=90, name='num_nodes')
dimensions = [dim_layers, dim_nodes]
return dimensions
def create_model(self, layers, nodes):
model = Sequential()
for layer in range(layers):
model.add(Dense(nodes))
model.add(Dense(1,activation='sigmoid'))
optimizer = Adam
model.compile(loss='mean_absolute_error',
optimizer=optimizer,
metrics=['mae', 'mse'])
return model
@use_named_args(dimensions=self.param_space)
def fitness(self,nodes, layers):
model = self.create_model(layers=layers, nodes=nodes)
history = model.fit(x=self.X_train.values,y=self.y_train.values,epochs=200,batch_size=200,verbose=0)
loss = history.history['val_loss'][-1]
if loss < self.best_loss:
model.save('model.h5')
self.best_loss = loss
del model
K.clear_session()
return loss
def find_best_model(self):
search_result = gp.minimize(func=self.fitness, dimensions=self.param_space,acq_func='EI',n_calls=10)
return search_result
hptun = hptuning(input_df=df)
search_result = hptun.find_best_model()
print(search_result.fun)
Now I get the problem that the decorator @use_named_args is not working within a class as he should be (example code of scikit-optimize). I get the error message
Traceback (most recent call last):
File "main.py", line 138, in <module>
class hptuning:
File "main.py", line 220, in hptuning
@use_named_args(dimensions=self.param_space)
NameError: name 'self' is not defined
which is obviously about the misuse of the decorator in this scenario.
Probably due to my missing understanding of the functionality of such decorators, I am not able to get this running. Could someone help me on this one?
Thank you all in advance for the support!
The problem of self
not being defined is unrelated to scikit.learn. You cannot use self
to define a decorator, because it is only defined inside the method you are decorating. But even if you sidestep this issue (e.g. by providing param_space as a global variable) I expect the next problem will be that self
will be passed to the use_named_args
decorator, but it expects only arguments to be optimized.
The most obvious solution would be to not use the decorator on the fitness
method but to define a decorated function that calls the fitness
method, inside the find_best_model
method, like this:
def find_best_model(self):
@use_named_args(dimensions=self.param_space)
def fitness_wrapper(*args, **kwargs):
return self.fitness(*args, **kwargs)
search_result = gp.minimize(func=fitness_wrapper, dimensions=self.param_space,acq_func='EI',n_calls=10)
return search_result