I am trying to tune hyperparameters using bayesian optimization for random forest regression over a hyperparameter space using the code below, but I get an error that says
TypeError: init() got an unexpected keyword argument 'min_samples'
I got this error when I tried the following code:
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples':hp.choice('min_samples', range(2, 100, 1)),
'max_features':hp.choice('max_features', range(2, 100, 1)),
'max_samples':hp.choice('max_samples', range(2, 100, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
I have also tried listing the hyperparameters in the objective function using the code below, but I get the following error
TypeError: objective() missing 3 required positional arguments: 'min_samples', 'max_features', and 'max_samples'
#Define Objective Function
def objective(n_estimators,min_samples,max_features,max_samples):
rf = RandomForestRegressor(n_estimators, min_samples, max_features, max_samples)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
Can you please advise on what I can do to fix my code?
I was able to tune a single hyperparameter using the code below:
# Import packages
import numpy as np
import time
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
from collections import OrderedDict
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
space= hp.choice('num_leaves', range(2, 100, 1))
def objective(num_leaves):
rf = RandomForestRegressor(num_leaves)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print(best)
print(trials.results)
The problem is that there is no parameter called min_samples
in RandomForestClassifier
. See here. Probably you meant min_samples_leaf
.
Just keep the upper bound of min_sample_leaf
within the range of number of samples in your dataset.
Otherwise there is no other problem with your code.
import matplotlib.pyplot as plt
# Import packages
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.ensemble import RandomForestRegressor
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
# Create datasets
reg_prob = datasets.make_friedman1(n_samples=100, n_features=10, noise=1.0, random_state=None)
x_train = reg_prob[0][0:50]
y_train = reg_prob[1][0:50]
x_test = reg_prob[0][50:100]
y_test = reg_prob[1][50:100]
#Create Hyperparameter space
space= {'n_estimators':hp.choice('n_estimators', range(2, 150, 1)),
'min_samples_leaf':hp.choice('min_samples', range(2, 50, 1)),
'max_features':hp.choice('max_features', range(2, 10, 1)),
'max_samples':hp.choice('max_samples', range(2, 50, 1)),
}
#Define Objective Function
def objective(space):
rf = RandomForestRegressor(**space)
# fit Training model
rf.fit(x_train, y_train)
# Making predictions and find RMSE
y_pred = rf.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
rmse = np.sqrt(mse)
# Return RMSE
return rmse
#Surrogate Fn
trials = Trials()
best = fmin(objective,
space=space,
algo=tpe.suggest,
max_evals=2,
trials=trials)
print(best)
print(trials.results)