I am trying to use an RF classifier but every time i try to run the bayessearchCV function, i get an error returned. Attached is my specific example and an example that you can run and reproduce. I suspect that this may be due to the train_test_split function, but I'm not entirely sure how to triage that. Please let me know if there is anything in my code that is obviously wrong...
I am currently using the most up-to-date versions of sklearn/skopt/numpy etc etc
import numpy as np
import pandas as pd
from sklearn import preprocessing
from matplotlib import pyplot as plt
import xgboost as xgb
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from skopt import BayesSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
opt = BayesSearchCV(
RandomForestClassifier(random_state=42),
{
'n_estimators': (5,5000),
'max_features': ['auto','sqrt'],
'max_depth': (2,90),
'min_samples_split': (2,10),
'min_samples_leaf': (1,7),
'bootstrap': ["True","False"]
},
n_iter=32,
cv=3,
scoring='roc_auc'
)
opt.fit(full_train, full_y_train)
print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(X_test_red, y_test))
ERROR
/Users/user/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/deprecation.py:67: FutureWarning: Class MaskedArray is deprecated; MaskedArray is deprecated in version 0.23 and will be removed in version 0.25. Use numpy.ma.MaskedArray instead.
warnings.warn(msg, category=FutureWarning)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-20-8b1596e90c35> in <module>
----> 1 opt.fit(full_train, full_y_train)
2
3 print("val. score: %s" % opt.best_score_)
4 print("test score: %s" % opt.score(X_test_red, y_test))
~/opt/anaconda3/lib/python3.8/site-packages/skopt/searchcv.py in fit(self, X, y, groups, callback)
~/opt/anaconda3/lib/python3.8/site-packages/skopt/searchcv.py in _step(self, X, y, search_space, optimizer, groups, n_points)
~/opt/anaconda3/lib/python3.8/site-packages/skopt/searchcv.py in _fit(self, X, y, groups, parameter_iterable)
~/opt/anaconda3/lib/python3.8/site-packages/sklearn/utils/deprecation.py in wrapped(*args, **kwargs)
66 def wrapped(*args, **kwargs):
67 warnings.warn(msg, category=FutureWarning)
---> 68 return init(*args, **kwargs)
69 cls.__init__ = wrapped
70
TypeError: object.__init__() takes exactly one argument (the instance to initialize)
one for you to reproduce
from skopt import BayesSearchCV
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
X, y = load_digits(10, True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=.25, random_state=0)
# log-uniform: understand as search over p = exp(x) by varying x
opt = BayesSearchCV(
SVC(),
{
'C': (1e-6, 1e+6, 'log-uniform'),
'gamma': (1e-6, 1e+1, 'log-uniform'),
'degree': (1, 8), # integer valued parameter
'kernel': ['linear', 'poly', 'rbf'], # categorical parameter
},
n_iter=32,
cv=3
)
opt.fit(X_train, y_train)
print("val. score: %s" % opt.best_score_)
print("test score: %s" % opt.score(X_test, y_test))
that gives the same error as the first example on my machine.
The problems with sklearn >= 0.23.0 have been fixed in skopt version 0.8.1. https://pypi.org/project/scikit-optimize/0.8.1/