optimizationxgboosthyperparametershyperopt

Hyperparameter Optimization with Hyperopt (Baysian Hyperparamter Optimization) yields hyperparamter outside defined search space


I implemented a hyperparameter optimization with hyperopt for a XGBoostClassifier. Therefore, I defined a certain search space, e.g. "n_estimators": hp.choice("n_estimators",np.arange(32, 264, 8, dtype=int))

However, hyperopt recommends 18 "n_estimators", which is outside the defined search space.

Is this a "normal" expected behavior, that could happen? If yes, why? Otherwise I think, I defined the search space incorrectly.

I appreciate any help or explanations.

Edit 1: Reproducible example:

from sklearn.metrics import precision_score, f1_score, accuracy_score, recall_score, average_precision_score, roc_auc_score
import xgboost as xgb
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

# Search space
space={
        "n_estimators": hp.choice("n_estimators",np.arange(32, 264, 8, dtype=int)),                   # tune 32 - 256
        "eta":hp.uniform("eta",0.01,0.9),                                   # learning rate # tune 0.01 - 0.9
        "gamma":hp.uniform("gamma",0.01,0.9),                               # tune 0 - 0.9
        "max_depth":hp.choice("max_depth", np.arange(6, 18, 1, dtype=int)),                       # tune 6 - 18
        "min_child_weight":hp.quniform('min_child_weight', 0, 10, 1),       # tune 0 - 10
        "subsample":hp.uniform("subsample",0.5,1),                          # tune 0.5 - 1
        "colsample_bytree":hp.uniform("colsample_bytree",0,1),              # tune 0- 1
        "colsample_bylevel":hp.uniform("colsample_bylevel",0,1),             # tune 0- 1
        "colsample_bynode":hp.uniform("colsample_bynode",0,1),              # tune 0- 1
        "scale_pos_weight": 1,                  # tune by class imbalance: (sum(negative instances) / sum(positive instances)                  
    }

def objective(space):
        params={     
                # General parameters
                "booster":'gbtree',
                "nthread":16,
                
                # Booster parameters
                "n_estimators":space["n_estimators"],           # tune 32 - 256
                "eta":space["eta"],                               # learning rate # tune 0.01 - 0.9
                "gamma":space["gamma"],                           # tune 0 - 0.9
                "max_depth":space["max_depth"],                   # tune 6 - 18
                "min_child_weight":space["min_child_weight"],     # tune 0 - 10
                "subsample":space["subsample"],                   # tune 0.5 - 1
                "colsample_bytree":space["colsample_bytree"],     # tune 0- 1
                "colsample_bylevel":space["colsample_bylevel"],   # tune 0- 1
                "colsample_bynode":space["colsample_bynode"],     # tune 0- 1
                "scale_pos_weight":space["scale_pos_weight"],     # tune by class imbalance: (sum(negative instances) / sum(positive instances))
                
                # Learning task parameters
                "objective":"multi:softmax", # multi:softprob
                "num_class":2,
                #eval_metric="auc", # default metric will be assigned according to objective, logloss for classification
                "seed":42,
                }
        
        clf=xgb.XGBClassifier(**params)                      
                        
        evaluation = [( X_valid, y_valid)]
        
        clf.fit(X_train, y_train, eval_set=evaluation,
                verbose=False)
        
        preds = clf.predict_proba(X_valid)

        predicted_classes = preds.argmax(axis=1) # extract the class with the highest probability

        f1 = f1_score(y_valid, predicted_classes)
        acc = accuracy_score(y_valid, predicted_classes)
        recall = recall_score(y_valid, predicted_classes)
        precision = precision_score(y_valid, predicted_classes)
        average_precision = average_precision_score(y_valid, predicted_classes)
        roc_auc = roc_auc_score(y_valid, predicted_classes)

        return {'loss': -f1, 'status': STATUS_OK, 'f1': f1, 'acc': acc, 'recall': recall, 'precision': precision, 'average_precision': average_precision, 'roc_auc': roc_auc}

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 10,
                        trials = trials,
                        rstate = np.random.default_rng(42))

print("The best hyperparameters are : ")
print(best_hyperparams)
print(trials.best_trial["result"])

Solution

  • Apparently the output of fmin for choice hyperparameters is the index of the hyperparameter in the list of choices: https://github.com/hyperopt/hyperopt/issues/284

    If you have a look at trials.__dict__ at the end of your code, you'll find that all the values of n_estimators are integers between 0 and 28, the length of your list of options but none in the actual list.