4. Optuna

[1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
import optuna
import tempfile
import pickle
import os
import scipy.stats as stats
import lightgbm as lgb
import xgboost as xgb
import catboost as catb

Let’s start by getting some data from https://archive.ics.uci.edu/ml/datasets/Dry+Bean+Dataset

[2]:
!wget -c https://archive.ics.uci.edu/ml/machine-learning-databases/00602/DryBeanDataset.zip
!unzip -u DryBeanDataset.zip
--2021-05-22 15:27:02--  https://archive.ics.uci.edu/ml/machine-learning-databases/00602/DryBeanDataset.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable

    The file is already fully retrieved; nothing to do.

Archive:  DryBeanDataset.zip
[3]:
df = pd.read_excel('DryBeanDataset/Dry_Bean_Dataset.xlsx')
x_full = df.loc[:,df.columns!="Class"].to_numpy()
y_full = df.Class.astype("category").cat.codes.to_numpy()
[4]:
x_train_val, x_test, y_train_val, y_test = train_test_split(x_full, y_full, test_size=0.1, random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.1, random_state=0)

Now, we train a classifier with fixed hyperparameters and evaluate the score

[5]:
clf = ExtraTreesClassifier(n_estimators=300, random_state=0)
clf.fit(x_train, y_train)
clf.score(x_val, y_val)
[5]:
0.9240816326530612

Let’s create an Optuna study and a directory to store our results

[6]:
try:
    study
except NameError:
    study = optuna.create_study(direction="maximize")

try:
    tempdir
except NameError:
    tempdir = tempfile.TemporaryDirectory().name
    os.mkdir(tempdir)

print(tempdir)
[I 2021-05-22 15:27:08,976] A new study created in memory with name: no-name-5e3b9e91-f0c8-45cc-b581-350d2e07025c
/tmp/pytmpfiles/tmp84frhkyo
[7]:
def objective(trial: optuna.trial.Trial) -> float:
    n_estimators = trial.suggest_int("et_n_estimators", 10, 1000)

    clf = ExtraTreesClassifier(n_estimators=n_estimators)
    clf.fit(x_train, y_train)

    with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
        pickle.dump(clf, f)

    score = clf.score(x_val, y_val)

    return score

# Here we describe the max amount of trials and the total amount of time they might take
study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:10,162] Trial 0 finished with value: 0.9257142857142857 and parameters: {'et_n_estimators': 163}. Best is trial 0 with value: 0.9257142857142857.
[I 2021-05-22 15:27:11,424] Trial 1 finished with value: 0.9273469387755102 and parameters: {'et_n_estimators': 186}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:14,834] Trial 2 finished with value: 0.926530612244898 and parameters: {'et_n_estimators': 493}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:15,354] Trial 3 finished with value: 0.9224489795918367 and parameters: {'et_n_estimators': 77}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:19,555] Trial 4 finished with value: 0.9240816326530612 and parameters: {'et_n_estimators': 593}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:26,643] Trial 5 finished with value: 0.9248979591836735 and parameters: {'et_n_estimators': 952}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:27,449] Trial 6 finished with value: 0.923265306122449 and parameters: {'et_n_estimators': 111}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:28,235] Trial 7 finished with value: 0.9240816326530612 and parameters: {'et_n_estimators': 107}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:34,293] Trial 8 finished with value: 0.926530612244898 and parameters: {'et_n_estimators': 861}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:35,330] Trial 9 finished with value: 0.9257142857142857 and parameters: {'et_n_estimators': 158}. Best is trial 1 with value: 0.9273469387755102.
[8]:
def objective(trial: optuna.trial.Trial) -> float:
    n_neighbors = trial.suggest_int("knn_n_neighbors", 1, 30)

    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    clf.fit(x_train, y_train)

    with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
        pickle.dump(clf, f)

    score = clf.score(x_val, y_val)

    return score

study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:35,663] Trial 10 finished with value: 0.6963265306122449 and parameters: {'knn_n_neighbors': 2}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:35,971] Trial 11 finished with value: 0.6914285714285714 and parameters: {'knn_n_neighbors': 28}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,244] Trial 12 finished with value: 0.713469387755102 and parameters: {'knn_n_neighbors': 10}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,515] Trial 13 finished with value: 0.6914285714285714 and parameters: {'knn_n_neighbors': 28}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,785] Trial 14 finished with value: 0.710204081632653 and parameters: {'knn_n_neighbors': 18}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,955] Trial 15 finished with value: 0.7379591836734694 and parameters: {'knn_n_neighbors': 1}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,227] Trial 16 finished with value: 0.710204081632653 and parameters: {'knn_n_neighbors': 18}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,490] Trial 17 finished with value: 0.7093877551020408 and parameters: {'knn_n_neighbors': 9}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,751] Trial 18 finished with value: 0.7004081632653061 and parameters: {'knn_n_neighbors': 24}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:38,015] Trial 19 finished with value: 0.713469387755102 and parameters: {'knn_n_neighbors': 10}. Best is trial 1 with value: 0.9273469387755102.
[9]:
%%capture cap_out --no-stderr

def objective(trial: optuna.trial.Trial) -> float:

    param = { # based on default suggestion from optuna website
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_loguniform('lgb_lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lgb_lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('lgb_num_leaves', 2, 256),
        'feature_fraction': trial.suggest_uniform('lgb_feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('lgb_bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('lgb_bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('lgb_min_child_samples', 5, 100),
    }

    clf = lgb.LGBMClassifier(**param)
    clf.fit(x_train, y_train)

    with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
        pickle.dump(clf, f)

    score = clf.score(x_val, y_val)

    return score

study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:39,711] Trial 20 finished with value: 0.9208163265306123 and parameters: {'lgb_lambda_l1': 1.8482981826823892, 'lgb_lambda_l2': 0.011973352986995828, 'lgb_num_leaves': 124, 'lgb_feature_fraction': 0.7888222469562156, 'lgb_bagging_fraction': 0.4677224216839525, 'lgb_bagging_freq': 4, 'lgb_min_child_samples': 89}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:42,148] Trial 21 finished with value: 0.929795918367347 and parameters: {'lgb_lambda_l1': 1.3056821804999452e-08, 'lgb_lambda_l2': 2.030713915929927e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.4563432407923397, 'lgb_bagging_fraction': 0.9734380103761134, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 7}. Best is trial 21 with value: 0.929795918367347.
[I 2021-05-22 15:27:44,574] Trial 22 finished with value: 0.9306122448979591 and parameters: {'lgb_lambda_l1': 1.4376070248840012e-08, 'lgb_lambda_l2': 2.2523492976261175e-08, 'lgb_num_leaves': 245, 'lgb_feature_fraction': 0.4323450149823892, 'lgb_bagging_fraction': 0.9938783874780044, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 21}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:46,953] Trial 23 finished with value: 0.923265306122449 and parameters: {'lgb_lambda_l1': 3.12812522832872e-08, 'lgb_lambda_l2': 1.8536183493240282e-08, 'lgb_num_leaves': 256, 'lgb_feature_fraction': 0.4361710200785238, 'lgb_bagging_fraction': 0.9990831209074797, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:49,543] Trial 24 finished with value: 0.929795918367347 and parameters: {'lgb_lambda_l1': 1.1485602913794501e-08, 'lgb_lambda_l2': 1.2072818375646901e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.4119630276339381, 'lgb_bagging_fraction': 0.9910257498295888, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 7}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:51,973] Trial 25 finished with value: 0.9314285714285714 and parameters: {'lgb_lambda_l1': 1.8433765454349008e-08, 'lgb_lambda_l2': 1.181356431909535e-08, 'lgb_num_leaves': 253, 'lgb_feature_fraction': 0.40623865437711265, 'lgb_bagging_fraction': 0.9955797155626703, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:54,578] Trial 26 finished with value: 0.9306122448979591 and parameters: {'lgb_lambda_l1': 1.0375640705273421e-08, 'lgb_lambda_l2': 2.572029864598687e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.42645064979623726, 'lgb_bagging_fraction': 0.993917684160062, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 11}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:56,004] Trial 27 finished with value: 0.926530612244898 and parameters: {'lgb_lambda_l1': 4.462235247025858e-06, 'lgb_lambda_l2': 5.464291275342302e-06, 'lgb_num_leaves': 203, 'lgb_feature_fraction': 0.5296234708856598, 'lgb_bagging_fraction': 0.8371004662109016, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 32}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:56,469] Trial 28 finished with value: 0.926530612244898 and parameters: {'lgb_lambda_l1': 1.1136459099267102e-06, 'lgb_lambda_l2': 1.0428500111438135e-06, 'lgb_num_leaves': 16, 'lgb_feature_fraction': 0.6157372989773611, 'lgb_bagging_fraction': 0.8552510327710622, 'lgb_bagging_freq': 2, 'lgb_min_child_samples': 32}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:58,900] Trial 29 finished with value: 0.9257142857142857 and parameters: {'lgb_lambda_l1': 1.114655804055448e-08, 'lgb_lambda_l2': 1.1545154342742978e-08, 'lgb_num_leaves': 185, 'lgb_feature_fraction': 0.9922189907585702, 'lgb_bagging_fraction': 0.8769821898599875, 'lgb_bagging_freq': 6, 'lgb_min_child_samples': 27}. Best is trial 25 with value: 0.9314285714285714.
[10]:
%%capture cap_out --no-stderr

def objective(trial: optuna.trial.Trial) -> float:

    param = { # based on default suggestions from optuna website
        'objective': 'mlogloss',
        'booster': trial.suggest_categorical('xgb_booster', ['gbtree', 'gblinear', 'dart']),
        'lambda': trial.suggest_loguniform('xgb_lambda', 1e-8, 1.0),
        'alpha': trial.suggest_loguniform('xgb_alpha', 1e-8, 1.0),
        'use_label_encoder': False
    }


    clf = xgb.XGBClassifier(**param)
    clf.fit(x_train, y_train)

    with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
        pickle.dump(clf, f)

    score = clf.score(x_val, y_val)

    return score

study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:59,179] Trial 30 finished with value: 0.76 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 0.08454736059803832, 'xgb_alpha': 0.13432174853140677}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:03,679] Trial 31 finished with value: 0.926530612244898 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 1.1204126614196895e-08, 'xgb_alpha': 4.073316114838999e-08}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:15,055] Trial 32 finished with value: 0.9224489795918367 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 1.617480580724709e-08, 'xgb_alpha': 1.6704898072863906e-07}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:15,304] Trial 33 finished with value: 0.7689795918367347 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 0.00028821012077711477, 'xgb_alpha': 0.5018281270041018}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:29,440] Trial 34 finished with value: 0.9281632653061225 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 0.5015577583215429, 'xgb_alpha': 0.0001847020678563732}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:34,560] Trial 35 finished with value: 0.9240816326530612 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 9.313644847261667e-06, 'xgb_alpha': 0.00018415473651269253}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:40,254] Trial 36 finished with value: 0.9240816326530612 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 4.775326333176848e-05, 'xgb_alpha': 2.57504955969162e-06}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:52,447] Trial 37 finished with value: 0.9257142857142857 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 0.007297608984814747, 'xgb_alpha': 0.015000524508202724}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:52,696] Trial 38 finished with value: 0.2587755102040816 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 1.2331468616902276e-06, 'xgb_alpha': 0.003169435403760046}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:57,905] Trial 39 finished with value: 0.9248979591836735 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 6.205635732109192e-07, 'xgb_alpha': 2.33581609601957e-06}. Best is trial 25 with value: 0.9314285714285714.
[11]:
#%%capture cap_out --no-stderr

def objective(trial: optuna.trial.Trial) -> float:

    param = {
        'boosting_type': trial.suggest_categorical('catb_boosting_type', ['Plain', 'Ordered']),
        'verbose': 50,
    }

    clf = catb.CatBoostClassifier(**param)
    clf.fit(x_train, y_train)

    with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
        pickle.dump(clf, f)

    score = clf.score(x_val, y_val)

    return score

study.optimize(objective, n_trials=10, timeout=60)
Learning rate set to 0.08943
0:      learn: 1.6169293        total: 109ms    remaining: 1m 48s
50:     learn: 0.2286390        total: 630ms    remaining: 11.7s
100:    learn: 0.1834779        total: 1.17s    remaining: 10.4s
150:    learn: 0.1659866        total: 1.69s    remaining: 9.48s
200:    learn: 0.1531031        total: 2.21s    remaining: 8.78s
250:    learn: 0.1427671        total: 2.74s    remaining: 8.17s
300:    learn: 0.1339345        total: 3.27s    remaining: 7.58s
350:    learn: 0.1267693        total: 3.79s    remaining: 7.01s
400:    learn: 0.1199584        total: 4.32s    remaining: 6.46s
450:    learn: 0.1140554        total: 4.84s    remaining: 5.9s
500:    learn: 0.1084516        total: 5.37s    remaining: 5.35s
550:    learn: 0.1032207        total: 5.89s    remaining: 4.8s
600:    learn: 0.0981966        total: 6.43s    remaining: 4.27s
650:    learn: 0.0939444        total: 6.98s    remaining: 3.74s
700:    learn: 0.0901262        total: 7.52s    remaining: 3.21s
750:    learn: 0.0865019        total: 8.06s    remaining: 2.67s
800:    learn: 0.0832780        total: 8.6s     remaining: 2.14s
850:    learn: 0.0798762        total: 9.14s    remaining: 1.6s
900:    learn: 0.0767640        total: 9.69s    remaining: 1.06s
950:    learn: 0.0737176        total: 10.2s    remaining: 528ms
[I 2021-05-22 15:29:08,914] Trial 40 finished with value: 0.9248979591836735 and parameters: {'catb_boosting_type': 'Plain'}. Best is trial 25 with value: 0.9314285714285714.
999:    learn: 0.0711576        total: 10.8s    remaining: 0us
Learning rate set to 0.08943
0:      learn: 1.6068053        total: 61.1ms   remaining: 1m 1s
50:     learn: 0.2306703        total: 2.96s    remaining: 55s
100:    learn: 0.1849559        total: 5.66s    remaining: 50.4s
150:    learn: 0.1663552        total: 8.47s    remaining: 47.6s
200:    learn: 0.1546436        total: 11.3s    remaining: 45.1s
250:    learn: 0.1452628        total: 14.2s    remaining: 42.3s
300:    learn: 0.1377216        total: 17.1s    remaining: 39.7s
350:    learn: 0.1309304        total: 20.1s    remaining: 37.1s
400:    learn: 0.1256575        total: 23.1s    remaining: 34.5s
450:    learn: 0.1203487        total: 26s      remaining: 31.7s
500:    learn: 0.1154523        total: 29s      remaining: 28.8s
550:    learn: 0.1108900        total: 31.9s    remaining: 26s
600:    learn: 0.1063524        total: 34.8s    remaining: 23.1s
650:    learn: 0.1020377        total: 37.8s    remaining: 20.3s
700:    learn: 0.0984189        total: 40.7s    remaining: 17.4s
750:    learn: 0.0945249        total: 43.6s    remaining: 14.4s
800:    learn: 0.0910075        total: 46.4s    remaining: 11.5s
850:    learn: 0.0878672        total: 49.2s    remaining: 8.62s
900:    learn: 0.0851764        total: 52.2s    remaining: 5.73s
950:    learn: 0.0825062        total: 55.8s    remaining: 2.87s
[I 2021-05-22 15:30:07,883] Trial 41 finished with value: 0.9216326530612244 and parameters: {'catb_boosting_type': 'Ordered'}. Best is trial 25 with value: 0.9314285714285714.
999:    learn: 0.0796181        total: 58.7s    remaining: 0us
[12]:
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:", study.best_params)

with open(f"{os.path.join(tempdir, str(study.best_trial.number))}.pkl", "rb") as f:
    best_model = pickle.load(f)
Number of finished trials: 42
Best trial: {'lgb_lambda_l1': 1.8433765454349008e-08, 'lgb_lambda_l2': 1.181356431909535e-08, 'lgb_num_leaves': 253, 'lgb_feature_fraction': 0.40623865437711265, 'lgb_bagging_fraction': 0.9955797155626703, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}

Let’s summarize the results:

[13]:
trials = [t for t in study.trials if t.state.name == 'COMPLETE']
trials_summary = sorted(trials, key=lambda x: -np.inf if x.value is None else x.value, reverse=True)
trials_summary = [dict(trial_number=trial.number, score=trial.value, **trial.params) for trial in trials_summary]
trials_summary = pd.DataFrame(trials_summary)
trials_summary.iloc[:7]
[13]:
trial_number score lgb_lambda_l1 lgb_lambda_l2 lgb_num_leaves lgb_feature_fraction lgb_bagging_fraction lgb_bagging_freq lgb_min_child_samples xgb_booster xgb_lambda xgb_alpha et_n_estimators catb_boosting_type knn_n_neighbors
0 25 0.931429 1.843377e-08 1.181356e-08 253.0 0.406239 0.995580 1.0 6.0 NaN NaN NaN NaN NaN NaN
1 22 0.930612 1.437607e-08 2.252349e-08 245.0 0.432345 0.993878 1.0 21.0 NaN NaN NaN NaN NaN NaN
2 26 0.930612 1.037564e-08 2.572030e-08 252.0 0.426451 0.993918 1.0 11.0 NaN NaN NaN NaN NaN NaN
3 21 0.929796 1.305682e-08 2.030714e-08 252.0 0.456343 0.973438 1.0 7.0 NaN NaN NaN NaN NaN NaN
4 24 0.929796 1.148560e-08 1.207282e-08 252.0 0.411963 0.991026 1.0 7.0 NaN NaN NaN NaN NaN NaN
5 34 0.928163 NaN NaN NaN NaN NaN NaN NaN dart 0.501558 0.000185 NaN NaN NaN
6 1 0.927347 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 186.0 NaN NaN
[14]:
# one column per estimator type
ridx = [x.split('_')[0] for x in trials_summary.iloc[:,2:].columns]
ridx = np.unique(ridx, True)[1]
ridx = list(np.sort(ridx) + 2)
ridx = [0,1]+ridx
trials_summary.iloc[:7, ridx]
[14]:
trial_number score lgb_lambda_l1 xgb_booster et_n_estimators catb_boosting_type knn_n_neighbors
0 25 0.931429 1.843377e-08 NaN NaN NaN NaN
1 22 0.930612 1.437607e-08 NaN NaN NaN NaN
2 26 0.930612 1.037564e-08 NaN NaN NaN NaN
3 21 0.929796 1.305682e-08 NaN NaN NaN NaN
4 24 0.929796 1.148560e-08 NaN NaN NaN NaN
5 34 0.928163 NaN dart NaN NaN NaN
6 1 0.927347 NaN NaN 186.0 NaN NaN
[15]:
# best estimator for estimator type
bidx = trials_summary.iloc[:,ridx].iloc[:,2:]
bidx = bidx[bidx.apply(lambda x: any(np.logical_not(x.isna())), 1)]
bidx = bidx.apply(lambda x: np.where(np.logical_not(pd.isna(x)))[0].item(), 1).to_numpy()
bidx = [np.where(bidx==i)[0][0] for i in np.unique(bidx)]
trials_summary.iloc[bidx, ridx]
[15]:
trial_number score lgb_lambda_l1 xgb_booster et_n_estimators catb_boosting_type knn_n_neighbors
0 25 0.931429 1.843377e-08 NaN NaN NaN NaN
5 34 0.928163 NaN dart NaN NaN NaN
6 1 0.927347 NaN NaN 186.0 NaN NaN
18 40 0.924898 NaN NaN NaN Plain NaN
31 15 0.737959 NaN NaN NaN NaN 1.0

Note: you can “go back” in this notebook and run more models, optuna will store than as additional trials (as long the study object is not recreated) and give you the best model. If you are happy enough with your results, let’s get evaluate the final results on the test set.

[16]:
best_model
[16]:
LGBMClassifier(bagging_fraction=0.9955797155626703, bagging_freq=1,
               feature_fraction=0.40623865437711265,
               lambda_l1=1.8433765454349008e-08,
               lambda_l2=1.181356431909535e-08, min_child_samples=6,
               num_leaves=253, verbosity=-1)
[17]:
best_model.score(x_val, y_val)
[17]:
0.9314285714285714
[18]:
best_model.score(x_test, y_test)
[18]:
0.9243759177679882