4. Optuna
[1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
import optuna
import tempfile
import pickle
import os
import scipy.stats as stats
import lightgbm as lgb
import xgboost as xgb
import catboost as catb
Let’s start by getting some data from https://archive.ics.uci.edu/ml/datasets/Dry+Bean+Dataset
[2]:
!wget -c https://archive.ics.uci.edu/ml/machine-learning-databases/00602/DryBeanDataset.zip
!unzip -u DryBeanDataset.zip
--2021-05-22 15:27:02-- https://archive.ics.uci.edu/ml/machine-learning-databases/00602/DryBeanDataset.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable
The file is already fully retrieved; nothing to do.
Archive: DryBeanDataset.zip
[3]:
df = pd.read_excel('DryBeanDataset/Dry_Bean_Dataset.xlsx')
x_full = df.loc[:,df.columns!="Class"].to_numpy()
y_full = df.Class.astype("category").cat.codes.to_numpy()
[4]:
x_train_val, x_test, y_train_val, y_test = train_test_split(x_full, y_full, test_size=0.1, random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.1, random_state=0)
Now, we train a classifier with fixed hyperparameters and evaluate the score
[5]:
clf = ExtraTreesClassifier(n_estimators=300, random_state=0)
clf.fit(x_train, y_train)
clf.score(x_val, y_val)
[5]:
0.9240816326530612
Let’s create an Optuna study and a directory to store our results
[6]:
try:
study
except NameError:
study = optuna.create_study(direction="maximize")
try:
tempdir
except NameError:
tempdir = tempfile.TemporaryDirectory().name
os.mkdir(tempdir)
print(tempdir)
[I 2021-05-22 15:27:08,976] A new study created in memory with name: no-name-5e3b9e91-f0c8-45cc-b581-350d2e07025c
/tmp/pytmpfiles/tmp84frhkyo
[7]:
def objective(trial: optuna.trial.Trial) -> float:
n_estimators = trial.suggest_int("et_n_estimators", 10, 1000)
clf = ExtraTreesClassifier(n_estimators=n_estimators)
clf.fit(x_train, y_train)
with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
pickle.dump(clf, f)
score = clf.score(x_val, y_val)
return score
# Here we describe the max amount of trials and the total amount of time they might take
study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:10,162] Trial 0 finished with value: 0.9257142857142857 and parameters: {'et_n_estimators': 163}. Best is trial 0 with value: 0.9257142857142857.
[I 2021-05-22 15:27:11,424] Trial 1 finished with value: 0.9273469387755102 and parameters: {'et_n_estimators': 186}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:14,834] Trial 2 finished with value: 0.926530612244898 and parameters: {'et_n_estimators': 493}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:15,354] Trial 3 finished with value: 0.9224489795918367 and parameters: {'et_n_estimators': 77}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:19,555] Trial 4 finished with value: 0.9240816326530612 and parameters: {'et_n_estimators': 593}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:26,643] Trial 5 finished with value: 0.9248979591836735 and parameters: {'et_n_estimators': 952}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:27,449] Trial 6 finished with value: 0.923265306122449 and parameters: {'et_n_estimators': 111}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:28,235] Trial 7 finished with value: 0.9240816326530612 and parameters: {'et_n_estimators': 107}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:34,293] Trial 8 finished with value: 0.926530612244898 and parameters: {'et_n_estimators': 861}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:35,330] Trial 9 finished with value: 0.9257142857142857 and parameters: {'et_n_estimators': 158}. Best is trial 1 with value: 0.9273469387755102.
[8]:
def objective(trial: optuna.trial.Trial) -> float:
n_neighbors = trial.suggest_int("knn_n_neighbors", 1, 30)
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
clf.fit(x_train, y_train)
with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
pickle.dump(clf, f)
score = clf.score(x_val, y_val)
return score
study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:35,663] Trial 10 finished with value: 0.6963265306122449 and parameters: {'knn_n_neighbors': 2}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:35,971] Trial 11 finished with value: 0.6914285714285714 and parameters: {'knn_n_neighbors': 28}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,244] Trial 12 finished with value: 0.713469387755102 and parameters: {'knn_n_neighbors': 10}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,515] Trial 13 finished with value: 0.6914285714285714 and parameters: {'knn_n_neighbors': 28}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,785] Trial 14 finished with value: 0.710204081632653 and parameters: {'knn_n_neighbors': 18}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:36,955] Trial 15 finished with value: 0.7379591836734694 and parameters: {'knn_n_neighbors': 1}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,227] Trial 16 finished with value: 0.710204081632653 and parameters: {'knn_n_neighbors': 18}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,490] Trial 17 finished with value: 0.7093877551020408 and parameters: {'knn_n_neighbors': 9}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:37,751] Trial 18 finished with value: 0.7004081632653061 and parameters: {'knn_n_neighbors': 24}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:38,015] Trial 19 finished with value: 0.713469387755102 and parameters: {'knn_n_neighbors': 10}. Best is trial 1 with value: 0.9273469387755102.
[9]:
%%capture cap_out --no-stderr
def objective(trial: optuna.trial.Trial) -> float:
param = { # based on default suggestion from optuna website
'verbosity': -1,
'boosting_type': 'gbdt',
'lambda_l1': trial.suggest_loguniform('lgb_lambda_l1', 1e-8, 10.0),
'lambda_l2': trial.suggest_loguniform('lgb_lambda_l2', 1e-8, 10.0),
'num_leaves': trial.suggest_int('lgb_num_leaves', 2, 256),
'feature_fraction': trial.suggest_uniform('lgb_feature_fraction', 0.4, 1.0),
'bagging_fraction': trial.suggest_uniform('lgb_bagging_fraction', 0.4, 1.0),
'bagging_freq': trial.suggest_int('lgb_bagging_freq', 1, 7),
'min_child_samples': trial.suggest_int('lgb_min_child_samples', 5, 100),
}
clf = lgb.LGBMClassifier(**param)
clf.fit(x_train, y_train)
with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
pickle.dump(clf, f)
score = clf.score(x_val, y_val)
return score
study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:39,711] Trial 20 finished with value: 0.9208163265306123 and parameters: {'lgb_lambda_l1': 1.8482981826823892, 'lgb_lambda_l2': 0.011973352986995828, 'lgb_num_leaves': 124, 'lgb_feature_fraction': 0.7888222469562156, 'lgb_bagging_fraction': 0.4677224216839525, 'lgb_bagging_freq': 4, 'lgb_min_child_samples': 89}. Best is trial 1 with value: 0.9273469387755102.
[I 2021-05-22 15:27:42,148] Trial 21 finished with value: 0.929795918367347 and parameters: {'lgb_lambda_l1': 1.3056821804999452e-08, 'lgb_lambda_l2': 2.030713915929927e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.4563432407923397, 'lgb_bagging_fraction': 0.9734380103761134, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 7}. Best is trial 21 with value: 0.929795918367347.
[I 2021-05-22 15:27:44,574] Trial 22 finished with value: 0.9306122448979591 and parameters: {'lgb_lambda_l1': 1.4376070248840012e-08, 'lgb_lambda_l2': 2.2523492976261175e-08, 'lgb_num_leaves': 245, 'lgb_feature_fraction': 0.4323450149823892, 'lgb_bagging_fraction': 0.9938783874780044, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 21}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:46,953] Trial 23 finished with value: 0.923265306122449 and parameters: {'lgb_lambda_l1': 3.12812522832872e-08, 'lgb_lambda_l2': 1.8536183493240282e-08, 'lgb_num_leaves': 256, 'lgb_feature_fraction': 0.4361710200785238, 'lgb_bagging_fraction': 0.9990831209074797, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:49,543] Trial 24 finished with value: 0.929795918367347 and parameters: {'lgb_lambda_l1': 1.1485602913794501e-08, 'lgb_lambda_l2': 1.2072818375646901e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.4119630276339381, 'lgb_bagging_fraction': 0.9910257498295888, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 7}. Best is trial 22 with value: 0.9306122448979591.
[I 2021-05-22 15:27:51,973] Trial 25 finished with value: 0.9314285714285714 and parameters: {'lgb_lambda_l1': 1.8433765454349008e-08, 'lgb_lambda_l2': 1.181356431909535e-08, 'lgb_num_leaves': 253, 'lgb_feature_fraction': 0.40623865437711265, 'lgb_bagging_fraction': 0.9955797155626703, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:54,578] Trial 26 finished with value: 0.9306122448979591 and parameters: {'lgb_lambda_l1': 1.0375640705273421e-08, 'lgb_lambda_l2': 2.572029864598687e-08, 'lgb_num_leaves': 252, 'lgb_feature_fraction': 0.42645064979623726, 'lgb_bagging_fraction': 0.993917684160062, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 11}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:56,004] Trial 27 finished with value: 0.926530612244898 and parameters: {'lgb_lambda_l1': 4.462235247025858e-06, 'lgb_lambda_l2': 5.464291275342302e-06, 'lgb_num_leaves': 203, 'lgb_feature_fraction': 0.5296234708856598, 'lgb_bagging_fraction': 0.8371004662109016, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 32}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:56,469] Trial 28 finished with value: 0.926530612244898 and parameters: {'lgb_lambda_l1': 1.1136459099267102e-06, 'lgb_lambda_l2': 1.0428500111438135e-06, 'lgb_num_leaves': 16, 'lgb_feature_fraction': 0.6157372989773611, 'lgb_bagging_fraction': 0.8552510327710622, 'lgb_bagging_freq': 2, 'lgb_min_child_samples': 32}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:27:58,900] Trial 29 finished with value: 0.9257142857142857 and parameters: {'lgb_lambda_l1': 1.114655804055448e-08, 'lgb_lambda_l2': 1.1545154342742978e-08, 'lgb_num_leaves': 185, 'lgb_feature_fraction': 0.9922189907585702, 'lgb_bagging_fraction': 0.8769821898599875, 'lgb_bagging_freq': 6, 'lgb_min_child_samples': 27}. Best is trial 25 with value: 0.9314285714285714.
[10]:
%%capture cap_out --no-stderr
def objective(trial: optuna.trial.Trial) -> float:
param = { # based on default suggestions from optuna website
'objective': 'mlogloss',
'booster': trial.suggest_categorical('xgb_booster', ['gbtree', 'gblinear', 'dart']),
'lambda': trial.suggest_loguniform('xgb_lambda', 1e-8, 1.0),
'alpha': trial.suggest_loguniform('xgb_alpha', 1e-8, 1.0),
'use_label_encoder': False
}
clf = xgb.XGBClassifier(**param)
clf.fit(x_train, y_train)
with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
pickle.dump(clf, f)
score = clf.score(x_val, y_val)
return score
study.optimize(objective, n_trials=10, timeout=60)
[I 2021-05-22 15:27:59,179] Trial 30 finished with value: 0.76 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 0.08454736059803832, 'xgb_alpha': 0.13432174853140677}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:03,679] Trial 31 finished with value: 0.926530612244898 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 1.1204126614196895e-08, 'xgb_alpha': 4.073316114838999e-08}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:15,055] Trial 32 finished with value: 0.9224489795918367 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 1.617480580724709e-08, 'xgb_alpha': 1.6704898072863906e-07}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:15,304] Trial 33 finished with value: 0.7689795918367347 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 0.00028821012077711477, 'xgb_alpha': 0.5018281270041018}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:29,440] Trial 34 finished with value: 0.9281632653061225 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 0.5015577583215429, 'xgb_alpha': 0.0001847020678563732}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:34,560] Trial 35 finished with value: 0.9240816326530612 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 9.313644847261667e-06, 'xgb_alpha': 0.00018415473651269253}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:40,254] Trial 36 finished with value: 0.9240816326530612 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 4.775326333176848e-05, 'xgb_alpha': 2.57504955969162e-06}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:52,447] Trial 37 finished with value: 0.9257142857142857 and parameters: {'xgb_booster': 'dart', 'xgb_lambda': 0.007297608984814747, 'xgb_alpha': 0.015000524508202724}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:52,696] Trial 38 finished with value: 0.2587755102040816 and parameters: {'xgb_booster': 'gblinear', 'xgb_lambda': 1.2331468616902276e-06, 'xgb_alpha': 0.003169435403760046}. Best is trial 25 with value: 0.9314285714285714.
[I 2021-05-22 15:28:57,905] Trial 39 finished with value: 0.9248979591836735 and parameters: {'xgb_booster': 'gbtree', 'xgb_lambda': 6.205635732109192e-07, 'xgb_alpha': 2.33581609601957e-06}. Best is trial 25 with value: 0.9314285714285714.
[11]:
#%%capture cap_out --no-stderr
def objective(trial: optuna.trial.Trial) -> float:
param = {
'boosting_type': trial.suggest_categorical('catb_boosting_type', ['Plain', 'Ordered']),
'verbose': 50,
}
clf = catb.CatBoostClassifier(**param)
clf.fit(x_train, y_train)
with open(f"{os.path.join(tempdir, str(trial.number))}.pkl", "wb") as f:
pickle.dump(clf, f)
score = clf.score(x_val, y_val)
return score
study.optimize(objective, n_trials=10, timeout=60)
Learning rate set to 0.08943
0: learn: 1.6169293 total: 109ms remaining: 1m 48s
50: learn: 0.2286390 total: 630ms remaining: 11.7s
100: learn: 0.1834779 total: 1.17s remaining: 10.4s
150: learn: 0.1659866 total: 1.69s remaining: 9.48s
200: learn: 0.1531031 total: 2.21s remaining: 8.78s
250: learn: 0.1427671 total: 2.74s remaining: 8.17s
300: learn: 0.1339345 total: 3.27s remaining: 7.58s
350: learn: 0.1267693 total: 3.79s remaining: 7.01s
400: learn: 0.1199584 total: 4.32s remaining: 6.46s
450: learn: 0.1140554 total: 4.84s remaining: 5.9s
500: learn: 0.1084516 total: 5.37s remaining: 5.35s
550: learn: 0.1032207 total: 5.89s remaining: 4.8s
600: learn: 0.0981966 total: 6.43s remaining: 4.27s
650: learn: 0.0939444 total: 6.98s remaining: 3.74s
700: learn: 0.0901262 total: 7.52s remaining: 3.21s
750: learn: 0.0865019 total: 8.06s remaining: 2.67s
800: learn: 0.0832780 total: 8.6s remaining: 2.14s
850: learn: 0.0798762 total: 9.14s remaining: 1.6s
900: learn: 0.0767640 total: 9.69s remaining: 1.06s
950: learn: 0.0737176 total: 10.2s remaining: 528ms
[I 2021-05-22 15:29:08,914] Trial 40 finished with value: 0.9248979591836735 and parameters: {'catb_boosting_type': 'Plain'}. Best is trial 25 with value: 0.9314285714285714.
999: learn: 0.0711576 total: 10.8s remaining: 0us
Learning rate set to 0.08943
0: learn: 1.6068053 total: 61.1ms remaining: 1m 1s
50: learn: 0.2306703 total: 2.96s remaining: 55s
100: learn: 0.1849559 total: 5.66s remaining: 50.4s
150: learn: 0.1663552 total: 8.47s remaining: 47.6s
200: learn: 0.1546436 total: 11.3s remaining: 45.1s
250: learn: 0.1452628 total: 14.2s remaining: 42.3s
300: learn: 0.1377216 total: 17.1s remaining: 39.7s
350: learn: 0.1309304 total: 20.1s remaining: 37.1s
400: learn: 0.1256575 total: 23.1s remaining: 34.5s
450: learn: 0.1203487 total: 26s remaining: 31.7s
500: learn: 0.1154523 total: 29s remaining: 28.8s
550: learn: 0.1108900 total: 31.9s remaining: 26s
600: learn: 0.1063524 total: 34.8s remaining: 23.1s
650: learn: 0.1020377 total: 37.8s remaining: 20.3s
700: learn: 0.0984189 total: 40.7s remaining: 17.4s
750: learn: 0.0945249 total: 43.6s remaining: 14.4s
800: learn: 0.0910075 total: 46.4s remaining: 11.5s
850: learn: 0.0878672 total: 49.2s remaining: 8.62s
900: learn: 0.0851764 total: 52.2s remaining: 5.73s
950: learn: 0.0825062 total: 55.8s remaining: 2.87s
[I 2021-05-22 15:30:07,883] Trial 41 finished with value: 0.9216326530612244 and parameters: {'catb_boosting_type': 'Ordered'}. Best is trial 25 with value: 0.9314285714285714.
999: learn: 0.0796181 total: 58.7s remaining: 0us
[12]:
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:", study.best_params)
with open(f"{os.path.join(tempdir, str(study.best_trial.number))}.pkl", "rb") as f:
best_model = pickle.load(f)
Number of finished trials: 42
Best trial: {'lgb_lambda_l1': 1.8433765454349008e-08, 'lgb_lambda_l2': 1.181356431909535e-08, 'lgb_num_leaves': 253, 'lgb_feature_fraction': 0.40623865437711265, 'lgb_bagging_fraction': 0.9955797155626703, 'lgb_bagging_freq': 1, 'lgb_min_child_samples': 6}
Let’s summarize the results:
[13]:
trials = [t for t in study.trials if t.state.name == 'COMPLETE']
trials_summary = sorted(trials, key=lambda x: -np.inf if x.value is None else x.value, reverse=True)
trials_summary = [dict(trial_number=trial.number, score=trial.value, **trial.params) for trial in trials_summary]
trials_summary = pd.DataFrame(trials_summary)
trials_summary.iloc[:7]
[13]:
trial_number | score | lgb_lambda_l1 | lgb_lambda_l2 | lgb_num_leaves | lgb_feature_fraction | lgb_bagging_fraction | lgb_bagging_freq | lgb_min_child_samples | xgb_booster | xgb_lambda | xgb_alpha | et_n_estimators | catb_boosting_type | knn_n_neighbors | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25 | 0.931429 | 1.843377e-08 | 1.181356e-08 | 253.0 | 0.406239 | 0.995580 | 1.0 | 6.0 | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 22 | 0.930612 | 1.437607e-08 | 2.252349e-08 | 245.0 | 0.432345 | 0.993878 | 1.0 | 21.0 | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 26 | 0.930612 | 1.037564e-08 | 2.572030e-08 | 252.0 | 0.426451 | 0.993918 | 1.0 | 11.0 | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 21 | 0.929796 | 1.305682e-08 | 2.030714e-08 | 252.0 | 0.456343 | 0.973438 | 1.0 | 7.0 | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 24 | 0.929796 | 1.148560e-08 | 1.207282e-08 | 252.0 | 0.411963 | 0.991026 | 1.0 | 7.0 | NaN | NaN | NaN | NaN | NaN | NaN |
5 | 34 | 0.928163 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | dart | 0.501558 | 0.000185 | NaN | NaN | NaN |
6 | 1 | 0.927347 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 186.0 | NaN | NaN |
[14]:
# one column per estimator type
ridx = [x.split('_')[0] for x in trials_summary.iloc[:,2:].columns]
ridx = np.unique(ridx, True)[1]
ridx = list(np.sort(ridx) + 2)
ridx = [0,1]+ridx
trials_summary.iloc[:7, ridx]
[14]:
trial_number | score | lgb_lambda_l1 | xgb_booster | et_n_estimators | catb_boosting_type | knn_n_neighbors | |
---|---|---|---|---|---|---|---|
0 | 25 | 0.931429 | 1.843377e-08 | NaN | NaN | NaN | NaN |
1 | 22 | 0.930612 | 1.437607e-08 | NaN | NaN | NaN | NaN |
2 | 26 | 0.930612 | 1.037564e-08 | NaN | NaN | NaN | NaN |
3 | 21 | 0.929796 | 1.305682e-08 | NaN | NaN | NaN | NaN |
4 | 24 | 0.929796 | 1.148560e-08 | NaN | NaN | NaN | NaN |
5 | 34 | 0.928163 | NaN | dart | NaN | NaN | NaN |
6 | 1 | 0.927347 | NaN | NaN | 186.0 | NaN | NaN |
[15]:
# best estimator for estimator type
bidx = trials_summary.iloc[:,ridx].iloc[:,2:]
bidx = bidx[bidx.apply(lambda x: any(np.logical_not(x.isna())), 1)]
bidx = bidx.apply(lambda x: np.where(np.logical_not(pd.isna(x)))[0].item(), 1).to_numpy()
bidx = [np.where(bidx==i)[0][0] for i in np.unique(bidx)]
trials_summary.iloc[bidx, ridx]
[15]:
trial_number | score | lgb_lambda_l1 | xgb_booster | et_n_estimators | catb_boosting_type | knn_n_neighbors | |
---|---|---|---|---|---|---|---|
0 | 25 | 0.931429 | 1.843377e-08 | NaN | NaN | NaN | NaN |
5 | 34 | 0.928163 | NaN | dart | NaN | NaN | NaN |
6 | 1 | 0.927347 | NaN | NaN | 186.0 | NaN | NaN |
18 | 40 | 0.924898 | NaN | NaN | NaN | Plain | NaN |
31 | 15 | 0.737959 | NaN | NaN | NaN | NaN | 1.0 |
Note: you can "go back" in this notebook and run more models, optuna will store than as additional trials (as long the study object is not recreated) and give you the best model. If you are happy enough with your results, let’s get evaluate the final results on the test set.
[16]:
best_model
[16]:
LGBMClassifier(bagging_fraction=0.9955797155626703, bagging_freq=1,
feature_fraction=0.40623865437711265,
lambda_l1=1.8433765454349008e-08,
lambda_l2=1.181356431909535e-08, min_child_samples=6,
num_leaves=253, verbosity=-1)
[17]:
best_model.score(x_val, y_val)
[17]:
0.9314285714285714
[18]:
best_model.score(x_test, y_test)
[18]:
0.9243759177679882