Tuning with optuna (Experimental)#

To run this code, you need to have optuna installed.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMClassifier
from modeva.models import ModelTuneOptuna
from scipy.stats import uniform, randint

Load Dataset

ds = DataSet()
ds.load(name="SimuCredit")
ds.set_random_split()

Run HPO#

param_distributions = {"max_depth": [1, 2, 3],
                       "learning_rate": uniform(0.01, 0.3),
                       "n_estimators": randint(1, 100),
                      }

model = MoLGBMClassifier(verbose=-1)
hpo = ModelTuneOptuna(dataset=ds, model=model)
result = hpo.run(param_distributions=param_distributions,
                 sampler="tpe", # "grid", "random", "tpe", "gs", "cma-es", "qmc"
                 metric=("AUC", "ACC", "LogLoss"),
                 cv=5)
result.table
max_depth learning_rate n_estimators AUC ACC LogLoss AUC_rank ACC_rank LogLoss_rank mean_fit_time
7 3 0.276 52 0.8390 0.7587 0.4891 1 1 1 0.1466
2 1 0.1655 94 0.8250 0.7514 0.5139 2 2 2 0.0943
5 1 0.2241 57 0.8239 0.7492 0.5161 3 3 3 0.0570
3 2 0.1537 16 0.8205 0.7427 0.5304 4 5 4 0.0386
6 3 0.1363 8 0.8192 0.7449 0.5433 5 4 6 0.5374
9 1 0.1338 37 0.8134 0.7423 0.5402 6 6 5 0.0696
0 2 0.0433 38 0.8124 0.7392 0.5483 7 7 7 0.0770
4 1 0.0641 62 0.8090 0.7384 0.5499 8 8 8 0.0568
1 1 0.0356 92 0.8058 0.7316 0.5580 9 9 9 0.0785
8 1 0.0208 92 0.7989 0.7174 0.5802 10 10 10 0.1478


result.plot("parallel", figsize=(8, 6))


result.plot(("max_depth", "AUC"))


result.plot(("learning_rate", "AUC"))


result.plot(("n_estimators", "AUC"))


Retrain model with best hyperparameter#

model_tuned = MoLGBMClassifier(**result.value["params"][0],
                               name="LGBM-Tuned",
                               verbose=-1)
model_tuned.fit(ds.train_x, ds.train_y)
model_tuned
MoLGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                 importance_type='split', learning_rate=0.043311734930306274,
                 max_depth=2, min_child_samples=20, min_child_weight=0.001,
                 min_split_gain=0.0, n_estimators=38, n_jobs=None,
                 num_leaves=31, objective=None, random_state=None,
                 reg_alpha=0.0, reg_lambda=0.0, subsample=1.0,
                 subsample_for_bin=200000, subsample_freq=0, verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Diagnose the tuned model#

ts = TestSuite(ds, model_tuned)
result = ts.diagnose_accuracy_table()
result.table
AUC ACC F1 LogLoss Brier
train 0.8149 0.7398 0.7781 0.5461 0.1815
test 0.8139 0.7425 0.7773 0.5476 0.1821
GAP -0.0010 0.0027 -0.0008 0.0015 0.0005


Total running time of the script: (0 minutes 8.761 seconds)

Gallery generated by Sphinx-Gallery