Tuning with optuna (Experimental)#

To run this code, you need to have optuna installed.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMClassifier
from modeva.models import ModelTuneOptuna
from scipy.stats import uniform, randint

Load Dataset

ds = DataSet()
ds.load(name="SimuCredit")
ds.set_random_split()

Run HPO#

param_distributions = {"max_depth": [1, 2, 3],
                       "learning_rate": uniform(0.01, 0.3),
                       "n_estimators": randint(1, 100),
                      }

model = MoLGBMClassifier(verbose=-1)
hpo = ModelTuneOptuna(dataset=ds, model=model)
result = hpo.run(param_distributions=param_distributions,
                 sampler="tpe", # "grid", "random", "tpe", "gs", "cma-es", "qmc"
                 metric=("AUC", "ACC", "LogLoss"),
                 cv=5)
result.table

	max_depth	learning_rate	n_estimators	AUC	ACC	LogLoss	AUC_rank	ACC_rank	LogLoss_rank	mean_fit_time
7	3	0.276	52	0.8390	0.7587	0.4891	1	1	1	0.1466
2	1	0.1655	94	0.8250	0.7514	0.5139	2	2	2	0.0943
5	1	0.2241	57	0.8239	0.7492	0.5161	3	3	3	0.0570
3	2	0.1537	16	0.8205	0.7427	0.5304	4	5	4	0.0386
6	3	0.1363	8	0.8192	0.7449	0.5433	5	4	6	0.5374
9	1	0.1338	37	0.8134	0.7423	0.5402	6	6	5	0.0696
0	2	0.0433	38	0.8124	0.7392	0.5483	7	7	7	0.0770
4	1	0.0641	62	0.8090	0.7384	0.5499	8	8	8	0.0568
1	1	0.0356	92	0.8058	0.7316	0.5580	9	9	9	0.0785
8	1	0.0208	92	0.7989	0.7174	0.5802	10	10	10	0.1478

result.plot("parallel", figsize=(8, 6))

result.plot(("max_depth", "AUC"))

result.plot(("learning_rate", "AUC"))

result.plot(("n_estimators", "AUC"))

Retrain model with best hyperparameter#

model_tuned = MoLGBMClassifier(**result.value["params"][0],
                               name="LGBM-Tuned",
                               verbose=-1)
model_tuned.fit(ds.train_x, ds.train_y)
model_tuned

MoLGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                 importance_type='split', learning_rate=0.043311734930306274,
                 max_depth=2, min_child_samples=20, min_child_weight=0.001,
                 min_split_gain=0.0, n_estimators=38, n_jobs=None,
                 num_leaves=31, objective=None, random_state=None,
                 reg_alpha=0.0, reg_lambda=0.0, subsample=1.0,
                 subsample_for_bin=200000, subsample_freq=0, verbose=-1)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Diagnose the tuned model#

ts = TestSuite(ds, model_tuned)
result = ts.diagnose_accuracy_table()
result.table

	AUC	ACC	F1	LogLoss	Brier
train	0.8149	0.7398	0.7781	0.5461	0.1815
test	0.8139	0.7425	0.7773	0.5476	0.1821
GAP	-0.0010	0.0027	-0.0008	0.0015	0.0005

Total running time of the script: (0 minutes 8.761 seconds)

Gallery generated by Sphinx-Gallery