Calibrating Binary Classifier#

This example requires full licence, and the program will break if you use the trial licence.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from copy import deepcopy
from IPython.display import HTML
from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoXGBClassifier
import mocharts as mc

Build a model#

ds = DataSet()
ds.load(name="TaiwanCredit")
ds.set_random_split()

model = MoXGBClassifier(name="Raw XGB", max_depth=2)
model.fit(ds.train_x, ds.train_y)
MoXGBClassifier(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bynode=None,
                colsample_bytree=None, device=None, early_stopping_rounds=None,
                enable_categorical=False, eval_metric=None, feature_types=None,
                gamma=None, grow_policy=None, importance_type=None,
                interaction_constraints=None, learning_rate=None, max_bin=None,
                max_cat_threshold=None, max_cat_to_onehot=None,
                max_delta_step=None, max_depth=2, max_leaves=None,
                min_child_weight=None, missing=nan, monotone_constraints=None,
                multi_strategy=None, n_estimators=None, n_jobs=None,
                num_parallel_tree=None, objective='binary:logistic', ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Calibrate the model#

model_calibrated = deepcopy(model)
model_calibrated.name = "Calibrated XGB"
model_calibrated.calibrate_proba(X=ds.test_x, y=ds.test_y, method='isotonic')
MoXGBClassifier(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bynode=None,
                colsample_bytree=None, device=None, early_stopping_rounds=None,
                enable_categorical=False, eval_metric=None, feature_types=None,
                gamma=None, grow_policy=None, importance_type=None,
                interaction_constraints=None, learning_rate=None, max_bin=None,
                max_cat_threshold=None, max_cat_to_onehot=None,
                max_delta_step=None, max_depth=2, max_leaves=None,
                min_child_weight=None, missing=nan, monotone_constraints=None,
                multi_strategy=None, n_estimators=None, n_jobs=None,
                num_parallel_tree=None, objective='binary:logistic', ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Check proba before and after calibration#

options = mc.scatterplot(model_calibrated.predict_proba(ds.test_x, calibration=False)[:, 1],
                         model_calibrated.predict_proba(ds.test_x, calibration=True)[:, 1])
options.set_xaxis(axis_name="proba before calibration")
options.set_yaxis(axis_name="proba after calibration")
options.figsize = {'width': 500, 'height': 400}

htmlstr = mc.mocharts_plot(options.render(), return_html=True, silent=True)
HTML(htmlstr)


Compare the XGBoost model with LGBM model#

tsc = TestSuite(ds, models=[model, model_calibrated])
results = tsc.compare_accuracy_table(train_dataset="train",
                                     test_dataset="test",
                                     metric="LogLoss")
results.table
Raw XGB Calibrated XGB
LogLoss LogLoss
train 0.4165 0.4216
test 0.4204 0.4158
GAP 0.0039 -0.0057


Rest calibration when needed#

model_calibrated.reset_calibrate_proba()

Total running time of the script: (0 minutes 1.845 seconds)

Gallery generated by Sphinx-Gallery