Performance Metrics (Classification)#

Evaluate model performance and residuals.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import modeva modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMClassifier
from modeva.models import MoXGBClassifier

Load BikeSharing Dataset

ds = DataSet()
ds.load(name="TaiwanCredit")
ds.set_random_split()

Fit a XGBoost model

model1 = MoXGBClassifier()
model1.fit(ds.train_x, ds.train_y)
MoXGBClassifier(base_score=None, booster=None, callbacks=None,
                colsample_bylevel=None, colsample_bynode=None,
                colsample_bytree=None, device=None, early_stopping_rounds=None,
                enable_categorical=False, eval_metric=None, feature_types=None,
                gamma=None, grow_policy=None, importance_type=None,
                interaction_constraints=None, learning_rate=None, max_bin=None,
                max_cat_threshold=None, max_cat_to_onehot=None,
                max_delta_step=None, max_depth=None, max_leaves=None,
                min_child_weight=None, missing=nan, monotone_constraints=None,
                multi_strategy=None, n_estimators=None, n_jobs=None,
                num_parallel_tree=None, objective='binary:logistic', ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Fit a LGBM model

model2 = MoLGBMClassifier(max_depth=2, verbose=-1, random_state=0)
model2.fit(ds.train_x, ds.train_y.ravel())
MoLGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                 importance_type='split', learning_rate=0.1, max_depth=2,
                 min_child_samples=20, min_child_weight=0.001,
                 min_split_gain=0.0, n_estimators=100, n_jobs=None,
                 num_leaves=31, objective=None, random_state=0, reg_alpha=0.0,
                 reg_lambda=0.0, subsample=1.0, subsample_for_bin=200000,
                 subsample_freq=0, verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Basic accuracy analysis#

ts = TestSuite(ds, model1)
results = ts.diagnose_accuracy_table(train_dataset="train", test_dataset="test",
                                     metric=("ACC", "AUC", "LogLoss"))
results.table
ACC AUC LogLoss
train 0.8885 0.9457 0.2739
test 0.8202 0.7662 0.4397
GAP -0.0684 -0.1795 0.1658


Generate confusion matrix (train)

results.plot(name=("confusion_matrix", "train"))


Generate confusion matrix (test)

results.plot(name=("confusion_matrix", "test"))


Generate roc auc curve (train)

results.plot(name=("roc_auc", "train"))


Generate roc auc curve (test)

results.plot(name=("roc_auc", "test"))


Generate precision recall curve (train)

results.plot(name=("precision_recall", "train"))


Generate precision recall curve (test)

results.plot(name=("precision_recall", "test"))


Compare the XGBoost model with LGBM model#

tsc = TestSuite(ds, models=[model1, model2])
results = tsc.compare_accuracy_table(train_dataset="train", test_dataset="test",
                                     metric=("ACC", "AUC", "LogLoss"))
results.plot("AUC")


Total running time of the script: (0 minutes 13.056 seconds)

Gallery generated by Sphinx-Gallery