Global Explainability#

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import modeva modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMRegressor

Load Dataset

ds = DataSet()
ds.load(name="BikeSharing")
ds.set_random_split()

ds.scale_numerical(features=("cnt",), method="log1p")
ds.preprocess()

Train a LGBM model

model = MoLGBMRegressor(verbose=-1)
model.fit(ds.train_x, ds.train_y)
MoLGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=-1,
                min_child_samples=20, min_child_weight=0.001,
                min_split_gain=0.0, n_estimators=100, n_jobs=None,
                num_leaves=31, objective=None, random_state=None, reg_alpha=0.0,
                reg_lambda=0.0, subsample=1.0, subsample_for_bin=200000,
                subsample_freq=0, verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Permutation feature importance#

ts = TestSuite(ds, model)
results = ts.explain_pfi(dataset='test', sample_size=2000, n_repeats=5, random_state=0)
results.plot(n_bars=10)


H-statistic#

results = ts.explain_hstatistic(features=('hr',
                                          'atemp',
                                          'season',
                                          'holiday',
                                          'hum'),
                                dataset='train', sample_size=2000, percentiles=(0, 1),
                                grid_resolution=10, response_method='auto', random_state=0)
results.table
hr atemp season holiday hum
hr NaN 0.0003 0.0005 0.0010 0.0031
atemp 0.0003 NaN 0.0052 0.0448 0.0083
season 0.0005 0.0052 NaN 0.0029 0.0116
holiday 0.0010 0.0448 0.0029 NaN 0.0036
hum 0.0031 0.0083 0.0116 0.0036 NaN


1D Partial dependency plots#

results = ts.explain_pdp(features="hr", dataset='train', sample_size=2000, percentiles=(0, 1),
                         grid_resolution=10, response_method='auto', random_state=0)
results.plot()


2D Partial dependency plots#

results = ts.explain_pdp(features=("hum", "hr"), dataset="train")
results.plot()


1D ALE#

results = ts.explain_ale(features="hr", dataset='train', sample_size=2000,
                         grid_resolution=10, response_method='auto', random_state=0)
results.plot()


2D ALE#

results = ts.explain_ale(features=("hum", "hr"), dataset="train")
results.plot()


Total running time of the script: (0 minutes 14.142 seconds)

Gallery generated by Sphinx-Gallery