Note

Go to the end to download the full example code.

Robustness Analysis (Regression)#

This example demonstrates how to analyze model robustness for regression problems using various methods and metrics.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMRegressor
from modeva.models import MoXGBRegressor
from modeva.testsuite.utils.slicing_utils import get_data_info

Load and prepare dataset

ds = DataSet()
ds.load(name="BikeSharing")
ds.set_random_split()

ds.scale_numerical(features=("cnt",), method="log1p")
ds.preprocess()

Train models

model1 = MoXGBRegressor()
model1.fit(ds.train_x, ds.train_y)

model2 = MoLGBMRegressor(max_depth=2, verbose=-1, random_state=0)
model2.fit(ds.train_x, ds.train_y.ravel())

MoLGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=2,
                min_child_samples=20, min_child_weight=0.001,
                min_split_gain=0.0, n_estimators=100, n_jobs=None,
                num_leaves=31, objective=None, random_state=0, reg_alpha=0.0,
                reg_lambda=0.0, subsample=1.0, subsample_for_bin=200000,
                subsample_freq=0, verbose=-1)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Basic robustness analysis#

ts = TestSuite(ds, model1)
results = ts.diagnose_robustness(
    perturb_features=None,
    noise_levels=(0.1, 0.2, 0.3, 0.4),
    metric="MSE"
)
results.plot(figsize=(6, 5))

Analyze data drift between small and large prediction changes groups

data_results = ds.data_drift_test(**results.value[0.2]["data_info"],
                                  distance_metric="PSI",
                                  psi_method="uniform",
                                  psi_bins=10)
data_results.plot("summary")

Analyze data drift for single variable

data_results.plot(("density", "hr"))

Slicing robustness analysis#

Single feature slicing

results = ts.diagnose_slicing_robustness(
    features="hr",
    perturb_features=("hum", "atemp"),
    noise_levels=0.1,
    metric="MAE",
    method="auto-xgb1",
    threshold=0.2
)
results.table

	Feature	Segment	Size	MAE	Threshold	Weak
1	hr	[3.00, 5.00)	284	0.3304	0.2	True
2	hr	[5.00, 7.00)	267	0.2734	0.2	True
0	hr	[0.00, 3.00)	428	0.2702	0.2	True
3	hr	[7.00, 9.00)	283	0.2401	0.2	True
9	hr	[20.00, 23.00]	584	0.1981	0.2	False
6	hr	[14.00, 16.00)	294	0.1772	0.2	False
4	hr	[9.00, 12.00)	456	0.1640	0.2	False
5	hr	[12.00, 14.00)	291	0.1551	0.2	False
8	hr	[18.00, 20.00)	295	0.1436	0.2	False
7	hr	[16.00, 18.00)	294	0.1287	0.2	False

Bivariate feature slicing

results = ts.diagnose_slicing_robustness(
    features=("hr", "atemp"),
    perturb_features=("hum", "temp"),
    noise_levels=0.1,
    metric="MSE"
)
results.table

	Feature1	Segment1	Feature2	Segment2	Size	MSE	Threshold	Weak
90	hr	[20.70, 23.00]	atemp	[0.00, 0.10)	3	0.3732	0.0931	True
10	hr	[2.30, 4.60)	atemp	[0.00, 0.10)	5	0.3456	0.0931	True
20	hr	[4.60, 6.90)	atemp	[0.00, 0.10)	3	0.2362	0.0931	True
91	hr	[20.70, 23.00]	atemp	[0.10, 0.20)	14	0.2325	0.0931	True
12	hr	[2.30, 4.60)	atemp	[0.20, 0.30)	45	0.2268	0.0931	True
...	...	...	...	...	...	...	...	...
50	hr	[11.50, 13.80)	atemp	[0.00, 0.10)	0	NaN	0.0931	False
59	hr	[11.50, 13.80)	atemp	[0.89, 0.98]	0	NaN	0.0931	False
60	hr	[13.80, 16.10)	atemp	[0.00, 0.10)	0	NaN	0.0931	False
80	hr	[18.40, 20.70)	atemp	[0.00, 0.10)	0	NaN	0.0931	False
99	hr	[20.70, 23.00]	atemp	[0.89, 0.98]	0	NaN	0.0931	False

100 rows × 8 columns

Batch mode single feature slicing

results = ts.diagnose_slicing_robustness(
    features=(("hr",), ("atemp",), ("season",)),
    perturb_features=("temp", "hum"),
    noise_levels=0.1,
    perturb_method="quantile",
    metric="MSE",
    threshold=0.15
)
results.table

	Feature	Segment	Size	MSE	Threshold	Weak
0	atemp	[0.00, 0.10)	20	0.2062	0.15	True
1	hr	[2.30, 4.60)	284	0.1768	0.15	True
2	hr	[4.60, 6.90)	267	0.1511	0.15	True
3	hr	[0.00, 2.30)	428	0.1391	0.15	False
4	atemp	[0.20, 0.30)	491	0.1322	0.15	False
5	season	1.0	823	0.1301	0.15	False
6	atemp	[0.49, 0.59)	577	0.1213	0.15	False
7	atemp	[0.39, 0.49)	599	0.1209	0.15	False
8	atemp	[0.10, 0.20)	129	0.1087	0.15	False
9	atemp	[0.30, 0.39)	559	0.1051	0.15	False
10	hr	[6.90, 9.20)	445	0.1004	0.15	False
11	season	2.0	867	0.0962	0.15	False
12	season	4.0	874	0.0944	0.15	False
13	hr	[20.70, 23.00]	447	0.0820	0.15	False
14	hr	[13.80, 16.10)	445	0.0748	0.15	False
15	season	3.0	912	0.0667	0.15	False
16	hr	[11.50, 13.80)	291	0.0659	0.15	False
17	hr	[9.20, 11.50)	294	0.0644	0.15	False
18	hr	[18.40, 20.70)	285	0.0613	0.15	False
19	atemp	[0.59, 0.69)	759	0.0538	0.15	False
20	hr	[16.10, 18.40)	290	0.0467	0.15	False
21	atemp	[0.79, 0.89)	68	0.0264	0.15	False
22	atemp	[0.69, 0.79)	267	0.0257	0.15	False
23	atemp	[0.89, 0.98]	7	0.0080	0.15	False

Batch mode 1D Slicing (all features by setting features=None)

results = ts.diagnose_slicing_robustness(features=None,
                                         perturb_features=("temp", "hum"),
                                         noise_levels=0.1,
                                         perturb_method="quantile",
                                         metric="MSE",
                                         threshold=0.15
                                         )
results.table

	Feature	Segment	Size	MSE	Threshold	Weak
0	windspeed	[0.70, 0.78]	2	0.5426	0.15	True
1	hum	[0.00, 0.10)	4	0.4705	0.15	True
2	weathersit	3.0	296	0.2878	0.15	True
3	hum	[0.90, 1.00]	245	0.2068	0.15	True
4	atemp	[0.00, 0.10)	20	0.2062	0.15	True
...	...	...	...	...	...	...
78	temp	[0.77, 0.87)	187	0.0198	0.15	False
79	atemp	[0.89, 0.98]	7	0.0080	0.15	False
80	weekday	[1.20, 1.80)	0	NaN	0.15	False
81	weekday	[2.40, 3.00)	0	NaN	0.15	False
82	weekday	[4.20, 4.80)	0	NaN	0.15	False

83 rows × 6 columns

Analyze data drift

data_info = get_data_info(res_value=results.value)
data_results = ds.data_drift_test(
    **data_info["hr"],
    distance_metric="PSI",
    psi_method="uniform",
    psi_bins=10
)
data_results.plot("summary")

Single feature density plot

data_results.plot(("density", "hr"))

Model robustness comparison#

tsc = TestSuite(ds, models=[model1, model2])

Compare resilience performance of multiple models

results = tsc.compare_robustness(
    perturb_features=("hr", "atemp"),
    noise_levels=(0.1, 0.2, 0.3, 0.4),
    perturb_method="quantile",
    metric="MSE"
)
results.plot(figsize=(6, 5))

Compare robustness performance of multiple models under single slicing feature

results = tsc.compare_slicing_robustness(
    features="hr",
    noise_levels=0.1,
    method="quantile",
    metric="MSE"
)
results.plot()

Total running time of the script: (0 minutes 13.383 seconds)

Gallery generated by Sphinx-Gallery