Model Fairness Analysis (Classification)#

This example requires full licence, and the program will break if you use the trial licence.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoLGBMClassifier
from modeva.models import MoXGBClassifier
from modeva.data.utils.loading import load_builtin_data
from modeva.testsuite.utils.slicing_utils import get_data_info

Load and prepare dataset

data = load_builtin_data("TaiwanCredit").drop(['SEX', 'MARRIAGE', 'AGE'], axis=1)

ds = DataSet()
ds.load_dataframe(data.iloc[:5000])
ds.set_target("FlagDefault")
ds.set_random_split()

protected_data = load_builtin_data("TaiwanCredit")[['SEX', 'MARRIAGE', 'AGE']]
ds.set_protected_data(protected_data.iloc[:5000])
ds.set_raw_extra_data(name="oot", data=data.iloc[5000:])
ds.set_protected_extra_data(name="oot", data=protected_data.iloc[5000:])

Train models

model1 = MoXGBClassifier()
model1.fit(ds.train_x, ds.train_y)

model2 = MoLGBMClassifier(max_depth=2, verbose=-1, random_state=0)
model2.fit(ds.train_x.astype(float), ds.train_y.ravel().astype(float))
MoLGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                 importance_type='split', learning_rate=0.1, max_depth=2,
                 min_child_samples=20, min_child_weight=0.001,
                 min_split_gain=0.0, n_estimators=100, n_jobs=None,
                 num_leaves=31, objective=None, random_state=0, reg_alpha=0.0,
                 reg_lambda=0.0, subsample=1.0, subsample_for_bin=200000,
                 subsample_freq=0, verbose=-1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.


Basic fairness analysis#

ts = TestSuite(ds, model1)

Config protected and reference groups

group_config = {
    "Gender-Male": {"feature": "SEX", "protected": 2.0, "reference": 1.0},
    "Gender-Female": {"feature": "SEX", "protected": 1.0, "reference": 2.0},
    "MARRIAGE": {"feature": "MARRIAGE", "protected": 2.0, "reference": 1.0},
    "AGE": {"feature": "AGE", "protected": {"lower": 60, "lower_inclusive": True},
            "reference": {"upper": 60, "upper_inclusive": False}}
}

Calculate adverse impact ratio (AIR)

results = ts.diagnose_fairness(group_config=group_config,
                               favorable_label=1,
                               metric="AIR",
                               threshold=0.8)
results.plot()


Check distribution drift of protected and reference groups (example for the “Gender-Male” group)

data_results = ds.data_drift_test(
    **results.value["Gender-Male"]["data_info"],
    distance_metric="PSI",
    psi_method="uniform",
    psi_bins=10
)
data_results.plot(name="summary")


Analyze data drift for single variable

data_results.plot(name=("density", "PAY_1"))


Slicing fairness analysis#

Single feature slicing

results = ts.diagnose_slicing_fairness(features="PAY_1",
                                       group_config=group_config,
                                       dataset="test",
                                       metric="AIR")
results.plot()


Bivariate features slicing

results = ts.diagnose_slicing_fairness(features=("PAY_1", "BILL_AMT1"),
                                       group_config=group_config,
                                       dataset="test",
                                       metric="AIR",
                                       threshold=0.9)
results.plot(name="Gender-Male")


Batch mode single feature slicing

results = ts.diagnose_slicing_fairness(features=(("BILL_AMT1",), ("BILL_AMT2",), ("BILL_AMT3",)),
                                       group_config=group_config,
                                       dataset="test",
                                       metric="AIR",
                                       method="auto-xgb1", bins=5)
results.table["Gender-Male"]
Feature Segment Size AIR Threshold Weak
0 BILL_AMT1 [3.30, 4.14) 202 0.4292 0.8677 True
1 BILL_AMT3 [-3.30, 3.19) 206 0.5846 0.8677 True
2 BILL_AMT3 [4.01, 4.45) 202 0.6529 0.8677 True
3 BILL_AMT1 [4.51, 4.91) 201 0.6618 0.8677 True
4 BILL_AMT2 [-4.39, 4.48) 588 0.6641 0.8677 True
5 BILL_AMT1 [-3.30, 3.30) 204 0.6927 0.8677 True
6 BILL_AMT3 [3.19, 4.01) 180 0.7882 0.8677 True
7 BILL_AMT2 [4.48, 4.88) 204 0.8716 0.8677 False
8 BILL_AMT3 [4.45, 4.85) 207 1.0312 0.8677 False
9 BILL_AMT1 [4.14, 4.51) 186 1.4322 0.8677 False
10 BILL_AMT3 [4.85, 5.76] 205 1.7355 0.8677 False
11 BILL_AMT2 [4.88, 5.71] 208 2.0543 0.8677 False
12 BILL_AMT1 [4.91, 5.73] 207 2.2414 0.8677 False


Batch mode 1D Slicing (all features by setting features=None)

results = ts.diagnose_slicing_fairness(features=None,
                                       group_config=group_config,
                                       dataset="test",
                                       metric="AIR",
                                       method="auto-xgb1", bins=5)
results.table["Gender-Male"]
Feature Segment Size AIR Threshold Weak
0 EDUCATION 0.0 9 0.0000 0.8677 True
1 PAY_4 [-1.00, -0.00) 195 0.3058 0.8677 True
2 BILL_AMT4 [-3.46, 2.99) 207 0.4011 0.8677 True
3 BILL_AMT1 [3.30, 4.14) 202 0.4292 0.8677 True
4 PAY_1 [-0.00, 1.00) 556 0.4738 0.8677 True
... ... ... ... ... ... ...
73 BILL_AMT1 [4.91, 5.73] 207 2.2414 0.8677 False
74 PAY_AMT1 [3.60, 3.85) 168 2.4358 0.8677 False
75 PAY_1 [8.00, 8.00] 1 NaN 0.8677 False
76 PAY_4 [7.00, 7.00] 1 NaN 0.8677 False
77 PAY_6 [8.00, 8.00] 1 NaN 0.8677 False

78 rows × 6 columns



Analyze data drift

data_info = get_data_info(res_value=results.value["PAY_1"]["Gender-Male"])
data_results = ds.data_drift_test(
    **data_info["PAY_1"],
    distance_metric="PSI",
    psi_method="uniform",
    psi_bins=10
)
data_results.plot("summary")


Single feature density plot

data_results.plot(("density", "PAY_1"))


Fairness comparison#

tsc = TestSuite(ds, models=[model1, model2])
results = tsc.compare_fairness(group_config=group_config,
                               metric="AIR",
                               threshold=0.8)
results.plot()


Compare robustness performance of multiple models under single slicing feature

result = tsc.compare_slicing_fairness(features="BILL_AMT1",
                                      group_config=group_config,
                                      favorable_label=1,
                                      dataset="test",
                                      metric="AIR")
result.table["Gender-Male"]
MoXGBClassifier MoLGBMClassifier
Feature Segment Size AIR Feature Segment Size AIR
0 BILL_AMT1 [-1.50, -0.59) 11 0.0000 BILL_AMT1 [3.02, 3.92) 168 0.6044
1 BILL_AMT1 [2.12, 3.02) 75 0.4728 BILL_AMT1 [2.12, 3.02) 75 0.6304
2 BILL_AMT1 [-0.59, 0.31) 64 0.5238 BILL_AMT1 [3.92, 4.82) 420 0.7284
3 BILL_AMT1 [3.02, 3.92) 168 0.7556 BILL_AMT1 [4.82, 5.73] 248 1.1571
4 BILL_AMT1 [3.92, 4.82) 420 0.8540 BILL_AMT1 [-3.30, -2.40) 4 NaN
5 BILL_AMT1 [4.82, 5.73] 248 1.5429 BILL_AMT1 [-2.40, -1.50) 8 NaN
6 BILL_AMT1 [-3.30, -2.40) 4 NaN BILL_AMT1 [-1.50, -0.59) 11 NaN
7 BILL_AMT1 [-2.40, -1.50) 8 NaN BILL_AMT1 [-0.59, 0.31) 64 NaN
8 BILL_AMT1 [0.31, 1.21) 0 NaN BILL_AMT1 [0.31, 1.21) 0 NaN
9 BILL_AMT1 [1.21, 2.12) 2 NaN BILL_AMT1 [1.21, 2.12) 2 NaN


Unfairness mitigation#

By adjusting threshold of predict proba

result = ts.diagnose_mitigate_unfair_thresholding(group_config=group_config,
                                                  favorable_label=1,
                                                  dataset="test",
                                                  metric="AIR",
                                                  performance_metric="AUC",
                                                  proba_cutoff=30)
result.plot("Gender-Male", figsize=(8, 5))


By binning features

result = ts.diagnose_mitigate_unfair_binning(group_config=group_config,
                                             favorable_label=1,
                                             dataset="test",
                                             metric="AIR",
                                             performance_metric="AUC",
                                             binning_method='uniform',
                                             bins=10)
result.plot("Gender-Male")


Total running time of the script: (0 minutes 13.040 seconds)

Gallery generated by Sphinx-Gallery