ModelZoo#

This example demonstrates how to use ModelZoo to manage multiple models, train them, and perform various analyses using TestSuite.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from lightgbm import LGBMClassifier

from modeva import DataSet
from modeva import ModelZoo
from modeva import TestSuite
from modeva.utils.mlflow import set_mlflow_home, get_mlflow_home
from modeva.models.local_model_zoo import LocalModelZoo
from modeva.models.wrappers.api import modeva_sklearn_classifier

# Import model classes
from modeva.models import (
    MoLogisticRegression, MoDecisionTreeClassifier,
    MoLGBMClassifier, MoXGBClassifier, MoCatBoostClassifier,
    MoRandomForestClassifier, MoGradientBoostingClassifier,
    MoGAMINetClassifier, MoReLUDNNClassifier,
    MoGLMTreeBoostClassifier, MoNeuralTreeClassifier
)

Configure MLflow settings#

set_mlflow_home(mlflow_home="~/modeva_mlflow")
mlflow_home = get_mlflow_home()

Load and prepare dataset#

ds = DataSet()
ds.load(name="TaiwanCredit")
ds.set_random_split()

Initialize ModelZoo#

mz = LocalModelZoo(name="TaiwanCredit-Exp", dataset=ds)
print(f"Experiment name: {mz.experiment_name}")
print(f"Experiment ID: {mz.experiment_id}")
Experiment name: TaiwanCredit-Exp
Experiment ID: 3

Add traditional ML models#

mz.add_model(model=MoLGBMClassifier(name="LGBM2", max_depth=2, verbose=-1))
mz.add_model(model=MoXGBClassifier(name="XGB2", max_depth=2))
mz.add_model(model=MoCatBoostClassifier(name="CatBoost2", max_depth=2, silent=True))
mz.add_model(model=MoRandomForestClassifier(name="RF2", max_depth=2))
mz.add_model(model=MoGradientBoostingClassifier(name="GBDT2", max_depth=2))
mz.add_model(model=MoLogisticRegression(
    name="LR",
    feature_names=ds.feature_names,
    feature_types=ds.feature_types
))
mz.add_model(model=MoDecisionTreeClassifier(name="DT", max_depth=8))
mz.add_model(model=MoReLUDNNClassifier(name="ReLUDNN"))

Add advanced ML models#

mz.add_model(model=MoNeuralTreeClassifier(
    name="NeuralTree",
    nn_temperature=0.001,
    nn_max_epochs=100,
    verbose=False,
    random_state=0
))

Add wrapped scikit-learn model#

wrap_estimator = modeva_sklearn_classifier(
    name="LGBM-wrapped",
    estimator=LGBMClassifier(verbose=-1)
)
mz.add_model(model=wrap_estimator)

Train all models and show leaderboard#

mz.train_all()
mz.leaderboard(order_by="test AUC")
  0%|                                                                                                        | 0/10 [00:00<?, ?it/s]
 10%|█████████▌                                                                                      | 1/10 [00:00<00:03,  2.61it/s]
 20%|███████████████████▏                                                                            | 2/10 [00:00<00:03,  2.44it/s]
 30%|████████████████████████████▊                                                                   | 3/10 [00:20<01:05,  9.40s/it]
 40%|██████████████████████████████████████▍                                                         | 4/10 [00:24<00:43,  7.21s/it]
 50%|████████████████████████████████████████████████                                                | 5/10 [00:59<01:25, 17.03s/it]
 60%|█████████████████████████████████████████████████████████▌                                      | 6/10 [00:59<00:45, 11.41s/it]
 70%|███████████████████████████████████████████████████████████████████▏                            | 7/10 [01:00<00:23,  7.96s/it]
 80%|████████████████████████████████████████████████████████████████████████████▊                   | 8/10 [01:34<00:32, 16.27s/it]
 90%|██████████████████████████████████████████████████████████████████████████████████████▍         | 9/10 [02:06<00:21, 21.17s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 14.97s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 12.76s/it]
start_time end_time Duration train AUC test AUC train ACC test ACC train F1 test F1 train LogLoss test LogLoss train Brier test Brier
LGBM-wrapped 2025-04-26 23:12:05 2025-04-26 23:12:05 0.535239 0.880594 0.786645 0.843542 0.827667 0.550138 0.486594 0.358546 0.417972 0.110928 0.130323
CatBoost2 2025-04-26 23:09:59 2025-04-26 23:10:17 18.066218 0.799742 0.784044 0.82275 0.827833 0.483111 0.480644 0.417998 0.420104 0.130939 0.130937
XGB2 2025-04-26 23:09:59 2025-04-26 23:09:59 0.224497 0.802168 0.783313 0.823417 0.829167 0.487297 0.487756 0.416518 0.420393 0.130687 0.131044
GBDT2 2025-04-26 23:10:23 2025-04-26 23:10:57 33.98626 0.791414 0.78185 0.821208 0.831 0.476388 0.486322 0.424638 0.421141 0.133016 0.130998
LGBM2 2025-04-26 23:09:58 2025-04-26 23:09:58 0.133382 0.789608 0.780283 0.820542 0.83 0.472634 0.48118 0.425528 0.421604 0.133381 0.131115
RF2 2025-04-26 23:10:19 2025-04-26 23:10:22 2.645222 0.768472 0.773772 0.8045 0.812167 0.307556 0.312386 0.454125 0.445729 0.143034 0.139443
DT 2025-04-26 23:10:58 2025-04-26 23:10:59 0.728562 0.796663 0.759269 0.832875 0.823167 0.511033 0.4606 0.408438 0.619822 0.126673 0.137084
NeuralTree 2025-04-26 23:11:33 2025-04-26 23:12:04 31.71789 0.70426 0.709229 0.796958 0.801333 0.456563 0.451197 3.219618 3.072233 0.194163 0.190408
LR 2025-04-26 23:10:57 2025-04-26 23:10:57 0.116774 0.61372 0.632926 0.777542 0.783833 0.0 0.0 0.537809 0.523983 0.175762 0.170817
ReLUDNN 2025-04-26 23:10:59 2025-04-26 23:11:33 33.915026 0.432403 0.416251 0.222458 0.216167 0.363952 0.355489 18.749674 18.836042 0.775229 0.781533


Model interpretation examples#

Feature importance analysis

model = mz.get_model("ReLUDNN")
ts = TestSuite(ds, model)
results = ts.interpret_fi()
results.plot()


Feature effects analysis for different models

model = mz.get_model("LGBM2")
ts = TestSuite(ds, model)
results = ts.interpret_effects(features="PAY_1")
results.plot()


Model registration and loading#

Register all models

for name in mz.models.keys():
    mz.register(name)

# List registered models
registered_models = mz.list_registered_models()
print("Registered models:", registered_models)
Registered models:            Name  Latest Version
0     CatBoost2               1
1            DT               1
2         GBDT2               1
3  LGBM-wrapped               1
4         LGBM2               1
5            LR               1
6    NeuralTree               1
7           RF2               1
8       ReLUDNN               1
9          XGB2               1

Load and verify registered models#

ds_new = DataSet()
ds_new.load(name="TaiwanCredit")
ds_new.set_random_split()
mz_new = ModelZoo(name="TaiwanCredit-Exp", dataset=ds_new)

# Verify predictions from loaded models
for name in mz.models.keys():
    loaded_model = mz_new.load_registered_model(name)
    predictions = loaded_model.predict_proba(ds_new.train_x)
    print(f"Model {name} predictions shape: {predictions.shape}")
Model LGBM2 predictions shape: (24000, 2)
Model XGB2 predictions shape: (24000, 2)
Model CatBoost2 predictions shape: (24000, 2)
Model RF2 predictions shape: (24000, 2)
Model GBDT2 predictions shape: (24000, 2)
Model LR predictions shape: (24000, 2)
Model DT predictions shape: (24000, 2)
Model ReLUDNN predictions shape: (24000, 2)
Model NeuralTree predictions shape: (24000, 2)
Model LGBM-wrapped predictions shape: (24000, 2)

Total running time of the script: (2 minutes 12.060 seconds)

Gallery generated by Sphinx-Gallery