ModelZoo#

This example demonstrates how to use ModelZoo to manage multiple models, train them, and perform various analyses using TestSuite.

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import required modules

from lightgbm import LGBMClassifier

from modeva import DataSet
from modeva import ModelZoo
from modeva import TestSuite
from modeva.utils.mlflow import set_mlflow_home, get_mlflow_home
from modeva.models.local_model_zoo import LocalModelZoo
from modeva.models.wrappers.api import modeva_sklearn_classifier

# Import model classes
from modeva.models import (
    MoLogisticRegression, MoDecisionTreeClassifier,
    MoLGBMClassifier, MoXGBClassifier, MoCatBoostClassifier,
    MoRandomForestClassifier, MoGradientBoostingClassifier,
    MoGAMINetClassifier, MoReLUDNNClassifier,
    MoGLMTreeBoostClassifier, MoNeuralTreeClassifier
)

Configure MLflow settings#

set_mlflow_home(mlflow_home="~/modeva_mlflow")
mlflow_home = get_mlflow_home()

Load and prepare dataset#

ds = DataSet()
ds.load(name="TaiwanCredit")
ds.set_random_split()

Initialize ModelZoo#

mz = LocalModelZoo(name="TaiwanCredit-Exp", dataset=ds)
print(f"Experiment name: {mz.experiment_name}")
print(f"Experiment ID: {mz.experiment_id}")

Experiment name: TaiwanCredit-Exp
Experiment ID: 3

Add traditional ML models#

mz.add_model(model=MoLGBMClassifier(name="LGBM2", max_depth=2, verbose=-1))
mz.add_model(model=MoXGBClassifier(name="XGB2", max_depth=2))
mz.add_model(model=MoCatBoostClassifier(name="CatBoost2", max_depth=2, silent=True))
mz.add_model(model=MoRandomForestClassifier(name="RF2", max_depth=2))
mz.add_model(model=MoGradientBoostingClassifier(name="GBDT2", max_depth=2))
mz.add_model(model=MoLogisticRegression(
    name="LR",
    feature_names=ds.feature_names,
    feature_types=ds.feature_types
))
mz.add_model(model=MoDecisionTreeClassifier(name="DT", max_depth=8))
mz.add_model(model=MoReLUDNNClassifier(name="ReLUDNN"))

Add advanced ML models#

mz.add_model(model=MoNeuralTreeClassifier(
    name="NeuralTree",
    nn_temperature=0.001,
    nn_max_epochs=100,
    verbose=False,
    random_state=0
))

Add wrapped scikit-learn model#

wrap_estimator = modeva_sklearn_classifier(
    name="LGBM-wrapped",
    estimator=LGBMClassifier(verbose=-1)
)
mz.add_model(model=wrap_estimator)

Train all models and show leaderboard#

mz.train_all()
mz.leaderboard(order_by="test AUC")

  0%|                                                                                                        | 0/10 [00:00<?, ?it/s]
 10%|█████████▌                                                                                      | 1/10 [00:00<00:03,  2.61it/s]
 20%|███████████████████▏                                                                            | 2/10 [00:00<00:03,  2.44it/s]
 30%|████████████████████████████▊                                                                   | 3/10 [00:20<01:05,  9.40s/it]
 40%|██████████████████████████████████████▍                                                         | 4/10 [00:24<00:43,  7.21s/it]
 50%|████████████████████████████████████████████████                                                | 5/10 [00:59<01:25, 17.03s/it]
 60%|█████████████████████████████████████████████████████████▌                                      | 6/10 [00:59<00:45, 11.41s/it]
 70%|███████████████████████████████████████████████████████████████████▏                            | 7/10 [01:00<00:23,  7.96s/it]
 80%|████████████████████████████████████████████████████████████████████████████▊                   | 8/10 [01:34<00:32, 16.27s/it]
 90%|██████████████████████████████████████████████████████████████████████████████████████▍         | 9/10 [02:06<00:21, 21.17s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 14.97s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 12.76s/it]

	start_time	end_time	Duration	train AUC	test AUC	train ACC	test ACC	train F1	test F1	train LogLoss	test LogLoss	train Brier	test Brier
LGBM-wrapped	2025-04-26 23:12:05	2025-04-26 23:12:05	0.535239	0.880594	0.786645	0.843542	0.827667	0.550138	0.486594	0.358546	0.417972	0.110928	0.130323
CatBoost2	2025-04-26 23:09:59	2025-04-26 23:10:17	18.066218	0.799742	0.784044	0.82275	0.827833	0.483111	0.480644	0.417998	0.420104	0.130939	0.130937
XGB2	2025-04-26 23:09:59	2025-04-26 23:09:59	0.224497	0.802168	0.783313	0.823417	0.829167	0.487297	0.487756	0.416518	0.420393	0.130687	0.131044
GBDT2	2025-04-26 23:10:23	2025-04-26 23:10:57	33.98626	0.791414	0.78185	0.821208	0.831	0.476388	0.486322	0.424638	0.421141	0.133016	0.130998
LGBM2	2025-04-26 23:09:58	2025-04-26 23:09:58	0.133382	0.789608	0.780283	0.820542	0.83	0.472634	0.48118	0.425528	0.421604	0.133381	0.131115
RF2	2025-04-26 23:10:19	2025-04-26 23:10:22	2.645222	0.768472	0.773772	0.8045	0.812167	0.307556	0.312386	0.454125	0.445729	0.143034	0.139443
DT	2025-04-26 23:10:58	2025-04-26 23:10:59	0.728562	0.796663	0.759269	0.832875	0.823167	0.511033	0.4606	0.408438	0.619822	0.126673	0.137084
NeuralTree	2025-04-26 23:11:33	2025-04-26 23:12:04	31.71789	0.70426	0.709229	0.796958	0.801333	0.456563	0.451197	3.219618	3.072233	0.194163	0.190408
LR	2025-04-26 23:10:57	2025-04-26 23:10:57	0.116774	0.61372	0.632926	0.777542	0.783833	0.0	0.0	0.537809	0.523983	0.175762	0.170817
ReLUDNN	2025-04-26 23:10:59	2025-04-26 23:11:33	33.915026	0.432403	0.416251	0.222458	0.216167	0.363952	0.355489	18.749674	18.836042	0.775229	0.781533

Model interpretation examples#

Feature importance analysis

model = mz.get_model("ReLUDNN")
ts = TestSuite(ds, model)
results = ts.interpret_fi()
results.plot()

Feature effects analysis for different models

model = mz.get_model("LGBM2")
ts = TestSuite(ds, model)
results = ts.interpret_effects(features="PAY_1")
results.plot()

Model registration and loading#

for name in mz.models.keys():
    mz.register(name)

# List registered models
registered_models = mz.list_registered_models()
print("Registered models:", registered_models)

Registered models:            Name  Latest Version
   CatBoost2               1
          DT               1
       GBDT2               1
LGBM-wrapped               1
       LGBM2               1
          LR               1
  NeuralTree               1
         RF2               1
     ReLUDNN               1
        XGB2               1

Load and verify registered models#

ds_new = DataSet()
ds_new.load(name="TaiwanCredit")
ds_new.set_random_split()
mz_new = ModelZoo(name="TaiwanCredit-Exp", dataset=ds_new)

# Verify predictions from loaded models
for name in mz.models.keys():
    loaded_model = mz_new.load_registered_model(name)
    predictions = loaded_model.predict_proba(ds_new.train_x)
    print(f"Model {name} predictions shape: {predictions.shape}")

Model LGBM2 predictions shape: (24000, 2)
Model XGB2 predictions shape: (24000, 2)
Model CatBoost2 predictions shape: (24000, 2)
Model RF2 predictions shape: (24000, 2)
Model GBDT2 predictions shape: (24000, 2)
Model LR predictions shape: (24000, 2)
Model DT predictions shape: (24000, 2)
Model ReLUDNN predictions shape: (24000, 2)
Model NeuralTree predictions shape: (24000, 2)
Model LGBM-wrapped predictions shape: (24000, 2)

Total running time of the script: (2 minutes 12.060 seconds)

Gallery generated by Sphinx-Gallery