Note
Go to the end to download the full example code.
ModelZoo#
This example demonstrates how to use ModelZoo to manage multiple models, train them, and perform various analyses using TestSuite.
Installation
# To install the required package, use the following command:
# !pip install modeva
Authentication
# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')
Import required modules
from lightgbm import LGBMClassifier
from modeva import DataSet
from modeva import ModelZoo
from modeva import TestSuite
from modeva.utils.mlflow import set_mlflow_home, get_mlflow_home
from modeva.models.local_model_zoo import LocalModelZoo
from modeva.models.wrappers.api import modeva_sklearn_classifier
# Import model classes
from modeva.models import (
MoLogisticRegression, MoDecisionTreeClassifier,
MoLGBMClassifier, MoXGBClassifier, MoCatBoostClassifier,
MoRandomForestClassifier, MoGradientBoostingClassifier,
MoGAMINetClassifier, MoReLUDNNClassifier,
MoGLMTreeBoostClassifier, MoNeuralTreeClassifier
)
Configure MLflow settings#
set_mlflow_home(mlflow_home="~/modeva_mlflow")
mlflow_home = get_mlflow_home()
Load and prepare dataset#
ds = DataSet()
ds.load(name="TaiwanCredit")
ds.set_random_split()
Initialize ModelZoo#
mz = LocalModelZoo(name="TaiwanCredit-Exp", dataset=ds)
print(f"Experiment name: {mz.experiment_name}")
print(f"Experiment ID: {mz.experiment_id}")
Experiment name: TaiwanCredit-Exp
Experiment ID: 3
Add traditional ML models#
mz.add_model(model=MoLGBMClassifier(name="LGBM2", max_depth=2, verbose=-1))
mz.add_model(model=MoXGBClassifier(name="XGB2", max_depth=2))
mz.add_model(model=MoCatBoostClassifier(name="CatBoost2", max_depth=2, silent=True))
mz.add_model(model=MoRandomForestClassifier(name="RF2", max_depth=2))
mz.add_model(model=MoGradientBoostingClassifier(name="GBDT2", max_depth=2))
mz.add_model(model=MoLogisticRegression(
name="LR",
feature_names=ds.feature_names,
feature_types=ds.feature_types
))
mz.add_model(model=MoDecisionTreeClassifier(name="DT", max_depth=8))
mz.add_model(model=MoReLUDNNClassifier(name="ReLUDNN"))
Add advanced ML models#
mz.add_model(model=MoNeuralTreeClassifier(
name="NeuralTree",
nn_temperature=0.001,
nn_max_epochs=100,
verbose=False,
random_state=0
))
Add wrapped scikit-learn model#
wrap_estimator = modeva_sklearn_classifier(
name="LGBM-wrapped",
estimator=LGBMClassifier(verbose=-1)
)
mz.add_model(model=wrap_estimator)
Train all models and show leaderboard#
mz.train_all()
mz.leaderboard(order_by="test AUC")
0%| | 0/10 [00:00<?, ?it/s]
10%|█████████▌ | 1/10 [00:00<00:03, 2.61it/s]
20%|███████████████████▏ | 2/10 [00:00<00:03, 2.44it/s]
30%|████████████████████████████▊ | 3/10 [00:20<01:05, 9.40s/it]
40%|██████████████████████████████████████▍ | 4/10 [00:24<00:43, 7.21s/it]
50%|████████████████████████████████████████████████ | 5/10 [00:59<01:25, 17.03s/it]
60%|█████████████████████████████████████████████████████████▌ | 6/10 [00:59<00:45, 11.41s/it]
70%|███████████████████████████████████████████████████████████████████▏ | 7/10 [01:00<00:23, 7.96s/it]
80%|████████████████████████████████████████████████████████████████████████████▊ | 8/10 [01:34<00:32, 16.27s/it]
90%|██████████████████████████████████████████████████████████████████████████████████████▍ | 9/10 [02:06<00:21, 21.17s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 14.97s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:07<00:00, 12.76s/it]
Model interpretation examples#
Feature importance analysis
model = mz.get_model("ReLUDNN")
ts = TestSuite(ds, model)
results = ts.interpret_fi()
results.plot()
Feature effects analysis for different models
model = mz.get_model("LGBM2")
ts = TestSuite(ds, model)
results = ts.interpret_effects(features="PAY_1")
results.plot()
Model registration and loading#
Register all models
for name in mz.models.keys():
mz.register(name)
# List registered models
registered_models = mz.list_registered_models()
print("Registered models:", registered_models)
Registered models: Name Latest Version
0 CatBoost2 1
1 DT 1
2 GBDT2 1
3 LGBM-wrapped 1
4 LGBM2 1
5 LR 1
6 NeuralTree 1
7 RF2 1
8 ReLUDNN 1
9 XGB2 1
Load and verify registered models#
ds_new = DataSet()
ds_new.load(name="TaiwanCredit")
ds_new.set_random_split()
mz_new = ModelZoo(name="TaiwanCredit-Exp", dataset=ds_new)
# Verify predictions from loaded models
for name in mz.models.keys():
loaded_model = mz_new.load_registered_model(name)
predictions = loaded_model.predict_proba(ds_new.train_x)
print(f"Model {name} predictions shape: {predictions.shape}")
Model LGBM2 predictions shape: (24000, 2)
Model XGB2 predictions shape: (24000, 2)
Model CatBoost2 predictions shape: (24000, 2)
Model RF2 predictions shape: (24000, 2)
Model GBDT2 predictions shape: (24000, 2)
Model LR predictions shape: (24000, 2)
Model DT predictions shape: (24000, 2)
Model ReLUDNN predictions shape: (24000, 2)
Model NeuralTree predictions shape: (24000, 2)
Model LGBM-wrapped predictions shape: (24000, 2)
Total running time of the script: (2 minutes 12.060 seconds)