Basic Dataset Operations#

Installation

# To install the required package, use the following command:
# !pip install modeva

Authentication

# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')

Import modeva modules

from modeva import DataSet

Load the built-in data#

ds = DataSet()
ds.load("TaiwanCredit")
ds
LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 FlagDefault
0 20000.0 2.0 2.0 1.0 24.0 2.0 2.0 -1.0 -1.0 0.0 0.0 3.592621 3.491782 2.838849 0.000000 0.000000 0.000000 0.000000 2.838849 0.000000 0.000000 0.000000 0.000000 1.0
1 120000.0 2.0 2.0 2.0 26.0 -1.0 2.0 0.0 0.0 0.0 2.0 3.428621 3.237041 3.428621 3.514946 3.538574 3.513484 0.000000 3.000434 3.000434 3.000434 0.000000 3.301247 1.0
2 90000.0 2.0 2.0 2.0 34.0 0.0 0.0 0.0 0.0 0.0 0.0 4.465977 4.146996 4.132260 4.156307 4.174612 4.191731 3.181558 3.176381 3.000434 3.000434 3.000434 3.699057 0.0
3 50000.0 2.0 2.0 1.0 37.0 0.0 0.0 0.0 0.0 0.0 0.0 4.672015 4.683353 4.692776 4.452016 4.461799 4.470528 3.301247 3.305351 3.079543 3.041787 3.029384 3.000434 0.0
4 50000.0 1.0 2.0 1.0 57.0 -1.0 0.0 -1.0 0.0 0.0 0.0 3.935406 3.753660 4.554319 4.320997 4.282101 4.281760 3.301247 4.564453 4.000043 3.954291 2.838849 2.832509 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
29995 220000.0 1.0 3.0 1.0 39.0 0.0 0.0 0.0 0.0 0.0 0.0 5.276345 5.285143 5.318827 4.944507 4.494683 4.203604 3.929470 4.301052 3.699317 3.484015 3.699057 3.000434 0.0
29996 150000.0 1.0 3.0 2.0 43.0 -1.0 -1.0 -1.0 -1.0 0.0 0.0 3.226342 3.262214 3.544440 3.953276 3.715251 0.000000 3.264345 3.547405 3.954194 2.113943 0.000000 0.000000 0.0
29997 30000.0 1.0 2.0 2.0 37.0 4.0 3.0 2.0 -1.0 0.0 0.0 3.552181 3.525951 3.440752 4.319710 4.313509 4.286861 0.000000 0.000000 4.342442 3.623353 3.301247 3.491502 1.0
29998 80000.0 1.0 3.0 1.0 41.0 1.0 -1.0 0.0 0.0 0.0 -1.0 -3.216430 4.894205 4.882553 4.722428 4.073938 4.689708 4.933998 3.532754 3.071514 3.284882 4.723989 3.256477 1.0
29999 50000.0 1.0 2.0 1.0 46.0 0.0 0.0 0.0 0.0 0.0 0.0 4.680607 4.689362 4.696924 4.562721 4.510933 4.185089 3.317854 3.255514 3.155640 3.000434 3.000434 3.000434 1.0

30000 rows × 24 columns



Basic data operations#

Split data

ds.set_random_split()

Set target

ds.set_target("FlagDefault")

Set sample weight

ds.set_sample_weight("LIMIT_BAL")

Disable features that will not be used for modeling

ds.set_inactive_features(features=('SEX_2.0',
                                   'MARRIAGE_1.0',
                                   'MARRIAGE_2.0'))

View feature names

ds.feature_names
['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']

View feature types

ds.feature_types
['categorical', 'categorical', 'categorical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical']

View training data

ds.train_x, ds.train_y, ds.train_sample_weight
(array([[2.       , 2.       , 1.       , ..., 0.       , 0.       ,
        0.       ],
       [2.       , 2.       , 2.       , ..., 3.0004342, 0.       ,
        3.3012471],
       [2.       , 2.       , 2.       , ..., 3.0004342, 3.0004342,
        3.6990569],
       ...,
       [1.       , 2.       , 2.       , ..., 3.84516  , 0.       ,
        3.6021686],
       [1.       , 3.       , 2.       , ..., 2.1139433, 0.       ,
        0.       ],
       [1.       , 3.       , 1.       , ..., 3.2848818, 4.723989 ,
        3.256477 ]]), array([[1.],
       [1.],
       [0.],
       ...,
       [1.],
       [0.],
       [1.]]), array([ 20000., 120000.,  90000., ...,  80000., 150000.,  80000.]))

View testing data

ds.test_x, ds.test_y, ds.test_sample_weight
(array([[1.       , 2.       , 1.       , ..., 3.9542909, 2.838849 ,
        2.8325088],
       [1.       , 1.       , 2.       , ..., 4.3062105, 4.1383343,
        4.1389656],
       [2.       , 2.       , 2.       , ..., 2.764923 , 3.2273724,
        3.188366 ],
       ...,
       [1.       , 3.       , 1.       , ..., 3.484015 , 3.6990569,
        3.0004342],
       [1.       , 2.       , 2.       , ..., 3.6233528, 3.3012471,
        3.4915018],
       [1.       , 2.       , 1.       , ..., 3.0004342, 3.0004342,
        3.0004342]]), array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [1.]]), array([ 50000., 500000., 100000., ..., 220000.,  30000.,  50000.]))

Register data into MLFlow#

ds.register(override=True)
ds.list_registered_data()
Name Latest Version Updated Time
0 TaiwanCredit 2 2025-04-26 23:08:10


Load data from MLFlow#

dsload = DataSet()
dsload.load_registered_data(name="TaiwanCredit")
dsload
LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 FlagDefault
0 20000.0 2.0 2.0 1.0 24.0 2.0 2.0 -1.0 -1.0 0.0 0.0 3.592621 3.491782 2.838849 0.000000 0.000000 0.000000 0.000000 2.838849 0.000000 0.000000 0.000000 0.000000 1.0
1 120000.0 2.0 2.0 2.0 26.0 -1.0 2.0 0.0 0.0 0.0 2.0 3.428621 3.237041 3.428621 3.514946 3.538574 3.513484 0.000000 3.000434 3.000434 3.000434 0.000000 3.301247 1.0
2 90000.0 2.0 2.0 2.0 34.0 0.0 0.0 0.0 0.0 0.0 0.0 4.465977 4.146996 4.132260 4.156307 4.174612 4.191731 3.181558 3.176381 3.000434 3.000434 3.000434 3.699057 0.0
3 50000.0 2.0 2.0 1.0 37.0 0.0 0.0 0.0 0.0 0.0 0.0 4.672015 4.683353 4.692776 4.452016 4.461799 4.470528 3.301247 3.305351 3.079543 3.041787 3.029384 3.000434 0.0
4 50000.0 1.0 2.0 1.0 57.0 -1.0 0.0 -1.0 0.0 0.0 0.0 3.935406 3.753660 4.554319 4.320997 4.282101 4.281760 3.301247 4.564453 4.000043 3.954291 2.838849 2.832509 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
29995 220000.0 1.0 3.0 1.0 39.0 0.0 0.0 0.0 0.0 0.0 0.0 5.276345 5.285143 5.318827 4.944507 4.494683 4.203604 3.929470 4.301052 3.699317 3.484015 3.699057 3.000434 0.0
29996 150000.0 1.0 3.0 2.0 43.0 -1.0 -1.0 -1.0 -1.0 0.0 0.0 3.226342 3.262214 3.544440 3.953276 3.715251 0.000000 3.264345 3.547405 3.954194 2.113943 0.000000 0.000000 0.0
29997 30000.0 1.0 2.0 2.0 37.0 4.0 3.0 2.0 -1.0 0.0 0.0 3.552181 3.525951 3.440752 4.319710 4.313509 4.286861 0.000000 0.000000 4.342442 3.623353 3.301247 3.491502 1.0
29998 80000.0 1.0 3.0 1.0 41.0 1.0 -1.0 0.0 0.0 0.0 -1.0 -3.216430 4.894205 4.882553 4.722428 4.073938 4.689708 4.933998 3.532754 3.071514 3.284882 4.723989 3.256477 1.0
29999 50000.0 1.0 2.0 1.0 46.0 0.0 0.0 0.0 0.0 0.0 0.0 4.680607 4.689362 4.696924 4.562721 4.510933 4.185089 3.317854 3.255514 3.155640 3.000434 3.000434 3.000434 1.0

30000 rows × 24 columns



Total running time of the script: (0 minutes 1.987 seconds)

Gallery generated by Sphinx-Gallery