Note
Go to the end to download the full example code.
Basic Dataset Operations#
Installation
# To install the required package, use the following command:
# !pip install modeva
Authentication
# To get authentication, use the following command: (To get full access please replace the token to your own token)
# from modeva.utils.authenticate import authenticate
# authenticate(auth_code='eaaa4301-b140-484c-8e93-f9f633c8bacb')
Import modeva modules
from modeva import DataSet
Load the built-in data#
ds = DataSet()
ds.load("TaiwanCredit")
ds
Basic data operations#
Split data
ds.set_random_split()
Set target
ds.set_target("FlagDefault")
Set sample weight
ds.set_sample_weight("LIMIT_BAL")
Disable features that will not be used for modeling
ds.set_inactive_features(features=('SEX_2.0',
'MARRIAGE_1.0',
'MARRIAGE_2.0'))
View feature names
ds.feature_names
['SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6']
View feature types
ds.feature_types
['categorical', 'categorical', 'categorical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical', 'numerical']
View training data
ds.train_x, ds.train_y, ds.train_sample_weight
(array([[2. , 2. , 1. , ..., 0. , 0. ,
0. ],
[2. , 2. , 2. , ..., 3.0004342, 0. ,
3.3012471],
[2. , 2. , 2. , ..., 3.0004342, 3.0004342,
3.6990569],
...,
[1. , 2. , 2. , ..., 3.84516 , 0. ,
3.6021686],
[1. , 3. , 2. , ..., 2.1139433, 0. ,
0. ],
[1. , 3. , 1. , ..., 3.2848818, 4.723989 ,
3.256477 ]]), array([[1.],
[1.],
[0.],
...,
[1.],
[0.],
[1.]]), array([ 20000., 120000., 90000., ..., 80000., 150000., 80000.]))
View testing data
ds.test_x, ds.test_y, ds.test_sample_weight
(array([[1. , 2. , 1. , ..., 3.9542909, 2.838849 ,
2.8325088],
[1. , 1. , 2. , ..., 4.3062105, 4.1383343,
4.1389656],
[2. , 2. , 2. , ..., 2.764923 , 3.2273724,
3.188366 ],
...,
[1. , 3. , 1. , ..., 3.484015 , 3.6990569,
3.0004342],
[1. , 2. , 2. , ..., 3.6233528, 3.3012471,
3.4915018],
[1. , 2. , 1. , ..., 3.0004342, 3.0004342,
3.0004342]]), array([[0.],
[0.],
[0.],
...,
[0.],
[1.],
[1.]]), array([ 50000., 500000., 100000., ..., 220000., 30000., 50000.]))
Register data into MLFlow#
ds.register(override=True)
ds.list_registered_data()
Load data from MLFlow#
dsload = DataSet()
dsload.load_registered_data(name="TaiwanCredit")
dsload
Total running time of the script: (0 minutes 1.987 seconds)