Skip to content

Adds code for NGBoost Algorithm #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion TabSurvey/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
all_models = ["LinearModel", "KNN", "DecisionTree", "RandomForest", "XGBoost", "CatBoost", "LightGBM", "ModelTree",
"MLP", "TabNet", "VIME", "TabTransformer", "NODE", "DeepGBM", "RLN", "DNFNet", "STG", "NAM", "DeepFM",
"SAINT", "DANet"]
"SAINT", "DANet", "NGBoost"]


def str2model(model):
Expand Down Expand Up @@ -93,5 +93,10 @@ def str2model(model):
from models.danet import DANet
return DANet

elif model == "NGBoost":
from models.tree_models import NGBoost
return NGBoost


else:
raise NotImplementedError("Model \"" + model + "\" not yet implemented")
70 changes: 69 additions & 1 deletion TabSurvey/models/tree_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import lightgbm as lgb
import numpy as np
import xgboost as xgb

from ngboost import NGBRegressor
from ngboost import NGBClassifier
from models.basemodel import BaseModel

"""
Expand Down Expand Up @@ -103,6 +104,71 @@ def default_parameters(cls):
}
return params

class NGBoost(BaseModel):

# TabZilla: add default number of boosting rounds
# default_epochs = 500

def __init__(self, params, args):
super().__init__(params, args)
self.params["verbosity"] = 1

if args.objective == "regression":
self.params["objective"] = "regression"
self.params["metric"] = "mse"
elif args.objective == "classification":
self.params["objective"] = "multiclass"
self.params["num_class"] = args.num_classes
self.params["metric"] = "multiclass"
elif args.objective == "binary":
self.params["objective"] = "binary"
self.params["metric"] = "auc"

def fit(self, X, y, X_val=None, y_val=None):
if self.args.objective == "regression":
self.model = NGBRegressor().fit(X, y, X_val=X_val, Y_val=y_val)
elif self.args.objective == "classification":
self.model = NGBClassifier(Dist=k_categorical(self.args.num_classes)).fit(X, y, X_val=X_val, Y_val=y_val)
else:
self.model = NGBClassifier(Dist=k_categorical(2)).fit(X, y, X_val=X_val, Y_val=y_val)
return [], []
def predict(self, X):
return super().predict(X)
def predict_proba(self, X):
probabilities = self.model.predict(X)
self.prediction_probabilities = probabilities
return self.prediction_probabilities

@classmethod
def define_trial_parameters(cls, trial, args):
params = {
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"n_estimators": trial.suggest_int("n_estimators", 100, 250, log=True),
"minibatch_frac": trial.suggest_float("minibatch_frac", 0.4, 0.8, log=True),
"col_sample": trial.suggest_float("col_sample", 0.3, 0.7, log=True),
}
return params
@classmethod
def get_random_parameters(cls, seed):
rs = np.random.RandomState(seed)
params = {
"learning_rate": 3.0 * np.power(10, rs.uniform(-2, -1)),
"n_estimators": int(np.round(50 * rs.uniform(1,5))),
"minibatch_frac": rs.uniform(0.4, 0.8),
"col_sample": rs.uniform(0.3, 0.7)
}
return params

@classmethod
def default_parameters(cls):
params = {
"learning_rate": 0.08,
"n_estimators": 100,
"minibatch_frac": 0.5,
"col_sample": 0.5,
}
return params


"""
CatBoost (https://catboost.ai/)
Expand Down Expand Up @@ -274,3 +340,5 @@ def default_parameters(cls):
"learning_rate": 0.08,
}
return params


10 changes: 5 additions & 5 deletions TabSurvey/tabzilla_alg_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,18 @@ def get_CatBoost():

return model

@register_model("gbdt")
def get_NGBoost():
from models.tree_models import NGBoost as model

return model

@register_model("gbdt")
def get_LightGBM():
from models.tree_models import LightGBM as model

return model


@register_model("gbdt")
def get_ModelTree():
from models.modeltree import ModelTree as model
Expand Down Expand Up @@ -178,10 +182,6 @@ def get_DANet():

return model

@register_model("torch")
def get_Hopular():
from models.hopular_model import HopularModel as model


##############################################################
# tensorflow models
Expand Down
5 changes: 3 additions & 2 deletions TabSurvey/testall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ TORCH_ENV="torch"
KERAS_ENV="tensorflow"

# "LinearModel" "KNN" "DecisionTree" "RandomForest"
# "XGBoost" "CatBoost" "LightGBM"
# "XGBoost" "CatBoost" "NGBoost" "LightGBM"
# "MLP" "TabNet" "VIME"
# MODELS=( "LinearModel" "KNN" "DecisionTree" "RandomForest" "XGBoost" "CatBoost" "LightGBM" "MLP" "TabNet" "VIME")
# MODELS=( "LinearModel" "KNN" "DecisionTree" "RandomForest" "XGBoost" "NGBoost" "CatBoost" "LightGBM" "MLP" "TabNet" "VIME")

declare -A MODELS
MODELS=( ["LinearModel"]=$SKLEARN_ENV
Expand All @@ -20,6 +20,7 @@ MODELS=( ["LinearModel"]=$SKLEARN_ENV
["DecisionTree"]=$SKLEARN_ENV
["RandomForest"]=$SKLEARN_ENV
["XGBoost"]=$GBDT_ENV
["NGBoost"]=$GBDT_ENV
["CatBoost"]=$GBDT_ENV
["LightGBM"]=$GBDT_ENV
["MLP"]=$TORCH_ENV
Expand Down
5 changes: 3 additions & 2 deletions conda_envs/gbdt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,16 @@ dependencies:
- kiwisolver==1.4.3
- lightgbm==3.3.1
- matplotlib==3.5.2
- modeltrees==0.1.1
#- modeltrees==0.1.1
- numpy==1.23.0
- pandas==1.4.3
- pillow==9.1.1
- plotly==5.9.0
- python-graphviz==0.20
#- python-graphviz==0.20
- scikit-learn==1.1.1
- scipy==1.8.1
- tenacity==8.0.1
- threadpoolctl==3.1.0
- xgboost==1.5.0
- ngboost==0.3.13
prefix: /opt/conda/envs/gbdt
1 change: 1 addition & 0 deletions scripts/experiments/cpu_experiments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ MODELS_ENVS=(
RandomForest:$SKLEARN_ENV
# XGBoost:$GBDT_ENV
# CatBoost:$GBDT_ENV
# NGBoost:$GBDT_ENV
LightGBM:$GBDT_ENV
# MLP:$TORCH_ENV
# ModelTree:$GBDT_ENV <- bug
Expand Down
1 change: 1 addition & 0 deletions scripts/experiments/gpu_experiments_a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ MODELS_ENVS=(
# DecisionTree:$SKLEARN_ENV
# RandomForest:$SKLEARN_ENV
XGBoost:$GBDT_ENV
NGBoost:$GBDT_ENV
CatBoost:$GBDT_ENV
# LightGBM:$GBDT_ENV
MLP:$TORCH_ENV
Expand Down
1 change: 1 addition & 0 deletions scripts/experiments/gpu_experiments_b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ MODELS_ENVS=(
# DecisionTree:$SKLEARN_ENV
# RandomForest:$SKLEARN_ENV
# XGBoost:$GBDT_ENV
# NGBoost:$GBDT_ENV
# CatBoost:$GBDT_ENV
# LightGBM:$GBDT_ENV
# MLP:$TORCH_ENV
Expand Down
1 change: 1 addition & 0 deletions scripts/tests/all_algs_two_datasets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ MODELS_ENVS=(
DecisionTree:$SKLEARN_ENV
RandomForest:$SKLEARN_ENV
XGBoost:$GBDT_ENV
NGBoost:$GBDT_ENV
CatBoost:$GBDT_ENV
LightGBM:$GBDT_ENV
MLP:$TORCH_ENV
Expand Down