Merge pull request #23 from uclamii/xgbearly_gridsearch_bug

Xgbearly gridsearch bug
uclamii · Jul 16, 2024 · 51b39ac · 51b39ac
2 parents 9026ccf + ba151b7
commit 51b39ac
Show file tree

Hide file tree

Showing 6 changed files with 33 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 ![Model Tuner Logo](https://github.com/uclamii/model_tuner/blob/main/assets/modeltunertiny.png?raw=true)
 
-[![Downloads](https://pepy.tech/badge/model_tuner)](https://pepy.tech/project/model_tuner) [![PyPI](https://img.shields.io/pypi/v/model_tuner.svg)](https://pypi.org/project/model_tuner/) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12727323.svg)](https://doi.org/10.5281/zenodo.12727323)
+[![Downloads](https://pepy.tech/badge/model_tuner)](https://pepy.tech/project/model_tuner) [![PyPI](https://img.shields.io/pypi/v/model_tuner.svg)](https://pypi.org/project/model_tuner/) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12727322.svg)](https://doi.org/10.5281/zenodo.12727322)
 
 
 The model_tuner class is a versatile and powerful tool designed to facilitate the training, evaluation, and tuning of machine learning models. It supports various functionalities such as handling imbalanced data, applying different scaling and imputation techniques, calibrating models, and conducting cross-validation. This class is particularly useful for model selection and hyperparameter tuning, ensuring optimal performance across different metrics.
@@ -36,7 +36,7 @@ pip install model_tuner
 
 ## 📄 Official Documentation
 
-https://uclamii.github.io/model_tuner/getting_started.html 
+https://uclamii.github.io/model_tuner
 
 ## 🌐 Author Website
 
@@ -51,17 +51,17 @@ https://www.mii.ucla.edu/
 If you use `model_tuner` in your research or projects, please consider citing it.
 
 ```bibtex
-@software{arthur_funnell_2024_12727323,
-  author       = {Arthur Funnell and
+@software{arthur_funnell_2024_12727322,
+  author       = {Arthur Funnell,
                   Leonid Shpaner and
                   Panayiotis Petousis},
-  title        = {uclamii/model\_tuner: model tuner 0.0.11a},
+  title        = {uclamii/model\_tuner: model tuner 0.0.12a},
   month        = jul,
   year         = 2024,
   publisher    = {Zenodo},
-  version      = {0.0.11a},
-  doi          = {10.5281/zenodo.12727323},
-  url          = {https://doi.org/10.5281/zenodo.12727323}
+  version      = {0.0.12a},
+  doi          = {10.5281/zenodo.12727322},
+  url          = {https://doi.org/10.5281/zenodo.12727322}
 }
 ```
 

diff --git a/notebooks/xgb_early_test.py b/notebooks/xgb_early_test.py
@@ -29,7 +29,7 @@
 tuned_parameters = {
     f"{estimator_name}__max_depth": [3, 10, 20, 200, 500],
     f"{estimator_name}__learning_rate": [1e-4],
-    f"{estimator_name}__n_estimators": [100000],
+    f"{estimator_name}__n_estimators": [30],
     f"{estimator_name}__early_stopping_rounds": [10],
     f"{estimator_name}__verbose": [True],
     f"{estimator_name}__eval_metric": ["logloss"],
@@ -48,8 +48,8 @@
     kfold=kfold,
     stratify_y=True,
     grid=tuned_parameters,
-    randomized_grid=True,
-    n_iter=1,
+    randomized_grid=False,
+    n_iter=4,
     xgboost_early=True,
     scoring=["roc_auc"],
     n_splits=10,

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="model_tuner",
-    version="0.0.11a",
+    version="0.0.12a",
     author="UCLA CTSI ML Team: Leonid Shpaner, Arthur Funnell, Panayiotis Petousis",
     author_email="[email protected]",
     description="A Python library for tuning Machine Learning models.",

diff --git a/src/model_tuner/__init__.py b/src/model_tuner/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "0.0.11a"
+__version__ = "0.0.12a"
 
 from .main import *
diff --git a/src/model_tuner/model_tuner_utils.py b/src/model_tuner/model_tuner_utils.py
@@ -306,8 +306,8 @@ def calibrateModel(
 
                     if self.imbalance_sampler:
                         self.process_imbalance_sampler(X_train, y_train)
-                    else:
-                        self.fit(X_train, y_train)
+
+                    self.fit(X_train, y_train)
                     #  calibrate model, and save output
                     self.estimator = CalibratedClassifierCV(
                         self.estimator,
@@ -355,14 +355,14 @@ def calibrateModel(
                     # fit estimator
                     if self.imbalance_sampler:
                         self.process_imbalance_sampler(X_train, y_train)
-                    else:
-                        # fit model
-                        self.fit(
-                            X_train,
-                            y_train,
-                            score=score,
-                            validation_data=(X_valid, y_valid),
-                        )
+
+                    # fit model
+                    self.fit(
+                        X_train,
+                        y_train,
+                        score=score,
+                        validation_data=(X_valid, y_valid),
+                    )
                     #  calibrate model, and save output
 
                     self.estimator = CalibratedClassifierCV(
@@ -746,6 +746,14 @@ def grid_search_param_tuning(
 
             if self.imbalance_sampler:
                 self.process_imbalance_sampler(X_train, y_train)
+
+
+            ## casting the ParameterGrid Object to a list so that we can update
+            ## update the hyperparameters in both random grid and non random grid
+            ## scenarios
+            if not self.randomized_grid:
+                self.grid = list(self.grid)
+
             for score in self.scoring:
                 scores = []
                 for index, params in enumerate(tqdm(self.grid)):
@@ -820,6 +828,7 @@ def grid_search_param_tuning(
 
                         # Update the parameters in the grid
                         self.grid[index] = params
+
 
                     else:
                         clf = self.estimator.set_params(**params).fit(X_train, y_train)

diff --git a/src/model_tuner/pickleObjects.py b/src/model_tuner/pickleObjects.py
@@ -1,6 +1,6 @@
 import pickle
 import joblib
-# import numpy as np
+import numpy as np
 
 
 def dumpObjects(file, filename, use_pickle=True):