Deep gp with MFEIBO

automl · Sep 5, 2023 · 8b15558 · 8b15558
1 parent b682926
commit 8b15558
Show file tree

Hide file tree

Showing 6 changed files with 163 additions and 63 deletions.
diff --git a/neps_examples/efficiency/multi_fidelity_dyhpo.py b/neps_examples/efficiency/multi_fidelity_dyhpo.py
@@ -82,5 +82,4 @@ def run_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate,
     # field above.
     max_cost_total=50,
     surrogate_model="gp",
-    hp_kernels=["m52"],
 )
diff --git a/src/neps/optimizers/bayesian_optimization/models/__init__.py b/src/neps/optimizers/bayesian_optimization/models/__init__.py
@@ -1,7 +1,9 @@
+from .deepGP import DeepGP
 from .gp import ComprehensiveGP
 from .gp_hierarchy import ComprehensiveGPHierarchy
 
 SurrogateModelMapping = {
+    "deep_gp": DeepGP,
     "gp": ComprehensiveGP,
-    "gp_hierarchy": ComprehensiveGPHierarchy
+    "gp_hierarchy": ComprehensiveGPHierarchy,
 }
diff --git a/src/neps/optimizers/bayesian_optimization/models/deepGP.py b/src/neps/optimizers/bayesian_optimization/models/deepGP.py
@@ -15,7 +15,7 @@
 )
 
 
-class DeepKernel(nn.Module):
+class NeuralFeatureExtractor(nn.Module):
     """
     Neural network to be used in the DeepGP
     """
@@ -133,6 +133,7 @@ def __init__(
         pipeline_space: SearchSpace,
         neural_network_args: dict | None = None,
         logger=None,
+        **kwargs,  # pylint: disable=unused-argument
     ):
         super().__init__()
         self.__preprocess_search_space(pipeline_space)
@@ -154,7 +155,7 @@ def __init__(
         )
 
         # build the neural network
-        self.nn = DeepKernel(self.input_size, **neural_network_args)
+        self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args)
 
         self.logger = logger or logging.getLogger("neps")
 
@@ -302,10 +303,10 @@ def _preprocess_input(
 
     def _preprocess_y(self, y_train: list[float], normalize_y: bool = False):
         y_train_array = np.array(y_train, dtype=np.single)
+        self.min_y = y_train_array.min()  # pylint: disable=attribute-defined-outside-init
+        self.max_y = y_train_array.max()  # pylint: disable=attribute-defined-outside-init
         if normalize_y:
-            y_train_array = (y_train_array - y_train_array.min()) / (
-                y_train_array.max() - y_train_array.min()
-            )
+            y_train_array = (y_train_array - self.min_y) / (self.max_y - self.min_y)
         y_train_array = torch.tensor(y_train_array).to(device=self.device)
         return y_train_array
 
@@ -373,6 +374,7 @@ def __train_model(
 
         for epoch_nr in range(0, n_epochs):
             if count_down == 0:
+                # stop training if performance doesn't increase after `patience` epochs
                 break
 
             nr_examples_batch = x_train.size(dim=0)
@@ -402,7 +404,6 @@ def __train_model(
                     f"for the past {patience - count_down} epochs "
                     f"the training will stop in {count_down} epochs"
                 )
-
                 count_down -= 1
 
             mse = gpytorch.metrics.mean_squared_error(output, self.model.train_targets)
@@ -423,8 +424,17 @@ def __train_model(
             #     training_errored = True
             #     break
 
-    def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]):
+    def set_prediction_learning_curves(self, learning_curves: list[list[float]]):
+        # pylint: disable=attribute-defined-outside-init
+        self.prediction_learning_curves = learning_curves
+        # pylint: enable=attribute-defined-outside-init
+
+    def predict(
+        self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None
+    ):
         # Preprocess input
+        if learning_curves is None:
+            learning_curves = self.prediction_learning_curves
         x_test, test_budgets, learning_curves = self._preprocess_input(
             x, learning_curves, self.normalize_budget
         )
@@ -445,22 +455,12 @@ def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]):
 
             preds = self.likelihood(self.model(projected_test_x))
 
-        means = (
-            preds.mean.detach()
-            .to("cpu")
-            .numpy()
-            .reshape(
-                -1,
-            )
-        )
-        cov = (
-            preds.variance.detach()
-            .to("cpu")
-            .numpy()
-            .reshape(
-                -1,
-            )
-        )
+        means = preds.mean.detach()
+
+        if self.normalize_y:
+            means = (means + self.min_y) * (self.max_y - self.min_y)
+
+        cov = torch.diag(torch.pow(preds.stddev.detach(), 2))
 
         return means, cov
 
@@ -489,3 +489,4 @@ def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]):
     means, stds = deep_gp.predict(configs, lcs)
 
     print(list(zip(means, y)))
+    print(stds)
diff --git a/src/neps/optimizers/multi_fidelity/dyhpo.py b/src/neps/optimizers/multi_fidelity/dyhpo.py
@@ -13,15 +13,12 @@
 from ..base_optimizer import BaseOptimizer
 from ..bayesian_optimization.acquisition_functions import AcquisitionMapping
 from ..bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition
-from ..bayesian_optimization.acquisition_functions.prior_weighted import (
-    DecayingPriorWeightedAcquisition,
-)
 from ..bayesian_optimization.acquisition_samplers import AcquisitionSamplerMapping
 from ..bayesian_optimization.acquisition_samplers.base_acq_sampler import (
     AcquisitionSampler,
 )
 from ..bayesian_optimization.kernels.get_kernels import get_kernels
-from .mf_bo import MFEIModel
+from .mf_bo import MFEIDeepModel, MFEIModel
 from .utils import MFObservedData
 
 
@@ -45,14 +42,14 @@ def __init__(
         ignore_errors: bool = False,
         logger=None,
         # arguments for model
-        surrogate_model: str | Any = "gp",
+        surrogate_model: str | Any = "deep_gp",
         surrogate_model_args: dict = None,
         domain_se_kernel: str = None,
         graph_kernels: list = None,
         hp_kernels: list = None,
         acquisition: str | BaseAcquisition = acquisition,
         acquisition_sampler: str | AcquisitionSampler = "freeze-thaw",
-        model_policy: Any = MFEIModel,
+        model_policy: Any = MFEIDeepModel,
         initial_design_fraction: float = 0.75,
         initial_design_size: int = 10,
         initial_design_budget: int = None,
@@ -130,6 +127,14 @@ def __init__(
             self.surrogate_model_args[
                 "vectorial_features"
             ] = pipeline_space.get_vectorial_dim()
+
+        # Temporary fix due to different data
+        # preprocessing pipelines of `deep_gp` and `gp`
+        # TODO: Remove this in a future iteration (karibbov)
+        if surrogate_model == "deep_gp":
+            model_policy = MFEIDeepModel
+        elif surrogate_model == "gp":
+            model_policy = MFEIModel
         # The surrogate model is initalized here
         self.model_policy = model_policy(
             pipeline_space=pipeline_space,
@@ -151,26 +156,26 @@ def __init__(
         self.count = 0
 
     def _set_initial_design(
-            self, 
-            initial_design_size: int=None, 
-            initial_design_budget: int=None, 
-            initial_design_fraction: float=0.75
-        ) -> tuple[int|float, int|float]:
-        """ Sets the initial design size and budget."""
-        
+        self,
+        initial_design_size: int = None,
+        initial_design_budget: int = None,
+        initial_design_fraction: float = 0.75,
+    ) -> tuple[int | float, int | float]:
+        """Sets the initial design size and budget."""
+
         # user specified initial_design_size takes precedence
         if initial_design_budget is not None:
             _initial_design_budget = initial_design_budget
         else:
             _initial_design_budget = self.max_budget
-        
+
         # user specified initial_design_size takes precedence
         _initial_design_size = np.inf
         if initial_design_size is not None:
             _initial_design_size = initial_design_size
         if (
-            initial_design_size is None or 
-            _initial_design_size * self.min_budget > _initial_design_budget
+            initial_design_size is None
+            or _initial_design_size * self.min_budget > _initial_design_budget
         ):
             # if the initial design budget is less than the budget spend on sampling
             # the initial design at the minimum budget (fidelity)
@@ -181,7 +186,7 @@ def _set_initial_design(
             _init_budget = initial_design_fraction * self.max_budget
             # number of min budget evaluations fitting within initial design budget
             _initial_design_size = _init_budget // self.min_budget
-        
+
         self.logger.info(
             f"\n\ninitial_design_size: {_initial_design_size}\n"
             f"initial_design_budget: {_initial_design_budget}\n"
@@ -210,27 +215,27 @@ def get_budget_value(self, budget_level: int | float) -> int | float:
             )
         self._budget_list.append(budget_val)
         return budget_val
-    
+
     def total_budget_spent(self) -> int | float:
-        """ Calculates the toal budget spent so far.
+        """Calculates the toal budget spent so far.
 
-        This is calculated as a function of the fidelity range provided, that takes into 
+        This is calculated as a function of the fidelity range provided, that takes into
         account the minimum budget and the step size.
         """
         if len(self.observed_configs.df) == 0:
             return 0
-        _df = self.observed_configs.get_learning_curves()  
+        _df = self.observed_configs.get_learning_curves()
         # budgets are columns now in _df
         budget_used = 0
 
         for idx in _df.index:
             # finds the budget steps taken per config excluding first min_budget step
-            _n = (~_df.loc[idx].isna()).sum() - 1   # budget_id starts from 0
+            _n = (~_df.loc[idx].isna()).sum() - 1  # budget_id starts from 0
             budget_used += self.get_budget_value(_n)
-        
+
         return budget_used
 
-    def is_init_phase(self, budget_based: bool=True) -> bool:
+    def is_init_phase(self, budget_based: bool = True) -> bool:
         if budget_based:
             if self.total_budget_spent() < self._initial_design_budget:
                 return True
@@ -322,15 +327,15 @@ def _fit_models(self):
         )
 
     def _randomly_promote(self) -> tuple[SearchSpace, int]:
-        """ Samples the initial design.
-        
-        With an unbiased coin toss (p=0.5) it decides whether to sample a new 
-        configuration or continue a partial configuration, until initial_design_size 
+        """Samples the initial design.
+
+        With an unbiased coin toss (p=0.5) it decides whether to sample a new
+        configuration or continue a partial configuration, until initial_design_size
         configurations have been sampled.
         """
         # sampling a configuration ID from the observed ones
         _config_ids = np.unique(
-            self.observed_configs.df.index.get_level_values('config_id').values
+            self.observed_configs.df.index.get_level_values("config_id").values
         )
         _config_id = np.random.choice(_config_ids)
         # extracting the config
@@ -363,11 +368,8 @@ def get_config_and_ids(  # pylint: disable=no-self-use
             )
             config.fidelity.value = self.min_budget
             _config_id = self.observed_configs.next_config_id()
-        elif (
-            self.is_init_phase(budget_based=True)
-            or self._model_update_failed
-        ):
-            # promote a config randomly if initial design size is satisfied but the 
+        elif self.is_init_phase(budget_based=True) or self._model_update_failed:
+            # promote a config randomly if initial design size is satisfied but the
             # initial design budget has not been exhausted
             self.logger.info("promoting...")
             config, _config_id = self._randomly_promote()
@@ -379,12 +381,31 @@ def get_config_and_ids(  # pylint: disable=no-self-use
             # main acquisition call here after initial design is turned off
             self.logger.info("acquiring...")
             samples = self.acquisition_sampler.sample()
+
+            # Get the learning curves if the surrogate model requires it
+            sample_lcs = []
+            if hasattr(
+                self.acquisition.surrogate_model, "set_prediction_learning_curves"
+            ):
+                for idx in samples.index:
+                    if idx in self.observed_configs.df.index.levels[0]:
+                        budget_level = self.get_budget_level(samples[idx]) - 1
+                        lc = self.observed_configs.extract_learning_curve(
+                            idx, budget_level
+                        )
+                    else:
+                        lc = [0.0]
+                    sample_lcs.append(lc)
+                self.acquisition.surrogate_model.set_prediction_learning_curves(
+                    sample_lcs
+                )
+
             eis = self.acquisition.eval(  # type: ignore[attr-defined]
                 x=samples.to_list(), asscalar=True
             )
             # maximizing EI
             _ids = np.argsort(eis)[-1]
-            # samples should have new configs with fidelities set to as required by 
+            # samples should have new configs with fidelities set to as required by
             # the acquisition sampler
             config = samples.iloc[_ids]
             _config_id = samples.index[_ids]