From 8b15558d1034cc813dc79e00915c58e6ae11939a Mon Sep 17 00:00:00 2001 From: karibbov Date: Tue, 5 Sep 2023 21:44:39 +0200 Subject: [PATCH] Deep gp with MFEIBO --- .../efficiency/multi_fidelity_dyhpo.py | 1 - .../bayesian_optimization/models/__init__.py | 4 +- .../bayesian_optimization/models/deepGP.py | 47 +++++----- src/neps/optimizers/multi_fidelity/dyhpo.py | 91 ++++++++++++------- src/neps/optimizers/multi_fidelity/mf_bo.py | 65 ++++++++++++- src/neps/optimizers/multi_fidelity/utils.py | 18 ++++ 6 files changed, 163 insertions(+), 63 deletions(-) diff --git a/neps_examples/efficiency/multi_fidelity_dyhpo.py b/neps_examples/efficiency/multi_fidelity_dyhpo.py index f105ed45..e6ba5557 100644 --- a/neps_examples/efficiency/multi_fidelity_dyhpo.py +++ b/neps_examples/efficiency/multi_fidelity_dyhpo.py @@ -82,5 +82,4 @@ def run_pipeline(pipeline_directory, previous_pipeline_directory, learning_rate, # field above. max_cost_total=50, surrogate_model="gp", - hp_kernels=["m52"], ) diff --git a/src/neps/optimizers/bayesian_optimization/models/__init__.py b/src/neps/optimizers/bayesian_optimization/models/__init__.py index 4fc6c982..ca9d2247 100644 --- a/src/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/src/neps/optimizers/bayesian_optimization/models/__init__.py @@ -1,7 +1,9 @@ +from .deepGP import DeepGP from .gp import ComprehensiveGP from .gp_hierarchy import ComprehensiveGPHierarchy SurrogateModelMapping = { + "deep_gp": DeepGP, "gp": ComprehensiveGP, - "gp_hierarchy": ComprehensiveGPHierarchy + "gp_hierarchy": ComprehensiveGPHierarchy, } diff --git a/src/neps/optimizers/bayesian_optimization/models/deepGP.py b/src/neps/optimizers/bayesian_optimization/models/deepGP.py index 22ceadeb..e2f4278f 100644 --- a/src/neps/optimizers/bayesian_optimization/models/deepGP.py +++ b/src/neps/optimizers/bayesian_optimization/models/deepGP.py @@ -15,7 +15,7 @@ ) -class DeepKernel(nn.Module): +class NeuralFeatureExtractor(nn.Module): """ Neural network to be used in the DeepGP """ @@ -133,6 +133,7 @@ def __init__( pipeline_space: SearchSpace, neural_network_args: dict | None = None, logger=None, + **kwargs, # pylint: disable=unused-argument ): super().__init__() self.__preprocess_search_space(pipeline_space) @@ -154,7 +155,7 @@ def __init__( ) # build the neural network - self.nn = DeepKernel(self.input_size, **neural_network_args) + self.nn = NeuralFeatureExtractor(self.input_size, **neural_network_args) self.logger = logger or logging.getLogger("neps") @@ -302,10 +303,10 @@ def _preprocess_input( def _preprocess_y(self, y_train: list[float], normalize_y: bool = False): y_train_array = np.array(y_train, dtype=np.single) + self.min_y = y_train_array.min() # pylint: disable=attribute-defined-outside-init + self.max_y = y_train_array.max() # pylint: disable=attribute-defined-outside-init if normalize_y: - y_train_array = (y_train_array - y_train_array.min()) / ( - y_train_array.max() - y_train_array.min() - ) + y_train_array = (y_train_array - self.min_y) / (self.max_y - self.min_y) y_train_array = torch.tensor(y_train_array).to(device=self.device) return y_train_array @@ -373,6 +374,7 @@ def __train_model( for epoch_nr in range(0, n_epochs): if count_down == 0: + # stop training if performance doesn't increase after `patience` epochs break nr_examples_batch = x_train.size(dim=0) @@ -402,7 +404,6 @@ def __train_model( f"for the past {patience - count_down} epochs " f"the training will stop in {count_down} epochs" ) - count_down -= 1 mse = gpytorch.metrics.mean_squared_error(output, self.model.train_targets) @@ -423,8 +424,17 @@ def __train_model( # training_errored = True # break - def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]): + def set_prediction_learning_curves(self, learning_curves: list[list[float]]): + # pylint: disable=attribute-defined-outside-init + self.prediction_learning_curves = learning_curves + # pylint: enable=attribute-defined-outside-init + + def predict( + self, x: list[SearchSpace], learning_curves: list[list[float]] | None = None + ): # Preprocess input + if learning_curves is None: + learning_curves = self.prediction_learning_curves x_test, test_budgets, learning_curves = self._preprocess_input( x, learning_curves, self.normalize_budget ) @@ -445,22 +455,12 @@ def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]): preds = self.likelihood(self.model(projected_test_x)) - means = ( - preds.mean.detach() - .to("cpu") - .numpy() - .reshape( - -1, - ) - ) - cov = ( - preds.variance.detach() - .to("cpu") - .numpy() - .reshape( - -1, - ) - ) + means = preds.mean.detach() + + if self.normalize_y: + means = (means + self.min_y) * (self.max_y - self.min_y) + + cov = torch.diag(torch.pow(preds.stddev.detach(), 2)) return means, cov @@ -489,3 +489,4 @@ def predict(self, x: list[SearchSpace], learning_curves: list[list[float]]): means, stds = deep_gp.predict(configs, lcs) print(list(zip(means, y))) + print(stds) diff --git a/src/neps/optimizers/multi_fidelity/dyhpo.py b/src/neps/optimizers/multi_fidelity/dyhpo.py index d90db389..6ea0613a 100644 --- a/src/neps/optimizers/multi_fidelity/dyhpo.py +++ b/src/neps/optimizers/multi_fidelity/dyhpo.py @@ -13,15 +13,12 @@ from ..base_optimizer import BaseOptimizer from ..bayesian_optimization.acquisition_functions import AcquisitionMapping from ..bayesian_optimization.acquisition_functions.base_acquisition import BaseAcquisition -from ..bayesian_optimization.acquisition_functions.prior_weighted import ( - DecayingPriorWeightedAcquisition, -) from ..bayesian_optimization.acquisition_samplers import AcquisitionSamplerMapping from ..bayesian_optimization.acquisition_samplers.base_acq_sampler import ( AcquisitionSampler, ) from ..bayesian_optimization.kernels.get_kernels import get_kernels -from .mf_bo import MFEIModel +from .mf_bo import MFEIDeepModel, MFEIModel from .utils import MFObservedData @@ -45,14 +42,14 @@ def __init__( ignore_errors: bool = False, logger=None, # arguments for model - surrogate_model: str | Any = "gp", + surrogate_model: str | Any = "deep_gp", surrogate_model_args: dict = None, domain_se_kernel: str = None, graph_kernels: list = None, hp_kernels: list = None, acquisition: str | BaseAcquisition = acquisition, acquisition_sampler: str | AcquisitionSampler = "freeze-thaw", - model_policy: Any = MFEIModel, + model_policy: Any = MFEIDeepModel, initial_design_fraction: float = 0.75, initial_design_size: int = 10, initial_design_budget: int = None, @@ -130,6 +127,14 @@ def __init__( self.surrogate_model_args[ "vectorial_features" ] = pipeline_space.get_vectorial_dim() + + # Temporary fix due to different data + # preprocessing pipelines of `deep_gp` and `gp` + # TODO: Remove this in a future iteration (karibbov) + if surrogate_model == "deep_gp": + model_policy = MFEIDeepModel + elif surrogate_model == "gp": + model_policy = MFEIModel # The surrogate model is initalized here self.model_policy = model_policy( pipeline_space=pipeline_space, @@ -151,26 +156,26 @@ def __init__( self.count = 0 def _set_initial_design( - self, - initial_design_size: int=None, - initial_design_budget: int=None, - initial_design_fraction: float=0.75 - ) -> tuple[int|float, int|float]: - """ Sets the initial design size and budget.""" - + self, + initial_design_size: int = None, + initial_design_budget: int = None, + initial_design_fraction: float = 0.75, + ) -> tuple[int | float, int | float]: + """Sets the initial design size and budget.""" + # user specified initial_design_size takes precedence if initial_design_budget is not None: _initial_design_budget = initial_design_budget else: _initial_design_budget = self.max_budget - + # user specified initial_design_size takes precedence _initial_design_size = np.inf if initial_design_size is not None: _initial_design_size = initial_design_size if ( - initial_design_size is None or - _initial_design_size * self.min_budget > _initial_design_budget + initial_design_size is None + or _initial_design_size * self.min_budget > _initial_design_budget ): # if the initial design budget is less than the budget spend on sampling # the initial design at the minimum budget (fidelity) @@ -181,7 +186,7 @@ def _set_initial_design( _init_budget = initial_design_fraction * self.max_budget # number of min budget evaluations fitting within initial design budget _initial_design_size = _init_budget // self.min_budget - + self.logger.info( f"\n\ninitial_design_size: {_initial_design_size}\n" f"initial_design_budget: {_initial_design_budget}\n" @@ -210,27 +215,27 @@ def get_budget_value(self, budget_level: int | float) -> int | float: ) self._budget_list.append(budget_val) return budget_val - + def total_budget_spent(self) -> int | float: - """ Calculates the toal budget spent so far. + """Calculates the toal budget spent so far. - This is calculated as a function of the fidelity range provided, that takes into + This is calculated as a function of the fidelity range provided, that takes into account the minimum budget and the step size. """ if len(self.observed_configs.df) == 0: return 0 - _df = self.observed_configs.get_learning_curves() + _df = self.observed_configs.get_learning_curves() # budgets are columns now in _df budget_used = 0 for idx in _df.index: # finds the budget steps taken per config excluding first min_budget step - _n = (~_df.loc[idx].isna()).sum() - 1 # budget_id starts from 0 + _n = (~_df.loc[idx].isna()).sum() - 1 # budget_id starts from 0 budget_used += self.get_budget_value(_n) - + return budget_used - def is_init_phase(self, budget_based: bool=True) -> bool: + def is_init_phase(self, budget_based: bool = True) -> bool: if budget_based: if self.total_budget_spent() < self._initial_design_budget: return True @@ -322,15 +327,15 @@ def _fit_models(self): ) def _randomly_promote(self) -> tuple[SearchSpace, int]: - """ Samples the initial design. - - With an unbiased coin toss (p=0.5) it decides whether to sample a new - configuration or continue a partial configuration, until initial_design_size + """Samples the initial design. + + With an unbiased coin toss (p=0.5) it decides whether to sample a new + configuration or continue a partial configuration, until initial_design_size configurations have been sampled. """ # sampling a configuration ID from the observed ones _config_ids = np.unique( - self.observed_configs.df.index.get_level_values('config_id').values + self.observed_configs.df.index.get_level_values("config_id").values ) _config_id = np.random.choice(_config_ids) # extracting the config @@ -363,11 +368,8 @@ def get_config_and_ids( # pylint: disable=no-self-use ) config.fidelity.value = self.min_budget _config_id = self.observed_configs.next_config_id() - elif ( - self.is_init_phase(budget_based=True) - or self._model_update_failed - ): - # promote a config randomly if initial design size is satisfied but the + elif self.is_init_phase(budget_based=True) or self._model_update_failed: + # promote a config randomly if initial design size is satisfied but the # initial design budget has not been exhausted self.logger.info("promoting...") config, _config_id = self._randomly_promote() @@ -379,12 +381,31 @@ def get_config_and_ids( # pylint: disable=no-self-use # main acquisition call here after initial design is turned off self.logger.info("acquiring...") samples = self.acquisition_sampler.sample() + + # Get the learning curves if the surrogate model requires it + sample_lcs = [] + if hasattr( + self.acquisition.surrogate_model, "set_prediction_learning_curves" + ): + for idx in samples.index: + if idx in self.observed_configs.df.index.levels[0]: + budget_level = self.get_budget_level(samples[idx]) - 1 + lc = self.observed_configs.extract_learning_curve( + idx, budget_level + ) + else: + lc = [0.0] + sample_lcs.append(lc) + self.acquisition.surrogate_model.set_prediction_learning_curves( + sample_lcs + ) + eis = self.acquisition.eval( # type: ignore[attr-defined] x=samples.to_list(), asscalar=True ) # maximizing EI _ids = np.argsort(eis)[-1] - # samples should have new configs with fidelities set to as required by + # samples should have new configs with fidelities set to as required by # the acquisition sampler config = samples.iloc[_ids] _config_id = samples.index[_ids] diff --git a/src/neps/optimizers/multi_fidelity/mf_bo.py b/src/neps/optimizers/multi_fidelity/mf_bo.py index cabf2455..ae0e92c0 100644 --- a/src/neps/optimizers/multi_fidelity/mf_bo.py +++ b/src/neps/optimizers/multi_fidelity/mf_bo.py @@ -8,10 +8,10 @@ from ..bayesian_optimization.models import SurrogateModelMapping from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity - """Base class for multi-fidelity Bayesian optimization for SH-based algorithms.""" -class MFBOBase: + +class MFBOBase: def _fit_models(self): """Performs necessary procedures to build and use models.""" @@ -220,7 +220,6 @@ class MFEIModel(ModelBase): def __init__(self, *args, **kwargs): self.num_train_configs = 0 self.observed_configs = kwargs.get("observed_configs", None) - super().__init__(*args, **kwargs) def _fantasize_pending(self, *args, **kwargs): # pylint: disable=unused-argument @@ -238,6 +237,7 @@ def _fantasize_pending(self, *args, **kwargs): # pylint: disable=unused-argumen pending_condition = self.observed_configs.pending_condition if pending_condition.any(): + # TODO: Unique might not work here replace this (karibbov) pending_configs = ( self.observed_configs.df[pending_condition] .loc[(), self.observed_configs.config_col] @@ -262,3 +262,62 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None) self.surrogate_model.fit(train_x, train_y) return self.surrogate_model, decay_t + + +class MFEIDeepModel(ModelBase): + def __init__( + self, + pipeline_space, + surrogate_model: str = "deep_gp", + surrogate_model_args: dict = None, + ): + self.pipeline_space = pipeline_space + + surrogate_model_args = ( + surrogate_model_args if surrogate_model_args is not None else {} + ) + + if surrogate_model == "deep_gp": + surrogate_model_args.update({"pipeline_space": pipeline_space}) + + super().__init__(pipeline_space, surrogate_model, surrogate_model_args) + + def _fantasize_pending(self, train_x, train_y, pending_x): + # Select configs that are neither pending nor resulted in error + completed_configs = self.observed_configs.completed_runs.copy(deep=True) + train_x, train_lcs, train_y = self.observed_configs.get_training_data_4DyHPO( + completed_configs + ) + + pending_condition = self.observed_configs.pending_condition + + if pending_condition.any(): + pending_configs = self.observed_configs.df.loc[pending_condition] + pending_x, pending_lcs, _ = self.observed_configs.get_training_data_4DyHPO( + pending_configs + ) + self.surrogate_model.fit(train_x, train_y, train_lcs) + _y, _ = self.surrogate_model.predict(pending_x, pending_lcs) + _y = _y.tolist() + + train_x.extend(pending_x) + train_y.extend(_y) + train_lcs.extend(pending_lcs) + return train_x, train_y, train_lcs + + def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): + if train_x is None: + train_x = [] + if train_y is None: + train_y = [] + if pending_x is None: + pending_x = [] + + if decay_t is None: + decay_t = len(train_x) + + train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) + # print(train_x, train_y, train_lcs) + self.surrogate_model.fit(train_x, train_y, train_lcs) + + return self.surrogate_model, decay_t diff --git a/src/neps/optimizers/multi_fidelity/utils.py b/src/neps/optimizers/multi_fidelity/utils.py index 1a306324..6f715323 100644 --- a/src/neps/optimizers/multi_fidelity/utils.py +++ b/src/neps/optimizers/multi_fidelity/utils.py @@ -189,6 +189,23 @@ def reduce_to_max_seen_budgets(self): def get_partial_configs_at_max_seen(self): return self.reduce_to_max_seen_budgets()[self.config_col] + def extract_learning_curve(self, config_id: int, budget_id: int) -> list[float]: + lcs = self.get_learning_curves() + lc = lcs.loc[config_id, :budget_id].values.flatten().tolist() + return lc + + def get_training_data_4DyHPO(self, df: pd.DataFrame): + configs = [] + learning_curves = [] + performance = [] + for idx, row in df.iterrows(): + config_id = idx[0] + budget_id = idx[1] + configs.append(row[self.config_col]) + performance.append(row[self.perf_col]) + learning_curves.append(self.extract_learning_curve(config_id, budget_id)) + return configs, learning_curves, performance + if __name__ == "__main__": # TODO: Either delete these or convert them to tests (karibbov) @@ -221,6 +238,7 @@ def get_partial_configs_at_max_seen(self): "Configuration ID of the best observed performance so far: ", data.get_best_learning_curve_id(), ) + print(data.extract_learning_curve(0, 2)) # data.df.sort_index(inplace=True) print(data.get_partial_configs_at_max_seen())