From bb7abf2c198744ef81cb52262ec22b608a2a9f2e Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Thu, 29 Aug 2024 19:58:48 +0200 Subject: [PATCH] First ifBO successful run push --- .../acquisition_functions/mf_ei.py | 3 - .../acquisition_functions/mf_pi.py | 15 +--- .../bayesian_optimization/models/__init__.py | 8 +- .../bayesian_optimization/models/pfn.py | 77 +++++++++++++++++++ neps/optimizers/default_searchers/ifbo.yaml | 6 +- neps/optimizers/multi_fidelity/dyhpo.py | 2 +- neps/optimizers/multi_fidelity/mf_bo.py | 72 +++++++++++------ neps/optimizers/multi_fidelity/utils.py | 24 +----- pyproject.toml | 2 +- 9 files changed, 141 insertions(+), 68 deletions(-) create mode 100644 neps/optimizers/bayesian_optimization/models/pfn.py diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 5139d4b4..c025578e 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -404,18 +404,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain inc_value = self.sample_threshold(inc_value) - print(f"Threshold for EI: {inc_value}") # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for EI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index e64ea2e3..e41e0528 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -87,10 +87,10 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]: # deepcopy - _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index) - if self.surrogate_model_name == "pfn": + _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index) + if self.surrogate_model_name == "ftpfn": _x, _x_tok, inc_list = self.preprocess_pfn( - x.copy() + deepcopy(x.copy()) ) # IMPORTANT change from vanilla-EI pi = self.eval_pfn_pi(_x_tok, inc_list) elif self.surrogate_model_name in ["deep_gp", "dpl"]: @@ -122,7 +122,6 @@ def eval_pfn_pi( pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list) if len(pi.shape) == 2: pi = pi.flatten() - print(f"Maximum PI: {pi.max()}") return pi def eval_gp_pi( @@ -311,19 +310,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain t_value = self.sample_threshold(inc_value) - print(f"Threshold for PI: {inc_value - t_value}") inc_value = t_value # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for PI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value @@ -344,7 +340,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) - #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) @@ -399,19 +394,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: inc_list = [] steps_passed = len(self.observations.completed_runs) - print(f"Steps acquired: {steps_passed}") # Like EI-AtMax, use the global incumbent as a basis for the EI threshold inc_value = min(self.observations.get_best_performance_for_each_budget()) # Extension: Add a random min improvement threshold to encourage high risk high gain t_value = self.sample_threshold(inc_value) - print(f"Threshold for EI: {inc_value - t_value}") inc_value = t_value # Like MFEI: set fidelities to query using horizon as self.b_step # Extension: Unlike DyHPO, we sample the horizon randomly over the full range horizon = self.sample_horizon(steps_passed) - print(f"Horizon for EI: {horizon}") for i, config in x.items(): if i <= max(self.observations.seen_config_ids): current_fidelity = config.fidelity.value @@ -431,7 +423,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: current_fidelity = 0 config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) - #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") # Drop unused configs x.drop(labels=indices_to_drop, inplace=True) diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py index c76bedfd..3bebbffb 100755 --- a/neps/optimizers/bayesian_optimization/models/__init__.py +++ b/neps/optimizers/bayesian_optimization/models/__init__.py @@ -8,14 +8,12 @@ except ImportError as e: DeepGP = MissingDependencyError("gpytorch", e) -try: - from .pfn import PFN_SURROGATE # only if available locally -except Exception as e: - PFN_SURROGATE = MissingDependencyError("pfn", e) +from .pfn import IFBOSurrogate + SurrogateModelMapping = { "deep_gp": DeepGP, "gp": ComprehensiveGP, "gp_hierarchy": ComprehensiveGPHierarchy, - "pfn": PFN_SURROGATE, + "ftpfn": IFBOSurrogate, } diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/pfn.py new file mode 100644 index 00000000..fcfd542b --- /dev/null +++ b/neps/optimizers/bayesian_optimization/models/pfn.py @@ -0,0 +1,77 @@ +from typing import Any +import numpy as np +import pandas as pd +from pathlib import Path +import torch + +from ifbo import FTPFN + + +class IFBOSurrogate: + """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" + + def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs): + super().__init__(*args, **kwargs) + self.ftpfn = FTPFN(target_path=target_path, version=version) + self.target_path = self.ftpfn.target_path + self.version = self.ftpfn.version + self.train_x = None + self.train_y = None + + @property + def device(self): + return self.ftpfn.device + + def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor: + return self.ftpfn.model( + self._cast_tensor_shapes(self.train_x), + self._cast_tensor_shapes(self.train_y), + self._cast_tensor_shapes(test_x) + ) + + def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: + if len(x.shape) == 3 and x.shape[1] == 1: + return x + if len(x.shape) == 2: + return x.reshape(x.shape[0], 1, x.shape[1]) + if len(x.shape) == 1: + return x.reshape(x.shape[0], 1) + raise ValueError(f"Shape not recognized: {x.shape}") + + @torch.no_grad() + def get_pi(self, test_x, y_best): + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.pi( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_ei(self, test_x, y_best): + logits = self._get_logits(test_x) + return self.ftpfn.model.criterion.ei( + logits.squeeze(), best_f=(1 - y_best).unsqueeze(1) + ) + + @torch.no_grad() + def get_lcb(self, test_x, beta: float=(1-.682)/2): + logits = self._get_logits(test_x) + # y values are always transformed for maximizing + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=False # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta + ) + return lcb + + @torch.no_grad() + def get_ucb(self, test_x, beta: float=(1-.682)/2): + logits = self._get_logits(test_x) + # y values are always transformed for maximizing + lcb = self.ftpfn.model.criterion.ucb( + logits=logits, + best_f=None, + rest_prob=beta, + maximize=True # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta + ) + return lcb diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml index 1eecea6a..38442175 100644 --- a/neps/optimizers/default_searchers/ifbo.yaml +++ b/neps/optimizers/default_searchers/ifbo.yaml @@ -1,2 +1,6 @@ strategy: ifbo -acquisition: MFPI-random \ No newline at end of file +surrogate_model: ftpfn +surrogate_model_args: + version: "0.0.1" +acquisition: MFPI-random +model_policy: PFNSurrogate \ No newline at end of file diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index db8de242..31735ed3 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -131,7 +131,7 @@ def __init__( raise NotImplementedError elif surrogate_model == "gp": model_policy = FreezeThawModel - elif surrogate_model == "pfn": + elif surrogate_model == "ftpfn": model_policy = PFNSurrogate else: raise ValueError("Invalid model option selected!") diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py index 4ab15e4b..e2522083 100755 --- a/neps/optimizers/multi_fidelity/mf_bo.py +++ b/neps/optimizers/multi_fidelity/mf_bo.py @@ -2,19 +2,16 @@ from __future__ import annotations from copy import deepcopy - import numpy as np import pandas as pd import torch from neps.utils.common import instance_from_map -# from ..bayesian_optimization.models import SurrogateModelMapping from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping -# from ..multi_fidelity.utils import normalize_vectorize_config from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config -# from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity -from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids +from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity + class MFBOBase: @@ -199,15 +196,13 @@ def __init__( self.surrogate_model_args = ( surrogate_model_args if surrogate_model_args is not None else {} ) - if self.surrogate_model_name in ["deep_gp", "pfn"]: + if self.surrogate_model_name in ["deep_gp"]: self.surrogate_model_args.update({"pipeline_space": pipeline_space}) elif self.surrogate_model_name == "dpl": - self.surrogate_model_args.update( - {"pipeline_space": self.pipeline_space, - "observed_data": self.observed_configs} - ) - - # instantiate the surrogate model + self.surrogate_model_args.update({ + "pipeline_space": self.pipeline_space, + "observed_data": self.observed_configs + }) self.surrogate_model = instance_from_map( SurrogateModelMapping, self.surrogate_model_name, @@ -241,8 +236,11 @@ def _fantasize_pending(self, train_x, train_y, pending_x): def _fit(self, train_x, train_y, train_lcs): if self.surrogate_model_name in ["gp", "gp_hierarchy"]: self.surrogate_model.fit(train_x, train_y) - elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]: + elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl",]: self.surrogate_model.fit(train_x, train_y, train_lcs) + elif self.surrogate_model_name == "ftpfn": + # do nothing - no training required + pass else: # check neps/optimizers/bayesian_optimization/models/__init__.py for options raise ValueError( @@ -284,7 +282,7 @@ def set_state( # only to handle tabular spaces if self.pipeline_space.has_tabular: - if self.surrogate_model_name in ["deep_gp", "pfn"]: + if self.surrogate_model_name in ["deep_gp"]: self.surrogate_model_args.update( {"pipeline_space": self.pipeline_space.raw_tabular_space} ) @@ -323,10 +321,10 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None) if decay_t is None: decay_t = len(train_x) train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) - self._fit(train_x, train_y, train_lcs) + self.surrogate_model._fit(train_x, train_y, train_lcs) return self.surrogate_model, decay_t - + class PFNSurrogate(FreezeThawModel): """Special class to deal with PFN surrogate model and freeze-thaw acquisition.""" @@ -336,10 +334,32 @@ def __init__(self, *args, **kwargs): self.train_x = None self.train_y = None + def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None): + if train_x is None: + train_x = [] + if train_y is None: + train_y = [] + if pending_x is None: + pending_x = [] + + if decay_t is None: + decay_t = len(train_x) + train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x) + self._fit(train_x, train_y, train_lcs) + + return self.surrogate_model, decay_t + def _fit(self, *args): # pylint: disable=unused-argument - assert self.surrogate_model_name == "pfn" + # no training required,, only preprocessing the training data as context during inference self.preprocess_training_set() - self.surrogate_model.fit(self.train_x, self.train_y) + + def _predict(self, test_x, test_lcs): + assert "pfn" in self.surrogate_model_name + test_x = self.preprocess_test_set(test_x) + return self.surrogate_model(self.train_x, self.train_y, test_x) + + def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor: + return x def preprocess_training_set(self): _configs = self.observed_configs.df.config.values.copy() @@ -361,8 +381,12 @@ def preprocess_training_set(self): idxs = idxs.astype(float) idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper # TODO: account for fantasization - self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device) - self.train_y = torch.Tensor(performances).to(device) + self.surrogate_model.train_x = self._cast_tensor_shapes( + torch.Tensor(np.hstack([idxs, configs])).to(device) + ) + self.surrogate_model.train_y = self._cast_tensor_shapes( + torch.Tensor(performances).to(device) + ) def preprocess_test_set(self, test_x): _len = len(self.observed_configs.all_configs_list()) @@ -379,10 +403,12 @@ def preprocess_test_set(self, test_x): token_ids = np.vstack((existing_token_ids, new_token_ids)) configs = np.array([normalize_vectorize_config(c) for c in test_x]) - test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device) - return test_x + self.surrogate_model.test_x = self._cast_tensor_shapes( + torch.Tensor(np.hstack([token_ids, configs])).to(device) + ) + return self.surrogate_model.test_x def _predict(self, test_x, test_lcs): assert self.surrogate_model_name == "pfn" test_x = self.preprocess_test_set(test_x) - return self.surrogate_model.predict(self.train_x, self.train_y, test_x) + return self.surrogate_model(self.train_x, self.train_y, test_x) diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index aa6c579c..efa5621e 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -3,6 +3,7 @@ from typing import Any, Sequence +from copy import deepcopy import numpy as np import pandas as pd import torch @@ -34,7 +35,7 @@ def normalize_vectorize_config( config: SearchSpace, ignore_fidelity: bool = True ) -> np.ndarray: _new_vector = [] - for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity).items(): + for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity=ignore_fidelity).items(): _new_vector.extend(hp_list) return np.array(_new_vector) @@ -361,33 +362,12 @@ def token_ids(self) -> np.ndarray: index=[(0, 2), (1, 2), (0, 1)], ) - print(data.df) - print(data.get_learning_curves()) - print( - "Mapping of budget IDs into best performing configurations at each fidelity:\n", - data.get_incumbents_for_budgets(), - ) - print( - "Best Performance at each budget level:\n", - data.get_best_performance_for_each_budget(), - ) - print( - "Configuration ID of the best observed performance so far: ", - data.get_best_learning_curve_id(), - ) - print(data.extract_learning_curve(0, 2)) - # data.df.sort_index(inplace=True) - print(data.get_partial_configs_at_max_seen()) - # When updating multiple indices at a time both the values in the data dictionary and the indices should be lists data.update_data({"perf": [1.8, 1.5]}, index=[(1, 1), (0, 0)]) - print(data.df) data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"]) # when adding a single row second level list is not necessary data.add_data(["conf1", 0.5], index=(0, 0)) - print(data.df) data.update_data({"perf": [1.8], "budget_col": [5]}, index=(0, 0)) - print(data.df) diff --git a/pyproject.toml b/pyproject.toml index b68f5e5a..4e3da049 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" -ifbo = ">=0.3.5" +ifbo = ">=0.3.8" [tool.poetry.group.dev.dependencies] ruff = "^0.4"