First ifBO successful run push

automl · Aug 29, 2024 · bb7abf2 · bb7abf2
1 parent f87f39e
commit bb7abf2
Show file tree

Hide file tree

Showing 9 changed files with 141 additions and 68 deletions.
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
@@ -404,18 +404,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         inc_value = self.sample_threshold(inc_value)
-        print(f"Threshold for EI: {inc_value}")
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for EI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value

diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py
@@ -87,10 +87,10 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
 
     def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]:
         # deepcopy
-        _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index)
-        if self.surrogate_model_name == "pfn":
+        _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index)
+        if self.surrogate_model_name == "ftpfn":
             _x, _x_tok, inc_list = self.preprocess_pfn(
-                x.copy()
+                deepcopy(x.copy())
             )  # IMPORTANT change from vanilla-EI
             pi = self.eval_pfn_pi(_x_tok, inc_list)
         elif self.surrogate_model_name in ["deep_gp", "dpl"]:
@@ -122,7 +122,6 @@ def eval_pfn_pi(
         pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list)
         if len(pi.shape) == 2:
             pi = pi.flatten()
-        print(f"Maximum PI: {pi.max()}")
         return pi
 
     def eval_gp_pi(
@@ -311,19 +310,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         t_value = self.sample_threshold(inc_value)
-        print(f"Threshold for PI: {inc_value - t_value}")
         inc_value = t_value
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for PI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value
@@ -344,7 +340,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
                 current_fidelity = 0
                 config.update_hp_values({config.fidelity_name: horizon})
                 inc_list.append(inc_value)
-            #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}")
 
         # Drop unused configs
         x.drop(labels=indices_to_drop, inplace=True)
@@ -399,19 +394,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         t_value = self.sample_threshold(inc_value)
-        print(f"Threshold for EI: {inc_value - t_value}")
         inc_value = t_value
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for EI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value
@@ -431,7 +423,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
                 current_fidelity = 0
                 config.update_hp_values({config.fidelity_name: horizon})
                 inc_list.append(inc_value)
-            #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}")
 
         # Drop unused configs
         x.drop(labels=indices_to_drop, inplace=True)

diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py
@@ -8,14 +8,12 @@
 except ImportError as e:
     DeepGP = MissingDependencyError("gpytorch", e)
 
-try:
-    from .pfn import PFN_SURROGATE  # only if available locally
-except Exception as e:
-    PFN_SURROGATE = MissingDependencyError("pfn", e)
+from .pfn import IFBOSurrogate
+
 
 SurrogateModelMapping = {
     "deep_gp": DeepGP,
     "gp": ComprehensiveGP,
     "gp_hierarchy": ComprehensiveGPHierarchy,
-    "pfn": PFN_SURROGATE,
+    "ftpfn": IFBOSurrogate,
 }
diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/pfn.py
@@ -0,0 +1,77 @@
+from typing import Any
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import torch
+
+from ifbo import FTPFN
+
+
+class IFBOSurrogate:
+    """Special class to deal with PFN surrogate model and freeze-thaw acquisition."""
+
+    def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ftpfn = FTPFN(target_path=target_path, version=version)
+        self.target_path = self.ftpfn.target_path
+        self.version = self.ftpfn.version
+        self.train_x = None
+        self.train_y = None
+
+    @property
+    def device(self):
+        return self.ftpfn.device
+
+    def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor:        
+        return self.ftpfn.model(
+            self._cast_tensor_shapes(self.train_x),
+            self._cast_tensor_shapes(self.train_y),
+            self._cast_tensor_shapes(test_x)
+        )
+
+    def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor:
+        if len(x.shape) == 3 and x.shape[1] == 1:
+            return x
+        if len(x.shape) == 2:
+            return x.reshape(x.shape[0], 1, x.shape[1])
+        if len(x.shape) == 1:     
+            return x.reshape(x.shape[0], 1)
+        raise ValueError(f"Shape not recognized: {x.shape}")
+
+    @torch.no_grad()
+    def get_pi(self, test_x, y_best):
+        logits = self._get_logits(test_x)
+        return self.ftpfn.model.criterion.pi(
+            logits.squeeze(), best_f=(1 - y_best).unsqueeze(1)
+        )
+
+    @torch.no_grad()
+    def get_ei(self, test_x, y_best):
+        logits = self._get_logits(test_x)
+        return self.ftpfn.model.criterion.ei(
+            logits.squeeze(), best_f=(1 - y_best).unsqueeze(1)
+        )
+
+    @torch.no_grad()
+    def get_lcb(self, test_x, beta: float=(1-.682)/2):
+        logits = self._get_logits(test_x)
+        # y values are always transformed for maximizing
+        lcb = self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            maximize=False  # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta
+        )
+        return lcb
+
+    @torch.no_grad()
+    def get_ucb(self, test_x, beta: float=(1-.682)/2):
+        logits = self._get_logits(test_x)
+        # y values are always transformed for maximizing
+        lcb = self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            maximize=True  # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta
+        )
+        return lcb
diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml
@@ -1,2 +1,6 @@
 strategy: ifbo
-acquisition: MFPI-random
+surrogate_model: ftpfn
+surrogate_model_args:
+  version: "0.0.1"
+acquisition: MFPI-random
+model_policy: PFNSurrogate
diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py
@@ -131,7 +131,7 @@ def __init__(
             raise NotImplementedError
         elif surrogate_model == "gp":
             model_policy = FreezeThawModel
-        elif surrogate_model == "pfn":
+        elif surrogate_model == "ftpfn":
             model_policy = PFNSurrogate
         else:
             raise ValueError("Invalid model option selected!")

diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py
@@ -2,19 +2,16 @@
 from __future__ import annotations
 
 from copy import deepcopy
-
 import numpy as np
 import pandas as pd
 import torch
 
 from neps.utils.common import instance_from_map
-# from ..bayesian_optimization.models import SurrogateModelMapping
 from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
-# from ..multi_fidelity.utils import normalize_vectorize_config
 from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config
-# from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
-from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
 from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids
+from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
+
 
 
 class MFBOBase:
@@ -199,15 +196,13 @@ def __init__(
         self.surrogate_model_args = (
             surrogate_model_args if surrogate_model_args is not None else {}
         )
-        if self.surrogate_model_name in ["deep_gp", "pfn"]:
+        if self.surrogate_model_name in ["deep_gp"]:
             self.surrogate_model_args.update({"pipeline_space": pipeline_space})
         elif self.surrogate_model_name == "dpl":
-            self.surrogate_model_args.update(
-                {"pipeline_space": self.pipeline_space,
-                 "observed_data": self.observed_configs}
-            )
-
-        # instantiate the surrogate model
+            self.surrogate_model_args.update({
+                "pipeline_space": self.pipeline_space,
+                 "observed_data": self.observed_configs
+            })
         self.surrogate_model = instance_from_map(
             SurrogateModelMapping,
             self.surrogate_model_name,
@@ -241,8 +236,11 @@ def _fantasize_pending(self, train_x, train_y, pending_x):
     def _fit(self, train_x, train_y, train_lcs):
         if self.surrogate_model_name in ["gp", "gp_hierarchy"]:
             self.surrogate_model.fit(train_x, train_y)
-        elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]:
+        elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl",]:
             self.surrogate_model.fit(train_x, train_y, train_lcs)
+        elif self.surrogate_model_name == "ftpfn":
+            # do nothing - no training required
+            pass
         else:
             # check neps/optimizers/bayesian_optimization/models/__init__.py for options
             raise ValueError(
@@ -284,7 +282,7 @@ def set_state(
 
         # only to handle tabular spaces
         if self.pipeline_space.has_tabular:
-            if self.surrogate_model_name in ["deep_gp", "pfn"]:
+            if self.surrogate_model_name in ["deep_gp"]:
                 self.surrogate_model_args.update(
                     {"pipeline_space": self.pipeline_space.raw_tabular_space}
                 )
@@ -323,10 +321,10 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None)
         if decay_t is None:
             decay_t = len(train_x)
         train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x)
-        self._fit(train_x, train_y, train_lcs)
+        self.surrogate_model._fit(train_x, train_y, train_lcs)
 
         return self.surrogate_model, decay_t
-
+    
 
 class PFNSurrogate(FreezeThawModel):
     """Special class to deal with PFN surrogate model and freeze-thaw acquisition."""
@@ -336,10 +334,32 @@ def __init__(self, *args, **kwargs):
         self.train_x = None
         self.train_y = None
 
+    def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None):
+        if train_x is None:
+            train_x = []
+        if train_y is None:
+            train_y = []
+        if pending_x is None:
+            pending_x = []
+
+        if decay_t is None:
+            decay_t = len(train_x)
+        train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x)
+        self._fit(train_x, train_y, train_lcs)
+
+        return self.surrogate_model, decay_t
+
     def _fit(self, *args):  # pylint: disable=unused-argument
-        assert self.surrogate_model_name == "pfn"
+        # no training required,, only preprocessing the training data as context during inference
         self.preprocess_training_set()
-        self.surrogate_model.fit(self.train_x, self.train_y)
+
+    def _predict(self, test_x, test_lcs):
+        assert "pfn" in self.surrogate_model_name
+        test_x = self.preprocess_test_set(test_x)
+        return self.surrogate_model(self.train_x, self.train_y, test_x)
+
+    def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor:
+        return x
 
     def preprocess_training_set(self):
         _configs = self.observed_configs.df.config.values.copy()
@@ -361,8 +381,12 @@ def preprocess_training_set(self):
         idxs = idxs.astype(float)
         idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper
         # TODO: account for fantasization
-        self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device)
-        self.train_y = torch.Tensor(performances).to(device)
+        self.surrogate_model.train_x = self._cast_tensor_shapes(
+            torch.Tensor(np.hstack([idxs, configs])).to(device)
+        )
+        self.surrogate_model.train_y = self._cast_tensor_shapes(
+            torch.Tensor(performances).to(device)
+        )
 
     def preprocess_test_set(self, test_x):
         _len = len(self.observed_configs.all_configs_list())
@@ -379,10 +403,12 @@ def preprocess_test_set(self, test_x):
         token_ids = np.vstack((existing_token_ids, new_token_ids))
 
         configs = np.array([normalize_vectorize_config(c) for c in test_x])
-        test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device)
-        return test_x
+        self.surrogate_model.test_x = self._cast_tensor_shapes(
+            torch.Tensor(np.hstack([token_ids, configs])).to(device)
+        )
+        return self.surrogate_model.test_x
 
     def _predict(self, test_x, test_lcs):
         assert self.surrogate_model_name == "pfn"
         test_x = self.preprocess_test_set(test_x)
-        return self.surrogate_model.predict(self.train_x, self.train_y, test_x)
+        return self.surrogate_model(self.train_x, self.train_y, test_x)