From bb7abf2c198744ef81cb52262ec22b608a2a9f2e Mon Sep 17 00:00:00 2001
From: Neeratyoy Mallik <neeratyoy@gmail.com>
Date: Thu, 29 Aug 2024 19:58:48 +0200
Subject: [PATCH] First ifBO successful run push

---
 .../acquisition_functions/mf_ei.py            |  3 -
 .../acquisition_functions/mf_pi.py            | 15 +---
 .../bayesian_optimization/models/__init__.py  |  8 +-
 .../bayesian_optimization/models/pfn.py       | 77 +++++++++++++++++++
 neps/optimizers/default_searchers/ifbo.yaml   |  6 +-
 neps/optimizers/multi_fidelity/dyhpo.py       |  2 +-
 neps/optimizers/multi_fidelity/mf_bo.py       | 72 +++++++++++------
 neps/optimizers/multi_fidelity/utils.py       | 24 +-----
 pyproject.toml                                |  2 +-
 9 files changed, 141 insertions(+), 68 deletions(-)
 create mode 100644 neps/optimizers/bayesian_optimization/models/pfn.py

diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
index 5139d4b4..c025578e 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py
@@ -404,18 +404,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         inc_value = self.sample_threshold(inc_value)
-        print(f"Threshold for EI: {inc_value}")
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for EI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value
diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py
index e64ea2e3..e41e0528 100644
--- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py
+++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py
@@ -87,10 +87,10 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
 
     def eval(self, x: pd.Series, asscalar: bool = False) -> Tuple[np.ndarray, pd.Series]:
         # deepcopy
-        _x = pd.Series([x.loc[idx].copy() for idx in x.index.values], index=x.index)
-        if self.surrogate_model_name == "pfn":
+        _x = pd.Series([deepcopy(x.loc[idx]) for idx in x.index.values], index=x.index)
+        if self.surrogate_model_name == "ftpfn":
             _x, _x_tok, inc_list = self.preprocess_pfn(
-                x.copy()
+                deepcopy(x.copy())
             )  # IMPORTANT change from vanilla-EI
             pi = self.eval_pfn_pi(_x_tok, inc_list)
         elif self.surrogate_model_name in ["deep_gp", "dpl"]:
@@ -122,7 +122,6 @@ def eval_pfn_pi(
         pi = self.surrogate_model.get_pi(x.to(self.surrogate_model.device), inc_list)
         if len(pi.shape) == 2:
             pi = pi.flatten()
-        print(f"Maximum PI: {pi.max()}")
         return pi
 
     def eval_gp_pi(
@@ -311,19 +310,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         t_value = self.sample_threshold(inc_value)
-        print(f"Threshold for PI: {inc_value - t_value}")
         inc_value = t_value
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for PI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value
@@ -344,7 +340,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
                 current_fidelity = 0
                 config.update_hp_values({config.fidelity_name: horizon})
                 inc_list.append(inc_value)
-            #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}")
 
         # Drop unused configs
         x.drop(labels=indices_to_drop, inplace=True)
@@ -399,19 +394,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
         inc_list = []
 
         steps_passed = len(self.observations.completed_runs)
-        print(f"Steps acquired: {steps_passed}")
 
         # Like EI-AtMax, use the global incumbent as a basis for the EI threshold
         inc_value = min(self.observations.get_best_performance_for_each_budget())
         # Extension: Add a random min improvement threshold to encourage high risk high gain
         t_value = self.sample_threshold(inc_value)
-        print(f"Threshold for EI: {inc_value - t_value}")
         inc_value = t_value
 
         # Like MFEI: set fidelities to query using horizon as self.b_step
         # Extension: Unlike DyHPO, we sample the horizon randomly over the full range
         horizon = self.sample_horizon(steps_passed)
-        print(f"Horizon for EI: {horizon}")
         for i, config in x.items():
             if i <= max(self.observations.seen_config_ids):
                 current_fidelity = config.fidelity.value
@@ -431,7 +423,6 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]:
                 current_fidelity = 0
                 config.update_hp_values({config.fidelity_name: horizon})
                 inc_list.append(inc_value)
-            #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}")
 
         # Drop unused configs
         x.drop(labels=indices_to_drop, inplace=True)
diff --git a/neps/optimizers/bayesian_optimization/models/__init__.py b/neps/optimizers/bayesian_optimization/models/__init__.py
index c76bedfd..3bebbffb 100755
--- a/neps/optimizers/bayesian_optimization/models/__init__.py
+++ b/neps/optimizers/bayesian_optimization/models/__init__.py
@@ -8,14 +8,12 @@
 except ImportError as e:
     DeepGP = MissingDependencyError("gpytorch", e)
 
-try:
-    from .pfn import PFN_SURROGATE  # only if available locally
-except Exception as e:
-    PFN_SURROGATE = MissingDependencyError("pfn", e)
+from .pfn import IFBOSurrogate
+
 
 SurrogateModelMapping = {
     "deep_gp": DeepGP,
     "gp": ComprehensiveGP,
     "gp_hierarchy": ComprehensiveGPHierarchy,
-    "pfn": PFN_SURROGATE,
+    "ftpfn": IFBOSurrogate,
 }
diff --git a/neps/optimizers/bayesian_optimization/models/pfn.py b/neps/optimizers/bayesian_optimization/models/pfn.py
new file mode 100644
index 00000000..fcfd542b
--- /dev/null
+++ b/neps/optimizers/bayesian_optimization/models/pfn.py
@@ -0,0 +1,77 @@
+from typing import Any
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import torch
+
+from ifbo import FTPFN
+
+
+class IFBOSurrogate:
+    """Special class to deal with PFN surrogate model and freeze-thaw acquisition."""
+
+    def __init__(self, target_path: Path = None, version: str = "0.0.1", *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ftpfn = FTPFN(target_path=target_path, version=version)
+        self.target_path = self.ftpfn.target_path
+        self.version = self.ftpfn.version
+        self.train_x = None
+        self.train_y = None
+
+    @property
+    def device(self):
+        return self.ftpfn.device
+    
+    def _get_logits(self, test_x: torch.Tensor) -> torch.Tensor:        
+        return self.ftpfn.model(
+            self._cast_tensor_shapes(self.train_x),
+            self._cast_tensor_shapes(self.train_y),
+            self._cast_tensor_shapes(test_x)
+        )
+
+    def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor:
+        if len(x.shape) == 3 and x.shape[1] == 1:
+            return x
+        if len(x.shape) == 2:
+            return x.reshape(x.shape[0], 1, x.shape[1])
+        if len(x.shape) == 1:     
+            return x.reshape(x.shape[0], 1)
+        raise ValueError(f"Shape not recognized: {x.shape}")
+
+    @torch.no_grad()
+    def get_pi(self, test_x, y_best):
+        logits = self._get_logits(test_x)
+        return self.ftpfn.model.criterion.pi(
+            logits.squeeze(), best_f=(1 - y_best).unsqueeze(1)
+        )
+    
+    @torch.no_grad()
+    def get_ei(self, test_x, y_best):
+        logits = self._get_logits(test_x)
+        return self.ftpfn.model.criterion.ei(
+            logits.squeeze(), best_f=(1 - y_best).unsqueeze(1)
+        )
+
+    @torch.no_grad()
+    def get_lcb(self, test_x, beta: float=(1-.682)/2):
+        logits = self._get_logits(test_x)
+        # y values are always transformed for maximizing
+        lcb = self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            maximize=False  # IMPORTANT to be False, should calculate the LCB using the lower-bound ICDF as per beta
+        )
+        return lcb
+    
+    @torch.no_grad()
+    def get_ucb(self, test_x, beta: float=(1-.682)/2):
+        logits = self._get_logits(test_x)
+        # y values are always transformed for maximizing
+        lcb = self.ftpfn.model.criterion.ucb(
+            logits=logits,
+            best_f=None,
+            rest_prob=beta,
+            maximize=True  # IMPORTANT to be True, should calculate the UCB using the upper-bound ICDF as per beta
+        )
+        return lcb
diff --git a/neps/optimizers/default_searchers/ifbo.yaml b/neps/optimizers/default_searchers/ifbo.yaml
index 1eecea6a..38442175 100644
--- a/neps/optimizers/default_searchers/ifbo.yaml
+++ b/neps/optimizers/default_searchers/ifbo.yaml
@@ -1,2 +1,6 @@
 strategy: ifbo
-acquisition: MFPI-random
\ No newline at end of file
+surrogate_model: ftpfn
+surrogate_model_args:
+  version: "0.0.1"
+acquisition: MFPI-random
+model_policy: PFNSurrogate
\ No newline at end of file
diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py
index db8de242..31735ed3 100755
--- a/neps/optimizers/multi_fidelity/dyhpo.py
+++ b/neps/optimizers/multi_fidelity/dyhpo.py
@@ -131,7 +131,7 @@ def __init__(
             raise NotImplementedError
         elif surrogate_model == "gp":
             model_policy = FreezeThawModel
-        elif surrogate_model == "pfn":
+        elif surrogate_model == "ftpfn":
             model_policy = PFNSurrogate
         else:
             raise ValueError("Invalid model option selected!")
diff --git a/neps/optimizers/multi_fidelity/mf_bo.py b/neps/optimizers/multi_fidelity/mf_bo.py
index 4ab15e4b..e2522083 100755
--- a/neps/optimizers/multi_fidelity/mf_bo.py
+++ b/neps/optimizers/multi_fidelity/mf_bo.py
@@ -2,19 +2,16 @@
 from __future__ import annotations
 
 from copy import deepcopy
-
 import numpy as np
 import pandas as pd
 import torch
 
 from neps.utils.common import instance_from_map
-# from ..bayesian_optimization.models import SurrogateModelMapping
 from neps.optimizers.bayesian_optimization.models import SurrogateModelMapping
-# from ..multi_fidelity.utils import normalize_vectorize_config
 from neps.optimizers.multi_fidelity.utils import normalize_vectorize_config
-# from ..multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
-from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
 from neps.optimizers.utils import map_real_hyperparameters_from_tabular_ids
+from neps.optimizers.multi_fidelity_prior.utils import calc_total_resources_spent, update_fidelity
+
 
 
 class MFBOBase:
@@ -199,15 +196,13 @@ def __init__(
         self.surrogate_model_args = (
             surrogate_model_args if surrogate_model_args is not None else {}
         )
-        if self.surrogate_model_name in ["deep_gp", "pfn"]:
+        if self.surrogate_model_name in ["deep_gp"]:
             self.surrogate_model_args.update({"pipeline_space": pipeline_space})
         elif self.surrogate_model_name == "dpl":
-            self.surrogate_model_args.update(
-                {"pipeline_space": self.pipeline_space,
-                 "observed_data": self.observed_configs}
-            )
-
-        # instantiate the surrogate model
+            self.surrogate_model_args.update({
+                "pipeline_space": self.pipeline_space,
+                 "observed_data": self.observed_configs
+            })
         self.surrogate_model = instance_from_map(
             SurrogateModelMapping,
             self.surrogate_model_name,
@@ -241,8 +236,11 @@ def _fantasize_pending(self, train_x, train_y, pending_x):
     def _fit(self, train_x, train_y, train_lcs):
         if self.surrogate_model_name in ["gp", "gp_hierarchy"]:
             self.surrogate_model.fit(train_x, train_y)
-        elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl"]:
+        elif self.surrogate_model_name in ["deep_gp", "pfn", "dpl",]:
             self.surrogate_model.fit(train_x, train_y, train_lcs)
+        elif self.surrogate_model_name == "ftpfn":
+            # do nothing - no training required
+            pass
         else:
             # check neps/optimizers/bayesian_optimization/models/__init__.py for options
             raise ValueError(
@@ -284,7 +282,7 @@ def set_state(
 
         # only to handle tabular spaces
         if self.pipeline_space.has_tabular:
-            if self.surrogate_model_name in ["deep_gp", "pfn"]:
+            if self.surrogate_model_name in ["deep_gp"]:
                 self.surrogate_model_args.update(
                     {"pipeline_space": self.pipeline_space.raw_tabular_space}
                 )
@@ -323,10 +321,10 @@ def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None)
         if decay_t is None:
             decay_t = len(train_x)
         train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x)
-        self._fit(train_x, train_y, train_lcs)
+        self.surrogate_model._fit(train_x, train_y, train_lcs)
 
         return self.surrogate_model, decay_t
-
+    
 
 class PFNSurrogate(FreezeThawModel):
     """Special class to deal with PFN surrogate model and freeze-thaw acquisition."""
@@ -336,10 +334,32 @@ def __init__(self, *args, **kwargs):
         self.train_x = None
         self.train_y = None
 
+    def update_model(self, train_x=None, train_y=None, pending_x=None, decay_t=None):
+        if train_x is None:
+            train_x = []
+        if train_y is None:
+            train_y = []
+        if pending_x is None:
+            pending_x = []
+
+        if decay_t is None:
+            decay_t = len(train_x)
+        train_x, train_y, train_lcs = self._fantasize_pending(train_x, train_y, pending_x)
+        self._fit(train_x, train_y, train_lcs)
+
+        return self.surrogate_model, decay_t
+
     def _fit(self, *args):  # pylint: disable=unused-argument
-        assert self.surrogate_model_name == "pfn"
+        # no training required,, only preprocessing the training data as context during inference
         self.preprocess_training_set()
-        self.surrogate_model.fit(self.train_x, self.train_y)
+
+    def _predict(self, test_x, test_lcs):
+        assert "pfn" in self.surrogate_model_name
+        test_x = self.preprocess_test_set(test_x)
+        return self.surrogate_model(self.train_x, self.train_y, test_x)
+
+    def _cast_tensor_shapes(self, x: torch.Tensor) -> torch.Tensor:
+        return x
 
     def preprocess_training_set(self):
         _configs = self.observed_configs.df.config.values.copy()
@@ -361,8 +381,12 @@ def preprocess_training_set(self):
         idxs = idxs.astype(float)
         idxs[:, 1] = idxs[:, 1] / _configs[0].fidelity.upper
         # TODO: account for fantasization
-        self.train_x = torch.Tensor(np.hstack([idxs, configs])).to(device)
-        self.train_y = torch.Tensor(performances).to(device)
+        self.surrogate_model.train_x = self._cast_tensor_shapes(
+            torch.Tensor(np.hstack([idxs, configs])).to(device)
+        )
+        self.surrogate_model.train_y = self._cast_tensor_shapes(
+            torch.Tensor(performances).to(device)
+        )
 
     def preprocess_test_set(self, test_x):
         _len = len(self.observed_configs.all_configs_list())
@@ -379,10 +403,12 @@ def preprocess_test_set(self, test_x):
         token_ids = np.vstack((existing_token_ids, new_token_ids))
 
         configs = np.array([normalize_vectorize_config(c) for c in test_x])
-        test_x = torch.Tensor(np.hstack([token_ids, configs])).to(device)
-        return test_x
+        self.surrogate_model.test_x = self._cast_tensor_shapes(
+            torch.Tensor(np.hstack([token_ids, configs])).to(device)
+        )
+        return self.surrogate_model.test_x
 
     def _predict(self, test_x, test_lcs):
         assert self.surrogate_model_name == "pfn"
         test_x = self.preprocess_test_set(test_x)
-        return self.surrogate_model.predict(self.train_x, self.train_y, test_x)
+        return self.surrogate_model(self.train_x, self.train_y, test_x)
diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py
index aa6c579c..efa5621e 100644
--- a/neps/optimizers/multi_fidelity/utils.py
+++ b/neps/optimizers/multi_fidelity/utils.py
@@ -3,6 +3,7 @@
 
 from typing import Any, Sequence
 
+from copy import deepcopy
 import numpy as np
 import pandas as pd
 import torch
@@ -34,7 +35,7 @@ def normalize_vectorize_config(
     config: SearchSpace, ignore_fidelity: bool = True
 ) -> np.ndarray:
     _new_vector = []
-    for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity).items():
+    for _, hp_list in config.get_normalized_hp_categories(ignore_fidelity=ignore_fidelity).items():
         _new_vector.extend(hp_list)
     return np.array(_new_vector)
 
@@ -361,33 +362,12 @@ def token_ids(self) -> np.ndarray:
         index=[(0, 2), (1, 2), (0, 1)],
     )
 
-    print(data.df)
-    print(data.get_learning_curves())
-    print(
-        "Mapping of budget IDs into best performing configurations at each fidelity:\n",
-        data.get_incumbents_for_budgets(),
-    )
-    print(
-        "Best Performance at each budget level:\n",
-        data.get_best_performance_for_each_budget(),
-    )
-    print(
-        "Configuration ID of the best observed performance so far: ",
-        data.get_best_learning_curve_id(),
-    )
-    print(data.extract_learning_curve(0, 2))
-    # data.df.sort_index(inplace=True)
-    print(data.get_partial_configs_at_max_seen())
-
     # When updating multiple indices at a time both the values in the data dictionary and the indices should be lists
     data.update_data({"perf": [1.8, 1.5]}, index=[(1, 1), (0, 0)])
-    print(data.df)
 
     data = MFObservedData(["config", "perf"], index_names=["config_id", "budget_id"])
 
     # when adding a single row second level list is not necessary
     data.add_data(["conf1", 0.5], index=(0, 0))
-    print(data.df)
 
     data.update_data({"perf": [1.8], "budget_col": [5]}, index=(0, 0))
-    print(data.df)
diff --git a/pyproject.toml b/pyproject.toml
index b68f5e5a..4e3da049 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,7 @@ pyyaml = "^6"
 tensorboard = "^2"
 typing-extensions = "*"
 torchvision = ">=0.8.0"
-ifbo = ">=0.3.5"
+ifbo = ">=0.3.8"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "^0.4"