Cleaned up std and variance estimates and added BeBOP RF modification

luinardi · Oct 10, 2023 · e3d059e · e3d059e
1 parent f7432d0
commit e3d059e
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 42 deletions.
diff --git a/hypermapper/bo/acquisition_functions.py b/hypermapper/bo/acquisition_functions.py
@@ -39,11 +39,10 @@ def ucb(
     beta = np.sqrt(0.125 * np.log(2 * iteration_number + 1))
     number_of_predictions = X.shape[0]
 
-    prediction_means, prediction_variances = models.compute_model_mean_and_uncertainty(
+    prediction_means, prediction_stds = models.compute_model_mean_and_uncertainty(
         X,
         regression_models,
         param_space,
-        var=True,
         predict_noiseless=settings["predict_noiseless"],
     )
 
@@ -53,7 +52,7 @@ def ucb(
         feasibility_indicator = torch.ones(number_of_predictions)
 
     acq_val = (
-        (prediction_means + torch.sqrt(beta * prediction_variances))
+        (prediction_means + beta * prediction_stds)
         @ objective_weights
         * feasibility_indicator
         * (feasibility_indicator >= feasibility_threshold)
@@ -96,11 +95,10 @@ def ei(
         - a tensor of scalarized values for each point in X.
     """
     number_of_predictions = X.shape[0]
-    prediction_means, prediction_variances = models.compute_model_mean_and_uncertainty(
+    prediction_means, prediction_stds = models.compute_model_mean_and_uncertainty(
         X,
         regression_models,
         param_space,
-        var=True,
         predict_noiseless=settings["predict_noiseless"],
     )
 
@@ -118,7 +116,7 @@ def ei(
         best_values = min(best_values, settings["objective_value_target"])
 
     normalized_best_values = (best_values - objective_means) / objective_stds
-    f_stds = torch.sqrt(prediction_variances)
+    f_stds = prediction_stds
     f_means = prediction_means
     v = (normalized_best_values - f_means - xi) / f_stds
     normal = torch.distributions.Normal(torch.zeros_like(v), torch.ones_like(v))

diff --git a/hypermapper/bo/models/gpbotorch.py b/hypermapper/bo/models/gpbotorch.py
@@ -198,7 +198,7 @@ def _backup_fit(self, mll):
             loss.backward()
             optimizer.step()
 
-    def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
+    def get_mean_and_std(self, normalized_data, predict_noiseless):
         """
         Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
 
@@ -216,12 +216,9 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
             raise Exception(f"GP prediction resulted in negative variance {var}")
         var += 1e-12
 
-        if use_var:
-            uncertainty = var
-        else:
-            uncertainty = torch.sqrt(var)
+        std = torch.sqrt(var)
 
-        return mean, uncertainty
+        return mean, std
 
 
 class GpBotorchHeteroskedastic(botorch.models.HeteroskedasticSingleTaskGP, Model):
@@ -307,7 +304,7 @@ def _backup_fit(self, mll):
 
         self.eval()
 
-    def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
+    def get_mean_and_std(self, normalized_data, predict_noiseless):
         """
         Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
 
@@ -325,12 +322,9 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
             raise Exception(f"GP prediction resulted in negative variance {var}")
         var += 1e-12
 
-        if use_var:
-            uncertainty = var
-        else:
-            uncertainty = torch.sqrt(var)
+        std = torch.sqrt(var)
 
-        return mean, uncertainty
+        return mean, std
 
 
 class GpBotorchFixed(botorch.models.FixedNoiseGP, Model):

diff --git a/hypermapper/bo/models/gpgpy.py b/hypermapper/bo/models/gpgpy.py
@@ -191,8 +191,7 @@ def fit(
     def get_mean_and_std(
         self,
         normalized_data,
-        predict_noiseless,
-        use_var=False,
+        predict_noiseless
     ):
         """
         Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
@@ -211,9 +210,6 @@ def get_mean_and_std(
         mean = mean.flatten()
         var = var.flatten()
         var[var < 10**-11] = 10**-11
-        if use_var:
-            uncertainty = var
-        else:
-            uncertainty = np.sqrt(var)
+        std = np.sqrt(var)
 
-        return torch.tensor(mean), torch.tensor(uncertainty)
+        return torch.tensor(mean), torch.tensor(std)
diff --git a/hypermapper/bo/models/gpgpytorch.py b/hypermapper/bo/models/gpgpytorch.py
@@ -286,7 +286,7 @@ def _backup_fit(self, mll):
             loss.backward()
             optimizer.step()
 
-    def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
+    def get_mean_and_std(self, normalized_data, predict_noiseless):
         """
         Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
 
@@ -304,10 +304,6 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
         if any(var < -1e-12):
             raise Exception(f"GP prediction resulted in negative variance {var}")
         var += 1e-12
+        std = torch.sqrt(var)
 
-        if use_var:
-            uncertainty = var
-        else:
-            uncertainty = torch.sqrt(var)
-
-        return mean, uncertainty
+        return mean, std
diff --git a/hypermapper/bo/models/models.py b/hypermapper/bo/models/models.py
@@ -29,8 +29,7 @@ def fit(
     def get_mean_and_std(
         self,
         normalized_data,
-        predict_noiseless,
-        use_var=False,
+        predict_noiseless
     ):
         raise NotImplementedError
 
@@ -121,6 +120,7 @@ def generate_mono_output_regression_models(
                 use_all_data_to_fit_variance=settings["models"][
                     "use_all_data_to_fit_variance"
                 ],
+                add_linear_std=settings["models"]["add_linear_std"],
             )
             model.fit_rf(X, y)
         else:
@@ -168,7 +168,6 @@ def compute_model_mean_and_uncertainty(
     data: torch.Tensor,
     models: list,
     param_space: Space,
-    var: bool = False,
     predict_noiseless: bool = True,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
     """
@@ -188,7 +187,7 @@ def compute_model_mean_and_uncertainty(
     means = torch.Tensor()
     uncertainties = torch.Tensor()
     for model in models:
-        mean, uncertainty = model.get_mean_and_std(X, predict_noiseless, var)
+        mean, uncertainty = model.get_mean_and_std(X, predict_noiseless)
         means = torch.cat((means, mean.unsqueeze(1)), 1)
         uncertainties = torch.cat((uncertainties, uncertainty.unsqueeze(1)), 1)
 

diff --git a/hypermapper/bo/models/rf.py b/hypermapper/bo/models/rf.py
@@ -26,6 +26,7 @@ def __init__(
         self,
         use_all_data_to_fit_mean=False,
         use_all_data_to_fit_variance=False,
+        add_linear_std=False,
         **kwargs
     ):
         """
@@ -39,6 +40,7 @@ def __init__(
         self.leaf_variances = []
         self.use_all_data_to_fit_mean = use_all_data_to_fit_mean
         self.use_all_data_to_fit_variance = use_all_data_to_fit_variance
+        self.add_linear_std = add_linear_std
 
         # This is just to make the code faster. If the min_samples_split is 2, we can just set the internal mean to 0.
         if self.min_samples_split == 2 and not self.use_all_data_to_fit_mean:
@@ -148,7 +150,12 @@ def fit_rf(
         if self.use_all_data_to_fit_mean:
             self.update_leaf_values(X, y, self.use_all_data_to_fit_variance)
 
-    def get_mean_and_std(self, X: torch.Tensor, _, use_var=False):
+        if self.add_linear_std:
+            from sklearn.neighbors import NearestNeighbors
+            self.neigh = NearestNeighbors(n_neighbors=1)
+            self.neigh.fit(X)
+
+    def get_mean_and_std(self, X: torch.Tensor, _):
         """
         Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with an RF model.
 
@@ -179,12 +186,13 @@ def get_mean_and_std(self, X: torch.Tensor, _, use_var=False):
         var -= mean**2.0
         var[var < 0.0] = 0.0
 
-        if use_var:
-            uncertainty = var
-        else:
-            uncertainty = np.sqrt(var)
+        if self.add_linear_std:
+            # This is from the BeBOP paper. https://arxiv.org/abs/2310.00971
+            var += np.square(2 * self.neigh.kneighbors(X.numpy())[0].squeeze())
+
+        std = np.sqrt(var)
 
-        return torch.tensor(mean), torch.tensor(uncertainty)
+        return torch.tensor(mean), torch.tensor(std)
 
 
 class RFClassificationModel(RandomForestClassifier):

diff --git a/hypermapper/schema.json b/hypermapper/schema.json
@@ -156,6 +156,11 @@
           "default": false,
           "description": "Whether to still use all data to fit the variance of the leaves when bootstrapping in Random Forests.",
           "type": "boolean"
+        },
+        "add_linear_std": {
+          "default": true,
+          "description": "Add the distance to the closest data point to the std estimate to promote exploration.",
+          "type": "boolean"
         }
       },
       "required": [