Skip to content

Commit

Permalink
Cleaned up std and variance estimates and added BeBOP RF modification
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikOrm committed Oct 10, 2023
1 parent f7432d0 commit e3d059e
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 42 deletions.
10 changes: 4 additions & 6 deletions hypermapper/bo/acquisition_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ def ucb(
beta = np.sqrt(0.125 * np.log(2 * iteration_number + 1))
number_of_predictions = X.shape[0]

prediction_means, prediction_variances = models.compute_model_mean_and_uncertainty(
prediction_means, prediction_stds = models.compute_model_mean_and_uncertainty(
X,
regression_models,
param_space,
var=True,
predict_noiseless=settings["predict_noiseless"],
)

Expand All @@ -53,7 +52,7 @@ def ucb(
feasibility_indicator = torch.ones(number_of_predictions)

acq_val = (
(prediction_means + torch.sqrt(beta * prediction_variances))
(prediction_means + beta * prediction_stds)
@ objective_weights
* feasibility_indicator
* (feasibility_indicator >= feasibility_threshold)
Expand Down Expand Up @@ -96,11 +95,10 @@ def ei(
- a tensor of scalarized values for each point in X.
"""
number_of_predictions = X.shape[0]
prediction_means, prediction_variances = models.compute_model_mean_and_uncertainty(
prediction_means, prediction_stds = models.compute_model_mean_and_uncertainty(
X,
regression_models,
param_space,
var=True,
predict_noiseless=settings["predict_noiseless"],
)

Expand All @@ -118,7 +116,7 @@ def ei(
best_values = min(best_values, settings["objective_value_target"])

normalized_best_values = (best_values - objective_means) / objective_stds
f_stds = torch.sqrt(prediction_variances)
f_stds = prediction_stds
f_means = prediction_means
v = (normalized_best_values - f_means - xi) / f_stds
normal = torch.distributions.Normal(torch.zeros_like(v), torch.ones_like(v))
Expand Down
18 changes: 6 additions & 12 deletions hypermapper/bo/models/gpbotorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def _backup_fit(self, mll):
loss.backward()
optimizer.step()

def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
def get_mean_and_std(self, normalized_data, predict_noiseless):
"""
Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
Expand All @@ -216,12 +216,9 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
raise Exception(f"GP prediction resulted in negative variance {var}")
var += 1e-12

if use_var:
uncertainty = var
else:
uncertainty = torch.sqrt(var)
std = torch.sqrt(var)

return mean, uncertainty
return mean, std


class GpBotorchHeteroskedastic(botorch.models.HeteroskedasticSingleTaskGP, Model):
Expand Down Expand Up @@ -307,7 +304,7 @@ def _backup_fit(self, mll):

self.eval()

def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
def get_mean_and_std(self, normalized_data, predict_noiseless):
"""
Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
Expand All @@ -325,12 +322,9 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
raise Exception(f"GP prediction resulted in negative variance {var}")
var += 1e-12

if use_var:
uncertainty = var
else:
uncertainty = torch.sqrt(var)
std = torch.sqrt(var)

return mean, uncertainty
return mean, std


class GpBotorchFixed(botorch.models.FixedNoiseGP, Model):
Expand Down
10 changes: 3 additions & 7 deletions hypermapper/bo/models/gpgpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ def fit(
def get_mean_and_std(
self,
normalized_data,
predict_noiseless,
use_var=False,
predict_noiseless
):
"""
Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
Expand All @@ -211,9 +210,6 @@ def get_mean_and_std(
mean = mean.flatten()
var = var.flatten()
var[var < 10**-11] = 10**-11
if use_var:
uncertainty = var
else:
uncertainty = np.sqrt(var)
std = np.sqrt(var)

return torch.tensor(mean), torch.tensor(uncertainty)
return torch.tensor(mean), torch.tensor(std)
10 changes: 3 additions & 7 deletions hypermapper/bo/models/gpgpytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def _backup_fit(self, mll):
loss.backward()
optimizer.step()

def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
def get_mean_and_std(self, normalized_data, predict_noiseless):
"""
Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with a GP model.
Expand All @@ -304,10 +304,6 @@ def get_mean_and_std(self, normalized_data, predict_noiseless, use_var=False):
if any(var < -1e-12):
raise Exception(f"GP prediction resulted in negative variance {var}")
var += 1e-12
std = torch.sqrt(var)

if use_var:
uncertainty = var
else:
uncertainty = torch.sqrt(var)

return mean, uncertainty
return mean, std
7 changes: 3 additions & 4 deletions hypermapper/bo/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def fit(
def get_mean_and_std(
self,
normalized_data,
predict_noiseless,
use_var=False,
predict_noiseless
):
raise NotImplementedError

Expand Down Expand Up @@ -121,6 +120,7 @@ def generate_mono_output_regression_models(
use_all_data_to_fit_variance=settings["models"][
"use_all_data_to_fit_variance"
],
add_linear_std=settings["models"]["add_linear_std"],
)
model.fit_rf(X, y)
else:
Expand Down Expand Up @@ -168,7 +168,6 @@ def compute_model_mean_and_uncertainty(
data: torch.Tensor,
models: list,
param_space: Space,
var: bool = False,
predict_noiseless: bool = True,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Expand All @@ -188,7 +187,7 @@ def compute_model_mean_and_uncertainty(
means = torch.Tensor()
uncertainties = torch.Tensor()
for model in models:
mean, uncertainty = model.get_mean_and_std(X, predict_noiseless, var)
mean, uncertainty = model.get_mean_and_std(X, predict_noiseless)
means = torch.cat((means, mean.unsqueeze(1)), 1)
uncertainties = torch.cat((uncertainties, uncertainty.unsqueeze(1)), 1)

Expand Down
20 changes: 14 additions & 6 deletions hypermapper/bo/models/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
self,
use_all_data_to_fit_mean=False,
use_all_data_to_fit_variance=False,
add_linear_std=False,
**kwargs
):
"""
Expand All @@ -39,6 +40,7 @@ def __init__(
self.leaf_variances = []
self.use_all_data_to_fit_mean = use_all_data_to_fit_mean
self.use_all_data_to_fit_variance = use_all_data_to_fit_variance
self.add_linear_std = add_linear_std

# This is just to make the code faster. If the min_samples_split is 2, we can just set the internal mean to 0.
if self.min_samples_split == 2 and not self.use_all_data_to_fit_mean:
Expand Down Expand Up @@ -148,7 +150,12 @@ def fit_rf(
if self.use_all_data_to_fit_mean:
self.update_leaf_values(X, y, self.use_all_data_to_fit_variance)

def get_mean_and_std(self, X: torch.Tensor, _, use_var=False):
if self.add_linear_std:
from sklearn.neighbors import NearestNeighbors
self.neigh = NearestNeighbors(n_neighbors=1)
self.neigh.fit(X)

def get_mean_and_std(self, X: torch.Tensor, _):
"""
Compute the predicted mean and uncertainty (either standard deviation or variance) for a number of points with an RF model.
Expand Down Expand Up @@ -179,12 +186,13 @@ def get_mean_and_std(self, X: torch.Tensor, _, use_var=False):
var -= mean**2.0
var[var < 0.0] = 0.0

if use_var:
uncertainty = var
else:
uncertainty = np.sqrt(var)
if self.add_linear_std:
# This is from the BeBOP paper. https://arxiv.org/abs/2310.00971
var += np.square(2 * self.neigh.kneighbors(X.numpy())[0].squeeze())

std = np.sqrt(var)

return torch.tensor(mean), torch.tensor(uncertainty)
return torch.tensor(mean), torch.tensor(std)


class RFClassificationModel(RandomForestClassifier):
Expand Down
5 changes: 5 additions & 0 deletions hypermapper/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@
"default": false,
"description": "Whether to still use all data to fit the variance of the leaves when bootstrapping in Random Forests.",
"type": "boolean"
},
"add_linear_std": {
"default": true,
"description": "Add the distance to the closest data point to the std estimate to promote exploration.",
"type": "boolean"
}
},
"required": [
Expand Down

0 comments on commit e3d059e

Please sign in to comment.