Skip to content

Commit

Permalink
Merge fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Neeratyoy committed Aug 30, 2023
2 parents d3b9ca1 + a1fd303 commit a7f9776
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,15 @@ def preprocess(self, x: Iterable) -> Tuple[Iterable, Iterable]:
required by the multi-fidelity Expected Improvement acquisition function.
"""
budget_list = []
config_id_series = self.observations.get_incumbents_for_budgets()
performances = self.observations.get_best_performance_for_each_budget()

new_configs_from = len(self.observations.get_partial_configs_at_max_seen())
for idx, _x in enumerate(x):
if idx < new_configs_from:
_x.fidelity.value = _x.fidelity.value + self.b_step # +1 step in budget
for _x in x:
budget_list.append(self.get_budget_level(_x))

inc_list = []
for budget in budget_list:
if budget in config_id_series.index:
inc = self.observations.df.loc[
(config_id_series[budget], budget), self.observations.perf_col
]
for budget_level in budget_list:
if budget_level in performances.index:
inc = performances[budget_level]
else:
inc = self.observations.get_best_seen_performance()
inc_list.append(inc)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# type: ignore
from __future__ import annotations

from copy import deepcopy

import numpy as np
import pandas as pd

from ....search_spaces.search_space import SearchSpace
Expand All @@ -24,18 +27,34 @@ def _sample_new(self, index_from: int, n: int = None) -> pd.Series:
)
for _ in range(n)
]
for _config in configs:
_config.fidelity.value = (
self.pipeline_space.fidelity.lower
) # assigns min budget

return pd.Series(configs, index=range(index_from, index_from + len(configs)))

def sample(self, acquisition_function=None, n: int = None) -> pd.Series:
lcs = self.observations.get_partial_configs_at_max_seen()
partial_configs = self.observations.get_partial_configs_at_max_seen()

new_configs = self._sample_new(index_from=self.observations.next_config_id(), n=n)

# Set fidelity for observed configs
partial_configs_list = []
index_list = []
for idx, config in partial_configs.items():
next_fidelity = config.fidelity.value + self.b_step
# Select only the configs not exceeding the max budget
if np.less_equal(next_fidelity, config.fidelity.upper):
_config = deepcopy(config)
_config.fidelity.value = next_fidelity
partial_configs_list.append(_config)
index_list.append(idx)

# We build a new series of partial configs to avoid
# incrementing fidelities multiple times due to pass-by-reference
partial_configs = pd.Series(partial_configs_list, index=index_list)

# Set fidelity for new configs
for _, config in new_configs.items():
config.fidelity.value = config.fidelity.lower

configs = self._sample_new(index_from=self.observations.next_config_id(), n=n)
configs = pd.concat([lcs, configs])
configs = pd.concat([partial_configs, new_configs])

return configs

Expand Down
46 changes: 19 additions & 27 deletions src/neps/optimizers/multi_fidelity/dyhpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from copy import deepcopy
from typing import Any, List, Union
from typing import Any

import numpy as np

Expand Down Expand Up @@ -34,7 +34,7 @@ def __init__(
self,
pipeline_space: SearchSpace,
budget: int,
step_size: Union[int, float] = 1,
step_size: int | float = 1,
optimal_assignment: bool = False, # pylint: disable=unused-argument
use_priors: bool = False,
sample_default_first: bool = False,
Expand All @@ -46,19 +46,19 @@ def __init__(
# promotion_type: str = "model",
# sample_type: str = "model",
# sampling_args: Union[dict, None] = None,
loss_value_on_error: Union[None, float] = None,
cost_value_on_error: Union[None, float] = None,
loss_value_on_error: None | float = None,
cost_value_on_error: None | float = None,
patience: int = 100,
ignore_errors: bool = False,
logger=None,
# arguments for model
surrogate_model: Union[str, Any] = "gp",
surrogate_model: str | Any = "gp",
surrogate_model_args: dict = None,
domain_se_kernel: str = None,
graph_kernels: list = None,
hp_kernels: list = None,
acquisition: Union[str, BaseAcquisition] = acquisition,
acquisition_sampler: Union[str, AcquisitionSampler] = "freeze-thaw",
acquisition: str | BaseAcquisition = acquisition,
acquisition_sampler: str | AcquisitionSampler = "freeze-thaw",
model_policy: Any = MFEIModel,
log_prior_weighted: bool = False,
initial_design_size: int = 10,
Expand Down Expand Up @@ -90,8 +90,8 @@ def __init__(
ignore_errors=ignore_errors,
logger=logger,
)
self._budget_list: List[Union[int, float]] = []
self.step_size: Union[int, float] = step_size
self._budget_list: list[int | float] = []
self.step_size: int | float = step_size
self.min_budget = self.pipeline_space.fidelity.lower
# TODO: generalize this to work with real data (not benchmarks)
self.max_budget = self.pipeline_space.fidelity.upper
Expand Down Expand Up @@ -155,7 +155,7 @@ def __init__(
def get_budget_level(self, config: SearchSpace) -> int:
return int((config.fidelity.value - config.fidelity.lower) / self.step_size)

def get_budget_value(self, budget_level: Union[int, float]) -> Union[int, float]:
def get_budget_value(self, budget_level: int | float) -> int | float:
if isinstance(self.pipeline_space.fidelity, IntegerParameter):
budget_val = int(
self.step_size * budget_level + self.pipeline_space.fidelity.lower
Expand Down Expand Up @@ -264,7 +264,7 @@ def _fit_models(self):

def get_config_and_ids( # pylint: disable=no-self-use
self,
) -> tuple[SearchSpace, str, Union[str, None]]:
) -> tuple[SearchSpace, str, str | None]:
"""...and this is the method that decides which point to query.
Returns:
Expand All @@ -286,27 +286,19 @@ def get_config_and_ids( # pylint: disable=no-self-use
# main call here

samples = self.acquisition_sampler.sample()
eis = self.acquisition.eval(x=deepcopy(samples.to_list()), asscalar=True)
eis = self.acquisition.eval( # type: ignore[attr-defined]
x=samples.to_list(), asscalar=True
)
# TODO: verify
_ids = np.argsort(eis)[0]
# samples should have new configs with fidelities set to minimum
# due to this line, otherwise we have to set them in here
config = samples.iloc[_ids]
_config_id = _ids
_config_id = samples.index[_ids]

if _config_id in self.observed_configs.seen_config_ids:
next_budget_level = self.get_budget_level(config) + 1

if np.less_equal(
self.get_budget_value(next_budget_level), config.fidelity.upper
):
config.fidelity.value = self.get_budget_value(next_budget_level)
config_id = f"{_config_id}_{next_budget_level}"
previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}"
else:
config = self.pipeline_space.sample(
patience=self.patience, user_priors=True, ignore_fidelity=False
)
config.fidelity.value = config.fidelity.lower
config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}"
config_id = f"{_config_id}_{self.get_budget_level(config)}"
previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}"
else:
config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}"

Expand Down
35 changes: 29 additions & 6 deletions src/neps/optimizers/multi_fidelity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,35 @@ def get_learning_curves(self):

def get_incumbents_for_budgets(self, maximize: bool = False):
"""
Returns a series object with the best configuration id for each budget id
Returns a series object with the best partial configuration for each budget id
Note: this will always map the best lowest ID if two configurations
has the same performance at the same fidelity
"""
learning_curves = self.get_learning_curves()
if maximize:
return learning_curves.idxmax(axis=0)
config_ids = learning_curves.idxmax(axis=0)
else:
return learning_curves.idxmin(axis=0)
config_ids = learning_curves.idxmin(axis=0)

indices = list(zip(config_ids.values.tolist(), config_ids.index.to_list()))
partial_configs = self.df.loc[indices, self.config_col].to_list()
return pd.Series(partial_configs, index=config_ids.index, name=self.config_col)

def get_best_performance_for_each_budget(self, maximize: bool = False):
"""
Returns a series object with the best partial configuration for each budget id
Note: this will always map the best lowest ID if two configurations
has the same performance at the same fidelity
"""
learning_curves = self.get_learning_curves()
if maximize:
performance = learning_curves.max(axis=0)
else:
performance = learning_curves.min(axis=0)

return performance

def get_best_learning_curve_id(self, maximize: bool = False):
"""
Expand All @@ -155,7 +174,7 @@ def get_best_seen_performance(self, maximize: bool = False):
else:
return learning_curves.min(axis=1).min()

def get_single_index(self):
def add_budget_column(self):
combined_df = self.df.reset_index(level=1)
combined_df.set_index(
keys=[self.budget_idx], drop=False, append=True, inplace=True
Expand All @@ -164,7 +183,7 @@ def get_single_index(self):

def reduce_to_max_seen_budgets(self):
self.df.sort_index(inplace=True)
combined_df = self.get_single_index()
combined_df = self.add_budget_column()
return combined_df.groupby(level=0).last()

def get_partial_configs_at_max_seen(self):
Expand All @@ -191,9 +210,13 @@ def get_partial_configs_at_max_seen(self):
print(data.df)
print(data.get_learning_curves())
print(
"Mapping of budget IDs into best performing configuration IDs at each fidelity:\n",
"Mapping of budget IDs into best performing configurations at each fidelity:\n",
data.get_incumbents_for_budgets(),
)
print(
"Best Performance at each budget level:\n",
data.get_best_performance_for_each_budget(),
)
print(
"Configuration ID of the best observed performance so far: ",
data.get_best_learning_curve_id(),
Expand Down

0 comments on commit a7f9776

Please sign in to comment.