Skip to content

Commit

Permalink
refactor(SearchSpace): Switch to clone() for search space. (#94)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddiebergman authored May 11, 2024
1 parent 16ba157 commit 529f334
Show file tree
Hide file tree
Showing 44 changed files with 2,691 additions and 1,370 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dist
*.err
*.log
*.json
*.speedscope

# slurm scripts
slurm_scripts/*
Expand Down
13 changes: 6 additions & 7 deletions neps/optimizers/base_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

import logging
from abc import abstractmethod
from copy import deepcopy
from typing import Any, Iterator, Mapping
from typing_extensions import Self
from contextlib import contextmanager
from pathlib import Path

from neps.utils.types import ConfigResult
from neps.utils.types import ConfigResult, RawConfig, ERROR, ResultDict
from neps.utils.files import serialize, deserialize
from ..search_spaces.search_space import SearchSpace
from neps.search_spaces.search_space import SearchSpace
from neps.utils.data_loading import _get_cost, _get_learning_curve, _get_loss


Expand Down Expand Up @@ -50,7 +49,7 @@ def load_results(
raise NotImplementedError

@abstractmethod
def get_config_and_ids(self) -> tuple[SearchSpace, str, str | None]:
def get_config_and_ids(self) -> tuple[RawConfig, str, str | None]:
"""Sample a new configuration
Returns:
Expand All @@ -75,11 +74,11 @@ def load_state(self, state: Any) -> None:
self.used_budget = state["used_budget"]

def load_config(self, config_dict: Mapping[str, Any]) -> SearchSpace:
config = deepcopy(self.pipeline_space)
config = self.pipeline_space.clone()
config.load_from(config_dict)
return config

def get_loss(self, result: str | dict | float) -> float | Any:
def get_loss(self, result: ERROR | ResultDict | float) -> float | Any:
"""Calls result.utils.get_loss() and passes the error handling through.
Please use self.get_loss() instead of get_loss() in all optimizer classes."""
return _get_loss(
Expand All @@ -88,7 +87,7 @@ def get_loss(self, result: str | dict | float) -> float | Any:
ignore_errors=self.ignore_errors,
)

def get_cost(self, result: str | dict | float) -> float | Any:
def get_cost(self, result: ERROR | ResultDict | float) -> float | Any:
"""Calls result.utils.get_cost() and passes the error handling through.
Please use self.get_cost() instead of get_cost() in all optimizer classes."""
return _get_cost(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,17 @@
from functools import partial
from typing import Callable

from .ei import ComprehensiveExpectedImprovement
from .mf_ei import MFEI
from .ucb import UpperConfidenceBound, MF_UCB
from neps.optimizers.bayesian_optimization.acquisition_functions.ei import (
ComprehensiveExpectedImprovement,
)
from neps.optimizers.bayesian_optimization.acquisition_functions.mf_ei import MFEI
from neps.optimizers.bayesian_optimization.acquisition_functions.ucb import (
UpperConfidenceBound,
MF_UCB,
)
from neps.optimizers.bayesian_optimization.acquisition_functions.prior_weighted import (
DecayingPriorWeightedAcquisition,
)


AcquisitionMapping: dict[str, Callable] = {
Expand Down Expand Up @@ -41,3 +49,12 @@
maximize=False,
),
}

__all__ = [
"AcquisitionMapping",
"ComprehensiveExpectedImprovement",
"MFEI",
"UpperConfidenceBound",
"MF_UCB",
"DecayingPriorWeightedAcquisition",
]
16 changes: 10 additions & 6 deletions neps/optimizers/bayesian_optimization/acquisition_functions/ei.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from copy import deepcopy
from typing import Iterable, Union
from __future__ import annotations

from typing import TYPE_CHECKING, Sequence, Union
import numpy as np
import torch
from torch.distributions import Normal

from .base_acquisition import BaseAcquisition

if TYPE_CHECKING:
from neps.search_spaces import SearchSpace

class ComprehensiveExpectedImprovement(BaseAcquisition):
def __init__(
Expand Down Expand Up @@ -49,18 +51,20 @@ def __init__(
self.optimize_on_max_fidelity = optimize_on_max_fidelity

def eval(
self, x: Iterable, asscalar: bool = False
self, x: Sequence[SearchSpace], asscalar: bool = False,
) -> Union[np.ndarray, torch.Tensor, float]:
"""
Return the negative expected improvement at the query point x2
"""
assert self.incumbent is not None, "EI function not fitted on model"
if x[0].has_fidelity and self.optimize_on_max_fidelity:
_x = deepcopy(x)

[elem.set_to_max_fidelity() for elem in _x]
if x[0].has_fidelity and self.optimize_on_max_fidelity:
_x = [e.clone() for e in x]
for e in _x:
e.set_to_max_fidelity()
else:
_x = x

try:
mu, cov = self.surrogate_model.predict(_x)
except ValueError as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ def preprocess(self, x: pd.Series) -> Tuple[Iterable, Iterable]:

if np.less_equal(target_fidelity, config.fidelity.upper):
# only consider the configs with fidelity lower than the max fidelity
config.fidelity.value = target_fidelity
config.fidelity.set_value(target_fidelity)
budget_list.append(self.get_budget_level(config))
else:
# if the target_fidelity higher than the max drop the configuration
indices_to_drop.append(i)
else:
config.fidelity.value = target_fidelity
config.fidelity.set_value(target_fidelity)
budget_list.append(self.get_budget_level(config))

# Drop unused configs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def __init__(
pibo_beta=10,
log: bool = False,
):
super().__init__()
self.pibo_beta = pibo_beta
self.base_acquisition = base_acquisition
self.log = log
Expand All @@ -23,11 +24,11 @@ def eval(
x: Iterable,
**base_acquisition_kwargs,
) -> Union[np.ndarray, torch.Tensor, float]:
super().__init__()
acquisition = self.base_acquisition(x, **base_acquisition_kwargs)

if self.log:
min_acq_val = abs(min(acquisition)) if min(acquisition) < 0 else 0

for i, candidate in enumerate(x):
prior_weight = candidate.compute_prior(log=self.log)
if prior_weight != 1.0:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

from abc import abstractmethod
from typing import TYPE_CHECKING, Sequence, Callable

import torch
from neps.utils.types import Array

from ....search_spaces.search_space import SearchSpace
if TYPE_CHECKING:
from neps.search_spaces.search_space import SearchSpace


class AcquisitionSampler:
Expand All @@ -14,17 +16,17 @@ def __init__(self, pipeline_space: SearchSpace, patience: int = 50):

self.pipeline_space = pipeline_space
self.acquisition_function = None
self.x: list = []
self.y: list = []
self.x: list[SearchSpace] = []
self.y: Sequence[float] | Array = []
self.patience = patience

@abstractmethod
def sample(self, acquisition_function) -> SearchSpace:
def sample(self, acquisition_function: Callable) -> SearchSpace:
raise NotImplementedError

def sample_batch(self, acquisition_function, batch) -> list[SearchSpace]:
def sample_batch(self, acquisition_function: Callable, batch: int) -> list[SearchSpace]:
return [self.sample(acquisition_function) for _ in range(batch)]

def set_state(self, x: list, y: list | torch.Tensor) -> None:
def set_state(self, x: list[SearchSpace], y: Sequence[float] | Array) -> None:
self.x = x
self.y = y
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class FreezeThawSampler(AcquisitionSampler):

SAMPLES_TO_DRAW = 100 # number of random samples to draw at lowest fidelity

def __init__(self, **kwargs):
Expand Down Expand Up @@ -119,8 +119,8 @@ def __sample_single_new_tabular(index: int):
config = self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=False
)
config["id"].value = _new_configs[index]
config.fidelity.value = set_new_sample_fidelity
config["id"].set_value(_new_configs[index])
config.fidelity.set_value(set_new_sample_fidelity)
return config

if self.is_tabular:
Expand All @@ -131,7 +131,7 @@ def __sample_single_new_tabular(index: int):
# accounting for unseen configs only, samples remaining table if flag is set
max_n = len(_all_ids) + 1 if self.sample_full_table else _n
_n = min(max_n, len(_all_ids - _partial_ids))

_new_configs = np.random.choice(
list(_all_ids - _partial_ids), size=_n, replace=False
)
Expand All @@ -145,13 +145,13 @@ def __sample_single_new_tabular(index: int):

elif set_new_sample_fidelity is not None:
for config in new_configs:
config.fidelity.value = set_new_sample_fidelity
config.fidelity.set_value(set_new_sample_fidelity)

# Deep copy configs for fidelity updates
partial_configs_list = []
index_list = []
for idx, config in partial_configs.items():
_config = deepcopy(config)
_config = config.clone()
partial_configs_list.append(_config)
index_list.append(idx)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
from __future__ import annotations

from typing import Iterable
from typing import TYPE_CHECKING, Callable, Sequence

import numpy as np
import torch
from more_itertools import first
from typing_extensions import override

from .base_acq_sampler import AcquisitionSampler
from .random_sampler import RandomSampler
from neps.optimizers.bayesian_optimization.acquisition_samplers.base_acq_sampler import AcquisitionSampler
from neps.optimizers.bayesian_optimization.acquisition_samplers.random_sampler import RandomSampler

if TYPE_CHECKING:
from neps.utils.types import Array
from neps.search_spaces.search_space import SearchSpace


def _propose_location(
acquisition_function,
candidates: list,
acquisition_function: Callable,
candidates: list[SearchSpace],
top_n: int = 5,
return_distinct: bool = True,
) -> tuple[Iterable, np.ndarray, np.ndarray]:
) -> tuple[list[SearchSpace], np.ndarray | torch.Tensor, np.ndarray]:
"""top_n: return the top n candidates wrt the acquisition function."""
if return_distinct:
eis = acquisition_function(candidates, asscalar=True) # faster
Expand All @@ -29,6 +34,7 @@ def _propose_location(
else:
eis = torch.tensor([acquisition_function(c) for c in candidates])
_, indices = eis.topk(top_n)

xs = [candidates[int(i)] for i in indices]
return xs, eis, indices

Expand All @@ -39,7 +45,7 @@ def __init__(
pipeline_space,
pool_size: int = 250,
n_best: int = 10,
mutate_size: int = None,
mutate_size: int | None = None,
allow_isomorphism: bool = False,
check_isomorphism_history: bool = True,
patience: int = 50,
Expand All @@ -57,14 +63,21 @@ def __init__(
pipeline_space=pipeline_space, patience=patience
)

def set_state(self, x, y) -> None:
@override
def set_state(self, x: list[SearchSpace], y: Sequence[float] | Array) -> None:
super().set_state(x, y)
self.random_sampling.set_state(x, y)

def sample(self, acquisition_function) -> tuple[list, list, np.ndarray]:
return first(self.sample_batch(acquisition_function, 1))
@override
def sample(self, acquisition_function: Callable) -> SearchSpace:
return first(self.sample_batch(acquisition_function, batch=1))

def sample_batch(self, acquisition_function, batch):
@override
def sample_batch(
self,
acquisition_function: Callable,
batch: int,
) -> list[SearchSpace]:
pool = self.create_pool(acquisition_function, self.pool_size)

samples, _, _ = _propose_location(
Expand All @@ -74,7 +87,11 @@ def sample_batch(self, acquisition_function, batch):
)
return samples

def create_pool(self, acquisition_function, pool_size: int) -> list:
def create_pool(
self,
acquisition_function: Callable,
pool_size: int,
) -> list[SearchSpace]:
if len(self.x) == 0:
return self.random_sampling.sample_batch(acquisition_function, pool_size)

Expand All @@ -89,8 +106,14 @@ def create_pool(self, acquisition_function, pool_size: int) -> list:
best_configs = [
x for (_, x) in sorted(zip(self.y, self.x), key=lambda pair: pair[0])
][:n_best]

seen: set[int] = set()
def _hash(_config: SearchSpace) -> int:
return hash(_config.hp_values().values())

evaluation_pool = []
per_arch = mutate_size // n_best

for config in best_configs:
remaining_patience = self.patience
for _ in range(per_arch):
Expand All @@ -101,15 +124,17 @@ def create_pool(self, acquisition_function, pool_size: int) -> list:
except Exception:
remaining_patience -= 1
continue
hash_child = _hash(child)

if not self.allow_isomorphism:
# if disallow isomorphism, we enforce that each time, we mutate n distinct graphs.
# For now we do not check the isomorphism in all of the previous graphs though
if child == config or child in evaluation_pool:
if child == config or hash_child in seen:
remaining_patience -= 1
continue

evaluation_pool.append(child)
seen.add(hash_child)
break

# Fill missing pool with random samples
Expand Down
Loading

0 comments on commit 529f334

Please sign in to comment.