Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

110 add ablation importance to deepcave #164

Merged
merged 21 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ repos:
'absl-py>=1.0.0',
'jsonlines>=3.0.0',
'pandas>=1.3.4',
'numpy>=1.22.2',
'numpy==1.26.4',
'matplotlib>=3.5.1',
'pyyaml>=6.0.1',
'kaleido>=0.2.1',
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
## Converters
- Add support for AMLTK.

## Ablation Paths
- Added ablation paths as a plugin.
- Added ablation as a evaluator to use for the plugin.
- Added tests as well as documentation.
- Modified the RandomForest surrogate model, so it can be passed the number of trees.

# Version 1.2.1

## Quality of Life
Expand Down
2 changes: 2 additions & 0 deletions deepcave/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def SERVER_NAME(self) -> str:
def PLUGINS(self) -> Dict[str, List[Any]]:
"""A list of available plugins per category."""
from deepcave.plugins.budget.budget_correlation import BudgetCorrelation
from deepcave.plugins.hyperparameter.ablation_paths import AblationPaths
from deepcave.plugins.hyperparameter.importances import Importances
from deepcave.plugins.hyperparameter.pdp import PartialDependencies
from deepcave.plugins.hyperparameter.symbolic_explanations import (
Expand Down Expand Up @@ -125,6 +126,7 @@ def PLUGINS(self) -> Dict[str, List[Any]]:
],
"Hyperparameter Analysis": [
Importances(),
AblationPaths(),
PartialDependencies(),
SymbolicExplanations(),
],
Expand Down
2 changes: 2 additions & 0 deletions deepcave/evaluators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@
This module provides utilities for creating a footprint of a run.
lpi
This module provides utilities to calculate the local parameter importance (LPI).
ablation
This module evaluates the ablation paths.
"""
265 changes: 265 additions & 0 deletions deepcave/evaluators/ablation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
# noqa: D400
"""
# Ablation Paths

This module evaluates the ablation paths.

Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
Starting from a default configuration, the default configuration is iteratively changed to the
incumbent configuration by changing one hyperparameter at a time, choosing the
hyperparameter that leads to the largest improvement in the objective function at each step.

## Classes:
- Ablation: Provide an evaluator of the ablation paths.
"""

from typing import Any, Dict, List, Optional, Tuple, Union

import copy
from collections import OrderedDict

import numpy as np

from deepcave.evaluators.epm.random_forest_surrogate import RandomForestSurrogate
from deepcave.runs import AbstractRun
from deepcave.runs.objective import Objective
from deepcave.utils.logs import get_logger


class Ablation:
"""
Provide an evaluator of the ablation paths.

Properties
----------
run : AbstractRun
The run to analyze.
cs : ConfigurationSpace
The configuration space of the run.
hp_names : List[str]
A list of the hyperparameter names.
performances : Optional[Dict[Any, Any]]
A dictionary containing the performances for each HP.
improvements : Optional[Dict[Any, Any]]
A dictionary containing the improvements over the respective previous step for each HP.
objectives : Optional[Union[Objective, List[Objective]]]
The objective(s) of the run.
default_config : Configurations
The default configuration of this configuration space.
Gets changed step by step towards the incumbent configuration.
"""

def __init__(self, run: AbstractRun):
self.run = run
self.cs = run.configspace
self.hp_names = self.cs.get_hyperparameter_names()
self.performances: Optional[Dict[Any, Any]] = None
self.improvements: Optional[Dict[Any, Any]] = None
self.logger = get_logger(self.__class__.__name__)

def calculate(
self,
objectives: Optional[Union[Objective, List[Objective]]], # noqa
budget: Optional[Union[int, float]] = None, # noqa
n_trees: int = 50, # noqa
seed: int = 0, # noqa
) -> None:
"""
Calculate the ablation path performances and improvements.

Parameters
----------
objectives : Optional[Union[Objective, List[Objective]]]
The objective(s) to be considered.
budget : Optional[Union[int, float]]
The budget to be considered. If None, all budgets of the run are considered.
Default is None.
n_trees : int
The number of trees for the surrogate model.
Default is 50.
seed : int
The seed for the surrogate model.
Default is 0.
"""
if isinstance(objectives, list) and len(objectives) > 1:
raise ValueError("Only one objective is supported for ablation paths.")
objective = objectives[0] if isinstance(objectives, list) else objectives
assert isinstance(objective, Objective)

performances: OrderedDict = OrderedDict()
improvements: OrderedDict = OrderedDict()

df = self.run.get_encoded_data(objective, budget, specific=True)

# Obtain all configurations with theirs costs
df = df.dropna(subset=[objective.name])
X = df[self.run.configspace.get_hyperparameter_names()].to_numpy()
Y = df[objective.name].to_numpy()

# A Random Forest Regressor is used as surrogate model
self._model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
self._model._fit(X, Y)

# Get the incumbent configuration
incumbent_config, _ = self.run.get_incumbent(budget=budget, objectives=objective)
incumbent_encode = self.run.encode_config(incumbent_config)

# Get the default configuration
self.default_config = self.cs.get_default_configuration()
default_encode = self.run.encode_config(self.default_config)

# Obtain the predicted cost of the default and incumbent configuration
def_cost, def_std = self._model.predict(np.array([default_encode]))
def_cost, def_std = def_cost[0], def_std[0]
inc_cost, _ = self._model.predict(np.array([incumbent_encode]))

# For further calculations, assume that the objective is to be minimized
if objective.optimize == "upper":
def_cost = -def_cost
inc_cost = -inc_cost

if inc_cost > def_cost:
self.logger.warning(
"The predicted incumbent cost is smaller than the predicted default "
f"cost for budget: {budget}. This could mean that the configuration space "
"with which the surrogate model was trained contained too few examples."
)
performances = OrderedDict({hp_name: (0, 0) for hp_name in self.hp_names})
improvements = OrderedDict({hp_name: (0, 0) for hp_name in self.hp_names})
else:
# Copy the hps names as to not remove objects from the original list
hp_it = self.hp_names.copy()
for i in range(len(hp_it)):
# Get the results of the current ablation iteration
continue_ablation, max_hp, max_hp_cost, max_hp_std = self._ablation(
objective, budget, incumbent_config, def_cost, hp_it
)

if not continue_ablation:
break

if objective.optimize == "upper":
# For returning the importance, flip back the objective if it was flipped before
performances[max_hp] = (-max_hp_cost, max_hp_std)
else:
performances[max_hp] = (max_hp_cost, max_hp_std)
impr_std = np.sqrt(def_std**2 + max_hp_std**2)
improvements[max_hp] = ((def_cost - max_hp_cost), impr_std)
# New 'default' cost and std
def_cost = max_hp_cost
def_std = max_hp_std
# Remove the current best hp for keeping the order right
hp_it.remove(max_hp)

self.performances = performances
self.improvements = improvements

def get_ablation_performances(self) -> Optional[Dict[Any, Any]]:
"""
Get the ablation performances.

Returns
-------
Optional[Dict[Any, Any]]
A dictionary containing the ablation performances.

Raises
------
RuntimeError
If the ablation performances have not been calculated.
"""
if self.performances is None:
raise RuntimeError("Ablation performances scores must be calculated first.")
return self.performances

def get_ablation_improvements(self) -> Optional[Dict[Any, Any]]:
"""
Get the ablation improvements.

Returns
-------
Optional[Dict[Any, Any]]
A dictionary containing the ablation improvements.

Raises
------
RuntimeError
If the ablation improvements have not been calculated.
"""
if self.improvements is None:
raise RuntimeError("Ablation improvements must be calculated first.")

return self.improvements

def _ablation(
self,
objective: Objective,
budget: Optional[Union[int, float]],
incumbent_config: Any,
def_cost: Any,
hp_it: List[str],
) -> Tuple[Any, Any, Any, Any]:
"""
Calculate the ablation importance for each hyperparameter.

Parameters
----------
objective: Objective
The objective to be considered.
budget: Optional[Union[int, float]]
The budget of the run.
incumbent_config: Any
The incumbent configuration.
def_cost: Any
The default cost.
hp_it: List[str]
A list of the HPs that still have to be looked at.

Returns
-------
Tuple[Any, Any, Any, Any]
continue_ablation, max_hp, max_hp_performance, max_hp_std
"""
max_hp = ""
max_hp_difference = -np.inf

for hp in hp_it:
if incumbent_config[hp] is not None and hp in self.default_config.keys():
config_copy = copy.copy(self.default_config)
config_copy[hp] = incumbent_config[hp]

new_cost, _ = self._model.predict(np.array([self.run.encode_config(config_copy)]))
if objective.optimize == "upper":
new_cost = -new_cost

difference = def_cost - new_cost

# Check for the maximum difference hyperparameter in this round
if difference >= max_hp_difference:
max_hp = hp
max_hp_difference = difference
else:
continue
hp_count = len(self.cs.get_hyperparameter_names())
if max_hp != "":
if max_hp_difference <= 0:
self.logger.info(
"No improvement found in ablation step "
f"{hp_count - len(hp_it) + 1}/{hp_count} for budget {budget}, "
"choose hyperparameter with smallest increase in cost."
)
# For the maximum impact hyperparameter, switch the default with the incumbent value
self.default_config[max_hp] = incumbent_config[max_hp]
max_hp_cost, max_hp_std = self._model.predict(
np.array([self.run.encode_config(self.default_config)])
)
if objective.optimize == "upper":
max_hp_cost = -max_hp_cost
return True, max_hp, max_hp_cost[0], max_hp_std[0]
else:
self.logger.info(
f"End ablation at step {hp_count - len(hp_it) + 1}/{hp_count} "
f"for budget {budget} (remaining hyperparameters not activate in incumbent or "
"default configuration)."
)
return False, None, None, None
3 changes: 2 additions & 1 deletion deepcave/evaluators/epm/random_forest_surrogate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def __init__(
self,
configspace: CS.ConfigurationSpace,
seed: Optional[int] = None,
n_trees: int = 16,
):
super().__init__(configspace, seed=seed)
self._model = RandomForest(configspace=configspace, seed=seed)
self._model = RandomForest(configspace=configspace, seed=seed, n_trees=n_trees)

def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Expand Down
2 changes: 2 additions & 0 deletions deepcave/plugins/hyperparameter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
----------
importances
This module provides a plugin for the visualization of the importances.
ablation_paths
This module provides a plugin for the visualization of the ablation paths.
pdp
This module provides utilities for generating Partial Dependency Plots (PDP).
symbolic_explanations
Expand Down
Loading