diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index 2eb93db1..ed323cbb 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.10 - run: pip install pre-commit - run: pre-commit install - run: pre-commit run --all-files diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index dd60a230..0ec1b3ed 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.10', '3.11'] os: [ubuntu-latest, macos-latest, windows-latest] defaults: run: diff --git a/neps/api.py b/neps/api.py index 0c6e2f0c..d717a7cb 100644 --- a/neps/api.py +++ b/neps/api.py @@ -80,7 +80,7 @@ def run( root_directory: The directory to save progress to. This is also used to synchronize multiple calls to run(.) for parallelization. run_args: An option for providing the optimization settings e.g. - max_evaluation_total in a YAML file. + max_evaluations_total in a YAML file. overwrite_working_directory: If true, delete the working directory at the start of the run. This is, e.g., useful when debugging a run_pipeline function. post_run_summary: If True, creates a csv file after each worker is done, diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py index 8e74b338..5139d4b4 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_ei.py @@ -132,13 +132,13 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: if np.less_equal(target_fidelity, config.fidelity.upper): # only consider the configs with fidelity lower than the max fidelity - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) else: # if the target_fidelity higher than the max drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # Drop unused configs @@ -267,7 +267,7 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # if the target_fidelity already reached, drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # drop unused configs @@ -308,23 +308,11 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: ## marker 2: the maximum fidelity value recorded in observation history pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - # TODO: compare with this first draft logic - # def update_fidelity(config): - # ### DO NOT DELETE THIS FUNCTION YET - # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) - # ## that is, choose the next highest marker from 1 and 2 - # z_extrapolate = min( - # max(config.fidelity.value + self.b_step, z_inc), - # pseudo_z_max - # ) - # config.fidelity.value = z_extrapolate - # return config - def update_fidelity(config): # for all configs, set to pseudo_z_max ## that is, choose the highest seen fidelity in observation history z_extrapolate = pseudo_z_max - config.fidelity.value = z_extrapolate + config.update_hp_values({config.fidelity_name: z_extrapolate}) return config # collect IDs for partial configurations @@ -437,12 +425,15 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: else: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + # if horizon exceeds max, query at max + config.uppdate_hp_values({ + config.fidelity_name: min(target_fidelity, config.fidelity.upper) + }) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") diff --git a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py index cc3804f5..e64ea2e3 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py +++ b/neps/optimizers/bayesian_optimization/acquisition_functions/mf_pi.py @@ -68,13 +68,13 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: if np.less_equal(target_fidelity, config.fidelity.upper): # only consider the configs with fidelity lower than the max fidelity - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) else: # if the target_fidelity higher than the max drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # Drop unused configs @@ -173,7 +173,7 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # if the target_fidelity already reached, drop the configuration indices_to_drop.append(i) else: - config.fidelity.value = target_fidelity + config.update_hp_values({config.fidelity_name: target_fidelity}) budget_list.append(self.get_budget_level(config)) # drop unused configs @@ -214,23 +214,11 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: ## marker 2: the maximum fidelity value recorded in observation history pseudo_z_max = self.b_step * pseudo_z_level_max + self.pipeline_space.fidelity.lower - # TODO: compare with this first draft logic - # def update_fidelity(config): - # ### DO NOT DELETE THIS FUNCTION YET - # # for all configs, set the min(max(current fidelity + step, z_inc), pseudo_z_max) - # ## that is, choose the next highest marker from 1 and 2 - # z_extrapolate = min( - # max(config.fidelity.value + self.b_step, z_inc), - # pseudo_z_max - # ) - # config.fidelity.value = z_extrapolate - # return config - def update_fidelity(config): # for all configs, set to pseudo_z_max ## that is, choose the highest seen fidelity in observation history z_extrapolate = pseudo_z_max - config.fidelity.value = z_extrapolate + config.update_hp_values({config.fidelity_name: z_extrapolate}) return config # collect IDs for partial configurations @@ -345,12 +333,16 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: else: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) # if horizon exceeds max, query at max + config.update_hp_values({ + config.fidelity_name: min( + config.fidelity.value + horizon, config.fidelity.upper + ) # if horizon exceeds max, query at max + }) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") @@ -430,12 +422,14 @@ def preprocess(self, x: pd.Series) -> Tuple[pd.Series, torch.Tensor]: # a candidate partial training run to continue target_fidelity = config.fidelity.value + horizon # if horizon exceeds max, query at max - config.fidelity.value = min(config.fidelity.value + horizon, config.fidelity.upper) + config.update_hp_values({config.fidelity_name: min( + config.fidelity.value + horizon, config.fidelity.upper + )}) inc_list.append(inc_value) else: # a candidate new training run that we would need to start current_fidelity = 0 - config.fidelity.value = horizon + config.update_hp_values({config.fidelity_name: horizon}) inc_list.append(inc_value) #print(f"- {x.index.values[i]}: {current_fidelity} --> {config.fidelity.value}") diff --git a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py index ceb34d5e..70c7a245 100644 --- a/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py +++ b/neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py @@ -106,11 +106,7 @@ def sample( set_new_sample_fidelity: int | float = None, ) -> list(): """Samples a new set and returns the total set of observed + new configs.""" - start = time.time() partial_configs = self.observations.get_partial_configs_at_max_seen() - # print("-" * 50) - # print(f"| freeze-thaw:get_partial_at_max_seen(): {time.time()-start:.2f}s") - # print("-" * 50) _n = n if n is not None else self.SAMPLES_TO_DRAW if self.is_tabular: @@ -123,7 +119,6 @@ def sample( max_n = len(_all_ids) + 1 if self.sample_full_table else _n _n = min(max_n, len(_all_ids - _partial_ids)) - start = time.time() _new_configs = np.random.choice( list(_all_ids - _partial_ids), size=_n, replace=False ) @@ -134,9 +129,6 @@ def sample( for _i, val in enumerate(_new_configs): _configs[_i]["id"].value = val - # print("-" * 50) - # print(f"| freeze-thaw:sample:new_configs_extraction: {time.time()-start:.2f}s") - # print("-" * 50) new_configs = pd.Series( _configs, index=np.arange( @@ -157,19 +149,11 @@ def sample( ) # Updating fidelity values - start = time.time() if set_new_sample_fidelity is not None: for config in new_configs: - config.fidelity.value = set_new_sample_fidelity - # print("-" * 50) - # print(f"| freeze-thaw:sample:new_configs_set_fidelity: {time.time()-start:.2f}s") - # print("-" * 50) + config.update_hp_values({config.fidelity_name: set_new_sample_fidelity}) - start = time.time() configs = pd.concat([deepcopy(partial_configs), new_configs]) - # print("-" * 50) - # print(f"| freeze-thaw:sample:concat_configs: {time.time()-start:.2f}s") - # print("-" * 50) return configs diff --git a/neps/optimizers/multi_fidelity/dyhpo.py b/neps/optimizers/multi_fidelity/dyhpo.py index adda097f..db8de242 100755 --- a/neps/optimizers/multi_fidelity/dyhpo.py +++ b/neps/optimizers/multi_fidelity/dyhpo.py @@ -290,13 +290,9 @@ def num_train_configs(self): def load_optimization_state( self, previous_results: dict[str, ConfigResult], -<<<<<<< HEAD pending_evaluations: dict[str, ConfigResult], -======= - pending_evaluations: dict[str, SearchSpace], budget_info: BudgetInfo | None, optimizer_state: dict[str, Any], ->>>>>>> master ) -> None: """This is basically the fit method. @@ -411,7 +407,7 @@ def _randomly_promote(self) -> tuple[SearchSpace, int]: # calculating fidelity value new_fidelity = self.get_budget_value(budget + 1) # setting the config fidelity - config.fidelity.value = new_fidelity + config.update_hp_values({config.fidelity_name: new_fidelity}) return config, _config_id def get_config_and_ids( # pylint: disable=no-self-use @@ -433,7 +429,6 @@ def get_config_and_ids( # pylint: disable=no-self-use _config_dict = config.hp_values() _config_dict.update({config.fidelity_name: self.min_budget}) config.set_hyperparameters_from_dict(_config_dict) - # config.fidelity.value = self.min_budget _config_id = self.observed_configs.next_config_id() elif self.is_init_phase() or self._model_update_failed: # promote a config randomly if initial design size is satisfied but the @@ -498,7 +493,7 @@ def get_config_and_ids( # pylint: disable=no-self-use config = samples.loc[_config_id] # IMPORTANT: setting the fidelity value appropriately - config.fidelity.value = ( + _fid_value = ( config.fidelity.lower if best_idx > max(self.observed_configs.seen_config_ids) else ( @@ -507,6 +502,7 @@ def get_config_and_ids( # pylint: disable=no-self-use ) + self.step_size # ONE-STEP FIDELITY QUERY ) ) + config.update_hp_values({config.fidelity_name: _fid_value}) # generating correct IDs if _config_id in self.observed_configs.seen_config_ids: config_id = f"{_config_id}_{self.get_budget_level(config)}" diff --git a/neps/optimizers/multi_fidelity/utils.py b/neps/optimizers/multi_fidelity/utils.py index 9568b63b..aa6c579c 100644 --- a/neps/optimizers/multi_fidelity/utils.py +++ b/neps/optimizers/multi_fidelity/utils.py @@ -277,7 +277,6 @@ def extract_learning_curve( def get_training_data_4DyHPO( self, df: pd.DataFrame, pipeline_space: SearchSpace | None = None ): - start = time.time() configs = [] learning_curves = [] performance = [] @@ -292,9 +291,6 @@ def get_training_data_4DyHPO( configs.append(row[self.config_col]) performance.append(row[self.perf_col]) learning_curves.append(self.extract_learning_curve(config_id, budget_id)) - # print("-" * 50) - # print(f"| Time for `get_training_data_4DyHPO()`: {time.time()-start:.2f}s") - # print("-" * 50) return configs, learning_curves, performance def get_best_performance_per_config(self, maximize: bool = False) -> pd.Series: diff --git a/neps/search_spaces/search_space.py b/neps/search_spaces/search_space.py index 7a6f776e..a819c704 100644 --- a/neps/search_spaces/search_space.py +++ b/neps/search_spaces/search_space.py @@ -885,3 +885,17 @@ def is_equal_value( return False return True + + def update_hp_values(self, new_values: dict[str, Any]) -> None: + """Update the hyperparameter values with new values. + + Args: + new_values: The new values to set for the hyperparameters. + """ + _hp_dict = self.hp_values() + _intersect = set(_hp_dict.keys()) & set(new_values.keys()) + assert len(_intersect) == len(new_values), \ + "All hyperparameters must be present! "\ + f"{set(_hp_dict.keys()) - set(new_values.keys())} are missing" + _hp_dict.update(new_values) + self.set_hyperparameters_from_dict(_hp_dict) diff --git a/neps/utils/run_args.py b/neps/utils/run_args.py index 9d7f6445..2f8f224b 100644 --- a/neps/utils/run_args.py +++ b/neps/utils/run_args.py @@ -450,13 +450,13 @@ def check_essential_arguments( root_directory: str | None, pipeline_space: dict | None, max_cost_total: int | None, - max_evaluation_total: int | None, + max_evaluations_total: int | None, searcher: BaseOptimizer | dict | str | None, ) -> None: """Validates essential NePS configuration arguments. Ensures 'run_pipeline', 'root_directory', 'pipeline_space', and either - 'max_cost_total' or 'max_evaluation_total' are provided for NePS execution. + 'max_cost_total' or 'max_evaluations_total' are provided for NePS execution. Raises ValueError with missing argument details. Additionally, checks 'searcher' is a BaseOptimizer if 'pipeline_space' is absent. @@ -465,7 +465,7 @@ def check_essential_arguments( root_directory (str): Directory path for data storage. pipeline_space: search space for this run. max_cost_total: Max allowed total cost for experiments. - max_evaluation_total: Max allowed evaluations. + max_evaluations_total: Max allowed evaluations. searcher: Optimizer for the configuration space. Raises: @@ -480,9 +480,9 @@ def check_essential_arguments( # provide the search_space because it's the argument of the searcher. raise ValueError("'pipeline_space' is required but was not provided.") - if not max_evaluation_total and not max_cost_total: + if not max_evaluations_total and not max_cost_total: raise ValueError( - "'max_evaluation_total' or 'max_cost_total' is required but " + "'max_evaluations_total' or 'max_cost_total' is required but " "both were not provided." ) diff --git a/pyproject.toml b/pyproject.toml index 28b6d992..b68f5e5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,14 +65,7 @@ pyyaml = "^6" tensorboard = "^2" typing-extensions = "*" torchvision = ">=0.8.0" - -[tool.poetry.dependencies.ifbo] -version = ">=0.3,<0.4" -python = ">=3.10,<3.12" -optional = true - -[tool.poetry.extras] -ifbo = ["ifbo"] +ifbo = ">=0.3.5" [tool.poetry.group.dev.dependencies] ruff = "^0.4" @@ -282,7 +275,7 @@ markers = [ filterwarnings = "ignore::DeprecationWarning:torch.utils.tensorboard.*:" [tool.mypy] -python_version = "3.8" # Match minimum supported version +python_version = "3.10" # Match minimum supported version packages = ["neps"] show_error_codes = true