Skip to content

Commit

Permalink
[refactor] Refactor __init__ of abstract evaluator
Browse files Browse the repository at this point in the history
[refactor] Collect shared variables in NamedTuples
[fix] Copy the budget passed to the evaluator params
[refactor] Add cross validation result manager for separate management
[refactor] Separate pipeline classes from abstract evaluator
[refactor] Refactor tae.py
[refactor] Increase the safety level of pipeline config
[test] Fix test_evaluation.py
[test] Fix test_abstract_evaluator.py 1 -- 3
[test] Add default pipeline config
[test] Modify queue.empty in a safer way
[test] Fix test_api.py
[test] Fix test_train_evaluator.py
[refactor] Refactor test_api.py before adding new tests
[refactor] Refactor test_tabular_xxx

[fix] Find the error in test_tabular_xxx

Since pipeline is updated after the evaluations and the previous code
updated self.pipeline in the predict method, dummy class only needs
to override this method. However, the new code does it separately,
so I override get_pipeline method so that we can reproduce the same
results.

[fix] Fix the shape issue in regression and add bug comment in a test
[refactor] Use keyword args to avoid unexpected bugs
[fix] Fix the ground truth of test_cv

Since we changed the weighting strategy for the cross validation in
the validation phase so that we weight performance from each model
proportionally to the size of each VALIDATION split.
I needed to change the answer.
Note that the previous was weighting the performance proportionally
to the TRAINING splits for both training and validation phases.

[fix] Change qsize --> Empty since qsize might not be reliable
[refactor] Add cost for crash in autoPyTorchMetrics
[test] Remove self.pipeline since this is a duplication of self.pipelines
[fix] Fix attribute errors caused by the last change in curve extraction
[fix] Fix the issue when taking num_classes from regression task
[fix] Deactivate the save of cv model in the case of holdout
  • Loading branch information
nabenabe0928 committed Jan 28, 2022
1 parent 1431980 commit 379033b
Show file tree
Hide file tree
Showing 14 changed files with 1,615 additions and 2,019 deletions.
31 changes: 16 additions & 15 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@
)
from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline
from autoPyTorch.evaluation.pipeline_class_collection import get_default_pipeline_config
from autoPyTorch.evaluation.tae import TargetAlgorithmQuery
from autoPyTorch.evaluation.utils import DisableFileOutputParameters
from autoPyTorch.optimizer.smbo import AutoMLSMBO
from autoPyTorch.pipeline.base_pipeline import BasePipeline
Expand Down Expand Up @@ -685,22 +686,23 @@ def _do_dummy_prediction(self) -> None:
# already be generated here!
stats = Stats(scenario_mock)
stats.start_timing()
ta = ExecuteTaFuncWithQueue(
taq = TargetAlgorithmQuery(
pynisher_context=self._multiprocessing_context,
backend=self._backend,
seed=self.seed,
metric=self._metric,
logger_port=self._logger_port,
cost_for_crash=get_cost_of_crash(self._metric),
cost_for_crash=self._metric._cost_of_crash,
abort_on_first_run_crash=False,
initial_num_run=num_run,
pipeline_config=get_default_pipeline_config(choice='dummy'),
stats=stats,
memory_limit=memory_limit,
disable_file_output=self._disable_file_output,
all_supported_metrics=self._all_supported_metrics
)

status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task)
status, _, _, additional_info = taq.run(num_run, cutoff=self._time_for_task)
if status == StatusType.SUCCESS:
self._logger.info("Finished creating dummy predictions.")
else:
Expand Down Expand Up @@ -769,13 +771,13 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
# already be generated here!
stats = Stats(scenario_mock)
stats.start_timing()
ta = ExecuteTaFuncWithQueue(
taq = TargetAlgorithmQuery(
pynisher_context=self._multiprocessing_context,
backend=self._backend,
seed=self.seed,
metric=self._metric,
logger_port=self._logger_port,
cost_for_crash=get_cost_of_crash(self._metric),
cost_for_crash=self._metric._cost_of_crash,
abort_on_first_run_crash=False,
initial_num_run=self._backend.get_next_num_run(),
stats=stats,
Expand All @@ -786,7 +788,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
dask_futures.append([
classifier,
self._dask_client.submit(
ta.run, config=classifier,
taq.run, config=classifier,
cutoff=func_eval_time_limit_secs,
)
])
Expand Down Expand Up @@ -1076,7 +1078,7 @@ def _search(

# Here the budget is set to max because the SMAC intensifier can be:
# Hyperband: in this case the budget is determined on the fly and overwritten
# by the ExecuteTaFuncWithQueue
# by the TargetAlgorithmQuery
# SimpleIntensifier (and others): in this case, we use max_budget as a target
# budget, and hece the below line is honored
self.pipeline_options[budget_type] = max_budget
Expand Down Expand Up @@ -1360,7 +1362,7 @@ def refit(
dataset_properties=dataset_properties,
dataset=dataset,
split_id=split_id)
fit_and_suppress_warnings(self._logger, model, X, y=None)
fit_pipeline(self._logger, model, X, y=None)

self._clean_logger()

Expand Down Expand Up @@ -1571,27 +1573,26 @@ def fit_pipeline(

stats.start_timing()

tae = ExecuteTaFuncWithQueue(
taq = TargetAlgorithmQuery(
backend=self._backend,
seed=self.seed,
metric=metric,
logger_port=self._logger_port,
cost_for_crash=get_cost_of_crash(metric),
cost_for_crash=metric._cost_of_crash,
abort_on_first_run_crash=False,
initial_num_run=self._backend.get_next_num_run(),
stats=stats,
memory_limit=memory_limit,
disable_file_output=disable_file_output,
all_supported_metrics=all_supported_metrics,
budget_type=budget_type,
include=include_components,
exclude=exclude_components,
search_space_updates=search_space_updates,
pipeline_config=pipeline_options,
pynisher_context=self._multiprocessing_context
)

run_info, run_value = tae.run_wrapper(
run_info, run_value = taq.run_wrapper(
RunInfo(config=configuration,
budget=budget,
seed=self.seed,
Expand All @@ -1603,7 +1604,7 @@ def fit_pipeline(

fitted_pipeline = self._get_fitted_pipeline(
dataset_name=dataset.dataset_name,
pipeline_idx=run_info.config.config_id + tae.initial_num_run,
pipeline_idx=run_info.config.config_id + taq.initial_num_run,
run_info=run_info,
run_value=run_value,
disable_file_output=disable_file_output
Expand Down
5 changes: 5 additions & 0 deletions autoPyTorch/configs/dummy_pipeline_options.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"budget_type": "epochs",
"epochs": 1,
"runtime": 1
}
Loading

0 comments on commit 379033b

Please sign in to comment.