[refactor] Refactor __init__ of abstract evaluator

[refactor] Collect shared variables in NamedTuples [fix] Copy the budget passed to the evaluator params [refactor] Add cross validation result manager for separate management [refactor] Separate pipeline classes from abstract evaluator [refactor] Refactor tae.py [refactor] Increase the safety level of pipeline config [test] Fix test_evaluation.py [test] Fix test_abstract_evaluator.py 1 -- 3 [test] Add default pipeline config [test] Modify queue.empty in a safer way [test] Fix test_api.py [test] Fix test_train_evaluator.py [refactor] Refactor test_api.py before adding new tests [refactor] Refactor test_tabular_xxx [fix] Find the error in test_tabular_xxx Since pipeline is updated after the evaluations and the previous code updated self.pipeline in the predict method, dummy class only needs to override this method. However, the new code does it separately, so I override get_pipeline method so that we can reproduce the same results. [fix] Fix the shape issue in regression and add bug comment in a test [refactor] Use keyword args to avoid unexpected bugs [fix] Fix the ground truth of test_cv Since we changed the weighting strategy for the cross validation in the validation phase so that we weight performance from each model proportionally to the size of each VALIDATION split. I needed to change the answer. Note that the previous was weighting the performance proportionally to the TRAINING splits for both training and validation phases. [fix] Change qsize --> Empty since qsize might not be reliable [refactor] Add cost for crash in autoPyTorchMetrics [test] Remove self.pipeline since this is a duplication of self.pipelines [fix] Fix attribute errors caused by the last change in curve extraction [fix] Fix the issue when taking num_classes from regression task [fix] Deactivate the save of cv model in the case of holdout
automl · Jan 28, 2022 · 379033b · 379033b
1 parent 1431980
commit 379033b
Show file tree

Hide file tree

Showing 14 changed files with 1,615 additions and 2,019 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -48,8 +48,9 @@
 )
 from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
 from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
-from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
-from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline
+from autoPyTorch.evaluation.pipeline_class_collection import get_default_pipeline_config
+from autoPyTorch.evaluation.tae import TargetAlgorithmQuery
 from autoPyTorch.evaluation.utils import DisableFileOutputParameters
 from autoPyTorch.optimizer.smbo import AutoMLSMBO
 from autoPyTorch.pipeline.base_pipeline import BasePipeline
@@ -685,22 +686,23 @@ def _do_dummy_prediction(self) -> None:
         # already be generated here!
         stats = Stats(scenario_mock)
         stats.start_timing()
-        ta = ExecuteTaFuncWithQueue(
+        taq = TargetAlgorithmQuery(
             pynisher_context=self._multiprocessing_context,
             backend=self._backend,
             seed=self.seed,
             metric=self._metric,
             logger_port=self._logger_port,
-            cost_for_crash=get_cost_of_crash(self._metric),
+            cost_for_crash=self._metric._cost_of_crash,
             abort_on_first_run_crash=False,
             initial_num_run=num_run,
+            pipeline_config=get_default_pipeline_config(choice='dummy'),
             stats=stats,
             memory_limit=memory_limit,
             disable_file_output=self._disable_file_output,
             all_supported_metrics=self._all_supported_metrics
         )
 
-        status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task)
+        status, _, _, additional_info = taq.run(num_run, cutoff=self._time_for_task)
         if status == StatusType.SUCCESS:
             self._logger.info("Finished creating dummy predictions.")
         else:
@@ -769,13 +771,13 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
                 # already be generated here!
                 stats = Stats(scenario_mock)
                 stats.start_timing()
-                ta = ExecuteTaFuncWithQueue(
+                taq = TargetAlgorithmQuery(
                     pynisher_context=self._multiprocessing_context,
                     backend=self._backend,
                     seed=self.seed,
                     metric=self._metric,
                     logger_port=self._logger_port,
-                    cost_for_crash=get_cost_of_crash(self._metric),
+                    cost_for_crash=self._metric._cost_of_crash,
                     abort_on_first_run_crash=False,
                     initial_num_run=self._backend.get_next_num_run(),
                     stats=stats,
@@ -786,7 +788,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
                 dask_futures.append([
                     classifier,
                     self._dask_client.submit(
-                        ta.run, config=classifier,
+                        taq.run, config=classifier,
                         cutoff=func_eval_time_limit_secs,
                     )
                 ])
@@ -1076,7 +1078,7 @@ def _search(
 
         # Here the budget is set to max because the SMAC intensifier can be:
         # Hyperband: in this case the budget is determined on the fly and overwritten
-        #            by the ExecuteTaFuncWithQueue
+        #            by the TargetAlgorithmQuery
         # SimpleIntensifier (and others): in this case, we use max_budget as a target
         #                                 budget, and hece the below line is honored
         self.pipeline_options[budget_type] = max_budget
@@ -1360,7 +1362,7 @@ def refit(
                 dataset_properties=dataset_properties,
                 dataset=dataset,
                 split_id=split_id)
-            fit_and_suppress_warnings(self._logger, model, X, y=None)
+            fit_pipeline(self._logger, model, X, y=None)
 
         self._clean_logger()
 
@@ -1571,27 +1573,26 @@ def fit_pipeline(
 
         stats.start_timing()
 
-        tae = ExecuteTaFuncWithQueue(
+        taq = TargetAlgorithmQuery(
             backend=self._backend,
             seed=self.seed,
             metric=metric,
             logger_port=self._logger_port,
-            cost_for_crash=get_cost_of_crash(metric),
+            cost_for_crash=metric._cost_of_crash,
             abort_on_first_run_crash=False,
             initial_num_run=self._backend.get_next_num_run(),
             stats=stats,
             memory_limit=memory_limit,
             disable_file_output=disable_file_output,
             all_supported_metrics=all_supported_metrics,
-            budget_type=budget_type,
             include=include_components,
             exclude=exclude_components,
             search_space_updates=search_space_updates,
             pipeline_config=pipeline_options,
             pynisher_context=self._multiprocessing_context
         )
 
-        run_info, run_value = tae.run_wrapper(
+        run_info, run_value = taq.run_wrapper(
             RunInfo(config=configuration,
                     budget=budget,
                     seed=self.seed,
@@ -1603,7 +1604,7 @@ def fit_pipeline(
 
         fitted_pipeline = self._get_fitted_pipeline(
             dataset_name=dataset.dataset_name,
-            pipeline_idx=run_info.config.config_id + tae.initial_num_run,
+            pipeline_idx=run_info.config.config_id + taq.initial_num_run,
             run_info=run_info,
             run_value=run_value,
             disable_file_output=disable_file_output

diff --git a/autoPyTorch/configs/dummy_pipeline_options.json b/autoPyTorch/configs/dummy_pipeline_options.json
@@ -0,0 +1,5 @@
+{
+    "budget_type": "epochs",
+    "epochs": 1,
+    "runtime": 1
+}