[rebase] Rebase to the latest version and merge test_evaluator to tra…

…in_evaluator Since test_evaluator can be merged, I merged it. * [rebase] Rebase and merge the changes in non-test files without issues * [refactor] Merge test- and train-evaluator * [fix] Fix the import error due to the change xxx_evaluator --> evaluator * [test] Fix errors in tests * [fix] Fix the handling of test pred in no resampling * [refactor] Move save_y_opt=False for no resampling deepter for simplicity * [test] Increase the budget size for no resample tests
automl · Feb 23, 2022 · 2eea80f · 2eea80f
1 parent b32e8be
commit 2eea80f
Show file tree

Hide file tree

Showing 13 changed files with 298 additions and 603 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -315,7 +315,7 @@ def _get_dataset_input_validator(
                 Testing feature set
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
@@ -355,7 +355,7 @@ def get_dataset(
                 Testing feature set
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
@@ -973,7 +973,7 @@ def _search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated
@@ -1380,7 +1380,7 @@ def fit_pipeline(
         X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         dataset_name: Optional[str] = None,
-        resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes]] = None,
+        resampling_strategy: Optional[ResamplingStrategies] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         run_time_limit_secs: int = 60,
         memory_limit: Optional[int] = None,
@@ -1415,7 +1415,7 @@ def fit_pipeline(
                 be provided to track the generalization performance of each stage.
             dataset_name (Optional[str]):
                 Name of the dataset, if None, random value is used.
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -330,7 +330,7 @@ def search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -331,7 +331,7 @@ def search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated

diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -93,6 +93,14 @@ def is_stratified(self) -> bool:
 # TODO: replace it with another way
 ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]
 
+
+def check_resampling_strategy(resampling_strategy: Optional[ResamplingStrategies]) -> None:
+    choices = (CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes)
+    if not isinstance(resampling_strategy, choices):
+        rs_names = (rs.__mro__[0].__name__ for rs in choices)
+        raise ValueError(f'resampling_strategy must be in {rs_names}, but got {resampling_strategy}')
+
+
 DEFAULT_RESAMPLING_PARAMETERS: Dict[
     ResamplingStrategies,
     Dict[str, Any]

diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -207,7 +207,7 @@ def __init__(self, backend: Backend,
     An evaluator is an object that:
         + constructs a pipeline (i.e. a classification or regression estimator) for a given
           pipeline_config and run settings (budget, seed)
-        + Fits and trains this pipeline (TrainEvaluator) or tests a given
+        + Fits and trains this pipeline (Evaluator) or tests a given
           configuration (TestEvaluator)
 
     The provided configuration determines the type of pipeline created. For more

diff --git a/autoPyTorch/evaluation/train_evaluator.py → autoPyTorch/evaluation/evaluator.py b/autoPyTorch/evaluation/train_evaluator.py → autoPyTorch/evaluation/evaluator.py
@@ -7,12 +7,11 @@
 
 from smac.tae import StatusType
 
-from autoPyTorch.automl_common.common.utils.backend import Backend
-from autoPyTorch.constants import (
-    CLASSIFICATION_TASKS,
-    MULTICLASSMULTIOUTPUT,
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    NoResamplingStrategyTypes,
+    check_resampling_strategy
 )
-from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 from autoPyTorch.evaluation.abstract_evaluator import (
     AbstractEvaluator,
     EvaluationResults,
@@ -21,7 +20,8 @@
 from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams
 from autoPyTorch.utils.common import dict_repr, subsampler
 
-__all__ = ['TrainEvaluator', 'eval_train_function']
+__all__ = ['Evaluator', 'eval_fn']
+
 
 class _CrossValidationResultsManager:
     def __init__(self, num_folds: int):
@@ -83,15 +83,13 @@ def get_result_dict(self) -> Dict[str, Any]:
         )
 
 
-class TrainEvaluator(AbstractEvaluator):
+class Evaluator(AbstractEvaluator):
     """
     This class builds a pipeline using the provided configuration.
     A pipeline implementing the provided configuration is fitted
     using the datamanager object retrieved from disc, via the backend.
     After the pipeline is fitted, it is save to disc and the performance estimate
-    is communicated to the main process via a Queue. It is only compatible
-    with `CrossValTypes`, `HoldoutValTypes`, i.e, when the training data
-    is split and the validation set is used for SMBO optimisation.
+    is communicated to the main process via a Queue.
 
     Args:
         queue (Queue):
@@ -101,43 +99,17 @@ class TrainEvaluator(AbstractEvaluator):
             Fixed parameters for a pipeline
         evaluator_params (EvaluatorParams):
             The parameters for an evaluator.
+
+    Attributes:
+        train (bool):
+            Whether the training data is split and the validation set is used for SMBO optimisation.
+        cross_validation (bool):
+            Whether we use cross validation or not.
     """
-    def __init__(self, backend: Backend, queue: Queue,
-                 metric: autoPyTorchMetric,
-                 budget: float,
-                 configuration: Union[int, str, Configuration],
-                 budget_type: str = None,
-                 pipeline_config: Optional[Dict[str, Any]] = None,
-                 seed: int = 1,
-                 output_y_hat_optimization: bool = True,
-                 num_run: Optional[int] = None,
-                 include: Optional[Dict[str, Any]] = None,
-                 exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-                 init_params: Optional[Dict[str, Any]] = None,
-                 logger_port: Optional[int] = None,
-                 keep_models: Optional[bool] = None,
-                 all_supported_metrics: bool = True,
-                 search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None) -> None:
-        super().__init__(
-            backend=backend,
-            queue=queue,
-            configuration=configuration,
-            metric=metric,
-            seed=seed,
-            output_y_hat_optimization=output_y_hat_optimization,
-            num_run=num_run,
-            include=include,
-            exclude=exclude,
-            disable_file_output=disable_file_output,
-            init_params=init_params,
-            budget=budget,
-            budget_type=budget_type,
-            logger_port=logger_port,
-            all_supported_metrics=all_supported_metrics,
-            pipeline_config=pipeline_config,
-            search_space_updates=search_space_updates
-        )
+    def __init__(self, queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams):
+        resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy
+        self.train = not isinstance(resampling_strategy, NoResamplingStrategyTypes)
+        self.cross_validation = isinstance(resampling_strategy, CrossValTypes)
 
         if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
             raise ValueError(
@@ -175,7 +147,7 @@ def _evaluate_on_split(self, split_id: int) -> EvaluationResults:
 
         return EvaluationResults(
             pipeline=pipeline,
-            opt_loss=self._loss(labels=self.y_train[opt_split], preds=opt_pred),
+            opt_loss=self._loss(labels=self.y_train[opt_split] if self.train else self.y_test, preds=opt_pred),
             train_loss=self._loss(labels=self.y_train[train_split], preds=train_pred),
             opt_pred=opt_pred,
             valid_pred=valid_pred,
@@ -201,6 +173,7 @@ def _cross_validation(self) -> EvaluationResults:
             results = self._evaluate_on_split(split_id)
 
             self.pipelines[split_id] = results.pipeline
+            assert opt_split is not None  # mypy redefinition
             cv_results.update(split_id, results, len(train_split), len(opt_split))
 
         self.y_opt = np.concatenate([y_opt for y_opt in Y_opt if y_opt is not None])
@@ -212,15 +185,16 @@ def evaluate_loss(self) -> None:
         if self.splits is None:
             raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None")
 
-        if self.num_folds == 1:
+        if self.cross_validation:
+            results = self._cross_validation()
+        else:
             _, opt_split = self.splits[0]
             results = self._evaluate_on_split(split_id=0)
-            self.y_opt, self.pipelines[0] = self.y_train[opt_split], results.pipeline
-        else:
-            results = self._cross_validation()
+            self.pipelines[0] = results.pipeline
+            self.y_opt = self.y_train[opt_split] if self.train else self.y_test
 
         self.logger.debug(
-            f"In train evaluator.evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss},"
+            f"In evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss},"
             f" status: {results.status},\nadditional run info:\n{dict_repr(results.additional_run_info)}"
         )
         self.record_evaluation(results=results)
@@ -240,41 +214,23 @@ def _fit_and_evaluate_loss(
 
         kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]}
         train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs)
-        opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs)
-        valid_pred = self.predict(self.X_valid, **kwargs)
         test_pred = self.predict(self.X_test, **kwargs)
+        valid_pred = self.predict(self.X_valid, **kwargs)
+
+        # No resampling ===> evaluate on test dataset
+        opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) if self.train else test_pred
 
         assert train_pred is not None and opt_pred is not None  # mypy check
         return train_pred, opt_pred, valid_pred, test_pred
 
 
-# create closure for evaluating an algorithm
-def eval_train_function(
-    backend: Backend,
-    queue: Queue,
-    metric: autoPyTorchMetric,
-    budget: float,
-    config: Optional[Configuration],
-    seed: int,
-    output_y_hat_optimization: bool,
-    num_run: int,
-    include: Optional[Dict[str, Any]],
-    exclude: Optional[Dict[str, Any]],
-    disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-    pipeline_config: Optional[Dict[str, Any]] = None,
-    budget_type: str = None,
-    init_params: Optional[Dict[str, Any]] = None,
-    logger_port: Optional[int] = None,
-    all_supported_metrics: bool = True,
-    search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
-    instance: str = None,
-) -> None:
+def eval_fn(queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams) -> None:
     """
     This closure allows the communication between the TargetAlgorithmQuery and the
-    pipeline trainer (TrainEvaluator).
+    pipeline trainer (Evaluator).
 
     Fundamentally, smac calls the TargetAlgorithmQuery.run() method, which internally
-    builds a TrainEvaluator. The TrainEvaluator builds a pipeline, stores the output files
+    builds an Evaluator. The Evaluator builds a pipeline, stores the output files
     to disc via the backend, and puts the performance result of the run in the queue.
 
     Args:
@@ -286,7 +242,11 @@ def eval_train_function(
         evaluator_params (EvaluatorParams):
             The parameters for an evaluator.
     """
-    evaluator = TrainEvaluator(
+    resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy
+    check_resampling_strategy(resampling_strategy)
+
+    # NoResamplingStrategyTypes ==> test evaluator, otherwise ==> train evaluator
+    evaluator = Evaluator(
         queue=queue,
         evaluator_params=evaluator_params,
         fixed_pipeline_params=fixed_pipeline_params