[rebase] Rebase to the latest version and merge test_evaluator to tra…

…in_evaluator Since test_evaluator can be merged, I merged it. * [rebase] Rebase and merge the changes in non-test files without issues * [refactor] Merge test- and train-evaluator * [fix] Fix the import error due to the change xxx_evaluator --> evaluator * [test] Fix errors in tests * [fix] Fix the handling of test pred in no resampling * [refactor] Move save_y_opt=False for no resampling deepter for simplicity
automl · Jan 30, 2022 · c41c87f · c41c87f
1 parent 5379632
commit c41c87f
Show file tree

Hide file tree

Showing 15 changed files with 323 additions and 872 deletions.
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
@@ -315,7 +315,7 @@ def _get_dataset_input_validator(
                 Testing feature set
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
@@ -355,7 +355,7 @@ def get_dataset(
                 Testing feature set
             y_test (Optional[Union[List, pd.DataFrame, np.ndarray]]):
                 Testing target set
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):
@@ -971,7 +971,7 @@ def _search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated
@@ -1378,7 +1378,7 @@ def fit_pipeline(
         X_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         y_test: Optional[Union[List, pd.DataFrame, np.ndarray]] = None,
         dataset_name: Optional[str] = None,
-        resampling_strategy: Optional[Union[HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes]] = None,
+        resampling_strategy: Optional[ResamplingStrategies] = None,
         resampling_strategy_args: Optional[Dict[str, Any]] = None,
         run_time_limit_secs: int = 60,
         memory_limit: Optional[int] = None,
@@ -1413,7 +1413,7 @@ def fit_pipeline(
                 be provided to track the generalization performance of each stage.
             dataset_name (Optional[str]):
                 Name of the dataset, if None, random value is used.
-            resampling_strategy (Optional[RESAMPLING_STRATEGIES]):
+            resampling_strategy (Optional[ResamplingStrategies]):
                 Strategy to split the training data. if None, uses
                 HoldoutValTypes.holdout_validation.
             resampling_strategy_args (Optional[Dict[str, Any]]):

diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -330,7 +330,7 @@ def search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -331,7 +331,7 @@ def search(
                 `SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
             tae_func (Optional[Callable]):
                 TargetAlgorithm to be optimised. If None, `eval_function`
-                available in autoPyTorch/evaluation/train_evaluator is used.
+                available in autoPyTorch/evaluation/evaluator is used.
                 Must be child class of AbstractEvaluator.
             all_supported_metrics (bool: default=True):
                 If True, all metrics supporting current task will be calculated

diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -93,6 +93,14 @@ def is_stratified(self) -> bool:
 # TODO: replace it with another way
 ResamplingStrategies = Union[CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes]
 
+
+def check_resampling_strategy(resampling_strategy: Optional[ResamplingStrategies]) -> None:
+    choices = (CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes)
+    if not isinstance(resampling_strategy, choices):
+        rs_names = (rs.__mro__[0].__name__ for rs in choices)
+        raise ValueError(f'resampling_strategy must be in {rs_names}, but got {resampling_strategy}')
+
+
 DEFAULT_RESAMPLING_PARAMETERS: Dict[
     ResamplingStrategies,
     Dict[str, Any]

diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -247,7 +247,7 @@ class AbstractEvaluator(object):
     An evaluator is an object that:
         + constructs a pipeline (i.e. a classification or regression estimator) for a given
           pipeline_config and run settings (budget, seed)
-        + Fits and trains this pipeline (TrainEvaluator) or tests a given
+        + Fits and trains this pipeline (Evaluator) or tests a given
           configuration (TestEvaluator)
 
     The provided configuration determines the type of pipeline created. For more

diff --git a/autoPyTorch/evaluation/train_evaluator.py → autoPyTorch/evaluation/evaluator.py b/autoPyTorch/evaluation/train_evaluator.py → autoPyTorch/evaluation/evaluator.py
@@ -7,12 +7,11 @@
 
 from smac.tae import StatusType
 
-from autoPyTorch.automl_common.common.utils.backend import Backend
-from autoPyTorch.constants import (
-    CLASSIFICATION_TASKS,
-    MULTICLASSMULTIOUTPUT,
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    NoResamplingStrategyTypes,
+    check_resampling_strategy
 )
-from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
 from autoPyTorch.evaluation.abstract_evaluator import (
     AbstractEvaluator,
     EvaluationResults,
@@ -21,7 +20,8 @@
 from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams
 from autoPyTorch.utils.common import dict_repr, subsampler
 
-__all__ = ['TrainEvaluator', 'eval_train_function']
+__all__ = ['Evaluator', 'eval_fn']
+
 
 class _CrossValidationResultsManager:
     def __init__(self, num_folds: int):
@@ -83,15 +83,13 @@ def get_result_dict(self) -> Dict[str, Any]:
         )
 
 
-class TrainEvaluator(AbstractEvaluator):
+class Evaluator(AbstractEvaluator):
     """
     This class builds a pipeline using the provided configuration.
     A pipeline implementing the provided configuration is fitted
     using the datamanager object retrieved from disc, via the backend.
     After the pipeline is fitted, it is save to disc and the performance estimate
-    is communicated to the main process via a Queue. It is only compatible
-    with `CrossValTypes`, `HoldoutValTypes`, i.e, when the training data
-    is split and the validation set is used for SMBO optimisation.
+    is communicated to the main process via a Queue.
 
     Args:
         queue (Queue):
@@ -101,54 +99,27 @@ class TrainEvaluator(AbstractEvaluator):
             Fixed parameters for a pipeline
         evaluator_params (EvaluatorParams):
             The parameters for an evaluator.
+
+    Attributes:
+        train (bool):
+            Whether the training data is split and the validation set is used for SMBO optimisation.
+        cross_validation (bool):
+            Whether we use cross validation or not.
     """
-    def __init__(self, backend: Backend, queue: Queue,
-                 metric: autoPyTorchMetric,
-                 budget: float,
-                 configuration: Union[int, str, Configuration],
-                 budget_type: str = None,
-                 pipeline_config: Optional[Dict[str, Any]] = None,
-                 seed: int = 1,
-                 output_y_hat_optimization: bool = True,
-                 num_run: Optional[int] = None,
-                 include: Optional[Dict[str, Any]] = None,
-                 exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-                 init_params: Optional[Dict[str, Any]] = None,
-                 logger_port: Optional[int] = None,
-                 keep_models: Optional[bool] = None,
-                 all_supported_metrics: bool = True,
-                 search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None) -> None:
-        super().__init__(
-            backend=backend,
-            queue=queue,
-            configuration=configuration,
-            metric=metric,
-            seed=seed,
-            output_y_hat_optimization=output_y_hat_optimization,
-            num_run=num_run,
-            include=include,
-            exclude=exclude,
-            disable_file_output=disable_file_output,
-            init_params=init_params,
-            budget=budget,
-            budget_type=budget_type,
-            logger_port=logger_port,
-            all_supported_metrics=all_supported_metrics,
-            pipeline_config=pipeline_config,
-            search_space_updates=search_space_updates
-        )
+    def __init__(self, queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams):
+        resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy
+        self.train = not isinstance(resampling_strategy, NoResamplingStrategyTypes)
+        self.cross_validation = isinstance(resampling_strategy, CrossValTypes)
 
-        if not isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
-            resampling_strategy = self.datamanager.resampling_strategy
-            raise ValueError(
-                f'resampling_strategy for TrainEvaluator must be in '
-                f'(CrossValTypes, HoldoutValTypes), but got {resampling_strategy}'
-            )
+        if not self.train and fixed_pipeline_params.save_y_opt:
+            # TODO: Add the test to cover here
+            # No resampling can not be used for building ensembles. save_y_opt=False ensures it
+            fixed_pipeline_params = fixed_pipeline_params._replace(save_y_opt=False)
+
+        super().__init__(queue=queue, fixed_pipeline_params=fixed_pipeline_params, evaluator_params=evaluator_params)
 
-        self.splits = self.datamanager.splits
-        self.num_folds: int = len(self.splits)
-        self.logger.debug("Search space updates :{}".format(self.search_space_updates))
+        if self.train:
+            self.logger.debug("Search space updates :{}".format(self.fixed_pipeline_params.search_space_updates))
 
     def _evaluate_on_split(self, split_id: int) -> EvaluationResults:
         """
@@ -177,7 +148,7 @@ def _evaluate_on_split(self, split_id: int) -> EvaluationResults:
 
         return EvaluationResults(
             pipeline=pipeline,
-            opt_loss=self._loss(labels=self.y_train[opt_split], preds=opt_pred),
+            opt_loss=self._loss(labels=self.y_train[opt_split] if self.train else self.y_test, preds=opt_pred),
             train_loss=self._loss(labels=self.y_train[train_split], preds=train_pred),
             opt_pred=opt_pred,
             valid_pred=valid_pred,
@@ -203,6 +174,7 @@ def _cross_validation(self) -> EvaluationResults:
             results = self._evaluate_on_split(split_id)
 
             self.pipelines[split_id] = results.pipeline
+            assert opt_split is not None  # mypy redefinition
             cv_results.update(split_id, results, len(train_split), len(opt_split))
 
         self.y_opt = np.concatenate([y_opt for y_opt in Y_opt if y_opt is not None])
@@ -214,15 +186,16 @@ def evaluate_loss(self) -> None:
         if self.splits is None:
             raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None")
 
-        if self.num_folds == 1:
+        if self.cross_validation:
+            results = self._cross_validation()
+        else:
             _, opt_split = self.splits[0]
             results = self._evaluate_on_split(split_id=0)
-            self.y_opt, self.pipelines[0] = self.y_train[opt_split], results.pipeline
-        else:
-            results = self._cross_validation()
+            self.pipelines[0] = results.pipeline
+            self.y_opt = self.y_train[opt_split] if self.train else self.y_test
 
         self.logger.debug(
-            f"In train evaluator.evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss},"
+            f"In evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss},"
             f" status: {results.status},\nadditional run info:\n{dict_repr(results.additional_run_info)}"
         )
         self.record_evaluation(results=results)
@@ -242,41 +215,23 @@ def _fit_and_evaluate_loss(
 
         kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]}
         train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs)
-        opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs)
-        valid_pred = self.predict(self.X_valid, **kwargs)
         test_pred = self.predict(self.X_test, **kwargs)
+        valid_pred = self.predict(self.X_valid, **kwargs)
+
+        # No resampling ===> evaluate on test dataset
+        opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) if self.train else test_pred
 
         assert train_pred is not None and opt_pred is not None  # mypy check
         return train_pred, opt_pred, valid_pred, test_pred
 
 
-# create closure for evaluating an algorithm
-def eval_train_function(
-    backend: Backend,
-    queue: Queue,
-    metric: autoPyTorchMetric,
-    budget: float,
-    config: Optional[Configuration],
-    seed: int,
-    output_y_hat_optimization: bool,
-    num_run: int,
-    include: Optional[Dict[str, Any]],
-    exclude: Optional[Dict[str, Any]],
-    disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-    pipeline_config: Optional[Dict[str, Any]] = None,
-    budget_type: str = None,
-    init_params: Optional[Dict[str, Any]] = None,
-    logger_port: Optional[int] = None,
-    all_supported_metrics: bool = True,
-    search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
-    instance: str = None,
-) -> None:
+def eval_fn(queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams) -> None:
     """
     This closure allows the communication between the TargetAlgorithmQuery and the
-    pipeline trainer (TrainEvaluator).
+    pipeline trainer (Evaluator).
 
     Fundamentally, smac calls the TargetAlgorithmQuery.run() method, which internally
-    builds a TrainEvaluator. The TrainEvaluator builds a pipeline, stores the output files
+    builds an Evaluator. The Evaluator builds a pipeline, stores the output files
     to disc via the backend, and puts the performance result of the run in the queue.
 
     Args:
@@ -288,7 +243,11 @@ def eval_train_function(
         evaluator_params (EvaluatorParams):
             The parameters for an evaluator.
     """
-    evaluator = TrainEvaluator(
+    resampling_strategy = fixed_pipeline_params.backend.load_datamanager().resampling_strategy
+    check_resampling_strategy(resampling_strategy)
+
+    # NoResamplingStrategyTypes ==> test evaluator, otherwise ==> train evaluator
+    evaluator = Evaluator(
         queue=queue,
         evaluator_params=evaluator_params,
         fixed_pipeline_params=fixed_pipeline_params