diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 80d8bd51e..0d0cadaf3 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -48,8 +48,9 @@ ) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager from autoPyTorch.ensemble.singlebest_ensemble import SingleBest -from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings -from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash +from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline +from autoPyTorch.evaluation.pipeline_class_collection import get_default_pipeline_config +from autoPyTorch.evaluation.tae import TargetAlgorithmQuery from autoPyTorch.evaluation.utils import DisableFileOutputParameters from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.base_pipeline import BasePipeline @@ -685,22 +686,23 @@ def _do_dummy_prediction(self) -> None: # already be generated here! stats = Stats(scenario_mock) stats.start_timing() - ta = ExecuteTaFuncWithQueue( + taq = TargetAlgorithmQuery( pynisher_context=self._multiprocessing_context, backend=self._backend, seed=self.seed, metric=self._metric, logger_port=self._logger_port, - cost_for_crash=get_cost_of_crash(self._metric), + cost_for_crash=self._metric._cost_of_crash, abort_on_first_run_crash=False, initial_num_run=num_run, + pipeline_config=get_default_pipeline_config(choice='dummy'), stats=stats, memory_limit=memory_limit, disable_file_output=self._disable_file_output, all_supported_metrics=self._all_supported_metrics ) - status, _, _, additional_info = ta.run(num_run, cutoff=self._time_for_task) + status, _, _, additional_info = taq.run(num_run, cutoff=self._time_for_task) if status == StatusType.SUCCESS: self._logger.info("Finished creating dummy predictions.") else: @@ -769,13 +771,13 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs: # already be generated here! stats = Stats(scenario_mock) stats.start_timing() - ta = ExecuteTaFuncWithQueue( + taq = TargetAlgorithmQuery( pynisher_context=self._multiprocessing_context, backend=self._backend, seed=self.seed, metric=self._metric, logger_port=self._logger_port, - cost_for_crash=get_cost_of_crash(self._metric), + cost_for_crash=self._metric._cost_of_crash, abort_on_first_run_crash=False, initial_num_run=self._backend.get_next_num_run(), stats=stats, @@ -786,7 +788,7 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs: dask_futures.append([ classifier, self._dask_client.submit( - ta.run, config=classifier, + taq.run, config=classifier, cutoff=func_eval_time_limit_secs, ) ]) @@ -1076,7 +1078,7 @@ def _search( # Here the budget is set to max because the SMAC intensifier can be: # Hyperband: in this case the budget is determined on the fly and overwritten - # by the ExecuteTaFuncWithQueue + # by the TargetAlgorithmQuery # SimpleIntensifier (and others): in this case, we use max_budget as a target # budget, and hece the below line is honored self.pipeline_options[budget_type] = max_budget @@ -1360,7 +1362,7 @@ def refit( dataset_properties=dataset_properties, dataset=dataset, split_id=split_id) - fit_and_suppress_warnings(self._logger, model, X, y=None) + fit_pipeline(self._logger, model, X, y=None) self._clean_logger() @@ -1571,19 +1573,18 @@ def fit_pipeline( stats.start_timing() - tae = ExecuteTaFuncWithQueue( + taq = TargetAlgorithmQuery( backend=self._backend, seed=self.seed, metric=metric, logger_port=self._logger_port, - cost_for_crash=get_cost_of_crash(metric), + cost_for_crash=metric._cost_of_crash, abort_on_first_run_crash=False, initial_num_run=self._backend.get_next_num_run(), stats=stats, memory_limit=memory_limit, disable_file_output=disable_file_output, all_supported_metrics=all_supported_metrics, - budget_type=budget_type, include=include_components, exclude=exclude_components, search_space_updates=search_space_updates, @@ -1591,7 +1592,7 @@ def fit_pipeline( pynisher_context=self._multiprocessing_context ) - run_info, run_value = tae.run_wrapper( + run_info, run_value = taq.run_wrapper( RunInfo(config=configuration, budget=budget, seed=self.seed, @@ -1603,7 +1604,7 @@ def fit_pipeline( fitted_pipeline = self._get_fitted_pipeline( dataset_name=dataset.dataset_name, - pipeline_idx=run_info.config.config_id + tae.initial_num_run, + pipeline_idx=run_info.config.config_id + taq.initial_num_run, run_info=run_info, run_value=run_value, disable_file_output=disable_file_output diff --git a/autoPyTorch/configs/dummy_pipeline_options.json b/autoPyTorch/configs/dummy_pipeline_options.json new file mode 100644 index 000000000..809b1bfae --- /dev/null +++ b/autoPyTorch/configs/dummy_pipeline_options.json @@ -0,0 +1,5 @@ +{ + "budget_type": "epochs", + "epochs": 1, + "runtime": 1 +} diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index 8de483f8e..3d19b56de 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -2,386 +2,138 @@ import time import warnings from multiprocessing.queues import Queue -from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union, no_type_check +from typing import Any, Dict, List, NamedTuple, Optional, Union, no_type_check from ConfigSpace import Configuration import numpy as np -import pandas as pd - from sklearn.base import BaseEstimator -from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.ensemble import VotingClassifier from smac.tae import StatusType -import autoPyTorch.pipeline.image_classification -import autoPyTorch.pipeline.tabular_classification -import autoPyTorch.pipeline.tabular_regression -import autoPyTorch.pipeline.traditional_tabular_classification -import autoPyTorch.pipeline.traditional_tabular_regression from autoPyTorch.automl_common.common.utils.backend import Backend from autoPyTorch.constants import ( CLASSIFICATION_TASKS, - IMAGE_TASKS, - MULTICLASS, REGRESSION_TASKS, - STRING_TO_OUTPUT_TYPES, - STRING_TO_TASK_TYPES, - TABULAR_TASKS, + STRING_TO_TASK_TYPES +) +from autoPyTorch.datasets.base_dataset import BaseDataset +from autoPyTorch.evaluation.pipeline_class_collection import ( + get_default_pipeline_config, + get_pipeline_class ) -from autoPyTorch.datasets.base_dataset import BaseDataset, BaseDatasetPropertiesType from autoPyTorch.evaluation.utils import ( DisableFileOutputParameters, VotingRegressorWrapper, - convert_multioutput_multiclass_to_multilabel, + ensure_prediction_array_sizes ) -from autoPyTorch.pipeline.base_pipeline import BasePipeline from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric from autoPyTorch.pipeline.components.training.metrics.utils import ( calculate_loss, get_metrics, ) -from autoPyTorch.utils.common import dict_repr, subsampler +from autoPyTorch.utils.common import dict_repr from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger from autoPyTorch.utils.pipeline import get_dataset_requirements __all__ = [ 'AbstractEvaluator', - 'fit_and_suppress_warnings' + 'EvaluationResults', + 'fit_pipeline' ] -class EvaluationResults(NamedTuple): - opt_loss: Dict[str, float] - train_loss: Dict[str, float] - opt_pred: np.ndarray - status: StatusType - valid_pred: Optional[np.ndarray] = None - test_pred: Optional[np.ndarray] = None - additional_run_info: Optional[Dict] = None - - -class MyTraditionalTabularClassificationPipeline(BaseEstimator): - """ - A wrapper class that holds a pipeline for traditional classification. - Estimators like CatBoost, and Random Forest are considered traditional machine - learning models and are fitted before neural architecture search. - - This class is an interface to fit a pipeline containing a traditional machine - learning model, and is the final object that is stored for inference. - - Attributes: - dataset_properties (Dict[str, BaseDatasetPropertiesType]): - A dictionary containing dataset specific information - random_state (Optional[np.random.RandomState]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - - def __init__(self, config: str, - dataset_properties: Dict[str, BaseDatasetPropertiesType], - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None): - self.config = config - self.dataset_properties = dataset_properties - self.random_state = random_state - self.init_params = init_params - self.pipeline = autoPyTorch.pipeline.traditional_tabular_classification. \ - TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties, - random_state=self.random_state) - configuration_space = self.pipeline.get_hyperparameter_search_space() - default_configuration = configuration_space.get_default_configuration().get_dictionary() - default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config - self.configuration = Configuration(configuration_space, default_configuration) - self.pipeline.set_hyperparameters(self.configuration) - - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - return self.pipeline.fit(X, y) - - def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - return self.pipeline.predict_proba(X, batch_size=batch_size) - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - return self.pipeline.predict(X, batch_size=batch_size) - - def get_additional_run_info(self) -> Dict[str, Any]: - """ - Can be used to return additional info for the run. - Returns: - Dict[str, Any]: - Currently contains - 1. pipeline_configuration: the configuration of the pipeline, i.e, the traditional model used - 2. trainer_configuration: the parameters for the traditional model used. - Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs - """ - return {'pipeline_configuration': self.configuration, - 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config(), - 'configuration_origin': 'traditional'} +def get_default_budget_type(choice: str = 'default') -> str: + pipeline_config = get_default_pipeline_config(choice=choice) + return str(pipeline_config['budget_type']) - def get_pipeline_representation(self) -> Dict[str, str]: - return self.pipeline.get_pipeline_representation() - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return autoPyTorch.pipeline.traditional_tabular_classification. \ - TraditionalTabularClassificationPipeline.get_default_pipeline_options() +def get_default_budget(choice: str = 'default') -> int: + pipeline_config = get_default_pipeline_config(choice=choice) + return int(pipeline_config[get_default_budget_type()]) -class MyTraditionalTabularRegressionPipeline(BaseEstimator): - """ - A wrapper class that holds a pipeline for traditional regression. - Estimators like CatBoost, and Random Forest are considered traditional machine - learning models and are fitted before neural architecture search. - - This class is an interface to fit a pipeline containing a traditional machine - learning model, and is the final object that is stored for inference. - - Attributes: - dataset_properties (Dict[str, Any]): - A dictionary containing dataset specific information - random_state (Optional[np.random.RandomState]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - def __init__(self, config: str, - dataset_properties: Dict[str, Any], - random_state: Optional[np.random.RandomState] = None, - init_params: Optional[Dict] = None): - self.config = config - self.dataset_properties = dataset_properties - self.random_state = random_state - self.init_params = init_params - self.pipeline = autoPyTorch.pipeline.traditional_tabular_regression. \ - TraditionalTabularRegressionPipeline(dataset_properties=dataset_properties, - random_state=self.random_state) - configuration_space = self.pipeline.get_hyperparameter_search_space() - default_configuration = configuration_space.get_default_configuration().get_dictionary() - default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config - self.configuration = Configuration(configuration_space, default_configuration) - self.pipeline.set_hyperparameters(self.configuration) - - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - return self.pipeline.fit(X, y) - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - return self.pipeline.predict(X, batch_size=batch_size) - - def get_additional_run_info(self) -> Dict[str, Any]: - """ - Can be used to return additional info for the run. - Returns: - Dict[str, Any]: - Currently contains - 1. pipeline_configuration: the configuration of the pipeline, i.e, the traditional model used - 2. trainer_configuration: the parameters for the traditional model used. - Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs - """ - return {'pipeline_configuration': self.configuration, - 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config()} - - def get_pipeline_representation(self) -> Dict[str, str]: - return self.pipeline.get_pipeline_representation() - - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return autoPyTorch.pipeline.traditional_tabular_regression.\ - TraditionalTabularRegressionPipeline.get_default_pipeline_options() - - -class DummyClassificationPipeline(DummyClassifier): - """ - A wrapper class that holds a pipeline for dummy classification. - - A wrapper over DummyClassifier of scikit learn. This estimator is considered the - worst performing model. In case of failure, at least this model will be fitted. - - Attributes: - random_state (Optional[Union[int, np.random.RandomState]]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - - def __init__(self, config: Configuration, - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None - ) -> None: - self.config = config - self.init_params = init_params - self.random_state = random_state - if config == 1: - super(DummyClassificationPipeline, self).__init__(strategy="uniform") - else: - super(DummyClassificationPipeline, self).__init__(strategy="most_frequent") - - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - X_train = subsampler(X['X_train'], X['train_indices']) - y_train = subsampler(X['y_train'], X['train_indices']) - return super(DummyClassificationPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, - sample_weight=sample_weight) - - def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - new_X = np.ones((X.shape[0], 1)) - probas = super(DummyClassificationPipeline, self).predict_proba(new_X) - probas = convert_multioutput_multiclass_to_multilabel(probas).astype( - np.float32) - return probas - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - new_X = np.ones((X.shape[0], 1)) - return super(DummyClassificationPipeline, self).predict(new_X).astype(np.float32) - - def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 - return {'configuration_origin': 'DUMMY'} - - def get_pipeline_representation(self) -> Dict[str, str]: - return { - 'Preprocessing': 'None', - 'Estimator': 'Dummy', - } - - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return {'budget_type': 'epochs', - 'epochs': 1, - 'runtime': 1} - - -class DummyRegressionPipeline(DummyRegressor): - """ - A wrapper class that holds a pipeline for dummy regression. - - A wrapper over DummyRegressor of scikit learn. This estimator is considered the - worst performing model. In case of failure, at least this model will be fitted. - - Attributes: - random_state (Optional[Union[int, np.random.RandomState]]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - - def __init__(self, config: Configuration, - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None) -> None: - self.config = config - self.init_params = init_params - self.random_state = random_state - if config == 1: - super(DummyRegressionPipeline, self).__init__(strategy='mean') - else: - super(DummyRegressionPipeline, self).__init__(strategy='median') - - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - X_train = subsampler(X['X_train'], X['train_indices']) - y_train = subsampler(X['y_train'], X['train_indices']) - return super(DummyRegressionPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, - sample_weight=sample_weight) - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - new_X = np.ones((X.shape[0], 1)) - return super(DummyRegressionPipeline, self).predict(new_X).astype(np.float32) - - def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 - return {'configuration_origin': 'DUMMY'} - - def get_pipeline_representation(self) -> Dict[str, str]: - return { - 'Preprocessing': 'None', - 'Estimator': 'Dummy', - } - - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return {'budget_type': 'epochs', - 'epochs': 1, - 'runtime': 1} - - -def fit_and_suppress_warnings(logger: PicklableClientLogger, pipeline: BaseEstimator, - X: Dict[str, Any], y: Any - ) -> BaseEstimator: +def _get_send_warnings_to_log(logger: PicklableClientLogger) -> Any: @no_type_check def send_warnings_to_log(message, category, filename, lineno, file=None, line=None) -> None: - logger.debug('%s:%s: %s:%s', - filename, lineno, category.__name__, message) + logger.debug(f'{filename}:{lineno}: {category.__name__}:{message}') return + return send_warnings_to_log + + +def fit_pipeline(logger: PicklableClientLogger, pipeline: BaseEstimator, + X: Dict[str, Any], y: Any) -> BaseEstimator: + + send_warnings_to_log = _get_send_warnings_to_log(logger) with warnings.catch_warnings(): warnings.showwarning = send_warnings_to_log + # X is a fit dictionary and y is usually None for the compatibility pipeline.fit(X, y) return pipeline -class AbstractEvaluator(object): +class EvaluationResults(NamedTuple): """ - This method defines the interface that pipeline evaluators should follow, when - interacting with SMAC through ExecuteTaFuncWithQueue. - - An evaluator is an object that: - + constructs a pipeline (i.e. a classification or regression estimator) for a given - pipeline_config and run settings (budget, seed) - + Fits and trains this pipeline (TrainEvaluator) or tests a given - configuration (TestEvaluator) + Attributes: + opt_loss (Dict[str, float]): + The optimization loss, calculated on the validation set. This will + be the cost used in SMAC + train_loss (Dict[str, float]): + The train loss, calculated on the train set + opt_pred (np.ndarray): + The predictions on the validation set. This validation set is created + from the resampling strategy + valid_pred (Optional[np.ndarray]): + Predictions on a user provided validation set + test_pred (Optional[np.ndarray]): + Predictions on a user provided test set + additional_run_info (Optional[Dict]): + A dictionary with additional run information, like duration or + the crash error msg, if any. + status (StatusType): + The status of the run, following SMAC StatusType syntax. + pipeline (Optional[BaseEstimator]): + The fitted pipeline. + """ + opt_loss: Dict[str, float] + train_loss: Dict[str, float] + opt_pred: np.ndarray + status: StatusType + pipeline: Optional[BaseEstimator] = None + valid_pred: Optional[np.ndarray] = None + test_pred: Optional[np.ndarray] = None + additional_run_info: Optional[Dict] = None - The provided configuration determines the type of pipeline created. For more - details, please read the get_pipeline() method. +class FixedPipelineParams(NamedTuple): + """ Attributes: backend (Backend): - An object that allows interaction with the disk storage. In particular, allows to + An object to interface with the disk storage. In particular, allows to access the train and test datasets - queue (Queue): - Each worker available will instantiate an evaluator, and after completion, - it will append the result to a multiprocessing queue metric (autoPyTorchMetric): A scorer object that is able to evaluate how good a pipeline was fit. It - is a wrapper on top of the actual score method (a wrapper on top of - scikit-learn accuracy for example) that formats the predictions accordingly. - budget: (float): - The amount of epochs/time a configuration is allowed to run. + is a wrapper on top of the actual score method (a wrapper on top of scikit + lean accuracy for example) that formats the predictions accordingly. budget_type (str): - The budget type. Currently, only epoch and time are allowed. + The budget type, which can be epochs or time pipeline_config (Optional[Dict[str, Any]]): Defines the content of the pipeline being evaluated. For example, it contains pipeline specific settings like logging name, or whether or not to use tensorboard. - configuration (Union[int, str, Configuration]): - Determines the pipeline to be constructed. A dummy estimator is created for - integer configurations, a traditional machine learning pipeline is created - for string based configuration, and NAS is performed when a configuration - object is passed. seed (int): A integer that allows for reproducibility of results - output_y_hat_optimization (bool): + save_y_opt (bool): Whether this worker should output the target predictions, so that they are stored on disk. Fundamentally, the resampling strategy might shuffle the Y_train targets, so we store the split in order to re-use them for ensemble selection. - num_run (Optional[int]): - An identifier of the current configuration being fit. This number is unique per - configuration. include (Optional[Dict[str, Any]]): An optional dictionary to include components of the pipeline steps. exclude (Optional[Dict[str, Any]]): @@ -405,159 +157,174 @@ class AbstractEvaluator(object): + `all`: do not save any of the above. For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`. - init_params (Optional[Dict[str, Any]]): - Optional argument that is passed to each pipeline step. It is the equivalent of - kwargs for the pipeline steps. logger_port (Optional[int]): Logging is performed using a socket-server scheme to be robust against many parallel entities that want to write to the same file. This integer states the - socket port for the communication channel. - If None is provided, the logging.handlers.DEFAULT_TCP_LOGGING_PORT is used. - all_supported_metrics (bool): - Whether all supported metrics should be calculated for every configuration. + socket port for the communication channel. If None is provided, a traditional + logger is used. + all_supported_metrics (bool): + Whether all supported metric should be calculated for every configuration. search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): An object used to fine tune the hyperparameter search space of the pipeline """ - def __init__(self, backend: Backend, - queue: Queue, - metric: autoPyTorchMetric, - budget: float, - configuration: Union[int, str, Configuration], - budget_type: str = None, - pipeline_config: Optional[Dict[str, Any]] = None, - seed: int = 1, - output_y_hat_optimization: bool = True, - num_run: Optional[int] = None, - include: Optional[Dict[str, Any]] = None, - exclude: Optional[Dict[str, Any]] = None, - disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, - init_params: Optional[Dict[str, Any]] = None, - logger_port: Optional[int] = None, - all_supported_metrics: bool = True, - search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None - ) -> None: - - self.starttime = time.time() - - self.configuration = configuration - self.backend: Backend = backend - self.queue = queue - - self.datamanager: BaseDataset = self.backend.load_datamanager() - - assert self.datamanager.task_type is not None, \ - "Expected dataset {} to have task_type got None".format(self.datamanager.__class__.__name__) - self.task_type = STRING_TO_TASK_TYPES[self.datamanager.task_type] - self.output_type = STRING_TO_OUTPUT_TYPES[self.datamanager.output_type] - self.issparse = self.datamanager.issparse - - self.include = include - self.exclude = exclude - self.search_space_updates = search_space_updates + backend: Backend + seed: int + metric: autoPyTorchMetric + budget_type: str # Literal['epochs', 'runtime'] + pipeline_config: Dict[str, Any] + save_y_opt: bool = True + include: Optional[Dict[str, Any]] = None + exclude: Optional[Dict[str, Any]] = None + disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None + logger_port: Optional[int] = None + all_supported_metrics: bool = True + search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None + + @classmethod + def with_default_pipeline_config( + cls, + pipeline_config: Optional[Dict[str, Any]] = None, + choice: str = 'default', + **kwargs: Any + ) -> 'FixedPipelineParams': + + if 'budget_type' in kwargs: + raise TypeError( + f'{cls.__name__}.with_default_pipeline_config() got multiple values for argument `budget_type`' + ) - self.X_train, self.y_train = self.datamanager.train_tensors + if pipeline_config is None: + pipeline_config = get_default_pipeline_config(choice=choice) - if self.datamanager.val_tensors is not None: - self.X_valid, self.y_valid = self.datamanager.val_tensors - else: - self.X_valid, self.y_valid = None, None + budget_type = pipeline_config['budget_type'] + kwargs.update(pipeline_config=pipeline_config, budget_type=budget_type) + return cls(**kwargs) - if self.datamanager.test_tensors is not None: - self.X_test, self.y_test = self.datamanager.test_tensors - else: - self.X_test, self.y_test = None, None - self.metric = metric +class EvaluatorParams(NamedTuple): + """ + Attributes: + configuration (Union[int, str, Configuration]): + Determines the pipeline to be constructed. A dummy estimator is created for + integer configurations, a traditional machine learning pipeline is created + for string based configuration, and NAS is performed when a configuration + object is passed. + num_run (Optional[int]): + An identifier of the current configuration being fit. This number is unique per + configuration. + init_params (Optional[Dict[str, Any]]): + Optional argument that is passed to each pipeline step. It is the equivalent of + kwargs for the pipeline steps. + """ + budget: float + configuration: Union[int, str, Configuration] + num_run: Optional[int] = None + init_params: Optional[Dict[str, Any]] = None + + @classmethod + def with_default_budget( + cls, + budget: float = 0, + choice: str = 'default', + **kwargs: Any + ) -> 'EvaluatorParams': + budget = get_default_budget(choice=choice) if budget == 0 else budget + kwargs.update(budget=budget) + return cls(**kwargs) - self.seed = seed - # Flag to save target for ensemble - self.output_y_hat_optimization = output_y_hat_optimization +class AbstractEvaluator(object): + """ + This method defines the interface that pipeline evaluators should follow, when + interacting with SMAC through TargetAlgorithmQuery. - disable_file_output = disable_file_output if disable_file_output is not None else [] - # check compatibility of disable file output - DisableFileOutputParameters.check_compatibility(disable_file_output) + An evaluator is an object that: + + constructs a pipeline (i.e. a classification or regression estimator) for a given + pipeline_config and run settings (budget, seed) + + Fits and trains this pipeline (TrainEvaluator) or tests a given + configuration (TestEvaluator) - self.disable_file_output = disable_file_output + The provided configuration determines the type of pipeline created. For more + details, please read the get_pipeline() method. - self.pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None - if self.task_type in REGRESSION_TASKS: - if isinstance(self.configuration, int): - self.pipeline_class = DummyRegressionPipeline - elif isinstance(self.configuration, str): - self.pipeline_class = MyTraditionalTabularRegressionPipeline - elif isinstance(self.configuration, Configuration): - self.pipeline_class = autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline - else: - raise ValueError('task {} not available'.format(self.task_type)) - self.predict_function = self._predict_regression + Args: + queue (Queue): + Each worker available will instantiate an evaluator, and after completion, + it will append the result to a multiprocessing queue + fixed_pipeline_params (FixedPipelineParams): + Fixed parameters for a pipeline. + evaluator_params (EvaluatorParams): + The parameters for an evaluator. + """ + def __init__(self, queue: Queue, fixed_pipeline_params: FixedPipelineParams, evaluator_params: EvaluatorParams): + self.y_opt: Optional[np.ndarray] = None + self.starttime = time.time() + self.queue = queue + self.fixed_pipeline_params = fixed_pipeline_params + self.evaluator_params = evaluator_params + self._init_miscellaneous() + self.logger.debug(f"Fit dictionary in Abstract evaluator: {dict_repr(self.fit_dictionary)}") + self.logger.debug(f"Search space updates : {self.fixed_pipeline_params.search_space_updates}") + + def _init_miscellaneous(self) -> None: + num_run = self.evaluator_params.num_run + self.num_run = 0 if num_run is None else num_run + self._init_dataset_properties() + self._init_additional_metrics() + self._init_fit_dictionary() + + disable_file_output = self.fixed_pipeline_params.disable_file_output + if disable_file_output is not None: + DisableFileOutputParameters.check_compatibility(disable_file_output) + self.disable_file_output = disable_file_output else: - if isinstance(self.configuration, int): - self.pipeline_class = DummyClassificationPipeline - elif isinstance(self.configuration, str): - if self.task_type in TABULAR_TASKS: - self.pipeline_class = MyTraditionalTabularClassificationPipeline - else: - raise ValueError("Only tabular tasks are currently supported with traditional methods") - elif isinstance(self.configuration, Configuration): - if self.task_type in TABULAR_TASKS: - self.pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline - elif self.task_type in IMAGE_TASKS: - self.pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline - else: - raise ValueError('task {} not available'.format(self.task_type)) - self.predict_function = self._predict_proba - self.dataset_properties = self.datamanager.get_dataset_properties( - get_dataset_requirements(info=self.datamanager.get_required_dataset_info(), - include=self.include, - exclude=self.exclude, - search_space_updates=self.search_space_updates + self.disable_file_output = [] + if self.num_folds == 1: # not save cv model when we perform holdout + self.disable_file_output.append('pipelines') + + def _init_dataset_properties(self) -> None: + datamanager: BaseDataset = self.fixed_pipeline_params.backend.load_datamanager() + if datamanager.task_type is None: + raise ValueError(f"Expected dataset {datamanager.__class__.__name__} to have task_type got None") + if datamanager.splits is None: + raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None") + + self.splits = datamanager.splits + self.num_folds: int = len(self.splits) + # Since cv might not finish in time, we take self.pipelines as None by default + self.pipelines: List[Optional[BaseEstimator]] = [None] * self.num_folds + self.task_type = STRING_TO_TASK_TYPES[datamanager.task_type] + self.num_classes = getattr(datamanager, 'num_classes', 1) + self.output_type = datamanager.output_type + + search_space_updates = self.fixed_pipeline_params.search_space_updates + self.dataset_properties = datamanager.get_dataset_properties( + get_dataset_requirements(info=datamanager.get_required_dataset_info(), + include=self.fixed_pipeline_params.include, + exclude=self.fixed_pipeline_params.exclude, + search_space_updates=search_space_updates )) + self.X_train, self.y_train = datamanager.train_tensors + self.X_valid, self.y_valid, self.X_test, self.y_test = None, None, None, None + if datamanager.val_tensors is not None: + self.X_valid, self.y_valid = datamanager.val_tensors + + if datamanager.test_tensors is not None: + self.X_test, self.y_test = datamanager.test_tensors + + def _init_additional_metrics(self) -> None: + all_supported_metrics = self.fixed_pipeline_params.all_supported_metrics + metric = self.fixed_pipeline_params.metric self.additional_metrics: Optional[List[autoPyTorchMetric]] = None - metrics_dict: Optional[Dict[str, List[str]]] = None + self.metrics_dict: Optional[Dict[str, List[str]]] = None + if all_supported_metrics: self.additional_metrics = get_metrics(dataset_properties=self.dataset_properties, all_supported_metrics=all_supported_metrics) - # Update fit dictionary with metrics passed to the evaluator - metrics_dict = {'additional_metrics': []} - metrics_dict['additional_metrics'].append(self.metric.name) - for metric in self.additional_metrics: - metrics_dict['additional_metrics'].append(metric.name) - - self._init_params = init_params - - assert self.pipeline_class is not None, "Could not infer pipeline class" - pipeline_config = pipeline_config if pipeline_config is not None \ - else self.pipeline_class.get_default_pipeline_options() - self.budget_type = pipeline_config['budget_type'] if budget_type is None else budget_type - self.budget = pipeline_config[self.budget_type] if budget == 0 else budget - - self.num_run = 0 if num_run is None else num_run - - logger_name = '%s(%d)' % (self.__class__.__name__.split('.')[-1], - self.seed) - if logger_port is None: - logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT - self.logger = get_named_client_logger( - name=logger_name, - port=logger_port, - ) - - self._init_fit_dictionary(logger_port=logger_port, pipeline_config=pipeline_config, metrics_dict=metrics_dict) - self.Y_optimization: Optional[np.ndarray] = None - self.Y_actual_train: Optional[np.ndarray] = None - self.pipelines: Optional[List[BaseEstimator]] = None - self.pipeline: Optional[BaseEstimator] = None - self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(dict_repr(self.fit_dictionary))) - self.logger.debug("Search space updates :{}".format(self.search_space_updates)) + self.metrics_dict = {'additional_metrics': [m.name for m in [metric] + self.additional_metrics]} - def _init_fit_dictionary( - self, - logger_port: int, - pipeline_config: Dict[str, Any], - metrics_dict: Optional[Dict[str, List[str]]] = None, - ) -> None: + def _init_fit_dictionary(self) -> None: """ Initialises the fit dictionary @@ -576,32 +343,77 @@ def _init_fit_dictionary( Returns: None """ + logger_name = f"{self.__class__.__name__.split('.')[-1]}({self.fixed_pipeline_params.seed})" + logger_port = self.fixed_pipeline_params.logger_port + logger_port = logger_port if logger_port is not None else logging.handlers.DEFAULT_TCP_LOGGING_PORT + self.logger = get_named_client_logger(name=logger_name, port=logger_port) + + self.fit_dictionary: Dict[str, Any] = dict( + dataset_properties=self.dataset_properties, + X_train=self.X_train, + y_train=self.y_train, + X_test=self.X_test, + y_test=self.y_test, + backend=self.fixed_pipeline_params.backend, + logger_port=logger_port, + optimize_metric=self.fixed_pipeline_params.metric.name, + **((lambda: {} if self.metrics_dict is None else self.metrics_dict)()) + ) + self.fit_dictionary.update(**self.fixed_pipeline_params.pipeline_config) - self.fit_dictionary: Dict[str, Any] = {'dataset_properties': self.dataset_properties} - - if metrics_dict is not None: - self.fit_dictionary.update(metrics_dict) - - self.fit_dictionary.update({ - 'X_train': self.X_train, - 'y_train': self.y_train, - 'X_test': self.X_test, - 'y_test': self.y_test, - 'backend': self.backend, - 'logger_port': logger_port, - 'optimize_metric': self.metric.name - }) - - self.fit_dictionary.update(pipeline_config) + budget, budget_type = self.evaluator_params.budget, self.fixed_pipeline_params.budget_type # If the budget is epochs, we want to limit that in the fit dictionary - if self.budget_type == 'epochs': - self.fit_dictionary['epochs'] = self.budget + if budget_type == 'epochs': + self.fit_dictionary['epochs'] = budget self.fit_dictionary.pop('runtime', None) - elif self.budget_type == 'runtime': - self.fit_dictionary['runtime'] = self.budget + elif budget_type == 'runtime': + self.fit_dictionary['runtime'] = budget self.fit_dictionary.pop('epochs', None) else: - raise ValueError(f"budget type must be `epochs` or `runtime`, but got {self.budget_type}") + raise ValueError(f"budget type must be `epochs` or `runtime`, but got {budget_type}") + + def predict( + self, + X: Optional[np.ndarray], + pipeline: BaseEstimator, + label_examples: Optional[np.ndarray] = None + ) -> Optional[np.ndarray]: + """ + A wrapper function to handle the prediction of regression or classification tasks. + + Args: + X (np.ndarray): + A set of features to feed to the pipeline + pipeline (BaseEstimator): + A model that will take the features X return a prediction y + label_examples (Optional[np.ndarray]): + + Returns: + (np.ndarray): + The predictions of pipeline for the given features X + """ + + if X is None: + return None + + send_warnings_to_log = _get_send_warnings_to_log(self.logger) + with warnings.catch_warnings(): + warnings.showwarning = send_warnings_to_log + if self.task_type in REGRESSION_TASKS: + # To comply with scikit-learn VotingRegressor requirement, if the estimator + # predicts a (N,) shaped array, it is converted to (N, 1) + pred = pipeline.predict(X, batch_size=1000) + pred = pred.reshape((-1, 1)) if len(pred.shape) == 1 else pred + else: + pred = pipeline.predict_proba(X, batch_size=1000) + pred = ensure_prediction_array_sizes( + prediction=pred, + num_classes=self.num_classes, + output_type=self.output_type, + label_examples=label_examples + ) + + return pred def _get_pipeline(self) -> BaseEstimator: """ @@ -621,38 +433,38 @@ def _get_pipeline(self) -> BaseEstimator: pipeline (BaseEstimator): A scikit-learn compliant pipeline which is not yet fit to the data. """ - assert self.pipeline_class is not None, "Can't return pipeline, pipeline_class not initialised" - if isinstance(self.configuration, int): - pipeline = self.pipeline_class(config=self.configuration, - random_state=np.random.RandomState(self.seed), - init_params=self._init_params) - elif isinstance(self.configuration, Configuration): - pipeline = self.pipeline_class(config=self.configuration, - dataset_properties=self.dataset_properties, - random_state=np.random.RandomState(self.seed), - include=self.include, - exclude=self.exclude, - init_params=self._init_params, - search_space_updates=self.search_space_updates) - elif isinstance(self.configuration, str): - pipeline = self.pipeline_class(config=self.configuration, - dataset_properties=self.dataset_properties, - random_state=np.random.RandomState(self.seed), - init_params=self._init_params) + config = self.evaluator_params.configuration + kwargs = dict( + config=config, + random_state=np.random.RandomState(self.fixed_pipeline_params.seed), + init_params=self.evaluator_params.init_params + ) + pipeline_class = get_pipeline_class(config=config, task_type=self.task_type) + + if isinstance(config, int): + return pipeline_class(**kwargs) + elif isinstance(config, str): + return pipeline_class(dataset_properties=self.dataset_properties, **kwargs) + elif isinstance(config, Configuration): + return pipeline_class(dataset_properties=self.dataset_properties, + include=self.fixed_pipeline_params.include, + exclude=self.fixed_pipeline_params.exclude, + search_space_updates=self.fixed_pipeline_params.search_space_updates, + **kwargs) else: - raise ValueError("Invalid configuration entered") - return pipeline + raise ValueError("The type of configuration must be either (int, str, Configuration), " + f"but got type {type(config)}") - def _loss(self, y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]: + def _loss(self, labels: np.ndarray, preds: np.ndarray) -> Dict[str, float]: """SMAC follows a minimization goal, so the make_scorer sign is used as a guide to obtain the value to reduce. The calculate_loss internally translate a score function to a minimization problem Args: - y_true (np.ndarray): + labels (np.ndarray): The expect labels given by the original dataset - y_hat (np.ndarray): + preds (np.ndarray): The prediction of the current pipeline being fit Returns: (Dict[str, float]): @@ -660,361 +472,137 @@ def _loss(self, y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]: supported metric """ - if isinstance(self.configuration, int): + metric = self.fixed_pipeline_params.metric + if isinstance(self.evaluator_params.configuration, int): # We do not calculate performance of the dummy configurations - return {self.metric.name: self.metric._optimum - self.metric._sign * self.metric._worst_possible_result} - - if self.additional_metrics is not None: - metrics = self.additional_metrics - else: - metrics = [self.metric] + return {metric.name: metric._optimum - metric._sign * metric._worst_possible_result} - return calculate_loss( - y_true, y_hat, self.task_type, metrics) + metrics = self.additional_metrics if self.additional_metrics is not None else [metric] - def finish_up(self, results: EvaluationResults, file_output: bool) -> Optional[Tuple[float, float, int, Dict]]: - """This function does everything necessary after the fitting is done: + return calculate_loss(target=labels, prediction=preds, task_type=self.task_type, metrics=metrics) - * predicting - * saving the files for the ensembles_statistics - * generate output for SMAC + def record_evaluation(self, results: EvaluationResults) -> None: + """This function does everything necessary after the fitting: + 1. Evaluate of loss for each metric + 2. Save the files for the ensembles_statistics + 3. Add evaluations to queue for SMAC We use it as the signal handler so we can recycle the code for the normal usecase and when the runsolver kills us here :) Args: - loss (Dict[str, float]): - The optimization loss, calculated on the validation set. This will - be the cost used in SMAC - train_loss (Dict[str, float]): - The train loss, calculated on the train set - opt_pred (np.ndarray): - The predictions on the validation set. This validation set is created - from the resampling strategy - valid_pred (Optional[np.ndarray]): - Predictions on a user provided validation set - test_pred (Optional[np.ndarray]): - Predictions on a user provided test set - additional_run_info (Optional[Dict]): - A dictionary with additional run information, like duration or - the crash error msg, if any. - file_output (bool): - Whether or not this pipeline should output information to disk - status (StatusType) - The status of the run, following SMAC StatusType syntax. - - Returns: - duration (float): - The elapsed time of the training of this evaluator - loss (float): - The optimization loss of this run - seed (int): - The seed used while fitting the pipeline - additional_info (Dict): - Additional run information, like train/test loss + results (EvaluationResults): + The results from fitting a pipeline. """ - self.duration = time.time() - self.starttime - - if file_output: - loss_, additional_run_info_ = self.file_output( - results.opt_pred, results.valid_pred, results.test_pred, - ) - else: - loss_ = None - additional_run_info_ = {} - - validation_loss, test_loss = self.calculate_auxiliary_losses( - results.valid_pred, results.test_pred - ) - - if loss_ is not None: - return self.duration, loss_, self.seed, additional_run_info_ + opt_pred, valid_pred, test_pred = results.opt_pred, results.valid_pred, results.test_pred - cost = results.opt_loss[self.metric.name] + if not self._save_to_backend(opt_pred, valid_pred, test_pred): + # If we CANNOT save, nothing to pass to SMAC thus early-return + return - additional_run_info = ( - {} if results.additional_run_info is None else results.additional_run_info + cost = results.opt_loss[self.fixed_pipeline_params.metric.name] + additional_run_info = {} if results.additional_run_info is None else results.additional_run_info + update_dict = dict( + train_loss=results.train_loss, + validation_loss=self._get_transformed_metrics(pred=valid_pred, inference_name='valid'), + test_loss=self._get_transformed_metrics(pred=test_pred, inference_name='test'), + opt_loss=results.opt_loss, + duration=time.time() - self.starttime, + num_run=self.num_run ) - additional_run_info['opt_loss'] = results.opt_loss - additional_run_info['duration'] = self.duration - additional_run_info['num_run'] = self.num_run - if results.train_loss is not None: - additional_run_info['train_loss'] = results.train_loss - if validation_loss is not None: - additional_run_info['validation_loss'] = validation_loss - if test_loss is not None: - additional_run_info['test_loss'] = test_loss - - rval_dict = {'loss': cost, - 'additional_run_info': additional_run_info, - 'status': results.status} + additional_run_info.update({k: v for k, v in update_dict.items() if v is not None}) + rval_dict = {'loss': cost, 'additional_run_info': additional_run_info, 'status': results.status} self.queue.put(rval_dict) - return None - def calculate_auxiliary_losses( - self, - Y_valid_pred: np.ndarray, - Y_test_pred: np.ndarray, - ) -> Tuple[Optional[Dict[str, float]], Optional[Dict[str, float]]]: + def _get_transformed_metrics(self, pred: Optional[np.ndarray], inference_name: str) -> Optional[Dict[str, float]]: """ A helper function to calculate the performance estimate of the current pipeline in the user provided validation/test set. Args: - Y_valid_pred (np.ndarray): + pred (Optional[np.ndarray]): predictions on a validation set provided by the user, - matching self.y_valid - Y_test_pred (np.ndarray): - predictions on a test set provided by the user, - matching self.y_test + matching self.y_{valid or test} + inference_name (str): + Which inference duration either `valid` or `test` Returns: - validation_loss_dict (Optional[Dict[str, float]]): - Various validation losses available. - test_loss_dict (Optional[Dict[str, float]]): - Various test losses available. + loss_dict (Optional[Dict[str, float]]): + Various losses available on the dataset for the specified duration. """ + duration_choices = ('valid', 'test') + if inference_name not in duration_choices: + raise ValueError(f'inference_name must be in {duration_choices}, but got {inference_name}') - validation_loss_dict: Optional[Dict[str, float]] = None - - if Y_valid_pred is not None: - if self.y_valid is not None: - validation_loss_dict = self._loss(self.y_valid, Y_valid_pred) + labels = getattr(self, f'y_{inference_name}', None) + return None if pred is None or labels is None else self._loss(labels, pred) - test_loss_dict: Optional[Dict[str, float]] = None - if Y_test_pred is not None: - if self.y_test is not None: - test_loss_dict = self._loss(self.y_test, Y_test_pred) + def _get_prediction(self, pred: Optional[np.ndarray], name: str) -> Optional[np.ndarray]: + return pred if name not in self.disable_file_output else None - return validation_loss_dict, test_loss_dict + def _fetch_voting_pipeline(self) -> Optional[Union[VotingClassifier, VotingRegressorWrapper]]: + pipelines = [pl for pl in self.pipelines if pl is not None] + if len(pipelines) == 0: + return None - def file_output( - self, - Y_optimization_pred: np.ndarray, - Y_valid_pred: np.ndarray, - Y_test_pred: np.ndarray - ) -> Tuple[Optional[float], Dict]: - """ - This method decides what file outputs are written to disk. - - It is also the interface to the backed save_numrun_to_dir - which stores all the pipeline related information to a single - directory for easy identification of the current run. - - Args: - Y_optimization_pred (np.ndarray): - The pipeline predictions on the validation set internally created - from self.y_train - Y_valid_pred (np.ndarray): - The pipeline predictions on the user provided validation set, - which should match self.y_valid - Y_test_pred (np.ndarray): - The pipeline predictions on the user provided test set, - which should match self.y_test - Returns: - loss (Optional[float]): - A loss in case the run failed to store files to - disk - error_dict (Dict): - A dictionary with an error that explains why a run - was not successfully stored to disk. - """ - # Abort if self.Y_optimization is None - # self.Y_optimization can be None if we use partial-cv, then, - # obviously no output should be saved. - if self.Y_optimization is None: - return None, {} - - # Abort in case of shape misalignment - if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]: - return ( - 1.0, - { - 'error': - "Targets %s and prediction %s don't have " - "the same length. Probably training didn't " - "finish" % (self.Y_optimization.shape, Y_optimization_pred.shape) - }, - ) + if self.task_type in CLASSIFICATION_TASKS: + voting_pipeline = VotingClassifier(estimators=None, voting='soft') + else: + voting_pipeline = VotingRegressorWrapper(estimators=None) - # Abort if predictions contain NaNs - for y, s in [ - # Y_train_pred deleted here. Fix unittest accordingly. - [Y_optimization_pred, 'optimization'], - [Y_valid_pred, 'validation'], - [Y_test_pred, 'test'] - ]: - if y is not None and not np.all(np.isfinite(y)): - return ( - 1.0, - { - 'error': - 'Model predictions for %s set contains NaNs.' % s - }, - ) + voting_pipeline.estimators_ = self.pipelines - # Abort if we don't want to output anything. - if 'all' in self.disable_file_output: - return None, {} + return voting_pipeline + def _save_to_backend( + self, + opt_pred: np.ndarray, + valid_pred: Optional[np.ndarray], + test_pred: Optional[np.ndarray] + ) -> bool: + """ Return False if we CANNOT save due to some issues """ + if not self._is_output_possible(opt_pred, valid_pred, test_pred): + return False + if self.y_opt is None or 'all' in self.disable_file_output: + # self.y_opt can be None if we use partial-cv ==> no output to save + return True + + backend = self.fixed_pipeline_params.backend # This file can be written independently of the others down bellow - if 'y_optimization' not in self.disable_file_output: - if self.output_y_hat_optimization: - self.backend.save_targets_ensemble(self.Y_optimization) - - if getattr(self, 'pipelines', None) is not None: - if self.pipelines[0] is not None and len(self.pipelines) > 0: # type: ignore[index, arg-type] - if 'pipelines' not in self.disable_file_output: - if self.task_type in CLASSIFICATION_TASKS: - pipelines = VotingClassifier(estimators=None, voting='soft', ) - else: - pipelines = VotingRegressorWrapper(estimators=None) - pipelines.estimators_ = self.pipelines - else: - pipelines = None - else: - pipelines = None - else: - pipelines = None + if 'y_optimization' not in self.disable_file_output and self.fixed_pipeline_params.save_y_opt: + backend.save_targets_ensemble(self.y_opt) - if getattr(self, 'pipeline', None) is not None: - if 'pipeline' not in self.disable_file_output: - pipeline = self.pipeline - else: - pipeline = None - else: - pipeline = None - - self.logger.debug("Saving directory {}, {}, {}".format(self.seed, self.num_run, self.budget)) - self.backend.save_numrun_to_dir( - seed=int(self.seed), + seed, budget = self.fixed_pipeline_params.seed, self.evaluator_params.budget + self.logger.debug(f"Saving directory {seed}, {self.num_run}, {budget}") + backend.save_numrun_to_dir( + seed=int(seed), idx=int(self.num_run), - budget=float(self.budget), - model=pipeline, - cv_model=pipelines, - ensemble_predictions=( - Y_optimization_pred if 'y_optimization' not in - self.disable_file_output else None - ), - valid_predictions=( - Y_valid_pred if 'y_valid' not in - self.disable_file_output else None - ), - test_predictions=( - Y_test_pred if 'y_test' not in - self.disable_file_output else None - ), + budget=float(budget), + model=self.pipelines[0] if 'pipeline' not in self.disable_file_output else None, + cv_model=self._fetch_voting_pipeline() if 'pipelines' not in self.disable_file_output else None, + ensemble_predictions=self._get_prediction(opt_pred, 'y_optimization'), + valid_predictions=self._get_prediction(valid_pred, 'y_valid'), + test_predictions=self._get_prediction(test_pred, 'y_test') ) + return True - return None, {} - - def _predict_proba(self, X: np.ndarray, pipeline: BaseEstimator, - Y_train: Optional[np.ndarray] = None) -> np.ndarray: - """ - A wrapper function to handle the prediction of classification tasks. - It also makes sure that the predictions has the same dimensionality - as the expected labels - - Args: - X (np.ndarray): - A set of features to feed to the pipeline - pipeline (BaseEstimator): - A model that will take the features X return a prediction y - This pipeline must be a classification estimator that supports - the predict_proba method. - Y_train (Optional[np.ndarray]): - Returns: - (np.ndarray): - The predictions of pipeline for the given features X - """ - @no_type_check - def send_warnings_to_log(message, category, filename, lineno, - file=None, line=None): - self.logger.debug('%s:%s: %s:%s' % - (filename, lineno, category.__name__, message)) - return - - with warnings.catch_warnings(): - warnings.showwarning = send_warnings_to_log - Y_pred = pipeline.predict_proba(X, batch_size=1000) - - Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train) - return Y_pred - - def _predict_regression(self, X: np.ndarray, pipeline: BaseEstimator, - Y_train: Optional[np.ndarray] = None) -> np.ndarray: - """ - A wrapper function to handle the prediction of regression tasks. - It is a wrapper to provide the same interface to _predict_proba - - Regression predictions expects an unraveled dimensionality. - To comply with scikit-learn VotingRegressor requirement, if the estimator - predicts a (N,) shaped array, it is converted to (N, 1) - - Args: - X (np.ndarray): - A set of features to feed to the pipeline - pipeline (BaseEstimator): - A model that will take the features X return a prediction y - Y_train (Optional[np.ndarray]): - Returns: - (np.ndarray): - The predictions of pipeline for the given features X - """ - @no_type_check - def send_warnings_to_log(message, category, filename, lineno, - file=None, line=None): - self.logger.debug('%s:%s: %s:%s' % - (filename, lineno, category.__name__, message)) - return - - with warnings.catch_warnings(): - warnings.showwarning = send_warnings_to_log - Y_pred = pipeline.predict(X, batch_size=1000) + def _is_output_possible( + self, + opt_pred: np.ndarray, + valid_pred: Optional[np.ndarray], + test_pred: Optional[np.ndarray] + ) -> bool: - if len(Y_pred.shape) == 1: - Y_pred = Y_pred.reshape((-1, 1)) + if self.y_opt is None: # mypy check + return True - return Y_pred + if self.y_opt.shape[0] != opt_pred.shape[0]: + return False - def _ensure_prediction_array_sizes(self, prediction: np.ndarray, - Y_train: np.ndarray) -> np.ndarray: - """ - This method formats a prediction to match the dimensionality of the provided - labels (Y_train). This should be used exclusively for classification tasks + y_dict = {'optimization': opt_pred, 'validation': valid_pred, 'test': test_pred} + for inference_name, y in y_dict.items(): + if y is not None and not np.all(np.isfinite(y)): + return False # Model predictions contains NaNs - Args: - prediction (np.ndarray): - The un-formatted predictions of a pipeline - Y_train (np.ndarray): - The labels from the dataset to give an intuition of the expected - predictions dimensionality - Returns: - (np.ndarray): - The formatted prediction - """ - assert self.datamanager.num_classes is not None, "Called function on wrong task" - num_classes: int = self.datamanager.num_classes - - if self.output_type == MULTICLASS and \ - prediction.shape[1] < num_classes: - if Y_train is None: - raise ValueError('Y_train must not be None!') - classes = list(np.unique(Y_train)) - - mapping = dict() - for class_number in range(num_classes): - if class_number in classes: - index = classes.index(class_number) - mapping[index] = class_number - new_predictions = np.zeros((prediction.shape[0], num_classes), - dtype=np.float32) - - for index in mapping: - class_index = mapping[index] - new_predictions[:, class_index] = prediction[:, index] - - return new_predictions - - return prediction + return True diff --git a/autoPyTorch/evaluation/pipeline_class_collection.py b/autoPyTorch/evaluation/pipeline_class_collection.py new file mode 100644 index 000000000..bd4c1be6f --- /dev/null +++ b/autoPyTorch/evaluation/pipeline_class_collection.py @@ -0,0 +1,335 @@ +import json +import os +from typing import Any, Dict, Optional, Union + +from ConfigSpace import Configuration + +import numpy as np + +import pandas as pd + +from sklearn.base import BaseEstimator +from sklearn.dummy import DummyClassifier, DummyRegressor + +import autoPyTorch.pipeline.image_classification +import autoPyTorch.pipeline.tabular_classification +import autoPyTorch.pipeline.tabular_regression +import autoPyTorch.pipeline.traditional_tabular_classification +import autoPyTorch.pipeline.traditional_tabular_regression +from autoPyTorch.constants import ( + IMAGE_TASKS, + REGRESSION_TASKS, + TABULAR_TASKS, +) +from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType +from autoPyTorch.evaluation.utils import convert_multioutput_multiclass_to_multilabel +from autoPyTorch.pipeline.base_pipeline import BasePipeline +from autoPyTorch.utils.common import replace_string_bool_to_bool, subsampler + + +def get_default_pipeline_config(choice: str) -> Dict[str, Any]: + choices = ('default', 'dummy') + if choice not in choices: + raise ValueError(f'choice must be in {choices}, but got {choice}') + + return _get_default_pipeline_config() if choice == 'default' else _get_dummy_pipeline_config() + + +def _get_default_pipeline_config() -> Dict[str, Any]: + file_path = os.path.join(os.path.dirname(__file__), '../configs/default_pipeline_options.json') + return replace_string_bool_to_bool(json.load(open(file_path))) + + +def _get_dummy_pipeline_config() -> Dict[str, Any]: + file_path = os.path.join(os.path.dirname(__file__), '../configs/dummy_pipeline_options.json') + return replace_string_bool_to_bool(json.load(open(file_path))) + + +def get_pipeline_class( + config: Union[int, str, Configuration], + task_type: int +) -> Union[BaseEstimator, BasePipeline]: + + pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None + if task_type in REGRESSION_TASKS: + if isinstance(config, int): + pipeline_class = DummyRegressionPipeline + elif isinstance(config, str): + pipeline_class = MyTraditionalTabularRegressionPipeline + elif isinstance(config, Configuration): + pipeline_class = autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline + else: + raise ValueError('task {} not available'.format(task_type)) + else: + if isinstance(config, int): + pipeline_class = DummyClassificationPipeline + elif isinstance(config, str): + if task_type in TABULAR_TASKS: + pipeline_class = MyTraditionalTabularClassificationPipeline + else: + raise ValueError("Only tabular tasks are currently supported with traditional methods") + elif isinstance(config, Configuration): + if task_type in TABULAR_TASKS: + pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline + elif task_type in IMAGE_TASKS: + pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline + else: + raise ValueError('task {} not available'.format(task_type)) + + if pipeline_class is None: + raise RuntimeError("could not infer pipeline class") + + return pipeline_class + + +class MyTraditionalTabularClassificationPipeline(BaseEstimator): + """ + A wrapper class that holds a pipeline for traditional classification. + Estimators like CatBoost, and Random Forest are considered traditional machine + learning models and are fitted before neural architecture search. + + This class is an interface to fit a pipeline containing a traditional machine + learning model, and is the final object that is stored for inference. + + Attributes: + dataset_properties (Dict[str, BaseDatasetPropertiesType]): + A dictionary containing dataset specific information + random_state (Optional[np.random.RandomState]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ + + def __init__(self, config: str, + dataset_properties: Dict[str, BaseDatasetPropertiesType], + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None): + self.config = config + self.dataset_properties = dataset_properties + self.random_state = random_state + self.init_params = init_params + self.pipeline = autoPyTorch.pipeline.traditional_tabular_classification. \ + TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties, + random_state=self.random_state) + configuration_space = self.pipeline.get_hyperparameter_search_space() + default_configuration = configuration_space.get_default_configuration().get_dictionary() + default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config + self.configuration = Configuration(configuration_space, default_configuration) + self.pipeline.set_hyperparameters(self.configuration) + + def fit(self, X: Dict[str, Any], y: Any, + sample_weight: Optional[np.ndarray] = None) -> object: + return self.pipeline.fit(X, y) + + def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + return self.pipeline.predict_proba(X, batch_size=batch_size) + + def predict(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + return self.pipeline.predict(X, batch_size=batch_size) + + def get_additional_run_info(self) -> Dict[str, Any]: + """ + Can be used to return additional info for the run. + Returns: + Dict[str, Any]: + Currently contains + 1. pipeline_configuration: the configuration of the pipeline, i.e, the traditional model used + 2. trainer_configuration: the parameters for the traditional model used. + Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs + """ + return {'pipeline_configuration': self.configuration, + 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config(), + 'configuration_origin': 'traditional'} + + def get_pipeline_representation(self) -> Dict[str, str]: + return self.pipeline.get_pipeline_representation() + + @staticmethod + def get_default_pipeline_options() -> Dict[str, Any]: + return autoPyTorch.pipeline.traditional_tabular_classification. \ + TraditionalTabularClassificationPipeline.get_default_pipeline_options() + + +class MyTraditionalTabularRegressionPipeline(BaseEstimator): + """ + A wrapper class that holds a pipeline for traditional regression. + Estimators like CatBoost, and Random Forest are considered traditional machine + learning models and are fitted before neural architecture search. + + This class is an interface to fit a pipeline containing a traditional machine + learning model, and is the final object that is stored for inference. + + Attributes: + dataset_properties (Dict[str, Any]): + A dictionary containing dataset specific information + random_state (Optional[np.random.RandomState]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ + def __init__(self, config: str, + dataset_properties: Dict[str, Any], + random_state: Optional[np.random.RandomState] = None, + init_params: Optional[Dict] = None): + self.config = config + self.dataset_properties = dataset_properties + self.random_state = random_state + self.init_params = init_params + self.pipeline = autoPyTorch.pipeline.traditional_tabular_regression. \ + TraditionalTabularRegressionPipeline(dataset_properties=dataset_properties, + random_state=self.random_state) + configuration_space = self.pipeline.get_hyperparameter_search_space() + default_configuration = configuration_space.get_default_configuration().get_dictionary() + default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config + self.configuration = Configuration(configuration_space, default_configuration) + self.pipeline.set_hyperparameters(self.configuration) + + def fit(self, X: Dict[str, Any], y: Any, + sample_weight: Optional[np.ndarray] = None) -> object: + return self.pipeline.fit(X, y) + + def predict(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + return self.pipeline.predict(X, batch_size=batch_size) + + def get_additional_run_info(self) -> Dict[str, Any]: + """ + Can be used to return additional info for the run. + Returns: + Dict[str, Any]: + Currently contains + 1. pipeline_configuration: the configuration of the pipeline, i.e, the traditional model used + 2. trainer_configuration: the parameters for the traditional model used. + Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs + """ + return {'pipeline_configuration': self.configuration, + 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config()} + + def get_pipeline_representation(self) -> Dict[str, str]: + return self.pipeline.get_pipeline_representation() + + @staticmethod + def get_default_pipeline_options() -> Dict[str, Any]: + return autoPyTorch.pipeline.traditional_tabular_regression.\ + TraditionalTabularRegressionPipeline.get_default_pipeline_options() + + +class DummyClassificationPipeline(DummyClassifier): + """ + A wrapper class that holds a pipeline for dummy classification. + + A wrapper over DummyClassifier of scikit learn. This estimator is considered the + worst performing model. In case of failure, at least this model will be fitted. + + Attributes: + random_state (Optional[Union[int, np.random.RandomState]]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ + + def __init__(self, config: Configuration, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ) -> None: + self.config = config + self.init_params = init_params + self.random_state = random_state + if config == 1: + super(DummyClassificationPipeline, self).__init__(strategy="uniform") + else: + super(DummyClassificationPipeline, self).__init__(strategy="most_frequent") + + def fit(self, X: Dict[str, Any], y: Any, + sample_weight: Optional[np.ndarray] = None) -> object: + X_train = subsampler(X['X_train'], X['train_indices']) + y_train = subsampler(X['y_train'], X['train_indices']) + return super(DummyClassificationPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, + sample_weight=sample_weight) + + def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + new_X = np.ones((X.shape[0], 1)) + probas = super(DummyClassificationPipeline, self).predict_proba(new_X) + probas = convert_multioutput_multiclass_to_multilabel(probas).astype( + np.float32) + return probas + + def predict(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + new_X = np.ones((X.shape[0], 1)) + return super(DummyClassificationPipeline, self).predict(new_X).astype(np.float32) + + def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 + return {'configuration_origin': 'DUMMY'} + + def get_pipeline_representation(self) -> Dict[str, str]: + return { + 'Preprocessing': 'None', + 'Estimator': 'Dummy', + } + + @staticmethod + def get_default_pipeline_options() -> Dict[str, Any]: + return {'budget_type': 'epochs', + 'epochs': 1, + 'runtime': 1} + + +class DummyRegressionPipeline(DummyRegressor): + """ + A wrapper class that holds a pipeline for dummy regression. + + A wrapper over DummyRegressor of scikit learn. This estimator is considered the + worst performing model. In case of failure, at least this model will be fitted. + + Attributes: + random_state (Optional[Union[int, np.random.RandomState]]): + Object that contains a seed and allows for reproducible results + init_params (Optional[Dict]): + An optional dictionary that is passed to the pipeline's steps. It complies + a similar function as the kwargs + """ + + def __init__(self, config: Configuration, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None) -> None: + self.config = config + self.init_params = init_params + self.random_state = random_state + if config == 1: + super(DummyRegressionPipeline, self).__init__(strategy='mean') + else: + super(DummyRegressionPipeline, self).__init__(strategy='median') + + def fit(self, X: Dict[str, Any], y: Any, + sample_weight: Optional[np.ndarray] = None) -> object: + X_train = subsampler(X['X_train'], X['train_indices']) + y_train = subsampler(X['y_train'], X['train_indices']) + return super(DummyRegressionPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, + sample_weight=sample_weight) + + def predict(self, X: Union[np.ndarray, pd.DataFrame], + batch_size: int = 1000) -> np.ndarray: + new_X = np.ones((X.shape[0], 1)) + return super(DummyRegressionPipeline, self).predict(new_X).astype(np.float32) + + def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 + return {'configuration_origin': 'DUMMY'} + + def get_pipeline_representation(self) -> Dict[str, str]: + return { + 'Preprocessing': 'None', + 'Estimator': 'Dummy', + } + + @staticmethod + def get_default_pipeline_options() -> Dict[str, Any]: + return {'budget_type': 'epochs', + 'epochs': 1, + 'runtime': 1} diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py index 7ca895304..97b29b640 100644 --- a/autoPyTorch/evaluation/tae.py +++ b/autoPyTorch/evaluation/tae.py @@ -4,10 +4,11 @@ import logging import math import multiprocessing -import os import time import traceback import warnings +from multiprocessing.context import BaseContext +from multiprocessing.queues import Queue from queue import Empty from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -37,15 +38,42 @@ read_queue ) from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric -from autoPyTorch.utils.common import dict_repr, replace_string_bool_to_bool +from autoPyTorch.utils.common import dict_repr from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger from autoPyTorch.utils.parallel import preload_modules -def fit_predict_try_except_decorator( - ta: Callable, - queue: multiprocessing.Queue, cost_for_crash: float, **kwargs: Any) -> None: +# cost, status, info, additional_run_info +ProcessedResultsType = Tuple[float, StatusType, Optional[List[RunValue]], Dict[str, Any]] +# status, cost, runtime, additional_info +PynisherResultsType = Tuple[StatusType, float, float, Dict[str, Any]] + + +class PynisherFunctionWrapperLikeType: + def __init__(self, func: Callable): + self.func: Callable = func + self.exit_status: Any = None + self.exitcode: Optional[str] = None + self.wall_clock_time: Optional[float] = None + self.stdout: Optional[str] = None + self.stderr: Optional[str] = None + raise RuntimeError("Cannot instantiate `PynisherFuncWrapperType` instances.") + + def __call__(self, *args: Any, **kwargs: Any) -> PynisherResultsType: + # status, cost, runtime, additional_info + raise NotImplementedError + + +PynisherFunctionWrapperType = Union[Any, PynisherFunctionWrapperLikeType] + + +def run_target_algorithm_with_exception_handling( + ta: Callable, + queue: Queue, + cost_for_crash: float, + **kwargs: Any +) -> None: try: ta(queue=queue, **kwargs) except Exception as e: @@ -57,8 +85,8 @@ def fit_predict_try_except_decorator( error_message = repr(e) # Print also to STDOUT in case of broken handlers - warnings.warn("Exception handling in `fit_predict_try_except_decorator`: " - "traceback: %s \nerror message: %s" % (exception_traceback, error_message)) + warnings.warn("Exception handling in `run_target_algorithm_with_exception_handling`: " + f"traceback: {exception_traceback} \nerror message: {error_message}") queue.put({'loss': cost_for_crash, 'additional_run_info': {'traceback': exception_traceback, @@ -68,26 +96,18 @@ def fit_predict_try_except_decorator( queue.close() -def get_cost_of_crash(metric: autoPyTorchMetric) -> float: - # The metric must always be defined to extract optimum/worst - if not isinstance(metric, autoPyTorchMetric): - raise ValueError("The metric must be strictly be an instance of autoPyTorchMetric") - - # Autopytorch optimizes the err. This function translates - # worst_possible_result to be a minimization problem. - # For metrics like accuracy that are bounded to [0,1] - # metric.optimum==1 is the worst cost. - # A simple guide is to use greater_is_better embedded as sign - if metric._sign < 0: - worst_possible_result = metric._worst_possible_result +def _get_eval_fn(cost_for_crash: float, target_algorithm: Optional[Callable] = None) -> Callable: + if target_algorithm is not None: + return target_algorithm else: - worst_possible_result = metric._optimum - metric._worst_possible_result - - return worst_possible_result + return functools.partial( + run_target_algorithm_with_exception_handling, + ta=autoPyTorch.evaluation.train_evaluator.eval_fn, + cost_for_crash=cost_for_crash, + ) -def _encode_exit_status(exit_status: multiprocessing.connection.Connection - ) -> str: +def _encode_exit_status(exit_status: multiprocessing.connection.Connection) -> str: try: encoded_exit_status: str = json.dumps(exit_status) return encoded_exit_status @@ -95,7 +115,131 @@ def _encode_exit_status(exit_status: multiprocessing.connection.Connection return str(exit_status) -class ExecuteTaFuncWithQueue(AbstractTAFunc): +def _get_logger(logger_port: Optional[int], logger_name: str) -> Union[logging.Logger, PicklableClientLogger]: + if logger_port is None: + logger: Union[logging.Logger, PicklableClientLogger] = logging.getLogger(logger_name) + else: + logger = get_named_client_logger(name=logger_name, port=logger_port) + + return logger + + +def _get_origin(config: Union[int, str, Configuration]) -> str: + if isinstance(config, int): + origin = 'DUMMY' + elif isinstance(config, str): + origin = 'traditional' + else: + origin = getattr(config, 'origin', 'UNKNOWN') + + return origin + + +def _exception_handling( + obj: PynisherFunctionWrapperType, + queue: Queue, + info_msg: str, + info_for_empty: Dict[str, Any], + status: StatusType, + is_anything_exception: bool, + worst_possible_result: float +) -> ProcessedResultsType: + """ + Args: + obj (PynisherFuncWrapperType): + queue (multiprocessing.Queue): The run histories + info_msg (str): + a message for the `info` key in additional_run_info + info_for_empty (AdditionalRunInfo): + the additional_run_info in the case of empty queue + status (StatusType): status type of the running + is_anything_exception (bool): + Exception other than TimeoutException or MemorylimitException + + Returns: + result (ProcessedResultsType): + cost, status, info, additional_run_info. + """ + cost, info = worst_possible_result, None + additional_run_info: Dict[str, Any] = {} + + try: + info = read_queue(queue) + except Empty: # alternative of queue.empty(), which is not reliable + return cost, status, info, info_for_empty + + result, status = info[-1]['loss'], info[-1]['status'] + additional_run_info = info[-1]['additional_run_info'] + + _success_in_anything_exc = (is_anything_exception and obj.exit_status == 0) + _success_in_to_or_mle = (status in [StatusType.SUCCESS, StatusType.DONOTADVANCE] + and not is_anything_exception) + + if _success_in_anything_exc or _success_in_to_or_mle: + cost = result + if not is_anything_exception or not _success_in_anything_exc: + additional_run_info.update( + subprocess_stdout=obj.stdout, + subprocess_stderr=obj.stderr, + info=info_msg) + if is_anything_exception and not _success_in_anything_exc: + status = StatusType.CRASHED + additional_run_info.update(exit_status=_encode_exit_status(obj.exit_status)) + + return cost, status, info, additional_run_info + + +def _process_exceptions( + obj: PynisherFunctionWrapperType, + queue: Queue, + budget: float, + worst_possible_result: float +) -> ProcessedResultsType: + if obj.exit_status is TAEAbortException: + info, status, cost = None, StatusType.ABORT, worst_possible_result + additional_run_info = dict( + error='Your configuration of autoPyTorch did not work', + exit_status=_encode_exit_status(obj.exit_status), + subprocess_stdout=obj.stdout, + subprocess_stderr=obj.stderr + ) + return cost, status, info, additional_run_info + + info_for_empty: Dict[str, Any] = {} + if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException): + is_timeout = obj.exit_status is pynisher.TimeoutException + status = StatusType.TIMEOUT if is_timeout else StatusType.MEMOUT + is_anything_exception = False + info_msg = f'Run stopped because of {"timeout" if is_timeout else "memout"}.' + info_for_empty = {'error': 'Timeout' if is_timeout else 'Memout'} + else: + status, is_anything_exception = StatusType.CRASHED, True + info_msg = 'Run treated as crashed because the pynisher exit ' \ + f'status {str(obj.exit_status)} is unknown.' + info_for_empty = dict( + error='Result queue is empty', + exit_status=_encode_exit_status(obj.exit_status), + subprocess_stdout=obj.stdout, + subprocess_stderr=obj.stderr, + exitcode=obj.exitcode + ) + + cost, status, info, additional_run_info = _exception_handling( + obj=obj, queue=queue, is_anything_exception=is_anything_exception, + info_msg=info_msg, info_for_empty=info_for_empty, + status=status, worst_possible_result=worst_possible_result + ) + + if budget == 0 and status == StatusType.DONOTADVANCE: + status = StatusType.SUCCESS + + if not isinstance(additional_run_info, dict): + additional_run_info = {'message': additional_run_info} + + return cost, status, info, additional_run_info + + +class TargetAlgorithmQuery(AbstractTAFunc): """ Wrapper class that executes the target algorithm with queues according to what SMAC expects. This allows us to @@ -116,15 +260,14 @@ def __init__( stats: Optional[Stats] = None, run_obj: str = 'quality', par_factor: int = 1, - output_y_hat_optimization: bool = True, + save_y_opt: bool = True, include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None, memory_limit: Optional[int] = None, disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None, init_params: Dict[str, Any] = None, - budget_type: str = None, ta: Optional[Callable] = None, - logger_port: int = None, + logger_port: Optional[int] = None, all_supported_metrics: bool = True, search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None ): @@ -153,14 +296,8 @@ def __init__( self.worst_possible_result = cost_for_crash - eval_function = functools.partial( - fit_predict_try_except_decorator, - ta=eval_function, - cost_for_crash=self.worst_possible_result, - ) - super().__init__( - ta=ta if ta is not None else eval_function, + ta=_get_eval_fn(self.worst_possible_result, target_algorithm=ta), stats=stats, run_obj=run_obj, par_factor=par_factor, @@ -169,35 +306,23 @@ def __init__( ) self.pynisher_context = pynisher_context - self.seed = seed self.initial_num_run = initial_num_run self.metric = metric self.include = include self.exclude = exclude self.disable_file_output = disable_file_output self.init_params = init_params + self.logger = _get_logger(logger_port, 'TAE') + self.memory_limit = int(math.ceil(memory_limit)) if memory_limit is not None else memory_limit - self.budget_type = pipeline_config['budget_type'] if pipeline_config is not None else budget_type - - self.pipeline_config: Dict[str, Union[int, str, float]] = dict() - if pipeline_config is None: - pipeline_config = replace_string_bool_to_bool(json.load(open( - os.path.join(os.path.dirname(__file__), '../configs/default_pipeline_options.json')))) - self.pipeline_config.update(pipeline_config) - - self.logger_port = logger_port - if self.logger_port is None: - self.logger: Union[logging.Logger, PicklableClientLogger] = logging.getLogger("TAE") - else: - self.logger = get_named_client_logger( - name="TAE", - port=self.logger_port, - ) - self.all_supported_metrics = all_supported_metrics + dm = backend.load_datamanager() + self._exist_val_tensor = (dm.val_tensors is not None) + self._exist_test_tensor = (dm.test_tensors is not None) - if memory_limit is not None: - memory_limit = int(math.ceil(memory_limit)) - self.memory_limit = memory_limit + @property + def eval_fn(self) -> Callable: + # this is a target algorithm defined in AbstractTAFunc during super().__init__(ta) + return self.ta # type: ignore self.search_space_updates = search_space_updates @@ -218,10 +343,7 @@ def _check_and_get_default_budget(self) -> float: else: return budget_choices[budget_type] - def run_wrapper( - self, - run_info: RunInfo, - ) -> Tuple[RunInfo, RunValue]: + def run_wrapper(self, run_info: RunInfo) -> Tuple[RunInfo, RunValue]: """ wrapper function for ExecuteTARun.run_wrapper() to cap the target algorithm runtime if it would run over the total allowed runtime. @@ -254,7 +376,8 @@ def run_wrapper( if remaining_time - 5 < run_info.cutoff: run_info = run_info._replace(cutoff=int(remaining_time - 5)) - if run_info.cutoff < 1.0: + cutoff = run_info.cutoff + if cutoff < 1.0: return run_info, RunValue( status=StatusType.STOP, cost=self.worst_possible_result, @@ -263,13 +386,10 @@ def run_wrapper( starttime=time.time(), endtime=time.time(), ) - elif ( - run_info.cutoff != int(np.ceil(run_info.cutoff)) - and not isinstance(run_info.cutoff, int) - ): - run_info = run_info._replace(cutoff=int(np.ceil(run_info.cutoff))) + elif cutoff != int(np.ceil(cutoff)) and not isinstance(cutoff, int): + run_info = run_info._replace(cutoff=int(np.ceil(cutoff))) - self.logger.info("Starting to evaluate configuration %s" % run_info.config.config_id) + self.logger.info(f"Starting to evaluate configuration {run_info.config.config_id}") run_info, run_value = super().run_wrapper(run_info=run_info) if not is_intensified: # It is required for the SMAC compatibility @@ -277,36 +397,27 @@ def run_wrapper( return run_info, run_value - def run( + def _get_pynisher_func_wrapper_and_params( self, config: Configuration, + context: BaseContext, + num_run: int, instance: Optional[str] = None, cutoff: Optional[float] = None, - seed: int = 12345, budget: float = 0.0, instance_specific: Optional[str] = None, - ) -> Tuple[StatusType, float, float, Dict[str, Any]]: + ) -> Tuple[PynisherFunctionWrapperType, EvaluatorParams]: - context = multiprocessing.get_context(self.pynisher_context) preload_modules(context) - queue: multiprocessing.queues.Queue = context.Queue() - if not (instance_specific is None or instance_specific == '0'): raise ValueError(instance_specific) + init_params = {'instance': instance} if self.init_params is not None: init_params.update(self.init_params) - if self.logger_port is None: - logger: Union[logging.Logger, PicklableClientLogger] = logging.getLogger("pynisher") - else: - logger = get_named_client_logger( - name="pynisher", - port=self.logger_port, - ) - pynisher_arguments = dict( - logger=logger, + logger=_get_logger(self.fixed_pipeline_params.logger_port, 'pynisher'), # Pynisher expects seconds as a time indicator wall_time_in_s=int(cutoff) if cutoff is not None else None, mem_in_mb=self.memory_limit, @@ -314,39 +425,46 @@ def run( context=context, ) - if isinstance(config, (int, str)): - num_run = self.initial_num_run - else: - num_run = config.config_id + self.initial_num_run + search_space_updates = self.fixed_pipeline_params.search_space_updates + self.logger.debug(f"Search space updates for {num_run}: {search_space_updates}") + + evaluator_params = EvaluatorParams( + configuration=config, + num_run=num_run, + init_params=init_params, + budget=budget + ) + + return pynisher.enforce_limits(**pynisher_arguments)(self.eval_fn), evaluator_params + + def run( + self, + config: Configuration, + instance: Optional[str] = None, + cutoff: Optional[float] = None, + budget: float = 0.0, + seed: int = 12345, # required for the compatibility with smac + instance_specific: Optional[str] = None, + ) -> PynisherResultsType: + + context = multiprocessing.get_context(self.pynisher_context) + queue: multiprocessing.queues.Queue = context.Queue() + budget_type = self.fixed_pipeline_params.budget_type + budget = self.fixed_pipeline_params.pipeline_config[budget_type] if budget == 0 else budget + num_run = self.initial_num_run if isinstance(config, (int, str)) else config.config_id + self.initial_num_run - self.logger.debug("Search space updates for {}: {}".format(num_run, - self.search_space_updates)) - obj_kwargs = dict( - queue=queue, + obj, params = self._get_pynisher_func_wrapper_and_params( config=config, - backend=self.backend, - metric=self.metric, - seed=self.seed, + context=context, num_run=num_run, - output_y_hat_optimization=self.output_y_hat_optimization, - include=self.include, - exclude=self.exclude, - disable_file_output=self.disable_file_output, instance=instance, - init_params=init_params, + cutoff=cutoff, budget=budget, - budget_type=self.budget_type, - pipeline_config=self.pipeline_config, - logger_port=self.logger_port, - all_supported_metrics=self.all_supported_metrics, - search_space_updates=self.search_space_updates + instance_specific=instance_specific ) - info: Optional[List[RunValue]] - additional_run_info: Dict[str, Any] try: - obj = pynisher.enforce_limits(**pynisher_arguments)(self.ta) - obj(**obj_kwargs) + obj(queue=queue, evaluator_params=params, fixed_pipeline_params=self.fixed_pipeline_params) except Exception as e: exception_traceback = traceback.format_exc() error_message = repr(e) @@ -356,147 +474,48 @@ def run( } return StatusType.CRASHED, self.cost_for_crash, 0.0, additional_run_info - if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException): - # Even if the pynisher thinks that a timeout or memout occured, - # it can be that the target algorithm wrote something into the queue - # - then we treat it as a successful run - try: - info = read_queue(queue) # type: ignore - result = info[-1]['loss'] # type: ignore - status = info[-1]['status'] # type: ignore - additional_run_info = info[-1]['additional_run_info'] # type: ignore - - if obj.stdout: - additional_run_info['subprocess_stdout'] = obj.stdout - if obj.stderr: - additional_run_info['subprocess_stderr'] = obj.stderr - - if obj.exit_status is pynisher.TimeoutException: - additional_run_info['info'] = 'Run stopped because of timeout.' - elif obj.exit_status is pynisher.MemorylimitException: - additional_run_info['info'] = 'Run stopped because of memout.' - - if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]: - cost = result - else: - cost = self.worst_possible_result - - except Empty: - info = None - if obj.exit_status is pynisher.TimeoutException: - status = StatusType.TIMEOUT - additional_run_info = {'error': 'Timeout'} - elif obj.exit_status is pynisher.MemorylimitException: - status = StatusType.MEMOUT - additional_run_info = { - 'error': 'Memout (used more than {} MB).'.format(self.memory_limit) - } - else: - raise ValueError(obj.exit_status) - cost = self.worst_possible_result - - elif obj.exit_status is TAEAbortException: - info = None - status = StatusType.ABORT - cost = self.worst_possible_result - additional_run_info = {'error': 'Your configuration of ' - 'autoPyTorch does not work!', - 'exit_status': _encode_exit_status(obj.exit_status), - 'subprocess_stdout': obj.stdout, - 'subprocess_stderr': obj.stderr, - } + return self._process_results(obj, config, queue, num_run, budget) - else: - try: - info = read_queue(queue) # type: ignore - result = info[-1]['loss'] # type: ignore - status = info[-1]['status'] # type: ignore - additional_run_info = info[-1]['additional_run_info'] # type: ignore - - if obj.exit_status == 0: - cost = result - else: - status = StatusType.CRASHED - cost = self.worst_possible_result - additional_run_info['info'] = 'Run treated as crashed ' \ - 'because the pynisher exit ' \ - 'status %s is unknown.' % \ - str(obj.exit_status) - additional_run_info['exit_status'] = _encode_exit_status(obj.exit_status) - additional_run_info['subprocess_stdout'] = obj.stdout - additional_run_info['subprocess_stderr'] = obj.stderr - except Empty: - info = None - additional_run_info = { - 'error': 'Result queue is empty', - 'exit_status': _encode_exit_status(obj.exit_status), - 'subprocess_stdout': obj.stdout, - 'subprocess_stderr': obj.stderr, - 'exitcode': obj.exitcode - } - status = StatusType.CRASHED - cost = self.worst_possible_result - - if ( - (self.budget_type is None or budget == 0) - and status == StatusType.DONOTADVANCE - ): - status = StatusType.SUCCESS - - if not isinstance(additional_run_info, dict): - additional_run_info = {'message': additional_run_info} - - if ( - info is not None - and self.resampling_strategy in ['holdout-iterative-fit', 'cv-iterative-fit'] - and status != StatusType.CRASHED - ): - learning_curve = extract_learning_curve(info) - learning_curve_runtime = extract_learning_curve(info, 'duration') - if len(learning_curve) > 1: - additional_run_info['learning_curve'] = learning_curve - additional_run_info['learning_curve_runtime'] = learning_curve_runtime - - train_learning_curve = extract_learning_curve(info, 'train_loss') - if len(train_learning_curve) > 1: - additional_run_info['train_learning_curve'] = train_learning_curve - additional_run_info['learning_curve_runtime'] = learning_curve_runtime - - if self._get_validation_loss: - validation_learning_curve = extract_learning_curve(info, 'validation_loss') - if len(validation_learning_curve) > 1: - additional_run_info['validation_learning_curve'] = \ - validation_learning_curve - additional_run_info[ - 'learning_curve_runtime'] = learning_curve_runtime - - if self._get_test_loss: - test_learning_curve = extract_learning_curve(info, 'test_loss') - if len(test_learning_curve) > 1: - additional_run_info['test_learning_curve'] = test_learning_curve - additional_run_info[ - 'learning_curve_runtime'] = learning_curve_runtime - - if isinstance(config, int): - origin = 'DUMMY' - elif isinstance(config, str): - origin = 'traditional' - else: - origin = getattr(config, 'origin', 'UNKNOWN') - additional_run_info['configuration_origin'] = origin + def _add_learning_curve_info(self, additional_run_info: Dict[str, Any], info: List[RunValue]) -> None: + lc_runtime = extract_learning_curve(info, 'duration') + stored = False + targets = {'learning_curve': (True, None), + 'train_learning_curve': (True, 'train_loss'), + 'validation_learning_curve': (self._exist_val_tensor, 'validation_loss'), + 'test_learning_curve': (self._exist_test_tensor, 'test_loss')} + + for key, (collect, metric_name) in targets.items(): + if collect: + lc = extract_learning_curve(info, metric_name) + if len(lc) > 1: + stored = True + additional_run_info[key] = lc + + if stored: + additional_run_info['learning_curve_runtime'] = lc_runtime + def _process_results( + self, + obj: PynisherFunctionWrapperType, + config: Configuration, + queue: Queue, + num_run: int, + budget: float + ) -> PynisherResultsType: + + cost, status, info, additional_run_info = _process_exceptions(obj, queue, budget, self.worst_possible_result) + + if info is not None and status != StatusType.CRASHED: + self._add_learning_curve_info(additional_run_info, info) + + additional_run_info['configuration_origin'] = _get_origin(config) + assert obj.wall_clock_time is not None # mypy check runtime = float(obj.wall_clock_time) empty_queue(queue) self.logger.debug( - "Finish function evaluation {}.\n" - "Status: {}, Cost: {}, Runtime: {},\n" - "Additional information:\n{}".format( - str(num_run), - status, - cost, - runtime, - dict_repr(additional_run_info) - ) + f"Finish function evaluation {num_run}.\n" + f"Status: {status}, Cost: {cost}, Runtime: {runtime},\n" + f"Additional information:\n{dict_repr(additional_run_info)}" ) return status, cost, runtime, additional_run_info diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py index e2165d330..fd99e1077 100644 --- a/autoPyTorch/evaluation/train_evaluator.py +++ b/autoPyTorch/evaluation/train_evaluator.py @@ -1,8 +1,6 @@ from multiprocessing.queues import Queue from typing import Any, Dict, List, Optional, Tuple, Union -from ConfigSpace.configuration_space import Configuration - import numpy as np from sklearn.base import BaseEstimator @@ -18,22 +16,71 @@ from autoPyTorch.evaluation.abstract_evaluator import ( AbstractEvaluator, EvaluationResults, - fit_and_suppress_warnings + fit_pipeline ) -from autoPyTorch.evaluation.utils import DisableFileOutputParameters -from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric +from autoPyTorch.evaluation.abstract_evaluator import EvaluatorParams, FixedPipelineParams from autoPyTorch.utils.common import dict_repr, subsampler -from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates __all__ = ['TrainEvaluator', 'eval_train_function'] +class _CrossValidationResultsManager: + def __init__(self, num_folds: int): + self.additional_run_info: Dict = {} + self.opt_preds: List[Optional[np.ndarray]] = [None] * num_folds + self.valid_preds: List[Optional[np.ndarray]] = [None] * num_folds + self.test_preds: List[Optional[np.ndarray]] = [None] * num_folds + self.train_loss: Dict[str, float] = {} + self.opt_loss: Dict[str, float] = {} + self.n_train, self.n_opt = 0, 0 -def _get_y_array(y: np.ndarray, task_type: int) -> np.ndarray: - if task_type in CLASSIFICATION_TASKS and task_type != \ - MULTICLASSMULTIOUTPUT: - return y.ravel() - else: - return y + @staticmethod + def _update_loss_dict(loss_sum_dict: Dict[str, float], loss_dict: Dict[str, float], n_datapoints: int) -> None: + loss_sum_dict.update({ + metric_name: loss_sum_dict.get(metric_name, 0) + loss_dict[metric_name] * n_datapoints + for metric_name in loss_dict.keys() + }) + + def update(self, split_id: int, results: EvaluationResults, n_train: int, n_opt: int) -> None: + self.n_train += n_train + self.n_opt += n_opt + self.opt_preds[split_id] = results.opt_pred + self.valid_preds[split_id] = results.valid_pred + self.test_preds[split_id] = results.test_pred + + if results.additional_run_info is not None: + self.additional_run_info.update(results.additional_run_info) + + self._update_loss_dict(self.train_loss, loss_dict=results.train_loss, n_datapoints=n_train) + self._update_loss_dict(self.opt_loss, loss_dict=results.opt_loss, n_datapoints=n_opt) + + def get_average_loss(self) -> Tuple[Dict[str, float], Dict[str, float]]: + train_avg_loss = {metric_name: val / float(self.n_train) for metric_name, val in self.train_loss.items()} + opt_avg_loss = {metric_name: val / float(self.n_opt) for metric_name, val in self.opt_loss.items()} + return train_avg_loss, opt_avg_loss + + def _merge_predictions(self, preds: List[Optional[np.ndarray]]) -> Optional[np.ndarray]: + merged_pred = np.array([pred for pred in preds if pred is not None]) + if merged_pred.size == 0: + return None + + if len(merged_pred.shape) != 3: + # merged_pred.shape := (n_splits, n_datapoints, n_class or 1) + raise ValueError( + f'each pred must have the shape (n_datapoints, n_class or 1), but got {merged_pred.shape[1:]}' + ) + + return np.nanmean(merged_pred, axis=0) + + def get_result_dict(self) -> Dict[str, Any]: + train_loss, opt_loss = self.get_average_loss() + return dict( + opt_loss=opt_loss, + train_loss=train_loss, + opt_pred=np.concatenate([pred for pred in self.opt_preds if pred is not None]), + valid_pred=self._merge_predictions(self.valid_preds), + test_pred=self._merge_predictions(self.test_preds), + additional_run_info=self.additional_run_info + ) class TrainEvaluator(AbstractEvaluator): @@ -46,75 +93,14 @@ class TrainEvaluator(AbstractEvaluator): with `CrossValTypes`, `HoldoutValTypes`, i.e, when the training data is split and the validation set is used for SMBO optimisation. - Attributes: - backend (Backend): - An object to interface with the disk storage. In particular, allows to - access the train and test datasets + Args: queue (Queue): Each worker available will instantiate an evaluator, and after completion, - it will return the evaluation result via a multiprocessing queue - metric (autoPyTorchMetric): - A scorer object that is able to evaluate how good a pipeline was fit. It - is a wrapper on top of the actual score method (a wrapper on top of scikit - lean accuracy for example) that formats the predictions accordingly. - budget: (float): - The amount of epochs/time a configuration is allowed to run. - budget_type (str): - The budget type, which can be epochs or time - pipeline_config (Optional[Dict[str, Any]]): - Defines the content of the pipeline being evaluated. For example, it - contains pipeline specific settings like logging name, or whether or not - to use tensorboard. - configuration (Union[int, str, Configuration]): - Determines the pipeline to be constructed. A dummy estimator is created for - integer configurations, a traditional machine learning pipeline is created - for string based configuration, and NAS is performed when a configuration - object is passed. - seed (int): - A integer that allows for reproducibility of results - output_y_hat_optimization (bool): - Whether this worker should output the target predictions, so that they are - stored on disk. Fundamentally, the resampling strategy might shuffle the - Y_train targets, so we store the split in order to re-use them for ensemble - selection. - num_run (Optional[int]): - An identifier of the current configuration being fit. This number is unique per - configuration. - include (Optional[Dict[str, Any]]): - An optional dictionary to include components of the pipeline steps. - exclude (Optional[Dict[str, Any]]): - An optional dictionary to exclude components of the pipeline steps. - disable_file_output (Optional[List[Union[str, DisableFileOutputParameters]]]): - Used as a list to pass more fine-grained - information on what to save. Must be a member of `DisableFileOutputParameters`. - Allowed elements in the list are: - - + `y_optimization`: - do not save the predictions for the optimization set, - which would later on be used to build an ensemble. Note that SMAC - optimizes a metric evaluated on the optimization set. - + `pipeline`: - do not save any individual pipeline files - + `pipelines`: - In case of cross validation, disables saving the joint model of the - pipelines fit on each fold. - + `y_test`: - do not save the predictions for the test set. - + `all`: - do not save any of the above. - For more information check `autoPyTorch.evaluation.utils.DisableFileOutputParameters`. - init_params (Optional[Dict[str, Any]]): - Optional argument that is passed to each pipeline step. It is the equivalent of - kwargs for the pipeline steps. - logger_port (Optional[int]): - Logging is performed using a socket-server scheme to be robust against many - parallel entities that want to write to the same file. This integer states the - socket port for the communication channel. If None is provided, a traditional - logger is used. - all_supported_metrics (bool): - Whether all supported metric should be calculated for every configuration. - search_space_updates (Optional[HyperparameterSearchSpaceUpdates]): - An object used to fine tune the hyperparameter search space of the pipeline + it will append the result to a multiprocessing queue + fixed_pipeline_params (FixedPipelineParams): + Fixed parameters for a pipeline + evaluator_params (EvaluatorParams): + The parameters for an evaluator. """ def __init__(self, backend: Backend, queue: Queue, metric: autoPyTorchMetric, @@ -178,59 +164,28 @@ def _evaluate_on_split(self, split_id: int) -> EvaluationResults: The results from the training and validation. """ self.logger.info("Starting fit {}".format(split_id)) + # We create pipeline everytime to avoid non-fitted pipelines to be in self.pipelines pipeline = self._get_pipeline() train_split, opt_split = self.splits[split_id] - y_train, y_opt = self.y_train[train_split], self.y_train[opt_split] - train_pred, opt_pred, valid_pred, test_pred = self._fit_and_predict( + train_pred, opt_pred, valid_pred, test_pred = self._fit_and_evaluate_loss( pipeline, split_id, train_indices=train_split, opt_indices=opt_split ) - train_loss = self._loss(y_train, train_pred) - opt_loss = self._loss(y_opt, opt_pred) - - additional_run_info = getattr(pipeline, 'get_additional_run_info', lambda: {})() - - status = StatusType.SUCCESS return EvaluationResults( - opt_loss=opt_loss, - train_loss=train_loss, + pipeline=pipeline, + opt_loss=self._loss(labels=self.y_train[opt_split], preds=opt_pred), + train_loss=self._loss(labels=self.y_train[train_split], preds=train_pred), opt_pred=opt_pred, valid_pred=valid_pred, test_pred=test_pred, - status=status, - additional_run_info=additional_run_info + status=StatusType.SUCCESS, + additional_run_info=getattr(pipeline, 'get_additional_run_info', lambda: {})() ) - @staticmethod - def _update_loss_dict(loss_sum_dict: Dict[str, float], loss_dict: Dict[str, float], n_datapoints: int) -> None: - if len(loss_sum_dict) == 0: - loss_sum_dict = {metric_name: val * n_datapoints for metric_name, val in loss_dict} - else: - loss_sum_dict = { - metric_name: loss_sum_dict[metric_name] + loss_dict[metric_name] * n_datapoints - for metric_name in loss_dict.keys() - } - - def _merge_predictions(self, preds: List[Optional[np.ndarray]], inference_name: str) -> np.ndarray: - """ Merge predictions from each split """ - - inference_name_choices = ['valid', 'test'] - if inference_name not in inference_name_choices: - raise ValueError(f'inference_name must be in {inference_name_choices}, but got {inference_name}') - - if getattr(self, f'X_{inference_name}', None) is None: - return None - - pred = np.array([pred for pred in preds if pred is not None]) - if len(pred.shape) == 3: # Average the predictions of several pipelines - pred = np.nanmean(pred, axis=0) - - return pred - def _cross_validation(self) -> EvaluationResults: """ Perform cross validation and return the merged results. @@ -239,76 +194,57 @@ def _cross_validation(self) -> EvaluationResults: results (EvaluationResults): The results that merge every split. """ - additional_run_info: Dict = {} - opt_preds: List[Optional[np.ndarray]] = [None] * self.num_folds - valid_preds: List[Optional[np.ndarray]] = [None] * self.num_folds - test_preds: List[Optional[np.ndarray]] = [] * self.num_folds - train_loss: Dict[str, float] = {} - opt_loss: Dict[str, float] = {} - n_train, n_opt = 0, 0 + cv_results = _CrossValidationResultsManager(self.num_folds) + Y_opt: List[Optional[np.ndarray]] = [None] * self.num_folds for split_id in range(len(self.splits)): train_split, opt_split = self.splits[split_id] + Y_opt[split_id] = self.y_train[opt_split] results = self._evaluate_on_split(split_id) - n_train += len(train_split) - n_opt += len(opt_split) - opt_preds[split_id] = results.opt_pred - valid_preds[split_id] = results.valid_pred - test_preds[split_id] = results.test_pred - additional_run_info.update(results.additional_run_info) - self._update_loss_dict(train_loss, loss_dict=results.train_loss, n_datapoints=len(train_split)) - self._update_loss_dict(opt_loss, loss_dict=results.opt_loss, n_datapoints=len(opt_split)) + self.pipelines[split_id] = results.pipeline + cv_results.update(split_id, results, len(train_split), len(opt_split)) - # averaging over folds - train_loss = {metric_name: val / float(n_train) for metric_name, val in train_loss.items()} - opt_loss = {metric_name: val / float(n_opt) for metric_name, val in train_loss.items()} + self.y_opt = np.concatenate([y_opt for y_opt in Y_opt if y_opt is not None]) - return EvaluationResults( - opt_loss=opt_loss, - train_loss=train_loss, - opt_pred=np.concatenate([pred for pred in opt_preds if pred is not None]), - valid_pred=self._merge_predictions(valid_preds, 'valid'), - test_pred=self._merge_predictions(test_preds, 'test'), - status=StatusType.SUCCESS, - additional_run_info=additional_run_info - ) + return EvaluationResults(status=StatusType.SUCCESS, **cv_results.get_result_dict()) - def fit_predict_and_loss(self) -> None: + def evaluate_loss(self) -> None: """Fit, predict and compute the loss for cross-validation and holdout""" if self.splits is None: raise ValueError(f"cannot fit pipeline {self.__class__.__name__} with datamanager.splits None") - results = self._evaluate_on_split(split_id=0) if self.num_folds == 1 else self._cross_validation() + if self.num_folds == 1: + _, opt_split = self.splits[0] + results = self._evaluate_on_split(split_id=0) + self.y_opt, self.pipelines[0] = self.y_train[opt_split], results.pipeline + else: + results = self._cross_validation() self.logger.debug( - f"In train evaluator.fit_predict_and_loss, num_run: {self.num_run}, loss:{results.opt_loss}," + f"In train evaluator.evaluate_loss, num_run: {self.num_run}, loss:{results.opt_loss}," f" status: {results.status},\nadditional run info:\n{dict_repr(results.additional_run_info)}" ) - self.finish_up(results=results, file_output=True) + self.record_evaluation(results=results) - def _fit_and_predict( + def _fit_and_evaluate_loss( self, pipeline: BaseEstimator, - fold: int, + split_id: int, train_indices: Union[np.ndarray, List], opt_indices: Union[np.ndarray, List] ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]: - X = {'train_indices': train_indices, - 'val_indices': opt_indices, - 'split_id': fold, - 'num_run': self.num_run, - **self.fit_dictionary} # fit dictionary - y = None - fit_and_suppress_warnings(self.logger, pipeline, X, y) + X = dict(train_indices=train_indices, val_indices=opt_indices, split_id=split_id, num_run=self.num_run) + X.update(self.fit_dictionary) + fit_pipeline(self.logger, pipeline, X, y=None) self.logger.info("Model fitted, now predicting") - kwargs = {'pipeline': pipeline, 'train_label_examples': self.y_train[train_indices]} - train_pred = self.predict_function(subsampler(self.X_train, train_indices), **kwargs) - opt_pred = self.predict_function(subsampler(self.X_train, opt_indices), **kwargs) - valid_pred = self.predict_function(self.X_valid, **kwargs) - test_pred = self.predict_function(self.X_test, **kwargs) + kwargs = {'pipeline': pipeline, 'label_examples': self.y_train[train_indices]} + train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) + opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) + valid_pred = self.predict(self.X_valid, **kwargs) + test_pred = self.predict(self.X_test, **kwargs) assert train_pred is not None and opt_pred is not None # mypy check return train_pred, opt_pred, valid_pred, test_pred @@ -336,84 +272,25 @@ def eval_train_function( instance: str = None, ) -> None: """ - This closure allows the communication between the ExecuteTaFuncWithQueue and the + This closure allows the communication between the TargetAlgorithmQuery and the pipeline trainer (TrainEvaluator). - Fundamentally, smac calls the ExecuteTaFuncWithQueue.run() method, which internally + Fundamentally, smac calls the TargetAlgorithmQuery.run() method, which internally builds a TrainEvaluator. The TrainEvaluator builds a pipeline, stores the output files to disc via the backend, and puts the performance result of the run in the queue. - - Attributes: - backend (Backend): - An object to interface with the disk storage. In particular, allows to - access the train and test datasets + Args: queue (Queue): Each worker available will instantiate an evaluator, and after completion, - it will return the evaluation result via a multiprocessing queue - metric (autoPyTorchMetric): - A scorer object that is able to evaluate how good a pipeline was fit. It - is a wrapper on top of the actual score method (a wrapper on top of scikit - lean accuracy for example) that formats the predictions accordingly. - budget: (float): - The amount of epochs/time a configuration is allowed to run. - budget_type (str): - The budget type, which can be epochs or time - pipeline_config (Optional[Dict[str, Any]]): - Defines the content of the pipeline being evaluated. For example, it - contains pipeline specific settings like logging name, or whether or not - to use tensorboard. - config (Union[int, str, Configuration]): - Determines the pipeline to be constructed. - seed (int): - A integer that allows for reproducibility of results - output_y_hat_optimization (bool): - Whether this worker should output the target predictions, so that they are - stored on disk. Fundamentally, the resampling strategy might shuffle the - Y_train targets, so we store the split in order to re-use them for ensemble - selection. - num_run (Optional[int]): - An identifier of the current configuration being fit. This number is unique per - configuration. - include (Optional[Dict[str, Any]]): - An optional dictionary to include components of the pipeline steps. - exclude (Optional[Dict[str, Any]]): - An optional dictionary to exclude components of the pipeline steps. - disable_file_output (Union[bool, List[str]]): - By default, the model, it's predictions and other metadata is stored on disk - for each finished configuration. This argument allows the user to skip - saving certain file type, for example the model, from being written to disk. - init_params (Optional[Dict[str, Any]]): - Optional argument that is passed to each pipeline step. It is the equivalent of - kwargs for the pipeline steps. - logger_port (Optional[int]): - Logging is performed using a socket-server scheme to be robust against many - parallel entities that want to write to the same file. This integer states the - socket port for the communication channel. If None is provided, a traditional - logger is used. - instance (str): - An instance on which to evaluate the current pipeline. By default we work - with a single instance, being the provided X_train, y_train of a single dataset. - This instance is a compatibility argument for SMAC, that is capable of working - with multiple datasets at the same time. + it will append the result to a multiprocessing queue + fixed_pipeline_params (FixedPipelineParams): + Fixed parameters for a pipeline + evaluator_params (EvaluatorParams): + The parameters for an evaluator. """ evaluator = TrainEvaluator( - backend=backend, queue=queue, - metric=metric, - configuration=config, - seed=seed, - num_run=num_run, - output_y_hat_optimization=output_y_hat_optimization, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates + evaluator_params=evaluator_params, + fixed_pipeline_params=fixed_pipeline_params ) - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py index 37e5fa36d..de8576418 100644 --- a/autoPyTorch/evaluation/utils.py +++ b/autoPyTorch/evaluation/utils.py @@ -8,12 +8,17 @@ from smac.runhistory.runhistory import RunValue +from autoPyTorch.constants import ( + MULTICLASS, + STRING_TO_OUTPUT_TYPES +) from autoPyTorch.utils.common import autoPyTorchEnum __all__ = [ 'read_queue', 'convert_multioutput_multiclass_to_multilabel', + 'ensure_prediction_array_sizes', 'extract_learning_curve', 'empty_queue', 'VotingRegressorWrapper' @@ -56,13 +61,58 @@ def empty_queue(queue_: Queue) -> None: queue_.close() -def extract_learning_curve(stack: List[RunValue], key: Optional[str] = None) -> List[List]: +def ensure_prediction_array_sizes( + prediction: np.ndarray, + output_type: str, + num_classes: Optional[int], + label_examples: Optional[np.ndarray] +) -> np.ndarray: + """ + This function formats a prediction to match the dimensionality of the provided + labels label_examples. This should be used exclusively for classification tasks + + Args: + prediction (np.ndarray): + The un-formatted predictions of a pipeline + output_type (str): + Output type specified in constants. (TODO: Fix it to enum) + label_examples (Optional[np.ndarray]): + The labels from the dataset to give an intuition of the expected + predictions dimensionality + + Returns: + (np.ndarray): + The formatted prediction + """ + if num_classes is None: + raise RuntimeError("_ensure_prediction_array_sizes is only for classification tasks") + if label_examples is None: + raise ValueError('label_examples must be provided, but got None') + + if STRING_TO_OUTPUT_TYPES[output_type] != MULTICLASS or prediction.shape[1] == num_classes: + return prediction + + classes = list(np.unique(label_examples)) + mapping = {classes.index(class_idx): class_idx for class_idx in range(num_classes)} + modified_pred = np.zeros((prediction.shape[0], num_classes), dtype=np.float32) + + for index, class_index in mapping.items(): + modified_pred[:, class_index] = prediction[:, index] + + return modified_pred + + +def extract_learning_curve(stack: List[RunValue], key: Optional[str] = None) -> List[float]: learning_curve = [] for entry in stack: - if key is not None: - learning_curve.append(entry['additional_run_info'][key]) - else: - learning_curve.append(entry['loss']) + try: + val = entry['loss'] if key is None else entry['additional_run_info'][key] + learning_curve.append(val) + except TypeError: # additional info is not dict + pass + except KeyError: # Key does not exist + pass + return list(learning_curve) diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py index d0bb4056c..a8915e06a 100644 --- a/autoPyTorch/optimizer/smbo.py +++ b/autoPyTorch/optimizer/smbo.py @@ -26,7 +26,7 @@ NoResamplingStrategyTypes ) from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager -from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash +from autoPyTorch.evaluation.tae import TargetAlgorithmQuery from autoPyTorch.optimizer.utils import read_return_initial_configurations from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates @@ -215,7 +215,7 @@ def __init__(self, self.resampling_strategy_args = resampling_strategy_args # and a bunch of useful limits - self.worst_possible_result = get_cost_of_crash(self.metric) + self.worst_possible_result = self.metric._cost_of_crash self.total_walltime_limit = int(total_walltime_limit) self.func_eval_time_limit_secs = int(func_eval_time_limit_secs) self.memory_limit = memory_limit @@ -302,7 +302,7 @@ def run_smbo(self, func: Optional[Callable] = None search_space_updates=self.search_space_updates, pynisher_context=self.pynisher_context, ) - ta = ExecuteTaFuncWithQueue + ta = TargetAlgorithmQuery self.logger.info("Finish creating Target Algorithm (TA) function") startup_time = self.watcher.wall_elapsed(self.dataset_name) diff --git a/autoPyTorch/pipeline/components/training/metrics/base.py b/autoPyTorch/pipeline/components/training/metrics/base.py index c3f247cd3..876a91fd1 100644 --- a/autoPyTorch/pipeline/components/training/metrics/base.py +++ b/autoPyTorch/pipeline/components/training/metrics/base.py @@ -23,6 +23,9 @@ def __init__(self, self._worst_possible_result = worst_possible_result self._sign = sign + # AutoPytorch MINIMIZES a metric, so cost of crash must be largest possible value + self._cost_of_crash = worst_possible_result if sign < 0 else optimum - worst_possible_result + def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index e3603f668..63d9c7246 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -1,6 +1,5 @@ import json import os -import pathlib import pickle import tempfile import unittest @@ -21,7 +20,7 @@ from sklearn.base import BaseEstimator, clone from sklearn.ensemble import VotingClassifier, VotingRegressor -from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue +from smac.runhistory.runhistory import RunHistory, RunInfo, RunValue, StatusType from autoPyTorch.api.tabular_classification import TabularClassificationTask from autoPyTorch.api.tabular_regression import TabularRegressionTask @@ -80,17 +79,14 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl enable_traditional_pipeline=False, ) - # Internal dataset has expected settings - assert estimator.dataset.task_type == 'tabular_classification' - expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \ - else CV_NUM_SPLITS - assert estimator.resampling_strategy == resampling_strategy - assert estimator.dataset.resampling_strategy == resampling_strategy - assert len(estimator.dataset.splits) == expected_num_splits + if split: + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=seed) + return X_train, X_test, y_train, y_test + else: + return X, y - # TODO: check for budget - # Check for the created files +def _check_created_files(estimator): tmp_dir = estimator._backend.temporary_directory loaded_datamanager = estimator._backend.load_datamanager() assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) @@ -110,23 +106,29 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl '.autoPyTorch/true_targets_ensemble.npy', ] for expected_file in expected_files: - assert os.path.exists(os.path.join(tmp_dir, expected_file)), "{}/{}/{}".format( - tmp_dir, - [data for data in pathlib.Path(tmp_dir).glob('*')], - expected_file, - ) + assert os.path.exists(os.path.join(tmp_dir, expected_file)) - # Check that smac was able to find proper models - succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( - ) if 'SUCCESS' in str(run_value.status)] - assert len(succesful_runs) > 1, [(k, v) for k, v in estimator.run_history.data.items()] + +def _check_internal_dataset_settings(estimator, resampling_strategy, task_type: str): + assert estimator.dataset.task_type == task_type + expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation \ + else CV_NUM_SPLITS + assert estimator.resampling_strategy == resampling_strategy + assert estimator.dataset.resampling_strategy == resampling_strategy + assert len(estimator.dataset.splits) == expected_num_splits + + +def _check_smac_success(estimator, n_successful_runs: int = 1): + data = estimator.run_history.data + succesful_runs = [rv.status for rv in data.values() if rv.status == StatusType.SUCCESS] + assert len(succesful_runs) >= n_successful_runs, [(k, v) for k, v in data.items()] # Search for an existing run key in disc. A individual model might have # a timeout and hence was not written to disc successful_num_run = None SUCCESS = False - for i, (run_key, value) in enumerate(estimator.run_history.data.items()): - if 'SUCCESS' in str(value.status): + for i, (run_key, value) in enumerate(data.items()): + if value.status == StatusType.SUCCESS: run_key_model_run_dir = estimator._backend.get_numrun_directory( estimator.seed, run_key.config_id + 1, run_key.budget) successful_num_run = run_key.config_id + 1 @@ -138,6 +140,10 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" + return run_key_model_run_dir, run_key, successful_num_run + + +def _check_model_file(estimator, resampling_strategy, run_key, run_key_model_run_dir, successful_num_run): if resampling_strategy == HoldoutValTypes.holdout_validation: model_file = os.path.join(run_key_model_run_dir, f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") @@ -150,15 +156,23 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" ) assert os.path.exists(model_file), model_file - model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( estimator.seed, successful_num_run, run_key.budget) - assert isinstance(model, VotingClassifier) + + if estimator.task_type.endswith('classification'): + assert isinstance(model, VotingClassifier) + elif estimator.task_type.endswith('regression'): + assert isinstance(model, VotingRegressor) + else: + raise RuntimeError(f'Got unknown model: {type(model)}') assert len(model.estimators_) == CV_NUM_SPLITS else: pytest.fail(resampling_strategy) - # Make sure that predictions on the test data are printed and make sense + return model + + +def _check_test_prediction(estimator, X_test, y_test, run_key, run_key_model_run_dir, successful_num_run): test_prediction = os.path.join(run_key_model_run_dir, estimator._backend.get_prediction_filename( 'test', estimator.seed, successful_num_run, @@ -166,6 +180,30 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl assert os.path.exists(test_prediction), test_prediction assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] + pred = estimator.predict(X_test) + score = estimator.score(pred, y_test) + assert np.shape(pred)[0] == np.shape(X_test)[0] + + if 'accuracy' in score: + # Make sure that predict proba has the expected shape + probabilites = estimator.predict_proba(X_test) + assert np.shape(probabilites) == (np.shape(X_test)[0], 2) + elif 'r2' not in score: + raise ValueError(f'Got unknown score `{score}`') + + +def _check_picklable(estimator, X_test): + dump_file = os.path.join(estimator._backend.temporary_directory, 'dump.pkl') + + with open(dump_file, 'wb') as f: + pickle.dump(estimator, f) + + with open(dump_file, 'rb') as f: + restored_estimator = pickle.load(f) + restored_estimator.predict(X_test) + + +def _check_ensemble_prediction(estimator, run_key, run_key_model_run_dir, successful_num_run): # Also, for ensemble builder, the OOF predictions should be there and match # the Ground truth that is also physically printed to disk ensemble_prediction = os.path.join(run_key_model_run_dir, @@ -184,17 +222,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl # There should be a weight for each element of the ensemble assert len(estimator.ensemble_.identifiers_) == len(estimator.ensemble_.weights_) - y_pred = estimator.predict(X_test) - assert np.shape(y_pred)[0] == np.shape(X_test)[0] - - # Make sure that predict proba has the expected shape - probabilites = estimator.predict_proba(X_test) - assert np.shape(probabilites) == (np.shape(X_test)[0], 2) - score = estimator.score(y_pred, y_test) - assert 'accuracy' in score - - # check incumbent config and results +def _check_incumbent(estimator, successful_num_run): incumbent_config, incumbent_results = estimator.get_incumbent_results() assert isinstance(incumbent_config, Configuration) assert isinstance(incumbent_results, dict) @@ -236,22 +265,23 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin ) X, y = X.iloc[:n_samples], y.iloc[:n_samples] - # normalize values - y = (y - y.mean()) / y.std() - - # fill NAs for now since they are not yet properly handled - for column in X.columns: - if X[column].dtype.name == "category": - X[column] = pd.Categorical(X[column], - categories=list(X[column].cat.categories) + ["missing"]).fillna("missing") - else: - X[column] = X[column].fillna(0) - - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, y, random_state=1) +def _get_estimator( + backend, + task_class, + X_train, + y_train, + X_test, + y_test, + resampling_strategy, + resampling_strategy_args, + metric, + total_walltime_limit=40, + func_eval_time_limit_secs=10, + **kwargs +): # Search for a good configuration - estimator = TabularRegressionTask( + estimator = task_class( backend=backend, resampling_strategy=resampling_strategy, resampling_strategy_args=resampling_strategy_args, @@ -262,147 +292,100 @@ def test_tabular_regression(openml_name, resampling_strategy, backend, resamplin estimator.search( X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, - optimize_metric='r2', - total_walltime_limit=40, - func_eval_time_limit_secs=10, + optimize_metric=metric, + total_walltime_limit=total_walltime_limit, + func_eval_time_limit_secs=func_eval_time_limit_secs, enable_traditional_pipeline=False, + **kwargs ) - # Internal dataset has expected settings - assert estimator.dataset.task_type == 'tabular_regression' - expected_num_splits = HOLDOUT_NUM_SPLITS if resampling_strategy == HoldoutValTypes.holdout_validation\ - else CV_NUM_SPLITS - assert estimator.resampling_strategy == resampling_strategy - assert estimator.dataset.resampling_strategy == resampling_strategy - assert len(estimator.dataset.splits) == expected_num_splits - - # TODO: check for budget - - # Check for the created files - tmp_dir = estimator._backend.temporary_directory - loaded_datamanager = estimator._backend.load_datamanager() - assert len(loaded_datamanager.train_tensors) == len(estimator.dataset.train_tensors) - - expected_files = [ - 'smac3-output/run_42/configspace.json', - 'smac3-output/run_42/runhistory.json', - 'smac3-output/run_42/scenario.txt', - 'smac3-output/run_42/stats.json', - 'smac3-output/run_42/train_insts.txt', - 'smac3-output/run_42/trajectory.json', - '.autoPyTorch/datamanager.pkl', - '.autoPyTorch/ensemble_read_preds.pkl', - '.autoPyTorch/start_time_42', - '.autoPyTorch/ensemble_history.json', - '.autoPyTorch/ensemble_read_losses.pkl', - '.autoPyTorch/true_targets_ensemble.npy', - ] - for expected_file in expected_files: - assert os.path.exists(os.path.join(tmp_dir, expected_file)), expected_file - - # Check that smac was able to find proper models - succesful_runs = [run_value.status for run_value in estimator.run_history.data.values( - ) if 'SUCCESS' in str(run_value.status)] - assert len(succesful_runs) >= 1, [(k, v) for k, v in estimator.run_history.data.items()] - - # Search for an existing run key in disc. A individual model might have - # a timeout and hence was not written to disc - successful_num_run = None - SUCCESS = False - for i, (run_key, value) in enumerate(estimator.run_history.data.items()): - if 'SUCCESS' in str(value.status): - run_key_model_run_dir = estimator._backend.get_numrun_directory( - estimator.seed, run_key.config_id + 1, run_key.budget) - successful_num_run = run_key.config_id + 1 - if os.path.exists(run_key_model_run_dir): - # Runkey config id is different from the num_run - # more specifically num_run = config_id + 1(dummy) - SUCCESS = True - break + return estimator - assert SUCCESS, f"Successful run was not properly saved for num_run: {successful_num_run}" - if resampling_strategy == HoldoutValTypes.holdout_validation: - model_file = os.path.join(run_key_model_run_dir, - f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") - assert os.path.exists(model_file), model_file - model = estimator._backend.load_model_by_seed_and_id_and_budget( - estimator.seed, successful_num_run, run_key.budget) - elif resampling_strategy == CrossValTypes.k_fold_cross_validation: - model_file = os.path.join( - run_key_model_run_dir, - f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" - ) - assert os.path.exists(model_file), model_file - model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( - estimator.seed, successful_num_run, run_key.budget) - assert isinstance(model, VotingRegressor) - assert len(model.estimators_) == CV_NUM_SPLITS - else: - pytest.fail(resampling_strategy) +def _check_tabular_task(estimator, X_test, y_test, task_type, resampling_strategy, n_successful_runs): + _check_internal_dataset_settings(estimator, resampling_strategy, task_type=task_type) + _check_created_files(estimator) + run_key_model_run_dir, run_key, successful_num_run = _check_smac_success(estimator, + n_successful_runs=n_successful_runs) + _check_model_file(estimator, resampling_strategy, run_key, run_key_model_run_dir, successful_num_run) + _check_test_prediction(estimator, X_test, y_test, run_key, run_key_model_run_dir, successful_num_run) + _check_ensemble_prediction(estimator, run_key, run_key_model_run_dir, successful_num_run) + _check_incumbent(estimator, successful_num_run) - # Make sure that predictions on the test data are printed and make sense - test_prediction = os.path.join(run_key_model_run_dir, - estimator._backend.get_prediction_filename( - 'test', estimator.seed, successful_num_run, - run_key.budget)) - assert os.path.exists(test_prediction), test_prediction - assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] + # Test refit on dummy data + # This process yields a mysterious bug after _check_picklable + # However, we can process it in the _check_picklable function. + estimator.refit(dataset=estimator._backend.load_datamanager()) - # Also, for ensemble builder, the OOF predictions should be there and match - # the Ground truth that is also physically printed to disk - ensemble_prediction = os.path.join(run_key_model_run_dir, - estimator._backend.get_prediction_filename( - 'ensemble', - estimator.seed, successful_num_run, - run_key.budget)) - assert os.path.exists(ensemble_prediction), ensemble_prediction - assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape( - estimator._backend.load_targets_ensemble() - )[0] + # Make sure that a configuration space is stored in the estimator + assert isinstance(estimator.get_search_space(), CS.ConfigurationSpace) - # Ensemble Builder produced an ensemble - estimator.ensemble_ is not None + _check_picklable(estimator, X_test) - # There should be a weight for each element of the ensemble - assert len(estimator.ensemble_.identifiers_) == len(estimator.ensemble_.weights_) - y_pred = estimator.predict(X_test) +# Test +# ==== +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_fn', + new=dummy_eval_fn) +@pytest.mark.parametrize('openml_id', (40981, )) +@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', + ((HoldoutValTypes.holdout_validation, None), + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) + )) +def test_tabular_classification(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): + X_train, X_test, y_train, y_test = _get_dataset(openml_id, n_samples, seed=42) - assert np.shape(y_pred)[0] == np.shape(X_test)[0] + estimator = _get_estimator( + backend, TabularClassificationTask, X_train, y_train, X_test, y_test, + resampling_strategy, resampling_strategy_args, metric='accuracy' + ) + _check_tabular_task( + estimator, X_test, y_test, + task_type='tabular_classification', + resampling_strategy=resampling_strategy, + n_successful_runs=2 + ) - score = estimator.score(y_pred, y_test) - assert 'r2' in score - # check incumbent config and results - incumbent_config, incumbent_results = estimator.get_incumbent_results() - assert isinstance(incumbent_config, Configuration) - assert isinstance(incumbent_results, dict) - assert 'opt_loss' in incumbent_results, "run history: {}, successful_num_run: {}".format(estimator.run_history.data, - successful_num_run) - assert 'train_loss' in incumbent_results, estimator.run_history.data +@pytest.mark.parametrize('openml_id', (531, )) +@unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_fn', + new=dummy_eval_fn) +@pytest.mark.parametrize('resampling_strategy,resampling_strategy_args', + ((HoldoutValTypes.holdout_validation, None), + (CrossValTypes.k_fold_cross_validation, {'num_splits': CV_NUM_SPLITS}) + )) +def test_tabular_regression(openml_id, resampling_strategy, backend, resampling_strategy_args, n_samples): + X, y = _get_dataset(openml_id, n_samples, split=False) - # Check that we can pickle - dump_file = os.path.join(estimator._backend.temporary_directory, 'dump.pkl') + # normalize values + y = (y - y.mean()) / y.std() - with open(dump_file, 'wb') as f: - pickle.dump(estimator, f) + # fill NAs for now since they are not yet properly handled + for column in X.columns: + if X[column].dtype.name == "category": + cats = list(X[column].cat.categories) + ["missing"] + X[column] = pd.Categorical(X[column], categories=cats).fillna("missing") + else: + X[column] = X[column].fillna(0) - with open(dump_file, 'rb') as f: - restored_estimator = pickle.load(f) - restored_estimator.predict(X_test) + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, y, random_state=1) - # Test refit on dummy data - estimator.refit(dataset=backend.load_datamanager()) + estimator = _get_estimator( + backend, TabularRegressionTask, X_train, y_train, X_test, y_test, + resampling_strategy, resampling_strategy_args, metric='r2' + ) - # Make sure that a configuration space is stored in the estimator - assert isinstance(estimator.get_search_space(), CS.ConfigurationSpace) + _check_tabular_task( + estimator, X_test, y_test, + task_type='tabular_regression', + resampling_strategy=resampling_strategy, + n_successful_runs=1 + ) representation = estimator.show_models() assert isinstance(representation, str) - assert 'Weight' in representation - assert 'Preprocessing' in representation - assert 'Estimator' in representation + assert all(word in representation for word in ['Weight', 'Preprocessing', 'Estimator']) @pytest.mark.parametrize('openml_id', ( @@ -472,18 +455,13 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): estimator._do_dummy_prediction() + dir_names = [backend.temporary_directory, '.autoPyTorch', 'runs', '1_1_1.0'] # Ensure that the dummy predictions are not in the current working # directory, but in the temporary directory. assert not os.path.exists(os.path.join(os.getcwd(), '.autoPyTorch')) - assert os.path.exists(os.path.join( - backend.temporary_directory, '.autoPyTorch', 'runs', '1_1_50.0', - 'predictions_ensemble_1_1_50.0.npy') - ) + assert os.path.exists(os.path.join(*dir_names, 'predictions_ensemble_1_1_1.0.npy')) - model_path = os.path.join(backend.temporary_directory, - '.autoPyTorch', - 'runs', '1_1_50.0', - '1.1.50.0.model') + model_path = os.path.join(*dir_names, '1.1.1.0.model') # Make sure the dummy model complies with scikit learn # get/set params @@ -502,39 +480,23 @@ def test_do_dummy_prediction(dask_client, fit_dictionary_tabular): @pytest.mark.parametrize('openml_id', (40981, )) def test_portfolio_selection(openml_id, backend, n_samples): - # Get the data and check that contents of data-manager make sense - X, y = sklearn.datasets.fetch_openml( - data_id=int(openml_id), - return_X_y=True, as_frame=True - ) - X, y = X.iloc[:n_samples], y.iloc[:n_samples] - - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( - X, y, random_state=1) + X_train, X_test, y_train, y_test = _get_dataset(openml_id, n_samples, seed=1) - # Search for a good configuration - estimator = TabularClassificationTask( - backend=backend, + path = os.path.join(os.path.dirname(__file__), "../../autoPyTorch/configs/greedy_portfolio.json") + estimator = _get_estimator( + backend, TabularClassificationTask, X_train, y_train, X_test, y_test, resampling_strategy=HoldoutValTypes.holdout_validation, + resampling_strategy_args={'val_share': 0.33}, + metric='accuracy', + total_walltime_limit=30, + func_eval_time_limit_secs=5, + portfolio_selection=path ) - with unittest.mock.patch.object(estimator, '_do_dummy_prediction', new=dummy_do_dummy_prediction): - estimator.search( - X_train=X_train, y_train=y_train, - X_test=X_test, y_test=y_test, - optimize_metric='accuracy', - total_walltime_limit=30, - func_eval_time_limit_secs=5, - enable_traditional_pipeline=False, - portfolio_selection=os.path.join(os.path.dirname(__file__), - "../../autoPyTorch/configs/greedy_portfolio.json") - ) - - successful_config_ids = [run_key.config_id for run_key, run_value in estimator.run_history.data.items( - ) if 'SUCCESS' in str(run_value.status)] + data = estimator.run_history.data + successful_config_ids = [k.config_id for k, v in data.items() if v.status == StatusType.SUCCESS] successful_configs = [estimator.run_history.ids_config[id].get_dictionary() for id in successful_config_ids] - portfolio_configs = json.load(open(os.path.join(os.path.dirname(__file__), - "../../autoPyTorch/configs/greedy_portfolio.json"))) + portfolio_configs = json.load(open(path)) # check if any configs from greedy portfolio were compatible with australian assert any(successful_config in portfolio_configs for successful_config in successful_configs) diff --git a/test/test_api/utils.py b/test/test_api/utils.py index f8a11db88..b95e7c726 100644 --- a/test/test_api/utils.py +++ b/test/test_api/utils.py @@ -3,13 +3,14 @@ from smac.runhistory.runhistory import DataOrigin, RunHistory, RunKey, RunValue, StatusType from autoPyTorch.constants import REGRESSION_TASKS -from autoPyTorch.evaluation.abstract_evaluator import ( +from autoPyTorch.evaluation.abstract_evaluator import fit_pipeline +from autoPyTorch.evaluation.pipeline_class_collection import ( DummyClassificationPipeline, - DummyRegressionPipeline, - fit_and_suppress_warnings + DummyRegressionPipeline ) from autoPyTorch.evaluation.train_evaluator import TrainEvaluator from autoPyTorch.pipeline.traditional_tabular_classification import TraditionalTabularClassificationPipeline +from autoPyTorch.utils.common import subsampler def dummy_traditional_classification(self, time_left: int, func_eval_time_limit_secs: int) -> None: @@ -28,44 +29,28 @@ def dummy_traditional_classification(self, time_left: int, func_eval_time_limit_ # Fixtures # ======== class DummyTrainEvaluator(TrainEvaluator): - - def _fit_and_predict(self, pipeline, fold: int, train_indices, - test_indices, - add_pipeline_to_self - ): - + def _get_pipeline(self): if self.task_type in REGRESSION_TASKS: pipeline = DummyRegressionPipeline(config=1) else: pipeline = DummyClassificationPipeline(config=1) - self.indices[fold] = ((train_indices, test_indices)) + return pipeline - X = {'train_indices': train_indices, - 'val_indices': test_indices, - 'split_id': fold, - 'num_run': self.num_run, - **self.fit_dictionary} # fit dictionary - y = None - fit_and_suppress_warnings(self.logger, pipeline, X, y) + def _fit_and_evaluate_loss(self, pipeline, split_id, train_indices, opt_indices): + X = dict(train_indices=train_indices, val_indices=opt_indices, split_id=split_id, num_run=self.num_run) + X.update(self.fit_dictionary) + fit_pipeline(self.logger, pipeline, X, y=None) self.logger.info("Model fitted, now predicting") - ( - Y_train_pred, - Y_opt_pred, - Y_valid_pred, - Y_test_pred - ) = self._predict( - pipeline, - train_indices=train_indices, - test_indices=test_indices, - ) - if add_pipeline_to_self: - self.pipeline = pipeline - else: - self.pipelines[fold] = pipeline + kwargs = {'pipeline': pipeline, 'label_examples': self.y_train[train_indices]} + train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) + opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) + valid_pred = self.predict(self.X_valid, **kwargs) + test_pred = self.predict(self.X_test, **kwargs) - return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred + assert train_pred is not None and opt_pred is not None # mypy check + return train_pred, opt_pred, valid_pred, test_pred # create closure for evaluating an algorithm @@ -90,25 +75,11 @@ def dummy_eval_train_function( instance: str = None, ) -> None: evaluator = DummyTrainEvaluator( - backend=backend, queue=queue, - metric=metric, - configuration=config, - seed=seed, - num_run=num_run, - output_y_hat_optimization=output_y_hat_optimization, - include=include, - exclude=exclude, - disable_file_output=disable_file_output, - init_params=init_params, - budget=budget, - budget_type=budget_type, - logger_port=logger_port, - all_supported_metrics=all_supported_metrics, - pipeline_config=pipeline_config, - search_space_updates=search_space_updates + fixed_pipeline_params=fixed_pipeline_params, + evaluator_params=evaluator_params ) - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() def dummy_do_dummy_prediction(): diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py index a0be2c3f3..4e7565677 100644 --- a/test/test_evaluation/test_abstract_evaluator.py +++ b/test/test_evaluation/test_abstract_evaluator.py @@ -12,8 +12,12 @@ from smac.tae import StatusType from autoPyTorch.automl_common.common.utils.backend import Backend, BackendContext -from autoPyTorch.evaluation.abstract_evaluator import AbstractEvaluator -from autoPyTorch.evaluation.utils import DisableFileOutputParameters +from autoPyTorch.evaluation.abstract_evaluator import ( + AbstractEvaluator, + EvaluationResults, + EvaluatorParams, + FixedPipelineParams +) from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy this_directory = os.path.dirname(__file__) @@ -43,6 +47,13 @@ def setUp(self): D = get_multiclass_classification_datamanager() backend_mock.load_datamanager.return_value = D self.backend_mock = backend_mock + self.eval_params = EvaluatorParams.with_default_budget(budget=0, configuration=1) + self.fixed_params = FixedPipelineParams.with_default_pipeline_config( + backend=self.backend_mock, + save_y_opt=False, + metric=accuracy, + seed=1 + ) self.working_directory = os.path.join(this_directory, '.tmp_%s' % self.id()) @@ -53,72 +64,33 @@ def tearDown(self): except: # noqa E722 pass - def test_finish_up_model_predicts_NaN(self): + def test_record_evaluation_model_predicts_NaN(self): '''Tests by handing in predictions which contain NaNs''' rs = np.random.RandomState(1) - queue_mock = unittest.mock.Mock() - ae = AbstractEvaluator(backend=self.backend_mock, - output_y_hat_optimization=False, - queue=queue_mock, metric=accuracy, budget=0, - configuration=1) - ae.Y_optimization = rs.rand(33, 3) - predictions_ensemble = rs.rand(33, 3) - predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) - - # NaNs in prediction ensemble - predictions_ensemble[5, 2] = np.NaN - _, loss, _, additional_run_info = ae.finish_up( - loss={'accuracy': 0.1}, - train_loss={'accuracy': 0.1}, - opt_pred=predictions_ensemble, - valid_pred=predictions_valid, - test_pred=predictions_test, - additional_run_info=None, - file_output=True, - status=StatusType.SUCCESS, - ) - self.assertEqual(loss, 1.0) - self.assertEqual(additional_run_info, - {'error': 'Model predictions for optimization set ' - 'contains NaNs.'}) - - # NaNs in prediction validation - predictions_ensemble[5, 2] = 0.5 - predictions_valid[5, 2] = np.NaN - _, loss, _, additional_run_info = ae.finish_up( - loss={'accuracy': 0.1}, - train_loss={'accuracy': 0.1}, - opt_pred=predictions_ensemble, - valid_pred=predictions_valid, - test_pred=predictions_test, - additional_run_info=None, - file_output=True, - status=StatusType.SUCCESS, - ) - self.assertEqual(loss, 1.0) - self.assertEqual(additional_run_info, - {'error': 'Model predictions for validation set ' - 'contains NaNs.'}) - - # NaNs in prediction test - predictions_valid[5, 2] = 0.5 - predictions_test[5, 2] = np.NaN - _, loss, _, additional_run_info = ae.finish_up( - loss={'accuracy': 0.1}, - train_loss={'accuracy': 0.1}, - opt_pred=predictions_ensemble, - valid_pred=predictions_valid, - test_pred=predictions_test, - additional_run_info=None, - file_output=True, - status=StatusType.SUCCESS, + opt_pred, test_pred, valid_pred = rs.rand(33, 3), rs.rand(25, 3), rs.rand(25, 3) + ae = AbstractEvaluator( + queue=queue_mock, + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params ) - self.assertEqual(loss, 1.0) - self.assertEqual(additional_run_info, - {'error': 'Model predictions for test set contains ' - 'NaNs.'}) + ae.y_opt = rs.rand(33, 3) + + for inference_name, pred in [('optimization', opt_pred), ('validation', valid_pred), ('test', test_pred)]: + pred[5, 2] = np.nan + results = EvaluationResults( + opt_loss={'accuracy': 0.1}, + train_loss={'accuracy': 0.1}, + opt_pred=opt_pred, + valid_pred=valid_pred, + test_pred=test_pred, + additional_run_info=None, + status=StatusType.SUCCESS, + ) + ae.fixed_pipeline_params.backend.save_numrun_to_dir = unittest.mock.Mock() + ae.record_evaluation(results=results) + self.assertEqual(ae.fixed_pipeline_params.backend.save_numrun_to_dir.call_count, 0) + pred[5, 2] = 0.5 self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count, 0) @@ -126,124 +98,50 @@ def test_disable_file_output(self): queue_mock = unittest.mock.Mock() rs = np.random.RandomState(1) + opt_pred, test_pred, valid_pred = rs.rand(33, 3), rs.rand(25, 3), rs.rand(25, 3) - ae = AbstractEvaluator( - backend=self.backend_mock, - queue=queue_mock, - disable_file_output=[DisableFileOutputParameters.all], - metric=accuracy, - logger_port=unittest.mock.Mock(), - budget=0, - configuration=1 - ) - ae.pipeline = unittest.mock.Mock() - predictions_ensemble = rs.rand(33, 3) - predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) - - loss_, additional_run_info_ = ( - ae.file_output( - predictions_ensemble, - predictions_valid, - predictions_test, - ) - ) - - self.assertIsNone(loss_) - self.assertEqual(additional_run_info_, {}) - # This function is never called as there is a return before - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 0) + fixed_params_dict = self.fixed_params._asdict() - for call_count, disable in enumerate(['pipeline', 'pipelines'], start=1): + for call_count, disable in enumerate(['all', 'pipeline', 'pipelines', 'y_optimization']): + fixed_params_dict.update(disable_file_output=[disable]) ae = AbstractEvaluator( - backend=self.backend_mock, - output_y_hat_optimization=False, queue=queue_mock, - disable_file_output=[disable], - metric=accuracy, - budget=0, - configuration=1 + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params ) - ae.Y_optimization = predictions_ensemble - ae.pipeline = unittest.mock.Mock() + ae.y_opt = opt_pred ae.pipelines = [unittest.mock.Mock()] - loss_, additional_run_info_ = ( - ae.file_output( - predictions_ensemble, - predictions_valid, - predictions_test, - ) - ) + if ae._is_output_possible(opt_pred, valid_pred, test_pred): + ae._save_to_backend(opt_pred, valid_pred, test_pred) - self.assertIsNone(loss_) - self.assertEqual(additional_run_info_, {}) self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, call_count) + if disable == 'all': + continue + + call_list = self.backend_mock.save_numrun_to_dir.call_args_list[-1][1] if disable == 'pipeline': - self.assertIsNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) + self.assertIsNone(call_list['model']) + self.assertIsNotNone(call_list['cv_model']) + elif disable == 'pipelines': + self.assertIsNotNone(call_list['model']) + self.assertIsNone(call_list['cv_model']) + + if disable in ('y_optimization', 'all'): + self.assertIsNone(call_list['ensemble_predictions']) else: - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) - self.assertIsNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'ensemble_predictions'] - ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'valid_predictions'] - ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'test_predictions'] - ) + self.assertIsNotNone(call_list['ensemble_predictions']) - ae = AbstractEvaluator( - backend=self.backend_mock, - output_y_hat_optimization=False, - queue=queue_mock, - metric=accuracy, - disable_file_output=['y_optimization'], - budget=0, - configuration=1 - ) - ae.Y_optimization = predictions_ensemble - ae.pipeline = 'pipeline' - ae.pipelines = [unittest.mock.Mock()] - - loss_, additional_run_info_ = ( - ae.file_output( - predictions_ensemble, - predictions_valid, - predictions_test, - ) - ) - - self.assertIsNone(loss_) - self.assertEqual(additional_run_info_, {}) - - self.assertIsNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'ensemble_predictions'] - ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'valid_predictions'] - ) - self.assertIsNotNone( - self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][ - 'test_predictions'] - ) + self.assertIsNotNone(call_list['valid_predictions']) + self.assertIsNotNone(call_list['test_predictions']) - def test_file_output(self): + def test_save_to_backend(self): shutil.rmtree(self.working_directory, ignore_errors=True) os.mkdir(self.working_directory) queue_mock = unittest.mock.Mock() + rs = np.random.RandomState(1) + opt_pred, test_pred, valid_pred = rs.rand(33, 3), rs.rand(25, 3), rs.rand(25, 3) context = BackendContext( prefix='autoPyTorch', @@ -255,29 +153,17 @@ def test_file_output(self): with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager() - backend = Backend(context, prefix='autoPyTorch') + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=Backend(context, prefix='autoPyTorch')) ae = AbstractEvaluator( - backend=backend, - output_y_hat_optimization=False, queue=queue_mock, - metric=accuracy, - budget=0, - configuration=1 + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=EvaluatorParams.with_default_budget(choice='dummy', configuration=1) ) ae.model = sklearn.dummy.DummyClassifier() - - rs = np.random.RandomState() - ae.Y_optimization = rs.rand(33, 3) - predictions_ensemble = rs.rand(33, 3) - predictions_test = rs.rand(25, 3) - predictions_valid = rs.rand(25, 3) - - ae.file_output( - Y_optimization_pred=predictions_ensemble, - Y_valid_pred=predictions_valid, - Y_test_pred=predictions_test, - ) + ae.y_opt = rs.rand(33, 3) + ae._save_to_backend(opt_pred=opt_pred, valid_pred=valid_pred, test_pred=test_pred) self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'tmp', '.autoPyTorch', 'runs', '1_0_1.0'))) @@ -300,17 +186,17 @@ def test_error_unsupported_budget_type(self): with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager() - backend = Backend(context, prefix='autoPyTorch') - try: + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update( + backend=Backend(context, prefix='autoPyTorch'), + pipeline_config={'budget_type': "error", 'error': 0} + ) AbstractEvaluator( - backend=backend, - output_y_hat_optimization=False, queue=queue_mock, - pipeline_config={'budget_type': "error", 'error': 0}, - metric=accuracy, - budget=0, - configuration=1) + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) except Exception as e: self.assertIsInstance(e, ValueError) @@ -332,17 +218,18 @@ def test_error_unsupported_disable_file_output_parameters(self): with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock: load_datamanager_mock.return_value = get_multiclass_classification_datamanager() - backend = Backend(context, prefix='autoPyTorch') + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update( + backend=Backend(context, prefix='autoPyTorch'), + disable_file_output=['model'] + ) try: AbstractEvaluator( - backend=backend, - output_y_hat_optimization=False, queue=queue_mock, - metric=accuracy, - budget=0, - configuration=1, - disable_file_output=['model']) + evaluator_params=self.eval_params, + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict) + ) except Exception as e: self.assertIsInstance(e, ValueError) diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py index 051a1c174..be0d731ad 100644 --- a/test/test_evaluation/test_evaluation.py +++ b/test/test_evaluation/test_evaluation.py @@ -17,7 +17,7 @@ from smac.tae import StatusType from smac.utils.constants import MAXINT -from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash +from autoPyTorch.evaluation.tae import TargetAlgorithmQuery from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy, log_loss this_directory = os.path.dirname(__file__) @@ -58,6 +58,26 @@ def setUp(self): stats = Stats(scenario_mock) stats.start_timing() self.stats = stats + self.taq_kwargs = dict( + backend=BackendMock(), + seed=1, + stats=self.stats, + memory_limit=3072, + metric=accuracy, + cost_for_crash=accuracy._cost_of_crash, + abort_on_first_run_crash=False, + logger_port=self.logger_port, + pynisher_context='fork' + ) + config = unittest.mock.Mock(spec=int) + config.config_id, config.origin = 198, 'MOCK' + self.runinfo_kwargs = dict( + config=config, + instance=None, + instance_specific=None, + seed=1, + capped=False + ) try: shutil.rmtree(self.tmp) @@ -115,45 +135,22 @@ def test_eval_with_limits_holdout(self, pynisher_mock): @unittest.mock.patch('pynisher.enforce_limits') def test_cutoff_lower_than_remaining_time(self, pynisher_mock): - config = unittest.mock.Mock() - config.config_id = 198 - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + ta = TargetAlgorithmQuery(**self.taq_kwargs) self.stats.ta_runs = 1 - ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, instance_specific=None, - seed=1, capped=False)) + ta.run_wrapper(RunInfo(cutoff=30, **self.runinfo_kwargs)) self.assertEqual(pynisher_mock.call_args[1]['wall_time_in_s'], 4) self.assertIsInstance(pynisher_mock.call_args[1]['wall_time_in_s'], int) @unittest.mock.patch('pynisher.enforce_limits') def test_eval_with_limits_holdout_fail_timeout(self, pynisher_mock): - config = unittest.mock.Mock() - config.config_id = 198 - m1 = unittest.mock.Mock() m2 = unittest.mock.Mock() m1.return_value = m2 pynisher_mock.return_value = m1 m2.exit_status = pynisher.TimeoutException m2.wall_clock_time = 30 - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) + ta = TargetAlgorithmQuery(**self.taq_kwargs) + info = ta.run_wrapper(RunInfo(cutoff=30, **self.runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.TIMEOUT) self.assertEqual(info[1].cost, 1.0) self.assertIsInstance(info[1].time, float) @@ -161,55 +158,38 @@ def test_eval_with_limits_holdout_fail_timeout(self, pynisher_mock): @unittest.mock.patch('pynisher.enforce_limits') def test_zero_or_negative_cutoff(self, pynisher_mock): - config = unittest.mock.Mock() - config.config_id = 198 - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + ta = TargetAlgorithmQuery(**self.taq_kwargs) self.scenario.wallclock_limit = 5 self.stats.submitted_ta_runs += 1 - run_info, run_value = ta.run_wrapper(RunInfo(config=config, cutoff=9, instance=None, - instance_specific=None, seed=1, capped=False)) + run_info, run_value = ta.run_wrapper(RunInfo(cutoff=9, **self.runinfo_kwargs)) self.assertEqual(run_value.status, StatusType.STOP) @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock): - pynisher_mock.return_value = None config = unittest.mock.Mock() - config.origin = 'MOCK' - config.config_id = 198 - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + config.config_id, config.origin = 198, 'MOCK' + runinfo_kwargs = self.runinfo_kwargs.copy() + runinfo_kwargs['config'] = config + pynisher_mock.return_value = None + ta = TargetAlgorithmQuery(**self.taq_kwargs) # The following should not fail because abort on first config crashed is false - info = ta.run_wrapper(RunInfo(config=config, cutoff=60, instance=None, - instance_specific=None, seed=1, capped=False)) + info = ta.run_wrapper(RunInfo(cutoff=60, **runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.CRASHED) self.assertEqual(info[1].cost, 1.0) self.assertIsInstance(info[1].time, float) - self.assertEqual(info[1].additional_info, {'configuration_origin': 'MOCK', - 'error': "Result queue is empty", - 'exit_status': '0', - 'exitcode': 0, - 'subprocess_stdout': '', - 'subprocess_stderr': ''}) + ans = { + 'configuration_origin': 'MOCK', + 'error': "Result queue is empty", + 'exit_status': '0', + 'exitcode': 0, + 'subprocess_stdout': '', + 'subprocess_stderr': '' + } + self.assertTrue(all(ans[key] == info[1].additional_info[key] for key in ans.keys())) self.stats.submitted_ta_runs += 1 - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) + info = ta.run_wrapper(RunInfo(cutoff=30, **runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.CRASHED) self.assertEqual(info[1].cost, 1.0) self.assertIsInstance(info[1].time, float) @@ -223,19 +203,8 @@ def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock): @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock): pynisher_mock.side_effect = MemoryError - config = unittest.mock.Mock() - config.config_id = 198 - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) + ta = TargetAlgorithmQuery(**self.taq_kwargs) + info = ta.run_wrapper(RunInfo(cutoff=30, **self.runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.MEMOUT) # For accuracy, worst possible result is MAXINT @@ -246,86 +215,44 @@ def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock): @unittest.mock.patch('pynisher.enforce_limits') def test_eval_with_limits_holdout_timeout_with_results_in_queue(self, pynisher_mock): - config = unittest.mock.Mock() - config.config_id = 198 - - def side_effect(**kwargs): - queue = kwargs['queue'] - queue.put({'status': StatusType.SUCCESS, - 'loss': 0.5, - 'additional_run_info': {}}) + result_vals = [ + # Test for a succesful run + {'status': StatusType.SUCCESS, 'loss': 0.5, 'additional_run_info': {}}, + # And a crashed run which is in the queue + {'status': StatusType.CRASHED, 'loss': 2.0, 'additional_run_info': {}} + ] m1 = unittest.mock.Mock() m2 = unittest.mock.Mock() m1.return_value = m2 pynisher_mock.return_value = m1 - m2.side_effect = side_effect m2.exit_status = pynisher.TimeoutException m2.wall_clock_time = 30 + ans_loss = [0.5, 1.0] - # Test for a succesful run - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) - self.assertEqual(info[1].status, StatusType.SUCCESS) - self.assertEqual(info[1].cost, 0.5) - self.assertIsInstance(info[1].time, float) - self.assertNotIn('exitcode', info[1].additional_info) + for results, ans in zip(result_vals, ans_loss): + def side_effect(queue, evaluator_params, fixed_pipeline_params): + queue.put(results) - # And a crashed run which is in the queue - def side_effect(**kwargs): - queue = kwargs['queue'] - queue.put({'status': StatusType.CRASHED, - 'loss': 2.0, - 'additional_run_info': {}}) - m2.side_effect = side_effect - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) - self.assertEqual(info[1].status, StatusType.CRASHED) - self.assertEqual(info[1].cost, 1.0) - self.assertIsInstance(info[1].time, float) - self.assertNotIn('exitcode', info[1].additional_info) + m2.side_effect = side_effect @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_eval_with_limits_holdout_2(self, eval_houldout_mock): config = unittest.mock.Mock() config.config_id = 198 - def side_effect(*args, **kwargs): - queue = kwargs['queue'] + @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_fn') + def test_eval_with_limits_holdout_2(self, eval_houldout_mock): + def side_effect(queue, evaluator_params, fixed_pipeline_params): queue.put({'status': StatusType.SUCCESS, 'loss': 0.5, - 'additional_run_info': kwargs['instance']}) + 'additional_run_info': evaluator_params.init_params['instance']}) + eval_houldout_mock.side_effect = side_effect - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + ta = TargetAlgorithmQuery(**self.taq_kwargs) self.scenario.wallclock_limit = 180 - instance = "{'subsample': 30}" - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=instance, - instance_specific=None, seed=1, capped=False)) + runinfo_kwargs = self.runinfo_kwargs.copy() + runinfo_kwargs.update(instance="{'subsample': 30}") + info = ta.run_wrapper(RunInfo(cutoff=30, **runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.SUCCESS, info) self.assertEqual(len(info[1].additional_info), 2) self.assertIn('configuration_origin', info[1].additional_info) @@ -333,22 +260,10 @@ def side_effect(*args, **kwargs): @unittest.mock.patch('autoPyTorch.evaluation.tae.eval_train_function') def test_exception_in_target_function(self, eval_holdout_mock): - config = unittest.mock.Mock() - config.config_id = 198 - eval_holdout_mock.side_effect = ValueError - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + ta = TargetAlgorithmQuery(**self.taq_kwargs) self.stats.submitted_ta_runs += 1 - info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, - instance_specific=None, seed=1, capped=False)) + info = ta.run_wrapper(RunInfo(cutoff=30, **self.runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.CRASHED) self.assertEqual(info[1].cost, 1.0) self.assertIsInstance(info[1].time, float) @@ -357,22 +272,10 @@ def test_exception_in_target_function(self, eval_holdout_mock): self.assertNotIn('exitcode', info[1].additional_info) def test_silent_exception_in_target_function(self): - config = unittest.mock.Mock(spec=int) - config.config_id = 198 - - ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1, - stats=self.stats, - memory_limit=3072, - metric=accuracy, - cost_for_crash=get_cost_of_crash(accuracy), - abort_on_first_run_crash=False, - logger_port=self.logger_port, - pynisher_context='fork', - ) + ta = TargetAlgorithmQuery(**self.taq_kwargs) ta.pynisher_logger = unittest.mock.Mock() self.stats.submitted_ta_runs += 1 - info = ta.run_wrapper(RunInfo(config=config, cutoff=3000, instance=None, - instance_specific=None, seed=1, capped=False)) + info = ta.run_wrapper(RunInfo(cutoff=3000, **self.runinfo_kwargs)) self.assertEqual(info[1].status, StatusType.CRASHED, msg=str(info[1].additional_info)) self.assertEqual(info[1].cost, 1.0) self.assertIsInstance(info[1].time, float) @@ -422,5 +325,5 @@ def test_eval_with_simple_intensification(self): @pytest.mark.parametrize("metric,expected", [(accuracy, 1.0), (log_loss, MAXINT)]) -def test_get_cost_of_crash(metric, expected): - assert get_cost_of_crash(metric) == expected +def test_cost_of_crash(metric, expected): + assert metric._cost_of_crash == expected diff --git a/test/test_evaluation/test_evaluators.py b/test/test_evaluation/test_evaluators.py index 2ca32af10..b7598ab1d 100644 --- a/test/test_evaluation/test_evaluators.py +++ b/test/test_evaluation/test_evaluators.py @@ -79,6 +79,17 @@ def setUp(self): backend_mock.temporary_directory = self.ev_path self.backend_mock = backend_mock + self.fixed_params = FixedPipelineParams.with_default_pipeline_config( + backend=self.backend_mock, + metric=accuracy, + seed=0, + pipeline_config={'budget_type': 'epochs', 'epochs': 50}, + all_supported_metrics=True + ) + self.eval_params = EvaluatorParams( + budget=0, configuration=unittest.mock.Mock(spec=Configuration) + ) + self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir') self.output_dir = os.path.join(self.ev_path, 'out_dir') @@ -96,17 +107,21 @@ def test_holdout(self, pipeline_mock): pipeline_mock.side_effect = lambda **kwargs: pipeline_mock pipeline_mock.get_additional_run_info.return_value = None - configuration = unittest.mock.Mock(spec=Configuration) + _queue = multiprocessing.Queue() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') backend_api.load_datamanager = lambda: D - queue_ = multiprocessing.Queue() - evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0, - pipeline_config={'budget_type': 'epochs', 'epochs': 50}) - evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) - evaluator.file_output.return_value = (None, {}) + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api) + evaluator = TrainEvaluator( + queue=_queue, + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) + evaluator._save_to_backend.return_value = True - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() rval = read_queue(evaluator.queue) self.assertEqual(len(rval), 1) @@ -114,17 +129,16 @@ def test_holdout(self, pipeline_mock): self.assertEqual(len(rval[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - self.assertEqual(evaluator.file_output.call_count, 1) + self.assertEqual(evaluator._save_to_backend.call_count, 1) self.assertEqual(result, 0.5652173913043479) self.assertEqual(pipeline_mock.fit.call_count, 1) # 3 calls because of train, holdout and test set self.assertEqual(pipeline_mock.predict_proba.call_count, 3) - self.assertEqual(evaluator.file_output.call_count, 1) - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], len(D.splits[0][1])) - self.assertIsNone(evaluator.file_output.call_args[0][1]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], - D.test_tensors[1].shape[0]) - self.assertEqual(evaluator.pipeline.fit.call_count, 1) + call_args = evaluator._save_to_backend.call_args + self.assertEqual(call_args[0][0].shape[0], len(D.splits[0][1])) + self.assertIsNone(call_args[0][1]) + self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) + self.assertEqual(evaluator.pipelines[0].fit.call_count, 1) @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_cv(self, pipeline_mock): @@ -135,17 +149,21 @@ def test_cv(self, pipeline_mock): pipeline_mock.side_effect = lambda **kwargs: pipeline_mock pipeline_mock.get_additional_run_info.return_value = None - configuration = unittest.mock.Mock(spec=Configuration) + _queue = multiprocessing.Queue() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') backend_api.load_datamanager = lambda: D - queue_ = multiprocessing.Queue() - evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0, - pipeline_config={'budget_type': 'epochs', 'epochs': 50}) - evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) - evaluator.file_output.return_value = (None, {}) + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api) + evaluator = TrainEvaluator( + queue=_queue, + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) + evaluator._save_to_backend.return_value = True - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() rval = read_queue(evaluator.queue) self.assertEqual(len(rval), 1) @@ -153,85 +171,59 @@ def test_cv(self, pipeline_mock): self.assertEqual(len(rval[0]), 3) self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1) - self.assertEqual(evaluator.file_output.call_count, 1) - self.assertEqual(result, 0.46235467431119603) + self.assertEqual(evaluator._save_to_backend.call_count, 1) + self.assertEqual(result, 0.463768115942029) self.assertEqual(pipeline_mock.fit.call_count, 5) # 9 calls because of the training, holdout and # test set (3 sets x 5 folds = 15) self.assertEqual(pipeline_mock.predict_proba.call_count, 15) + call_args = evaluator._save_to_backend.call_args # as the optimisation preds in cv is concatenation of the 5 folds, # so it is 5*splits - self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], + self.assertEqual(call_args[0][0].shape[0], # Notice this - 1: It is because the dataset D # has shape ((69, )) which is not divisible by 5 - 5 * len(D.splits[0][1]) - 1, evaluator.file_output.call_args) - self.assertIsNone(evaluator.file_output.call_args[0][1]) - self.assertEqual(evaluator.file_output.call_args[0][2].shape[0], + 5 * len(D.splits[0][1]) - 1, call_args) + self.assertIsNone(call_args[0][1]) + self.assertEqual(call_args[0][2].shape[0], D.test_tensors[1].shape[0]) @unittest.mock.patch.object(TrainEvaluator, '_loss') - def test_file_output(self, loss_mock): - + def test_save_to_backend(self, loss_mock): D = get_regression_datamanager() D.name = 'test' self.backend_mock.load_datamanager.return_value = D - configuration = unittest.mock.Mock(spec=Configuration) - queue_ = multiprocessing.Queue() + _queue = multiprocessing.Queue() loss_mock.return_value = None - evaluator = TrainEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0) - - self.backend_mock.get_model_dir.return_value = True - evaluator.pipeline = 'model' - evaluator.Y_optimization = D.train_tensors[1] - rval = evaluator.file_output( - D.train_tensors[1], - None, - D.test_tensors[1], + evaluator = TrainEvaluator( + queue=_queue, + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params ) - - self.assertEqual(rval, (None, {})) - self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 1) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(), - {'seed', 'idx', 'budget', 'model', 'cv_model', - 'ensemble_predictions', 'valid_predictions', 'test_predictions'}) - self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) - self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) - - evaluator.pipelines = ['model2', 'model2'] - rval = evaluator.file_output( - D.train_tensors[1], - None, - D.test_tensors[1], - ) - self.assertEqual(rval, (None, {})) - self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 2) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2) - self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(), - {'seed', 'idx', 'budget', 'model', 'cv_model', - 'ensemble_predictions', 'valid_predictions', 'test_predictions'}) - self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model']) - self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model']) + evaluator.y_opt = D.train_tensors[1] + key_ans = {'seed', 'idx', 'budget', 'model', 'cv_model', + 'ensemble_predictions', 'valid_predictions', 'test_predictions'} + + for cnt, pl in enumerate([['model'], ['model2', 'model2']], start=1): + self.backend_mock.get_model_dir.return_value = True + evaluator.pipelines = pl + self.assertTrue(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) + call_list = self.backend_mock.save_numrun_to_dir.call_args_list[-1][1] + + self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, cnt) + self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, cnt) + self.assertEqual(call_list.keys(), key_ans) + self.assertIsNotNone(call_list['model']) + if isinstance(pl, list): # pipeline is list ==> cross validation + self.assertIsNotNone(call_list['cv_model']) + else: # holdout ==> single model and thus no cv_model + self.assertIsNone(call_list['cv_model']) # Check for not containing NaNs - that the models don't predict nonsense # for unseen data D.train_tensors[1][0] = np.NaN - rval = evaluator.file_output( - D.train_tensors[1], - None, - D.test_tensors[1], - ) - self.assertEqual( - rval, - ( - 1.0, - { - 'error': - 'Model predictions for optimization set contains NaNs.' - }, - ) - ) + self.assertFalse(evaluator._save_to_backend(D.train_tensors[1], None, D.test_tensors[1])) @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_predict_proba_binary_classification(self, mock): @@ -242,13 +234,15 @@ def test_predict_proba_binary_classification(self, mock): ) mock.side_effect = lambda **kwargs: mock - configuration = unittest.mock.Mock(spec=Configuration) - queue_ = multiprocessing.Queue() + _queue = multiprocessing.Queue() - evaluator = TrainEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0, - pipeline_config={'budget_type': 'epochs', 'epochs': 50}) + evaluator = TrainEvaluator( + queue=_queue, + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() Y_optimization_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][ 'ensemble_predictions'] @@ -256,17 +250,17 @@ def test_predict_proba_binary_classification(self, mock): self.assertEqual(0.9, Y_optimization_pred[i][1]) def test_get_results(self): - queue_ = multiprocessing.Queue() + _queue = multiprocessing.Queue() for i in range(5): - queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS)) - result = read_queue(queue_) + _queue.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS)) + result = read_queue(_queue) self.assertEqual(len(result), 5) self.assertEqual(result[0][0], 0) self.assertAlmostEqual(result[0][1], 1.0) @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_additional_metrics_during_training(self, pipeline_mock): - pipeline_mock.fit_dictionary = {'budget_type': 'epochs', 'epochs': 50} + pipeline_mock.fit_dictionary = self.fixed_params.pipeline_config # Binary iris, contains 69 train samples, 31 test samples D = get_binary_classification_datamanager() pipeline_mock.predict_proba.side_effect = \ @@ -274,20 +268,21 @@ def test_additional_metrics_during_training(self, pipeline_mock): pipeline_mock.side_effect = lambda **kwargs: pipeline_mock pipeline_mock.get_additional_run_info.return_value = None - # Binary iris, contains 69 train samples, 31 test samples - D = get_binary_classification_datamanager() - - configuration = unittest.mock.Mock(spec=Configuration) + _queue = multiprocessing.Queue() backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') backend_api.load_datamanager = lambda: D - queue_ = multiprocessing.Queue() - evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0, - pipeline_config={'budget_type': 'epochs', 'epochs': 50}, all_supported_metrics=True) - evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output) - evaluator.file_output.return_value = (None, {}) + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api) + evaluator = TrainEvaluator( + queue=_queue, + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + evaluator._save_to_backend = unittest.mock.Mock(spec=evaluator._save_to_backend) + evaluator._save_to_backend.return_value = True - evaluator.fit_predict_and_loss() + evaluator.evaluate_loss() rval = read_queue(evaluator.queue) self.assertEqual(len(rval), 1)