From 5379632a916c6bd92409282cbf3b06d8526c3b7a Mon Sep 17 00:00:00 2001 From: nabenabe0928 Date: Tue, 28 Dec 2021 16:03:29 +0900 Subject: [PATCH] [temporal] [cont] Fix errors [test] Add the tests for the instantiation of abstract evaluator 1 -- 3 [test] Add the tests for util 1 -- 2 [test] Add the tests for train_evaluator 1 -- 2 [refactor] [test] Clean up the pipeline classes and add tests for it 1 -- 2 [test] Add the tests for tae 1 -- 4 [fix] Fix an error due to the change in extract learning curve [experimental] Increase the coverage [test] Add tests for pipeline repr Since the modifications in tests removed the coverage on pipeline repr, I added tests to increase those parts. Basically, the decrease in the coverage happened due to the usage of dummy pipelines. --- autoPyTorch/api/base_task.py | 2 +- autoPyTorch/evaluation/abstract_evaluator.py | 23 +- .../evaluation/pipeline_class_collection.py | 351 +++++++----------- autoPyTorch/evaluation/tae.py | 3 +- autoPyTorch/evaluation/train_evaluator.py | 2 +- autoPyTorch/evaluation/utils.py | 48 ++- autoPyTorch/pipeline/base_pipeline.py | 14 - test/test_api/test_api.py | 16 +- test/test_api/utils.py | 2 +- .../test_abstract_evaluator.py | 154 +++++++- test/test_evaluation/test_evaluators.py | 58 +++ .../test_pipeline_class_collection.py | 145 ++++++++ test/test_evaluation/test_tae.py | 162 ++++++++ test/test_evaluation/test_utils.py | 52 ++- test/test_pipeline/test_pipeline.py | 9 - test/test_pipeline/test_tabular_regression.py | 13 + 16 files changed, 775 insertions(+), 279 deletions(-) create mode 100644 test/test_evaluation/test_pipeline_class_collection.py create mode 100644 test/test_evaluation/test_tae.py diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 19e187428..ec792c1e3 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1044,7 +1044,7 @@ def _search( DisableFileOutputParameters.y_opt in self._disable_file_output and self.ensemble_size > 1 ): - self._logger.warning(f"No ensemble will be created when {DisableFileOutputParameters.y_optimization}" + self._logger.warning(f"No ensemble will be created when {DisableFileOutputParameters.y_opt}" f" is in disable_file_output") self._memory_limit = memory_limit diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py index f914eb39b..4ad9adc21 100644 --- a/autoPyTorch/evaluation/abstract_evaluator.py +++ b/autoPyTorch/evaluation/abstract_evaluator.py @@ -193,10 +193,16 @@ def with_default_pipeline_config( f'{cls.__name__}.with_default_pipeline_config() got multiple values for argument `budget_type`' ) + budget_type_choices = ('epochs', 'runtime') if pipeline_config is None: pipeline_config = get_default_pipeline_config(choice=choice) + if 'budget_type' not in pipeline_config: + raise ValueError('pipeline_config must have `budget_type`') budget_type = pipeline_config['budget_type'] + if pipeline_config['budget_type'] not in budget_type_choices: + raise ValueError(f"budget_type must be in {budget_type_choices}, but got {budget_type}") + kwargs.update(pipeline_config=pipeline_config, budget_type=budget_type) return cls(**kwargs) @@ -307,6 +313,9 @@ def _init_dataset_properties(self) -> None: )) self.X_train, self.y_train = datamanager.train_tensors + self.unique_train_labels = [ + list(np.unique(self.y_train[train_indices])) for train_indices, _ in self.splits + ] self.X_valid, self.y_valid, self.X_test, self.y_test = None, None, None, None if datamanager.val_tensors is not None: self.X_valid, self.y_valid = datamanager.val_tensors @@ -377,7 +386,7 @@ def predict( self, X: Optional[np.ndarray], pipeline: BaseEstimator, - label_examples: Optional[np.ndarray] = None + unique_train_labels: Optional[List[int]] = None ) -> Optional[np.ndarray]: """ A wrapper function to handle the prediction of regression or classification tasks. @@ -387,7 +396,8 @@ def predict( A set of features to feed to the pipeline pipeline (BaseEstimator): A model that will take the features X return a prediction y - label_examples (Optional[np.ndarray]): + unique_train_labels (Optional[List[int]]): + The unique labels included in the train split. Returns: (np.ndarray): @@ -411,7 +421,7 @@ def predict( prediction=pred, num_classes=self.num_classes, output_type=self.output_type, - label_examples=label_examples + unique_train_labels=unique_train_labels ) return pred @@ -435,6 +445,10 @@ def _get_pipeline(self) -> BaseEstimator: A scikit-learn compliant pipeline which is not yet fit to the data. """ config = self.evaluator_params.configuration + if not isinstance(config, (int, str, Configuration)): + raise TypeError("The type of configuration must be either (int, str, Configuration), " + f"but got type {type(config)}") + kwargs = dict( config=config, random_state=np.random.RandomState(self.fixed_pipeline_params.seed), @@ -452,9 +466,6 @@ def _get_pipeline(self) -> BaseEstimator: exclude=self.fixed_pipeline_params.exclude, search_space_updates=self.fixed_pipeline_params.search_space_updates, **kwargs) - else: - raise ValueError("The type of configuration must be either (int, str, Configuration), " - f"but got type {type(config)}") def _loss(self, labels: np.ndarray, preds: np.ndarray) -> Dict[str, float]: """SMAC follows a minimization goal, so the make_scorer diff --git a/autoPyTorch/evaluation/pipeline_class_collection.py b/autoPyTorch/evaluation/pipeline_class_collection.py index bd4c1be6f..a84acfe6b 100644 --- a/autoPyTorch/evaluation/pipeline_class_collection.py +++ b/autoPyTorch/evaluation/pipeline_class_collection.py @@ -1,6 +1,6 @@ import json import os -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Type, Union from ConfigSpace import Configuration @@ -27,7 +27,7 @@ from autoPyTorch.utils.common import replace_string_bool_to_bool, subsampler -def get_default_pipeline_config(choice: str) -> Dict[str, Any]: +def get_default_pipeline_config(choice: str = 'default') -> Dict[str, Any]: choices = ('default', 'dummy') if choice not in choices: raise ValueError(f'choice must be in {choices}, but got {choice}') @@ -50,112 +50,36 @@ def get_pipeline_class( task_type: int ) -> Union[BaseEstimator, BasePipeline]: - pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None - if task_type in REGRESSION_TASKS: - if isinstance(config, int): - pipeline_class = DummyRegressionPipeline - elif isinstance(config, str): - pipeline_class = MyTraditionalTabularRegressionPipeline - elif isinstance(config, Configuration): - pipeline_class = autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline - else: - raise ValueError('task {} not available'.format(task_type)) - else: - if isinstance(config, int): - pipeline_class = DummyClassificationPipeline - elif isinstance(config, str): - if task_type in TABULAR_TASKS: - pipeline_class = MyTraditionalTabularClassificationPipeline - else: - raise ValueError("Only tabular tasks are currently supported with traditional methods") - elif isinstance(config, Configuration): - if task_type in TABULAR_TASKS: - pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline - elif task_type in IMAGE_TASKS: - pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline - else: - raise ValueError('task {} not available'.format(task_type)) - - if pipeline_class is None: - raise RuntimeError("could not infer pipeline class") - - return pipeline_class - - -class MyTraditionalTabularClassificationPipeline(BaseEstimator): - """ - A wrapper class that holds a pipeline for traditional classification. - Estimators like CatBoost, and Random Forest are considered traditional machine - learning models and are fitted before neural architecture search. - - This class is an interface to fit a pipeline containing a traditional machine - learning model, and is the final object that is stored for inference. - - Attributes: - dataset_properties (Dict[str, BaseDatasetPropertiesType]): - A dictionary containing dataset specific information - random_state (Optional[np.random.RandomState]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - - def __init__(self, config: str, - dataset_properties: Dict[str, BaseDatasetPropertiesType], - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None): - self.config = config - self.dataset_properties = dataset_properties - self.random_state = random_state - self.init_params = init_params - self.pipeline = autoPyTorch.pipeline.traditional_tabular_classification. \ - TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties, - random_state=self.random_state) - configuration_space = self.pipeline.get_hyperparameter_search_space() - default_configuration = configuration_space.get_default_configuration().get_dictionary() - default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config - self.configuration = Configuration(configuration_space, default_configuration) - self.pipeline.set_hyperparameters(self.configuration) + is_reg = (task_type in REGRESSION_TASKS) - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - return self.pipeline.fit(X, y) + if isinstance(config, int): + return DummyRegressionPipeline if is_reg else DummyClassificationPipeline + elif isinstance(config, str): + if is_reg: + return MyTraditionalTabularRegressionPipeline - def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - return self.pipeline.predict_proba(X, batch_size=batch_size) - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - return self.pipeline.predict(X, batch_size=batch_size) - - def get_additional_run_info(self) -> Dict[str, Any]: - """ - Can be used to return additional info for the run. - Returns: - Dict[str, Any]: - Currently contains - 1. pipeline_configuration: the configuration of the pipeline, i.e, the traditional model used - 2. trainer_configuration: the parameters for the traditional model used. - Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs - """ - return {'pipeline_configuration': self.configuration, - 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config(), - 'configuration_origin': 'traditional'} + if task_type not in TABULAR_TASKS: + # Time series and image tasks + raise NotImplementedError(f'classification task on {task_type} for traditional methods is not available') - def get_pipeline_representation(self) -> Dict[str, str]: - return self.pipeline.get_pipeline_representation() + return MyTraditionalTabularClassificationPipeline + elif isinstance(config, Configuration): + if is_reg: + return autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return autoPyTorch.pipeline.traditional_tabular_classification. \ - TraditionalTabularClassificationPipeline.get_default_pipeline_options() + if task_type in TABULAR_TASKS: + return autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline + elif task_type in IMAGE_TASKS: + return autoPyTorch.pipeline.image_classification.ImageClassificationPipeline + else: + raise NotImplementedError(f'classification task on {task_type} for traditional methods is not available') + else: + raise RuntimeError("could not infer pipeline class") -class MyTraditionalTabularRegressionPipeline(BaseEstimator): +class BaseMyTraditionalPipeline: """ - A wrapper class that holds a pipeline for traditional regression. + A wrapper class that holds a pipeline for traditional regression/classification. Estimators like CatBoost, and Random Forest are considered traditional machine learning models and are fitted before neural architecture search. @@ -171,29 +95,33 @@ class MyTraditionalTabularRegressionPipeline(BaseEstimator): An optional dictionary that is passed to the pipeline's steps. It complies a similar function as the kwargs """ - def __init__(self, config: str, - dataset_properties: Dict[str, Any], - random_state: Optional[np.random.RandomState] = None, - init_params: Optional[Dict] = None): + def __init__( + self, + config: str, + pipeline_class: Union[ + Type[autoPyTorch.pipeline.traditional_tabular_regression.TraditionalTabularRegressionPipeline], + Type[autoPyTorch.pipeline.traditional_tabular_classification.TraditionalTabularClassificationPipeline] + ], + dataset_properties: Dict[str, BaseDatasetPropertiesType], + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ): self.config = config self.dataset_properties = dataset_properties self.random_state = random_state self.init_params = init_params - self.pipeline = autoPyTorch.pipeline.traditional_tabular_regression. \ - TraditionalTabularRegressionPipeline(dataset_properties=dataset_properties, - random_state=self.random_state) + self.pipeline = pipeline_class(dataset_properties=dataset_properties, random_state=self.random_state) + configuration_space = self.pipeline.get_hyperparameter_search_space() default_configuration = configuration_space.get_default_configuration().get_dictionary() default_configuration['model_trainer:tabular_traditional_model:traditional_learner'] = config self.configuration = Configuration(configuration_space, default_configuration) self.pipeline.set_hyperparameters(self.configuration) - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: + def fit(self, X: Dict[str, Any], y: Any, sample_weight: Optional[np.ndarray] = None) -> object: return self.pipeline.fit(X, y) - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: + def predict(self, X: Union[np.ndarray, pd.DataFrame], batch_size: int = 1000) -> np.ndarray: return self.pipeline.predict(X, batch_size=batch_size) def get_additional_run_info(self) -> Dict[str, Any]: @@ -206,130 +134,137 @@ def get_additional_run_info(self) -> Dict[str, Any]: 2. trainer_configuration: the parameters for the traditional model used. Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/estimator_configs """ - return {'pipeline_configuration': self.configuration, - 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config()} + return { + 'pipeline_configuration': self.configuration, + 'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config(), + 'configuration_origin': 'traditional' + } def get_pipeline_representation(self) -> Dict[str, str]: return self.pipeline.get_pipeline_representation() @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return autoPyTorch.pipeline.traditional_tabular_regression.\ - TraditionalTabularRegressionPipeline.get_default_pipeline_options() + def get_default_pipeline_config() -> Dict[str, Any]: + return _get_default_pipeline_config() + + +class MyTraditionalTabularClassificationPipeline(BaseMyTraditionalPipeline, BaseEstimator): + """ A wrapper class that holds a pipeline for traditional classification. """ + def __init__( + self, + config: str, + dataset_properties: Dict[str, BaseDatasetPropertiesType], + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ): + + _pl = autoPyTorch.pipeline.traditional_tabular_classification.TraditionalTabularClassificationPipeline + BaseMyTraditionalPipeline.__init__( + self, + config=config, + dataset_properties=dataset_properties, + random_state=random_state, + init_params=init_params, + pipeline_class=_pl + ) + + def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], batch_size: int = 1000) -> np.ndarray: + return self.pipeline.predict_proba(X, batch_size=batch_size) -class DummyClassificationPipeline(DummyClassifier): - """ - A wrapper class that holds a pipeline for dummy classification. +class MyTraditionalTabularRegressionPipeline(BaseMyTraditionalPipeline, BaseEstimator): + """ A wrapper class that holds a pipeline for traditional regression. """ + def __init__( + self, + config: str, + dataset_properties: Dict[str, Any], + random_state: Optional[np.random.RandomState] = None, + init_params: Optional[Dict] = None + ): - A wrapper over DummyClassifier of scikit learn. This estimator is considered the - worst performing model. In case of failure, at least this model will be fitted. + BaseMyTraditionalPipeline.__init__( + self, + config=config, + dataset_properties=dataset_properties, + random_state=random_state, + init_params=init_params, + pipeline_class=autoPyTorch.pipeline.traditional_tabular_regression.TraditionalTabularRegressionPipeline + ) - Attributes: - random_state (Optional[Union[int, np.random.RandomState]]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs + +class BaseDummyPipeline: """ + Base class for wrapper classes that hold a pipeline for + dummy {classification/regression}. - def __init__(self, config: Configuration, - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None - ) -> None: + This estimator is considered the worst performing model. + In case of failure, at least this model will be fitted. + """ + def __init__( + self, + config: int, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ): self.config = config self.init_params = init_params self.random_state = random_state - if config == 1: - super(DummyClassificationPipeline, self).__init__(strategy="uniform") - else: - super(DummyClassificationPipeline, self).__init__(strategy="most_frequent") - - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: - X_train = subsampler(X['X_train'], X['train_indices']) - y_train = subsampler(X['y_train'], X['train_indices']) - return super(DummyClassificationPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, - sample_weight=sample_weight) - - def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - new_X = np.ones((X.shape[0], 1)) - probas = super(DummyClassificationPipeline, self).predict_proba(new_X) - probas = convert_multioutput_multiclass_to_multilabel(probas).astype( - np.float32) - return probas - - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: - new_X = np.ones((X.shape[0], 1)) - return super(DummyClassificationPipeline, self).predict(new_X).astype(np.float32) def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 return {'configuration_origin': 'DUMMY'} def get_pipeline_representation(self) -> Dict[str, str]: - return { - 'Preprocessing': 'None', - 'Estimator': 'Dummy', - } + return {'Preprocessing': 'None', 'Estimator': 'Dummy'} @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return {'budget_type': 'epochs', - 'epochs': 1, - 'runtime': 1} - + def get_default_pipeline_config() -> Dict[str, Any]: + return _get_dummy_pipeline_config() + + +class DummyClassificationPipeline(DummyClassifier, BaseDummyPipeline): + """ A wrapper over DummyClassifier of scikit learn. """ + def __init__( + self, + config: int, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ): + BaseDummyPipeline.__init__(self, config=config, random_state=random_state, init_params=init_params) + DummyClassifier.__init__(self, strategy="uniform" if config == 1 else "most_frequent") + + def fit(self, X: Dict[str, Any], y: Any, sample_weight: Optional[np.ndarray] = None) -> object: + X_train = subsampler(X['X_train'], X['train_indices']) + y_train = subsampler(X['y_train'], X['train_indices']) + X_new = np.ones((X_train.shape[0], 1)) + return super(DummyClassificationPipeline, self).fit(X_new, y_train, sample_weight=sample_weight) -class DummyRegressionPipeline(DummyRegressor): - """ - A wrapper class that holds a pipeline for dummy regression. + def predict(self, X: Union[np.ndarray, pd.DataFrame], batch_size: int = 1000) -> np.ndarray: + new_X = np.ones((X.shape[0], 1)) + return super(DummyClassificationPipeline, self).predict(new_X).astype(np.float32) - A wrapper over DummyRegressor of scikit learn. This estimator is considered the - worst performing model. In case of failure, at least this model will be fitted. + def predict_proba(self, X: Union[np.ndarray, pd.DataFrame], batch_size: int = 1000) -> np.ndarray: + new_X = np.ones((X.shape[0], 1)) + probas = super(DummyClassificationPipeline, self).predict_proba(new_X) + return convert_multioutput_multiclass_to_multilabel(probas).astype(np.float32) - Attributes: - random_state (Optional[Union[int, np.random.RandomState]]): - Object that contains a seed and allows for reproducible results - init_params (Optional[Dict]): - An optional dictionary that is passed to the pipeline's steps. It complies - a similar function as the kwargs - """ - def __init__(self, config: Configuration, - random_state: Optional[Union[int, np.random.RandomState]] = None, - init_params: Optional[Dict] = None) -> None: - self.config = config - self.init_params = init_params - self.random_state = random_state - if config == 1: - super(DummyRegressionPipeline, self).__init__(strategy='mean') - else: - super(DummyRegressionPipeline, self).__init__(strategy='median') +class DummyRegressionPipeline(DummyRegressor, BaseDummyPipeline): + """ A wrapper over DummyRegressor of scikit learn. """ + def __init__( + self, + config: int, + random_state: Optional[Union[int, np.random.RandomState]] = None, + init_params: Optional[Dict] = None + ): + BaseDummyPipeline.__init__(self, config=config, random_state=random_state, init_params=init_params) + DummyRegressor.__init__(self, strategy='mean' if config == 1 else 'median') - def fit(self, X: Dict[str, Any], y: Any, - sample_weight: Optional[np.ndarray] = None) -> object: + def fit(self, X: Dict[str, Any], y: Any, sample_weight: Optional[np.ndarray] = None) -> object: X_train = subsampler(X['X_train'], X['train_indices']) y_train = subsampler(X['y_train'], X['train_indices']) - return super(DummyRegressionPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train, - sample_weight=sample_weight) + X_new = np.ones((X_train.shape[0], 1)) + return super(DummyRegressionPipeline, self).fit(X_new, y_train, sample_weight=sample_weight) - def predict(self, X: Union[np.ndarray, pd.DataFrame], - batch_size: int = 1000) -> np.ndarray: + def predict(self, X: Union[np.ndarray, pd.DataFrame], batch_size: int = 1000) -> np.ndarray: new_X = np.ones((X.shape[0], 1)) return super(DummyRegressionPipeline, self).predict(new_X).astype(np.float32) - - def get_additional_run_info(self) -> Dict: # pylint: disable=R0201 - return {'configuration_origin': 'DUMMY'} - - def get_pipeline_representation(self) -> Dict[str, str]: - return { - 'Preprocessing': 'None', - 'Estimator': 'Dummy', - } - - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return {'budget_type': 'epochs', - 'epochs': 1, - 'runtime': 1} diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py index 97b29b640..bf02a3738 100644 --- a/autoPyTorch/evaluation/tae.py +++ b/autoPyTorch/evaluation/tae.py @@ -477,6 +477,7 @@ def run( return self._process_results(obj, config, queue, num_run, budget) def _add_learning_curve_info(self, additional_run_info: Dict[str, Any], info: List[RunValue]) -> None: + """ This method is experimental (The source of information in RunValue might require modifications.) """ lc_runtime = extract_learning_curve(info, 'duration') stored = False targets = {'learning_curve': (True, None), @@ -487,7 +488,7 @@ def _add_learning_curve_info(self, additional_run_info: Dict[str, Any], info: Li for key, (collect, metric_name) in targets.items(): if collect: lc = extract_learning_curve(info, metric_name) - if len(lc) > 1: + if len(lc) >= 1: stored = True additional_run_info[key] = lc diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py index fd99e1077..b4e4cb31b 100644 --- a/autoPyTorch/evaluation/train_evaluator.py +++ b/autoPyTorch/evaluation/train_evaluator.py @@ -240,7 +240,7 @@ def _fit_and_evaluate_loss( fit_pipeline(self.logger, pipeline, X, y=None) self.logger.info("Model fitted, now predicting") - kwargs = {'pipeline': pipeline, 'label_examples': self.y_train[train_indices]} + kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]} train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) valid_pred = self.predict(self.X_valid, **kwargs) diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py index 1a8500d7b..d2ec1fb93 100644 --- a/autoPyTorch/evaluation/utils.py +++ b/autoPyTorch/evaluation/utils.py @@ -65,18 +65,20 @@ def ensure_prediction_array_sizes( prediction: np.ndarray, output_type: str, num_classes: Optional[int], - label_examples: Optional[np.ndarray] + unique_train_labels: Optional[List[int]] ) -> np.ndarray: """ This function formats a prediction to match the dimensionality of the provided - labels label_examples. This should be used exclusively for classification tasks + labels `unique_train_labels`. This should be used exclusively for classification tasks. + This function is typically important when using cross validation, which might cause + some splits not having some class in the training split. Args: prediction (np.ndarray): The un-formatted predictions of a pipeline output_type (str): Output type specified in constants. (TODO: Fix it to enum) - label_examples (Optional[np.ndarray]): + unique_train_labels (Optional[List[int]]): The labels from the dataset to give an intuition of the expected predictions dimensionality @@ -85,15 +87,18 @@ def ensure_prediction_array_sizes( The formatted prediction """ if num_classes is None: - raise RuntimeError("_ensure_prediction_array_sizes is only for classification tasks") - if label_examples is None: - raise ValueError('label_examples must be provided, but got None') + raise RuntimeError("ensure_prediction_array_sizes is only for classification tasks") + if unique_train_labels is None: + raise ValueError('unique_train_labels must be provided, but got None') if STRING_TO_OUTPUT_TYPES[output_type] != MULTICLASS or prediction.shape[1] == num_classes: return prediction - classes = list(np.unique(label_examples)) - mapping = {classes.index(class_idx): class_idx for class_idx in range(num_classes)} + mapping = { + unique_train_labels.index(class_idx): class_idx + for class_idx in range(num_classes) if class_idx in unique_train_labels + } + # augment the array size when the output shape is different modified_pred = np.zeros((prediction.shape[0], num_classes), dtype=np.float32) for index, class_index in mapping.items(): @@ -103,12 +108,31 @@ def ensure_prediction_array_sizes( def extract_learning_curve(stack: List[RunValue], key: Optional[str] = None) -> List[float]: + """ + Extract learning curve from the additional info. + + Args: + stack (List[RunValue]): + The stack of the additional information. + key (Optional[str]): + The key to extract. + + Returns: + learning_curve (List[float]): + The list of the extracted information + + Note: + This function is experimental. + The source of information in RunValue might require modifications. + """ learning_curve = [] + key = 'loss' if key is None else key + for entry in stack: try: - val = entry['loss'] if key is None else entry['additional_run_info'][key] - learning_curve.append(val) - except TypeError: # additional info is not dict + info = entry.additional_info + learning_curve.append(getattr(entry, key, info[key])) + except AttributeError: # additional info is not RunValue pass except KeyError: # Key does not exist pass @@ -176,7 +200,7 @@ class DisableFileOutputParameters(autoPyTorchEnum): + `all`: do not save any of the above. """ - model = 'pipeline' + model = 'model' cv_model = 'cv_model' y_opt = 'y_opt' y_test = 'y_test' diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py index 90c0f6362..1d18771e2 100644 --- a/autoPyTorch/pipeline/base_pipeline.py +++ b/autoPyTorch/pipeline/base_pipeline.py @@ -566,17 +566,3 @@ def get_pipeline_representation(self) -> Dict[str, str]: Dict: contains the pipeline representation in a short format """ raise NotImplementedError() - - @staticmethod - def get_default_pipeline_options() -> Dict[str, Any]: - return { - 'num_run': 0, - 'device': 'cpu', - 'budget_type': 'epochs', - 'epochs': 5, - 'runtime': 3600, - 'torch_num_threads': 1, - 'early_stopping': 10, - 'use_tensorboard_logger': True, - 'metrics_during_training': True - } diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 63d9c7246..1a6413c1b 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -275,8 +275,8 @@ def _get_estimator( resampling_strategy, resampling_strategy_args, metric, - total_walltime_limit=40, - func_eval_time_limit_secs=10, + total_walltime_limit=18, + func_eval_time_limit_secs=6, **kwargs ): @@ -322,6 +322,10 @@ def _check_tabular_task(estimator, X_test, y_test, task_type, resampling_strateg _check_picklable(estimator, X_test) + representation = estimator.show_models() + assert isinstance(representation, str) + assert all(word in representation for word in ['Weight', 'Preprocessing', 'Estimator']) + # Test # ==== @@ -383,10 +387,6 @@ def test_tabular_regression(openml_id, resampling_strategy, backend, resampling_ n_successful_runs=1 ) - representation = estimator.show_models() - assert isinstance(representation, str) - assert all(word in representation for word in ['Weight', 'Preprocessing', 'Estimator']) - @pytest.mark.parametrize('openml_id', ( 1590, # Adult to test NaN in categorical columns @@ -423,8 +423,8 @@ def test_tabular_input_support(openml_id, backend): X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimize_metric='accuracy', - total_walltime_limit=150, - func_eval_time_limit_secs=50, + total_walltime_limit=30, + func_eval_time_limit_secs=6, enable_traditional_pipeline=False, load_models=False, ) diff --git a/test/test_api/utils.py b/test/test_api/utils.py index b95e7c726..0e757015d 100644 --- a/test/test_api/utils.py +++ b/test/test_api/utils.py @@ -43,7 +43,7 @@ def _fit_and_evaluate_loss(self, pipeline, split_id, train_indices, opt_indices) fit_pipeline(self.logger, pipeline, X, y=None) self.logger.info("Model fitted, now predicting") - kwargs = {'pipeline': pipeline, 'label_examples': self.y_train[train_indices]} + kwargs = {'pipeline': pipeline, 'unique_train_labels': self.unique_train_labels[split_id]} train_pred = self.predict(subsampler(self.X_train, train_indices), **kwargs) opt_pred = self.predict(subsampler(self.X_train, opt_indices), **kwargs) valid_pred = self.predict(self.X_valid, **kwargs) diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py index ac15c18bb..f42af756b 100644 --- a/test/test_evaluation/test_abstract_evaluator.py +++ b/test/test_evaluation/test_abstract_evaluator.py @@ -7,6 +7,8 @@ import numpy as np +import pytest + import sklearn.dummy from smac.tae import StatusType @@ -16,7 +18,8 @@ AbstractEvaluator, EvaluationResults, EvaluatorParams, - FixedPipelineParams + FixedPipelineParams, + get_default_pipeline_config ) from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy @@ -25,6 +28,41 @@ from evaluation_util import get_multiclass_classification_datamanager # noqa E402 +def setup_backend_mock(ev_path, dataset=get_multiclass_classification_datamanager()): + dummy_model_files = [os.path.join(ev_path, str(n)) for n in range(100)] + dummy_pred_files = [os.path.join(ev_path, str(n)) for n in range(100, 200)] + + backend_mock = unittest.mock.Mock() + backend_mock.get_model_dir.return_value = ev_path + backend_mock.get_model_path.side_effect = dummy_model_files + backend_mock.get_prediction_output_path.side_effect = dummy_pred_files + backend_mock.temporary_directory = ev_path + + backend_mock.load_datamanager.return_value = dataset + return backend_mock + + +def test_fixed_pipeline_params_with_default_pipeline_config(): + pipeline_config = get_default_pipeline_config() + dummy_config = {'budget_type': 'epochs'} + with pytest.raises(TypeError): + FixedPipelineParams.with_default_pipeline_config(budget_type='epochs') + with pytest.raises(ValueError): + FixedPipelineParams.with_default_pipeline_config(pipeline_config={'dummy': 'dummy'}) + with pytest.raises(ValueError): + FixedPipelineParams.with_default_pipeline_config(pipeline_config={'budget_type': 'dummy'}) + + for cfg, ans in [(None, pipeline_config), (dummy_config, dummy_config)]: + params = FixedPipelineParams.with_default_pipeline_config( + metric=accuracy, + pipeline_config=cfg, + backend=unittest.mock.Mock(), + seed=0 + ) + + assert params.pipeline_config == ans + + class AbstractEvaluatorTest(unittest.TestCase): _multiprocess_can_split_ = True @@ -35,18 +73,8 @@ def setUp(self): self.ev_path = os.path.join(this_directory, '.tmp_evaluation') if not os.path.exists(self.ev_path): os.mkdir(self.ev_path) - dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)] - dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)] - - backend_mock = unittest.mock.Mock() - backend_mock.get_model_dir.return_value = self.ev_path - backend_mock.get_model_path.side_effect = dummy_model_files - backend_mock.get_prediction_output_path.side_effect = dummy_pred_files - backend_mock.temporary_directory = self.ev_path - - D = get_multiclass_classification_datamanager() - backend_mock.load_datamanager.return_value = D - self.backend_mock = backend_mock + + self.backend_mock = setup_backend_mock(self.ev_path) self.eval_params = EvaluatorParams.with_default_budget(budget=0, configuration=1) self.fixed_params = FixedPipelineParams.with_default_pipeline_config( backend=self.backend_mock, @@ -64,8 +92,106 @@ def tearDown(self): except: # noqa E722 pass + def test_instantiation_errors(self): + for task_type, splits in [('tabular_classification', None), (None, [])]: + with pytest.raises(ValueError): + fixed_params = self.fixed_params._asdict() + backend = unittest.mock.Mock() + dataset_mock = unittest.mock.Mock() + dataset_mock.task_type = task_type + dataset_mock.splits = splits + backend.load_datamanager.return_value = dataset_mock + fixed_params.update(backend=backend) + + AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params), + evaluator_params=self.eval_params + ) + + def test_tensors_in_instantiation(self): + fixed_params = self.fixed_params._asdict() + dataset = get_multiclass_classification_datamanager() + + dataset.val_tensors = ('X_val', 'y_val') + dataset.test_tensors = ('X_test', 'y_test') + fixed_params.update(backend=setup_backend_mock(self.ev_path, dataset=dataset)) + + ae = AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params), + evaluator_params=self.eval_params + ) + + assert (ae.X_valid, ae.y_valid) == dataset.val_tensors + assert (ae.X_test, ae.y_test) == dataset.test_tensors + + def test_init_fit_dictionary(self): + for budget_type, exc in [('runtime', None), ('epochs', None), ('dummy', ValueError)]: + fixed_params = self.fixed_params._asdict() + fixed_params.update(budget_type=budget_type) + kwargs = dict( + queue=unittest.mock.Mock(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params), + evaluator_params=self.eval_params + ) + if exc is None: + AbstractEvaluator(**kwargs) + else: + with pytest.raises(exc): + AbstractEvaluator(**kwargs) + + def test_get_pipeline(self): + ae = AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) + eval_params = ae.evaluator_params._asdict() + eval_params.update(configuration=1.5) + ae.evaluator_params = EvaluatorParams(**eval_params) + with pytest.raises(TypeError): + ae._get_pipeline() + + def test_get_transformed_metrics_error(self): + ae = AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) + with pytest.raises(ValueError): + ae._get_transformed_metrics(pred=[], inference_name='dummy') + + def test_fetch_voting_pipeline_without_pipeline(self): + ae = AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) + ae.pipelines = [None] * 4 + assert ae._fetch_voting_pipeline() is None + + def test_is_output_possible(self): + ae = AbstractEvaluator( + queue=unittest.mock.Mock(), + fixed_pipeline_params=self.fixed_params, + evaluator_params=self.eval_params + ) + + dummy = np.random.random((33, 3)) + dummy_with_nan = dummy.copy() + dummy_with_nan[0][0] = np.nan + for y_opt, opt_pred, ans in [ + (None, dummy, True), + (dummy, np.random.random((100, 3)), False), + (dummy, dummy, True), + (dummy, dummy_with_nan, False) + ]: + ae.y_opt = y_opt + assert ae._is_output_possible(opt_pred, None, None) == ans + def test_record_evaluation_model_predicts_NaN(self): - '''Tests by handing in predictions which contain NaNs''' + """ Tests by handing in predictions which contain NaNs """ rs = np.random.RandomState(1) queue_mock = unittest.mock.Mock() opt_pred, test_pred, valid_pred = rs.rand(33, 3), rs.rand(25, 3), rs.rand(25, 3) diff --git a/test/test_evaluation/test_evaluators.py b/test/test_evaluation/test_evaluators.py index 8eab5d333..aae259e08 100644 --- a/test/test_evaluation/test_evaluators.py +++ b/test/test_evaluation/test_evaluators.py @@ -10,6 +10,8 @@ import numpy as np +import pytest + from sklearn.base import BaseEstimator from smac.tae import StatusType @@ -55,6 +57,47 @@ def get_additional_run_info(self): return {} +class TestCrossValidationResultsManager(unittest.TestCase): + def test_update_loss_dict(self): + cv_results = _CrossValidationResultsManager(3) + loss_sum_dict = {} + loss_dict = {'f1': 1.0, 'f2': 2.0} + cv_results._update_loss_dict(loss_sum_dict, loss_dict, 3) + assert loss_sum_dict == {'f1': 1.0 * 3, 'f2': 2.0 * 3} + loss_sum_dict = {'f1': 2.0, 'f2': 1.0} + cv_results._update_loss_dict(loss_sum_dict, loss_dict, 3) + assert loss_sum_dict == {'f1': 2.0 + 1.0 * 3, 'f2': 1.0 + 2.0 * 3} + + def test_merge_predictions(self): + cv_results = _CrossValidationResultsManager(3) + preds = np.array([]) + assert cv_results._merge_predictions(preds) is None + + for preds_shape in [(10, ), (10, 10, )]: + preds = np.random.random(preds_shape) + with pytest.raises(ValueError): + cv_results._merge_predictions(preds) + + preds = np.array([ + [ + [1.0, 2.0], + [3.0, 4.0], + [5.0, 6.0], + ], + [ + [7.0, 8.0], + [9.0, 10.0], + [11.0, 12.0], + ] + ]) + ans = np.array([ + [4.0, 5.0], + [6.0, 7.0], + [8.0, 9.0], + ]) + assert np.allclose(ans, cv_results._merge_predictions(preds)) + + class TestTrainEvaluator(BaseEvaluatorTest, unittest.TestCase): _multiprocess_can_split_ = True @@ -97,6 +140,21 @@ def tearDown(self): if os.path.exists(self.ev_path): shutil.rmtree(self.ev_path) + def test_evaluate_loss(self): + D = get_binary_classification_datamanager() + backend_api = create(self.tmp_dir, self.output_dir, prefix='autoPyTorch') + backend_api.load_datamanager = lambda: D + fixed_params_dict = self.fixed_params._asdict() + fixed_params_dict.update(backend=backend_api) + evaluator = TrainEvaluator( + queue=multiprocessing.Queue(), + fixed_pipeline_params=FixedPipelineParams(**fixed_params_dict), + evaluator_params=self.eval_params + ) + evaluator.splits = None + with pytest.raises(ValueError): + evaluator.evaluate_loss() + @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline') def test_holdout(self, pipeline_mock): pipeline_mock.fit_dictionary = {'budget_type': 'epochs', 'epochs': 50} diff --git a/test/test_evaluation/test_pipeline_class_collection.py b/test/test_evaluation/test_pipeline_class_collection.py new file mode 100644 index 000000000..a5f9a786f --- /dev/null +++ b/test/test_evaluation/test_pipeline_class_collection.py @@ -0,0 +1,145 @@ +import unittest.mock + +from ConfigSpace import Configuration + +import numpy as np + +import pytest + +import autoPyTorch.pipeline.tabular_regression +from autoPyTorch.constants import ( + IMAGE_CLASSIFICATION, + REGRESSION_TASKS, + TABULAR_CLASSIFICATION, + TABULAR_REGRESSION, + TIMESERIES_CLASSIFICATION +) +from autoPyTorch.evaluation.pipeline_class_collection import ( + DummyClassificationPipeline, + DummyRegressionPipeline, + MyTraditionalTabularClassificationPipeline, + MyTraditionalTabularRegressionPipeline, + get_default_pipeline_config, + get_pipeline_class, +) + + +def test_get_default_pipeline_config(): + with pytest.raises(ValueError): + get_default_pipeline_config(choice='fail') + + +@pytest.mark.parametrize('task_type', ( + TABULAR_CLASSIFICATION, + TABULAR_REGRESSION +)) +@pytest.mark.parametrize('config', (1, 'tradition')) +def test_get_pipeline_class(task_type, config): + is_reg = task_type in REGRESSION_TASKS + pipeline_cls = get_pipeline_class(config, task_type) + if is_reg: + assert 'Regression' in pipeline_cls.__mro__[0].__name__ + else: + assert 'Classification' in pipeline_cls.__mro__[0].__name__ + + +@pytest.mark.parametrize('config,ans', ( + (1, DummyRegressionPipeline), + ('tradition', MyTraditionalTabularRegressionPipeline), + (unittest.mock.Mock(spec=Configuration), autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline) +)) +def test_get_pipeline_class_check_class(config, ans): + task_type = TABULAR_REGRESSION + pipeline_cls = get_pipeline_class(config, task_type) + assert ans is pipeline_cls + + +def test_get_pipeline_class_errors(): + with pytest.raises(RuntimeError): + get_pipeline_class(config=1.5, task_type=TABULAR_CLASSIFICATION) + + with pytest.raises(NotImplementedError): + get_pipeline_class(config='config', task_type=IMAGE_CLASSIFICATION) + + config = unittest.mock.Mock(spec=Configuration) + with pytest.raises(NotImplementedError): + get_pipeline_class(config=config, task_type=TIMESERIES_CLASSIFICATION) + + # Check callable + get_pipeline_class(config=config, task_type=IMAGE_CLASSIFICATION) + get_pipeline_class(config=config, task_type=TABULAR_REGRESSION) + + +@pytest.mark.parametrize('pipeline_cls', ( + MyTraditionalTabularClassificationPipeline, + MyTraditionalTabularRegressionPipeline +)) +def test_traditional_pipelines(pipeline_cls): + rng = np.random.RandomState() + is_reg = (pipeline_cls == MyTraditionalTabularRegressionPipeline) + pipeline = pipeline_cls( + config='random_forest', + dataset_properties={ + 'numerical_columns': None, + 'categorical_columns': None + }, + random_state=rng + ) + # Check if it is callable + pipeline.get_pipeline_representation() + + # fit and predict + n_insts = 100 + X = { + 'X_train': np.random.random((n_insts, 10)), + 'y_train': np.random.random(n_insts), + 'train_indices': np.arange(n_insts // 2), + 'val_indices': np.arange(n_insts // 2, n_insts), + 'dataset_properties': { + 'task_type': 'tabular_regression' if is_reg else 'tabular_classification', + 'output_type': 'continuous' if is_reg else 'multiclass' + } + } + if not is_reg: + X['y_train'] = np.array(X['y_train'] * 3, dtype=np.int32) + + pipeline.fit(X, y=None) + pipeline.predict(X['X_train']) + + if pipeline_cls == DummyClassificationPipeline: + pipeline.predict_proba(X['X_train']) + + assert pipeline.get_default_pipeline_config() == get_default_pipeline_config(choice='default') + for key in ['pipeline_configuration', + 'trainer_configuration', + 'configuration_origin']: + assert key in pipeline.get_additional_run_info() + + +@pytest.mark.parametrize('pipeline_cls', ( + DummyRegressionPipeline, + DummyClassificationPipeline +)) +def test_dummy_pipelines(pipeline_cls): + rng = np.random.RandomState() + pipeline = pipeline_cls( + config=1, + random_state=rng + ) + assert pipeline.get_additional_run_info() == {'configuration_origin': 'DUMMY'} + assert pipeline.get_pipeline_representation() == {'Preprocessing': 'None', 'Estimator': 'Dummy'} + assert pipeline.get_default_pipeline_config() == get_default_pipeline_config(choice='dummy') + n_insts = 100 + X = { + 'X_train': np.random.random((n_insts, 10)), + 'y_train': np.random.random(n_insts), + 'train_indices': np.arange(n_insts // 2) + } + if pipeline_cls == DummyClassificationPipeline: + X['y_train'] = np.array(X['y_train'] * 3, dtype=np.int32) + + pipeline.fit(X, y=None) + pipeline.predict(X['X_train']) + + if pipeline_cls == DummyClassificationPipeline: + pipeline.predict_proba(X['X_train']) diff --git a/test/test_evaluation/test_tae.py b/test/test_evaluation/test_tae.py new file mode 100644 index 000000000..351e7b633 --- /dev/null +++ b/test/test_evaluation/test_tae.py @@ -0,0 +1,162 @@ +import queue +import unittest.mock + +import numpy as np + +import pytest + +from smac.runhistory.runhistory import RunInfo, RunValue +from smac.tae import StatusType, TAEAbortException + +from autoPyTorch.evaluation.tae import ( + PynisherFunctionWrapperLikeType, + TargetAlgorithmQuery, + _exception_handling, + _get_eval_fn, + _get_logger, + _process_exceptions +) +from autoPyTorch.metrics import accuracy + + +def test_pynisher_function_wrapper_like_type_init(): + with pytest.raises(RuntimeError): + PynisherFunctionWrapperLikeType(lambda: None) + + +def test_get_eval_fn(): + return_value = 'test_func' + fn = _get_eval_fn(cost_for_crash=1e9, target_algorithm=lambda: return_value) + assert fn() == return_value + + +def test_get_logger(): + name = 'test_logger' + logger = _get_logger(logger_port=None, logger_name=name) + assert logger.name == name + + +@pytest.mark.parametrize('is_anything_exception,ans', ( + (True, StatusType.CRASHED), + (False, StatusType.SUCCESS) +)) +def test_exception_handling(is_anything_exception, ans): + obj = unittest.mock.Mock() + obj.exit_status = 1 + info = { + 'loss': 1.0, + 'status': StatusType.SUCCESS, + 'additional_run_info': {} + } + q = queue.Queue() + q.put(info) + + _, status, _, _ = _exception_handling( + obj=obj, + queue=q, + info_msg='dummy', + info_for_empty={}, + status=StatusType.DONOTADVANCE, + is_anything_exception=is_anything_exception, + worst_possible_result=1e9 + ) + assert status == ans + + +def test_process_exceptions(): + obj = unittest.mock.Mock() + q = unittest.mock.Mock() + obj.exit_status = TAEAbortException + _, _, _, info = _process_exceptions(obj=obj, queue=q, budget=1.0, worst_possible_result=1e9) + assert info['error'] == 'Your configuration of autoPyTorch did not work' + + obj.exit_status = 0 + info = { + 'loss': 1.0, + 'status': StatusType.DONOTADVANCE, + 'additional_run_info': {} + } + q = queue.Queue() + q.put(info) + + _, status, _, _ = _process_exceptions(obj=obj, queue=q, budget=0, worst_possible_result=1e9) + assert status == StatusType.SUCCESS + _, _, _, info = _process_exceptions(obj=obj, queue=q, budget=0, worst_possible_result=1e9) + assert 'empty' in info.get('error', 'no error') + + +def _create_taq(): + return TargetAlgorithmQuery( + backend=unittest.mock.Mock(), + seed=1, + metric=accuracy, + cost_for_crash=accuracy._cost_of_crash, + abort_on_first_run_crash=True, + pynisher_context=unittest.mock.Mock() + ) + + +class TestTargetAlgorithmQuery(unittest.TestCase): + def test_check_run_info(self): + taq = _create_taq() + run_info = unittest.mock.Mock() + run_info.budget = -1 + with pytest.raises(ValueError): + taq._check_run_info(run_info) + + def test_cutoff_update_in_run_wrapper(self): + taq = _create_taq() + run_info = RunInfo( + config=unittest.mock.Mock(), + instance=None, + instance_specific='dummy', + seed=0, + cutoff=8, + capped=False, + budget=1, + ) + run_info._replace() + taq.stats = unittest.mock.Mock() + taq.stats.get_remaing_time_budget.return_value = 10 + + # remaining_time - 5 < cutoff + res, _ = taq.run_wrapper(run_info) + assert res.cutoff == 5 + + # flot cutoff ==> round up + run_info = run_info._replace(cutoff=2.5) + res, _ = taq.run_wrapper(run_info) + assert res.cutoff == 3 + + def test_add_learning_curve_info(self): + # add_learning_curve_info is experimental + taq = _create_taq() + additional_run_info = {} + iter = np.arange(1, 6) + info = [ + RunValue( + cost=1e9, + time=1e9, + status=1e9, + starttime=1e9, + endtime=1e9, + additional_info={ + 'duration': 0.1 * i, + 'train_loss': 0.2 * i, + 'loss': 0.3 * i + } + ) + for i in iter + ] + taq._add_learning_curve_info( + additional_run_info=additional_run_info, + info=info + ) + + for i, key in enumerate([ + 'learning_curve_runtime', + 'train_learning_curve', + 'learning_curve' + ]): + assert key in additional_run_info + assert np.allclose(additional_run_info[key], 0.1 * iter * (i + 1)) diff --git a/test/test_evaluation/test_utils.py b/test/test_evaluation/test_utils.py index e81eea38b..d5ca69861 100644 --- a/test/test_evaluation/test_utils.py +++ b/test/test_evaluation/test_utils.py @@ -1,14 +1,58 @@ """ Tests the functionality in autoPyTorch.evaluation.utils """ +import numpy as np + import pytest -from autoPyTorch.evaluation.utils import DisableFileOutputParameters +from autoPyTorch.constants import STRING_TO_OUTPUT_TYPES +from autoPyTorch.evaluation.utils import ( + DisableFileOutputParameters, + ensure_prediction_array_sizes, +) + + +def test_ensure_prediction_array_sizes_errors(): + dummy = np.random.random(20) + with pytest.raises(RuntimeError): + ensure_prediction_array_sizes(dummy, 'binary', None, dummy) + with pytest.raises(ValueError): + ensure_prediction_array_sizes(dummy, 'binary', 1, None) + + +def test_ensure_prediction_array_sizes(): + output_types = list(STRING_TO_OUTPUT_TYPES.keys()) + dummy = np.random.random((20, 3)) + for output_type in output_types: + if output_type == 'multiclass': + num_classes = dummy.shape[-1] + label_examples = np.array([0, 2, 0, 2]) + unique_train_labels = list(np.unique(label_examples)) + pred = np.array([ + [0.1, 0.9], + [0.2, 0.8], + ]) + ans = np.array([ + [0.1, 0.0, 0.9], + [0.2, 0.0, 0.8] + ]) + ret = ensure_prediction_array_sizes( + prediction=pred, + output_type=output_type, + num_classes=num_classes, + unique_train_labels=unique_train_labels + ) + assert np.allclose(ans, ret) + else: + num_classes = 1 + + ret = ensure_prediction_array_sizes(dummy, output_type, num_classes, dummy) + assert np.allclose(ret, dummy) @pytest.mark.parametrize('disable_file_output', - [['pipeline', 'pipelines'], - [DisableFileOutputParameters.pipelines, DisableFileOutputParameters.pipeline]]) + [['model', 'cv_model'], + [DisableFileOutputParameters.model, DisableFileOutputParameters.cv_model]]) def test_disable_file_output_no_error(disable_file_output): """ Checks that `DisableFileOutputParameters.check_compatibility` @@ -28,7 +72,7 @@ def test_disable_file_output_error(): for a value not present in `DisableFileOutputParameters` and ensures that the expected error is raised. """ - disable_file_output = ['model'] + disable_file_output = ['dummy'] with pytest.raises(ValueError, match=r"Expected .*? to be in the members (.*?) of" r" DisableFileOutputParameters or as string value" r" of a member."): diff --git a/test/test_pipeline/test_pipeline.py b/test/test_pipeline/test_pipeline.py index 668930d57..e4a0caf85 100644 --- a/test/test_pipeline/test_pipeline.py +++ b/test/test_pipeline/test_pipeline.py @@ -115,12 +115,3 @@ def test_pipeline_set_config(base_pipeline): # choice, as it is not a hyperparameter from the cs assert isinstance(base_pipeline.named_steps['DummyChoice'].choice, DummyComponent) assert 'orange' == base_pipeline.named_steps['DummyChoice'].choice.b - - -def test_get_default_options(base_pipeline): - default_options = base_pipeline.get_default_pipeline_options() - # test if dict is returned - assert isinstance(default_options, dict) - for option, default in default_options.items(): - # check whether any defaults is none - assert default is not None diff --git a/test/test_pipeline/test_tabular_regression.py b/test/test_pipeline/test_tabular_regression.py index 75dc8a415..e21eb961f 100644 --- a/test/test_pipeline/test_tabular_regression.py +++ b/test/test_pipeline/test_tabular_regression.py @@ -317,3 +317,16 @@ def test_pipeline_score(fit_dictionary_tabular_dummy): # we should be able to get a decent score on this dummy data assert r2_score >= 0.8, f"Pipeline:{pipeline} Config:{config} FitDict: {fit_dictionary_tabular_dummy}, " \ f"{pipeline.named_steps['trainer'].run_summary.performance_tracker['train_metrics']}" + + +def test_get_pipeline_representation(): + pipeline = TabularRegressionPipeline( + dataset_properties={ + 'numerical_columns': None, + 'categorical_columns': None, + 'task_type': 'tabular_classification' + } + ) + repr = pipeline.get_pipeline_representation() + assert isinstance(repr, dict) + assert all(word in repr for word in ['Preprocessing', 'Estimator'])