From fcfa2cfdfc2beb3f9d430b5b45491f4b4e2f933c Mon Sep 17 00:00:00 2001 From: dengdifan Date: Thu, 8 Dec 2022 17:00:34 +0100 Subject: [PATCH 1/4] additional arugments to time series scores --- autoPyTorch/api/base_task.py | 8 ++- autoPyTorch/api/time_series_forecasting.py | 63 +++++++++++++++++++ .../example_time_series_forecasting.py | 5 ++ test/test_api/test_api.py | 13 +++- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index 12d451ca0..d4fb4af17 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1781,7 +1781,8 @@ def predict( def score( self, y_pred: np.ndarray, - y_test: Union[np.ndarray, pd.DataFrame] + y_test: Union[np.ndarray, pd.DataFrame], + **score_kwargs: Any, ) -> Dict[str, float]: """Calculate the score on the test set. Calculate the evaluation measure on the test set. @@ -1791,6 +1792,8 @@ def score( The test predictions y_test (np.ndarray): The test ground truth labels. + score_kwargs: Any + additional arguments for computing the scores. Some metrics might require special arguments Returns: Dict[str, float]: @@ -1804,7 +1807,8 @@ def score( "Please check the log file for related errors. ") return calculate_score(target=y_test, prediction=y_pred, task_type=STRING_TO_TASK_TYPES[self.task_type], - metrics=[self._metric]) + metrics=[self._metric], + **score_kwargs) def __getstate__(self) -> Dict[str, Any]: # Cannot serialize a client! diff --git a/autoPyTorch/api/time_series_forecasting.py b/autoPyTorch/api/time_series_forecasting.py index 27b923576..a6328ebd1 100644 --- a/autoPyTorch/api/time_series_forecasting.py +++ b/autoPyTorch/api/time_series_forecasting.py @@ -1,4 +1,5 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union +import warnings import numpy as np @@ -590,3 +591,65 @@ def update_sliding_window_size(self, n_prediction_steps: int) -> None: ], default_value=int(np.ceil(1.25 * base_window_size)), ) + + def score( + self, + y_pred: np.ndarray, + y_test: Union[np.ndarray, pd.DataFrame], + y_test_past: Optional[List[Union[np.ndarray, pd.DataFrame]]] = None, + sp: Optional[int] = None, + n_prediction_steps: Optional[int] = None, + **score_kwargs: Any, + ) -> Dict[str, float]: + """Calculate the score on the test set. + Calculate the evaluation measure on the test set. As forecasting metrics might require some additional metrics, + they can be either given by the users or by default given by the api. + + NOTE: MASE metric requires to scale the predicted values with the previous data. However, given that we do not + have access to the previous data within this function, users need to manually compute the coefficient with the + function autoPyTorch.pipeline.components.training.metrics.metrics.compute_mase_coefficient and multiple both + y_pred and y_test with that coefficient + + Args: + y_pred (np.ndarray): + The test predictions + y_test (np.ndarray): + The test ground truth labels. + y_test_past: + sp (Optional[int]): + + score_kwargs: Any + additional arguments for computing the scores. Some metrics might require special arguments + + Returns: + Dict[str, float]: + Value of the evaluation metric calculated on the test set. + """ + y_pred = np.asarray(y_pred) + y_test = np.asarray(y_test) + assert np.all(y_pred.shape == y_test.shape), f"y_pred and y_test must have the same shape! But they are " \ + f"{y_pred.shape} and {y_test.shape}!" + score_kwargs_forecasting = {} + if sp is None: + sp = self.dataset.seasonality + score_kwargs_forecasting['sp'] = sp + if n_prediction_steps is None: + n_prediction_steps = self.dataset.n_prediction_steps + score_kwargs_forecasting['n_prediction_steps'] = n_prediction_steps + if y_test_past is not None: + assert len(y_test_past) == len(y_test), f'The length of y_test_past must be equal to the length of ' \ + f'y_test.But they are {len(y_test_past)} and {len(y_test)}' + from autoPyTorch.pipeline.components.training.metrics.metrics import compute_mase_coefficient + mase_coefficient = np.asarray([compute_mase_coefficient(y_past, sp) for y_past in y_test_past]) + if len(y_pred.shape) > 2: + mase_coefficient = np.expand_dims(mase_coefficient, 1) + # Match the shape of mase_coefficient and y_test + score_kwargs_forecasting['mase_coefficient'] = mase_coefficient + else: + if self._metric.name.endswith('MASE_Forecasting'): + warnings.warn("To compute MASE losses, the past target values must be provided. Here we simply ignore " + "the scaling coefficient and the loss degenerate to a MAE loss") + + return super(TimeSeriesForecastingTask, self).score(np.asarray(y_pred), np.asarray(y_test), + **score_kwargs_forecasting, + **score_kwargs) diff --git a/examples/20_basics/example_time_series_forecasting.py b/examples/20_basics/example_time_series_forecasting.py index a7adba025..e508ee03b 100644 --- a/examples/20_basics/example_time_series_forecasting.py +++ b/examples/20_basics/example_time_series_forecasting.py @@ -12,6 +12,8 @@ import warnings import copy +import numpy as np + os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir() os.environ['OMP_NUM_THREADS'] = '1' os.environ['OPENBLAS_NUM_THREADS'] = '1' @@ -91,3 +93,6 @@ # test_sets2 = api.dataset.generate_test_seqs() pred = api.predict(test_sets) +# To compute the scores with AutoPyTorch, the ground truth value must be of shape [n_seq, seq_length, n_output] +# or [n_seq * seq_length, n_output] +score = api.score(np.expand_dims(pred, -1), np.expand_dims(np.asarray(y_test), -1)) diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index d95f3943f..65ae7bf57 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -39,8 +39,7 @@ from autoPyTorch.optimizer.smbo import AutoMLSMBO from autoPyTorch.pipeline.base_pipeline import BasePipeline from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners -from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy - +from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy, mean_MASE_forecasting CV_NUM_SPLITS = 2 HOLDOUT_NUM_SPLITS = 1 @@ -578,6 +577,14 @@ def test_time_series_forecasting(forecasting_toy_dataset, resampling_strategy, b y_pred = estimator.predict(X_test) assert np.shape(y_pred) == np.shape(y_test) + score_mse = estimator.score(np.expand_dims(np.asarray(y_pred), -1), + np.expand_dims(np.asarray(y_test), -1)) + assert isinstance(score_mse, dict) + + estimator._metric = mean_MASE_forecasting + score_mase = estimator.score(np.expand_dims(np.asarray(y_pred), -1), + np.expand_dims(np.asarray(y_test), -1), + y_test_past=y_train) # Test refit on dummy data estimator.refit(dataset=backend.load_datamanager()) @@ -585,6 +592,8 @@ def test_time_series_forecasting(forecasting_toy_dataset, resampling_strategy, b assert isinstance(estimator.get_search_space(), CS.ConfigurationSpace) + + @pytest.mark.parametrize('openml_id', ( 1590, # Adult to test NaN in categorical columns )) From 84fe605b4f415932661546390ffa5fe8cf05cf0b Mon Sep 17 00:00:00 2001 From: dengdifan Date: Thu, 8 Dec 2022 17:03:18 +0100 Subject: [PATCH 2/4] delete unnecesary assignment --- .../evaluation/time_series_forecasting_train_evaluator.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py b/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py index 0940d1e9a..4d6de750d 100644 --- a/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py +++ b/autoPyTorch/evaluation/time_series_forecasting_train_evaluator.py @@ -146,10 +146,7 @@ def __init__(self, backend: Backend, queue: Queue, self.num_sequences = self.datamanager.num_sequences self.num_targets = self.datamanager.num_targets self.seq_length_min = np.min(self.num_sequences) - seasonality = SEASONALITY_MAP.get(self.datamanager.freq, 1) - if isinstance(seasonality, list): - seasonality = min(seasonality) # Use to calculate MASE - self.seasonality = int(seasonality) # type: ignore[call-overload] + self.seasonality = self.datamanager.seasonality self.max_budget = max_budget self.min_num_test_instances = min_num_test_instances From 7cba1dc3506eca8484d5c9b2c56ccca54c54cc8d Mon Sep 17 00:00:00 2001 From: dengdifan Date: Mon, 19 Dec 2022 11:26:32 +0100 Subject: [PATCH 3/4] additional docstring for api basetasks --- autoPyTorch/api/base_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py index d4fb4af17..eb17bb1e1 100644 --- a/autoPyTorch/api/base_task.py +++ b/autoPyTorch/api/base_task.py @@ -1793,7 +1793,9 @@ def score( y_test (np.ndarray): The test ground truth labels. score_kwargs: Any - additional arguments for computing the scores. Some metrics might require special arguments + additional arguments for computing the scores. Some metrics might require special arguments. Currently, + this argument is required by time series forecasting tasks. For detailed information, please check + autoPyTorch/api/time_series_forecasting.py Returns: Dict[str, float]: From 1e2180b88903f343d6d3306819e7df6ce2759684 Mon Sep 17 00:00:00 2001 From: dengdifan Date: Mon, 19 Dec 2022 11:29:25 +0100 Subject: [PATCH 4/4] fix time features for gluonts --- autoPyTorch/datasets/time_series_dataset.py | 2 +- setup.py | 2 +- test/test_datasets/test_time_series_datasets.py | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/autoPyTorch/datasets/time_series_dataset.py b/autoPyTorch/datasets/time_series_dataset.py index 4c3565172..f61764c7f 100644 --- a/autoPyTorch/datasets/time_series_dataset.py +++ b/autoPyTorch/datasets/time_series_dataset.py @@ -132,7 +132,7 @@ def __init__(self, Y: np.ndarray, start_time: Optional[pd.DatetimeIndex] = None, freq: str = '1Y', - time_feature_transform: List[TimeFeature] = [ConstantTransform], + time_feature_transform: List[TimeFeature] = [ConstantTransform()], X_test: Optional[np.ndarray] = None, Y_test: Optional[np.ndarray] = None, train_transforms: Optional[torchvision.transforms.Compose] = None, diff --git a/setup.py b/setup.py index 7a8e7bac6..3fda6cf35 100755 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ include_package_data=True, extras_require={ "forecasting": [ - "gluonts>=0.10.0", + "gluonts>=0.11.0", "sktime", "pytorch-forecasting", ], diff --git a/test/test_datasets/test_time_series_datasets.py b/test/test_datasets/test_time_series_datasets.py index 68d866e09..5f5af6d27 100644 --- a/test/test_datasets/test_time_series_datasets.py +++ b/test/test_datasets/test_time_series_datasets.py @@ -2,7 +2,7 @@ from typing import Callable, List, Tuple from gluonts.time_feature import Constant as ConstantTransform -from gluonts.time_feature import DayOfMonth +from gluonts.time_feature import day_of_month import numpy as np @@ -12,7 +12,6 @@ import torch - from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes, NoResamplingStrategyTypes from autoPyTorch.datasets.time_series_dataset import ( TimeSeriesForecastingDataset, @@ -40,7 +39,7 @@ def setUp(self) -> None: self.x_test_data = rng.rand(self.n_prediction_steps, 5) self.y_test = rng.rand(self.n_prediction_steps, 1) - self.time_feature_transform = [DayOfMonth(), ConstantTransform(10.0)] + self.time_feature_transform = [day_of_month, ConstantTransform()] self.known_future_features_index = [0, 2] self.seq_uni = TimeSeriesSequence(X=None, Y=self.y, n_prediction_steps=self.n_prediction_steps, @@ -122,8 +121,8 @@ def test_uni_get_update_time_features(self): self.assertEqual(len(self.seq_uni._cached_time_features), len(self.y)) self.assertTrue(list(past_features.shape) == [3 + 1, len(self.time_feature_transform)]) self.assertTrue(list(future_features.shape) == [self.n_prediction_steps, len(self.time_feature_transform)]) - self.assertTrue(torch.all(past_features[:, 1] == 10.)) - self.assertTrue(torch.all(future_features[:, 1] == 10.)) + self.assertTrue(torch.all(past_features[:, 1] == 0.)) + self.assertTrue(torch.all(future_features[:, 1] == 0.)) def test_uni_to_test_set(self): self.seq_uni.transform_time_features = True