automl · ravinkohli · Feb 19, 2021 · Feb 19, 2021 · Feb 22, 2021 · Feb 22, 2021
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
@@ -254,7 +254,7 @@ def search(
         memory_limit: int = 4096,
         smac_scenario_args: Optional[Dict[str, Any]] = None,
         get_smac_object_callback: Optional[Callable] = None,
-        all_supported_metrics: bool = True,
+        all_supported_metrics: bool = False,
         precision: int = 32,
         disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         load_models: bool = True,
@@ -354,7 +354,7 @@ def search(
                 TargetAlgorithm to be optimised. If None, `eval_function`
                 available in autoPyTorch/evaluation/train_evaluator is used.
                 Must be child class of AbstractEvaluator.
-            all_supported_metrics (bool: default=True):
+            all_supported_metrics (bool: default=False):
                 If True, all metrics supporting current task will be calculated
                 for each pipeline and results will be available via cv_results
             precision (int: default=32):

diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
@@ -79,7 +79,6 @@ class TabularRegressionTask(BaseTask):
             Search space updates that can be used to modify the search
             space of particular components or choice modules of the pipeline
     """
-
     def __init__(
         self,
         seed: int = 1,
@@ -254,7 +253,7 @@ def search(
         memory_limit: int = 4096,
         smac_scenario_args: Optional[Dict[str, Any]] = None,
         get_smac_object_callback: Optional[Callable] = None,
-        all_supported_metrics: bool = True,
+        all_supported_metrics: bool = False,
         precision: int = 32,
         disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
         load_models: bool = True,
@@ -354,7 +353,7 @@ def search(
                 TargetAlgorithm to be optimised. If None, `eval_function`
                 available in autoPyTorch/evaluation/train_evaluator is used.
                 Must be child class of AbstractEvaluator.
-            all_supported_metrics (bool: default=True):
+            all_supported_metrics (bool: default=False):
                 If True, all metrics supporting current task will be calculated
                 for each pipeline and results will be available via cv_results
             precision (int: default=32):

diff --git a/autoPyTorch/api/time_series_forecasting.py b/autoPyTorch/api/time_series_forecasting.py
@@ -289,7 +289,7 @@ def search(
         memory_limit: Optional[int] = 4096,
         smac_scenario_args: Optional[Dict[str, Any]] = None,
         get_smac_object_callback: Optional[Callable] = None,
-        all_supported_metrics: bool = True,
+        all_supported_metrics: bool = False,
         precision: int = 32,
         disable_file_output: List = [],
         load_models: bool = True,
@@ -396,7 +396,7 @@ def search(
                 instances, num_params, runhistory, seed and ta. This is
                 an advanced feature. Use only if you are familiar with
                 [SMAC](https://automl.github.io/SMAC3/master/index.html).
-            all_supported_metrics (bool), (default=True): if True, all
+            all_supported_metrics (bool), (default=False): if True, all
                 metrics supporting current task will be calculated
                 for each pipeline and results will be available via cv_results
             precision (int), (default=32): Numeric precision used when loading
@@ -526,6 +526,9 @@ def predict(
                 predicted value, it needs to be with shape (B, H, N),
                 B is the number of series, H is forecasting horizon (n_prediction_steps), N is the number of targets
         """
+        if self.dataset is None:
+            raise AttributeError(f"Expected dataset to be initialised when predicting in {self.__class__.__name__}")
+
         if X_test is None or not isinstance(X_test[0], TimeSeriesSequence):
             assert past_targets is not None
             # Validate and construct TimeSeriesSequence
@@ -566,6 +569,9 @@ def update_sliding_window_size(self, n_prediction_steps: int) -> None:
                 forecast horizon. Sometimes we could also make our base sliding window size based on the
                 forecast horizon
         """
+        if self.dataset is None:
+            raise AttributeError(f"Expected dataset to be initialised when updating sliding window"
+                                 f" in {self.__class__.__name__}")
         base_window_size = int(np.ceil(self.dataset.base_window_size))
         # we don't want base window size to large, which might cause a too long computation time, in which case
         # we will use n_prediction_step instead (which is normally smaller than base_window_size)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
@@ -1,5 +1,5 @@
 import logging
-from typing import List, Optional, Union
+from typing import List, Optional, Set, Tuple, Union
 
 import numpy as np
 
@@ -24,24 +24,21 @@ class BaseFeatureValidator(BaseEstimator):
             List of the column types found by this estimator during fit.
         data_type (str):
             Class name of the data type provided during fit.
-        column_transformer (Optional[BaseEstimator])
+        encoder (Optional[BaseEstimator])
             Host a encoder object if the data requires transformation (for example,
-            if provided a categorical column in a pandas DataFrame)
-        transformed_columns (List[str])
-            List of columns that were encoded.
+            if provided a categorical column in a pandas DataFrame).
     """
     def __init__(
         self,
         logger: Optional[Union[PicklableClientLogger, logging.Logger]] = None,
-    ):
+    ) -> None:
         # Register types to detect unsupported data format changes
         self.feat_types: Optional[List[str]] = None
         self.data_type: Optional[type] = None
         self.dtypes: List[str] = []
         self.column_order: List[str] = []
 
         self.column_transformer: Optional[BaseEstimator] = None
-        self.transformed_columns: List[str] = []
 
         self.logger: Union[
             PicklableClientLogger, logging.Logger
@@ -52,6 +49,9 @@ def __init__(
         self.categories: List[List[int]] = []
         self.categorical_columns: List[int] = []
         self.numerical_columns: List[int] = []
+        self.encode_columns: List[str] = []
+
+        self.all_nan_columns: Optional[Set[Union[int, str]]] = None
 
         self._is_fitted = False
 
@@ -75,7 +75,7 @@ def fit(
 
         # If a list was provided, it will be converted to pandas
         if isinstance(X_train, list):
-            X_train, X_test = self.list_to_dataframe(X_train, X_test)
+            X_train, X_test = self.list_to_pandas(X_train, X_test)
 
         self._check_data(X_train)
 
@@ -109,6 +109,22 @@ def _fit(
             self:
                 The fitted base estimator
         """
+
+        raise NotImplementedError()
+
+    def _check_data(
+        self,
+        X: SupportedFeatTypes,
+    ) -> None:
+        """
+        Feature dimensionality and data type checks
+
+        Args:
+            X (SupportedFeatTypes):
+                A set of features that are going to be validated (type and dimensionality
+                checks) and a encoder fitted in the case the data needs encoding
+        """
+
         raise NotImplementedError()
 
     def transform(
@@ -125,4 +141,30 @@ def transform(
             np.ndarray:
                 The transformed array
         """
+
+        raise NotImplementedError()
+
+    def list_to_pandas(
+        self,
+        X_train: SupportedFeatTypes,
+        X_test: Optional[SupportedFeatTypes] = None,
+    ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
+        """
+        Converts a list to a pandas DataFrame. In this process, column types are inferred.
+
+        If test data is provided, we proactively match it to train data
+
+        Args:
+            X_train (SupportedFeatTypes):
+                A set of features that are going to be validated (type and dimensionality
+                checks) and a encoder fitted in the case the data needs encoding
+            X_test (Optional[SupportedFeatTypes]):
+                A hold out set of data used for checking
+        Returns:
+            pd.DataFrame:
+                transformed train data from list to pandas DataFrame
+            pd.DataFrame:
+                transformed test data from list to pandas DataFrame
+        """
+
         raise NotImplementedError()
diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
@@ -36,7 +36,7 @@ def __init__(self,
                                         logging.Logger
                                         ]
                                   ] = None,
-                 ):
+                 ) -> None:
         self.is_classification = is_classification
 
         self.data_type: Optional[type] = None
@@ -131,7 +131,7 @@ def _fit(
 
     def transform(
         self,
-        y: Union[SupportedTargetTypes],
+        y: SupportedTargetTypes,
     ) -> np.ndarray:
         """
         Args: