diff --git a/README.md b/README.md index 9d2b525..1dbca4a 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ run_forecast( #### Parameters description: - ```train_data``` is a delta table name that stores the input dataset. -- ```scoring_data``` is a delta table name that stores the [dynamic future regressors](https://nixtlaverse.nixtla.io/neuralforecast/examples/exogenous_variables.html#3-training-with-exogenous-variables). If not provided or if the same name as ```train_data``` is provided, the models will ignore the future dynamical regressors. +- ```scoring_data``` is a delta table name that stores the [dynamic future regressors](https://nixtlaverse.nixtla.io/statsforecast/docs/how-to-guides/exogenous.html). If not provided or if the same name as ```train_data``` is provided, the models will ignore the future dynamical regressors. - ```scoring_output``` is a delta table where you write your forecasting output. This table will be created if does not exist - ```evaluation_output``` is a delta table where you write the evaluation results from all backtesting trials from all time series and all models. This table will be created if does not exist. - ```group_id``` is a column storing the unique id that groups your dataset to each time series. diff --git a/examples/global_external_regressors_daily.py b/examples/global_external_regressors_daily.py index b9f2dc2..0135e9a 100644 --- a/examples/global_external_regressors_daily.py +++ b/examples/global_external_regressors_daily.py @@ -90,7 +90,7 @@ # COMMAND ---------- # MAGIC %md -# MAGIC Note that in `rossmann_daily_train` we have our target variable `Sales` but not in `rossmann_daily_test`. This is because `rossmann_daily_test` is going to be used as our `scoring_data` that stores `dynamic_future` variables of the future dates. When you adapt this notebook to your use case, make sure to comply with these datasets formats. See neuralforecast's [documentation](https://nixtlaverse.nixtla.io/neuralforecast/examples/exogenous_variables.html) for more detail on exogenous regressors. +# MAGIC Note that in `rossmann_daily_train` we have our target variable `Sales` but not in `rossmann_daily_test`. This is because `rossmann_daily_test` is going to be used as our `scoring_data` that stores `dynamic_future_categorical` variables of the future dates. When you adapt this notebook to your use case, make sure to comply with these datasets formats. See neuralforecast's [documentation](https://nixtlaverse.nixtla.io/neuralforecast/examples/exogenous_variables.html) for more detail on exogenous regressors. # COMMAND ---------- @@ -118,7 +118,7 @@ # MAGIC %md ### Run MMF # MAGIC -# MAGIC Now, we run the evaluation and forecasting using `run_forecast` function. We are providing the training table and the scoring table names. If `scoring_data` is not provided or if the same name as `train_data` is provided, the models will ignore the `dynamic_future` regressors. Note that we are providing a covariate field (i.e. `dynamic_future`) this time in `run_forecast` function called in [examples/run_external_regressors_daily.py](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/run_external_regressors_daily.py). There are also other convariate fields, namely `static_features`, and `dynamic_historical`, which you can provide. Read more about these covariates in [neuralforecast's documentation](https://nixtlaverse.nixtla.io/neuralforecast/examples/exogenous_variables.html). +# MAGIC Now, we run the evaluation and forecasting using `run_forecast` function. We are providing the training table and the scoring table names. If `scoring_data` is not provided or if the same name as `train_data` is provided, the models will ignore the `dynamic_future_numerical` and `dynamic_future_categorical` regressors. Note that we are providing a covariate field (i.e. `dynamic_future_numerical` or `dynamic_future_categorical`) this time in `run_forecast` function called in [examples/run_external_regressors_daily.py](https://github.com/databricks-industry-solutions/many-model-forecasting/blob/main/examples/run_external_regressors_daily.py). There are also other convariate fields, namely `static_features`, and `dynamic_historical_numerical` and `dynamic_historical_categorical`, which you can provide. Read more about these covariates in [neuralforecast's documentation](https://nixtlaverse.nixtla.io/neuralforecast/examples/exogenous_variables.html). # COMMAND ---------- diff --git a/examples/local_univariate_external_regressors_daily.py b/examples/local_univariate_external_regressors_daily.py index 0408c4a..0206d45 100644 --- a/examples/local_univariate_external_regressors_daily.py +++ b/examples/local_univariate_external_regressors_daily.py @@ -90,7 +90,7 @@ # COMMAND ---------- # MAGIC %md -# MAGIC Note that in `rossmann_daily_train` we have our target variable `Sales` but not in `rossmann_daily_test`. This is because `rossmann_daily_test` is going to be used as our `scoring_data` that stores `dynamic_future` variables of the future dates. When you adapt this notebook to your use case, make sure to comply with these datasets formats. See statsforecast's [documentation](https://nixtlaverse.nixtla.io/statsforecast/docs/how-to-guides/exogenous.html) for more detail on exogenous regressors. +# MAGIC Note that in `rossmann_daily_train` we have our target variable `Sales` but not in `rossmann_daily_test`. This is because `rossmann_daily_test` is going to be used as our `scoring_data` that stores `dynamic_future_categorical` variables of the future dates. When you adapt this notebook to your use case, make sure to comply with these datasets formats. See statsforecast's [documentation](https://nixtlaverse.nixtla.io/statsforecast/docs/how-to-guides/exogenous.html) for more detail on exogenous regressors. # COMMAND ---------- @@ -134,7 +134,7 @@ # MAGIC %md ### Run MMF # MAGIC -# MAGIC Now, we run the evaluation and forecasting using `run_forecast` function. We are providing the training table and the scoring table names. If `scoring_data` is not provided or if the same name as `train_data` is provided, the models will ignore the `dynamic_future` regressors. Note that we are providing a covariate field (i.e. `dynamic_future`) this time. There are also other convariate fields, namely `static_features`, and `dynamic_historical`, but these are only relevant with the global models. +# MAGIC Now, we run the evaluation and forecasting using `run_forecast` function. We are providing the training table and the scoring table names. If `scoring_data` is not provided or if the same name as `train_data` is provided, the models will ignore the `dynamic_future_numerical` and `dynamic_future_categorical` regressors. Note that we are providing a covariate field (i.e. `dynamic_future_numerical` or `dynamic_future_categorical`) this time. There are also other convariate fields, namely `static_features`, `dynamic_historical_numerical` and `dynamic_historical_categorical`, but these are only relevant with the global models. # COMMAND ---------- @@ -148,7 +148,7 @@ date_col="Date", target="Sales", freq="D", - dynamic_future=["DayOfWeek", "Open", "Promo", "SchoolHoliday"], + dynamic_future_categorical=["DayOfWeek", "Open", "Promo", "SchoolHoliday"], prediction_length=10, backtest_months=1, stride=10, diff --git a/examples/run_external_regressors_daily.py b/examples/run_external_regressors_daily.py index b2d96f8..d145540 100644 --- a/examples/run_external_regressors_daily.py +++ b/examples/run_external_regressors_daily.py @@ -36,7 +36,7 @@ date_col="Date", target="Sales", freq="D", - dynamic_future=["DayOfWeek", "Open", "Promo", "SchoolHoliday"], + dynamic_future_categorical=["DayOfWeek", "Open", "Promo", "SchoolHoliday"], prediction_length=10, backtest_months=1, stride=10, diff --git a/mmf_sa/__init__.py b/mmf_sa/__init__.py index 348bb4a..ab72ac5 100644 --- a/mmf_sa/__init__.py +++ b/mmf_sa/__init__.py @@ -28,8 +28,10 @@ def run_forecast( model_output: str = None, use_case_name: str = None, static_features: List[str] = None, - dynamic_future: List[str] = None, - dynamic_historical: List[str] = None, + dynamic_future_numerical: List[str] = None, + dynamic_future_categorical: List[str] = None, + dynamic_historical_numerical: List[str] = None, + dynamic_historical_categorical: List[str] = None, active_models: List[str] = None, accelerator: str = "cpu", backtest_retrain: bool = None, @@ -63,8 +65,10 @@ def run_forecast( model_output (str): A string specifying the output path for the model. use_case_name (str): A string specifying the use case name. static_features (List[str]): A list of strings specifying the static features. - dynamic_future (List[str]): A list of strings specifying the dynamic future features. - dynamic_historical (List[str]): A list of strings specifying the dynamic historical features. + dynamic_future_numerical (List[str]): A list of strings specifying the dynamic future features that are numerical. + dynamic_future_categorical (List[str]): A list of strings specifying the dynamic future features that are categorical. + dynamic_historical_numerical (List[str]): A list of strings specifying the dynamic historical features that are numerical. + dynamic_historical_categorical (List[str]): A list of strings specifying the dynamic historical features that are categorical. active_models (List[str]): A list of strings specifying the active models. accelerator (str): A string specifying the accelerator to use: cpu or gpu. Default is cpu. backtest_retrain (bool): A boolean specifying whether to retrain the model during backtesting. Currently, not supported. @@ -137,10 +141,14 @@ def run_forecast( _conf["data_quality_check"] = data_quality_check if static_features is not None: _conf["static_features"] = static_features - if dynamic_future is not None: - _conf["dynamic_future"] = dynamic_future - if dynamic_historical is not None: - _conf["dynamic_historical"] = dynamic_historical + if dynamic_future_numerical is not None: + _conf["dynamic_future_numerical"] = dynamic_future_numerical + if dynamic_future_categorical is not None: + _conf["dynamic_future_categorical"] = dynamic_future_categorical + if dynamic_historical_numerical is not None: + _conf["dynamic_historical_numerical"] = dynamic_historical_numerical + if dynamic_historical_categorical is not None: + _conf["dynamic_historical_categorical"] = dynamic_historical_categorical if run_id is not None: _conf["run_id"] = run_id diff --git a/mmf_sa/data_quality_checks.py b/mmf_sa/data_quality_checks.py index e6921db..31592ec 100644 --- a/mmf_sa/data_quality_checks.py +++ b/mmf_sa/data_quality_checks.py @@ -43,8 +43,10 @@ def _external_regressors_check(self): """ if ( self.conf.get("static_features", None) - or self.conf.get("dynamic_future", None) - or self.conf.get("dynamic_historical", None) + or self.conf.get("dynamic_future_numerical", None) + or self.conf.get("dynamic_future_categorical", None) + or self.conf.get("dynamic_historical_numerical", None) + or self.conf.get("dynamic_historical_categorical", None) ): if self.conf.get("resample"): raise Exception( @@ -77,19 +79,29 @@ def _multiple_checks( # 1. Checking for nulls in external regressors static_features = conf.get("static_features", None) - dynamic_future = conf.get("dynamic_future", None) - dynamic_historical = conf.get("dynamic_historical", None) + dynamic_future_numerical = conf.get("dynamic_future_numerical", None) + dynamic_future_categorical = conf.get("dynamic_future_categorical", None) + dynamic_historical_numerical = conf.get("dynamic_historical_numerical", None) + dynamic_historical_categorical = conf.get("dynamic_historical_categorical", None) if static_features: if _df[static_features].isnull().values.any(): - # Removing: null in static categoricals + # Removing: null in static categorical return pd.DataFrame() - if dynamic_future: - if _df[dynamic_future].isnull().values.any(): - # Removing: null in dynamic future + if dynamic_future_numerical: + if _df[dynamic_future_numerical].isnull().values.any(): + # Removing: null in dynamic future numerical return pd.DataFrame() - if dynamic_historical: - if _df[dynamic_historical].isnull().values.any(): - # Removing: null in dynamic historical + if dynamic_future_categorical: + if _df[dynamic_future_categorical].isnull().values.any(): + # Removing: null in dynamic future categorical + return pd.DataFrame() + if dynamic_historical_numerical: + if _df[dynamic_historical_numerical].isnull().values.any(): + # Removing: null in dynamic historical numerical + return pd.DataFrame() + if dynamic_historical_categorical: + if _df[dynamic_historical_categorical].isnull().values.any(): + # Removing: null in dynamic historical categorical return pd.DataFrame() # 2. Checking for training period length diff --git a/mmf_sa/forecasting_conf.yaml b/mmf_sa/forecasting_conf.yaml index d9ad660..fd11e79 100644 --- a/mmf_sa/forecasting_conf.yaml +++ b/mmf_sa/forecasting_conf.yaml @@ -11,12 +11,16 @@ accelerator: cpu static_features: #- State -dynamic_future: +dynamic_future_numerical: + +dynamic_future_categorical: #- Open #- Promo #- DayOfWeek -dynamic_historical: +dynamic_historical_numerical: + +dynamic_historical_categorical: active_models: - StatsForecastBaselineWindowAverage diff --git a/mmf_sa/models/models_conf.yaml b/mmf_sa/models/models_conf.yaml index 93dfe6e..79fbd39 100644 --- a/mmf_sa/models/models_conf.yaml +++ b/mmf_sa/models/models_conf.yaml @@ -10,8 +10,10 @@ promoted_props: - backtest_months - stride - static_features - - dynamic_future - - dynamic_historical + - dynamic_future_numerical + - dynamic_future_categorical + - dynamic_historical_numerical + - dynamic_historical_categorical models: diff --git a/mmf_sa/models/neuralforecast/NeuralForecastPipeline.py b/mmf_sa/models/neuralforecast/NeuralForecastPipeline.py index 2fbacac..bba16ef 100644 --- a/mmf_sa/models/neuralforecast/NeuralForecastPipeline.py +++ b/mmf_sa/models/neuralforecast/NeuralForecastPipeline.py @@ -68,27 +68,28 @@ def prepare_data(self, df: pd.DataFrame, future: bool = False) -> pd.DataFrame: if not future: # Prepare historical dataframe with or without exogenous regressors for training df[self.params.target] = df[self.params.target].clip(0) - if 'dynamic_future' in self.params.keys(): + features = [self.params.group_id, self.params.date_col, self.params.target] + if 'dynamic_future_numerical' in self.params.keys(): try: - _df = ( - df[[self.params.group_id, self.params.date_col, self.params.target] - + self.params.dynamic_future] - ) + features = features + self.params.dynamic_future_numerical except Exception as e: - raise Exception(f"Dynamic future regressor columns missing from " - f"the training dataset: {e}") - elif 'dynamic_historical' in self.params.keys(): + raise Exception(f"Dynamic future numerical missing: {e}") + if 'dynamic_future_categorical' in self.params.keys(): try: - _df = ( - df[[self.params.group_id, self.params.date_col, self.params.target] - + self.params.dynamic_historical] - ) + features = features + self.params.dynamic_future_categorical except Exception as e: - raise Exception(f"Dynamic historical regressor columns missing from " - f"the training dataset: {e}") - else: - _df = df[[self.params.group_id, self.params.date_col, self.params.target]] - + raise Exception(f"Dynamic future categorical missing: {e}") + if 'dynamic_historical_numerical' in self.params.keys(): + try: + features = features + self.params.dynamic_historical_numerical + except Exception as e: + raise Exception(f"Dynamic historical numerical missing: {e}") + if 'dynamic_historical_categorical' in self.params.keys(): + try: + features = features + self.params.dynamic_historical_categorical + except Exception as e: + raise Exception(f"Dynamic historical categorical missing: {e}") + _df = df[features] _df = ( _df.rename( columns={ @@ -100,16 +101,18 @@ def prepare_data(self, df: pd.DataFrame, future: bool = False) -> pd.DataFrame: ) else: # Prepare future dataframe with exogenous regressors for forecasting - if 'dynamic_future' in self.params.keys(): + features = [self.params.group_id, self.params.date_col] + if 'dynamic_future_numerical' in self.params.keys(): + try: + features = features + self.params.dynamic_future_numerical + except Exception as e: + raise Exception(f"Dynamic future numerical missing: {e}") + if 'dynamic_future_categorical' in self.params.keys(): try: - _df = ( - df[[self.params.group_id, self.params.date_col] - + self.params.dynamic_future] - ) + features = features + self.params.dynamic_future_categorical except Exception as e: - raise Exception(f"Dynamic future regressors missing: {e}") - else: - _df = df[[self.params.group_id, self.params.date_col]] + raise Exception(f"Dynamic future categorical missing: {e}") + _df = df[features] _df = ( _df.rename( columns={ @@ -118,7 +121,6 @@ def prepare_data(self, df: pd.DataFrame, future: bool = False) -> pd.DataFrame: } ) ) - return _df def prepare_static_features(self, df: pd.DataFrame) -> pd.DataFrame: @@ -284,8 +286,12 @@ def __init__(self, params): decoder_layers=self.params.decoder_layers, learning_rate=self.params.learning_rate, stat_exog_list=list(self.params.get("static_features", [])), - futr_exog_list=list(self.params.get("dynamic_future", [])), - hist_exog_list=list(self.params.get("dynamic_historical", [])), + futr_exog_list=list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + hist_exog_list=list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), accelerator=self.params.accelerator, devices=self.devices, ), @@ -317,8 +323,12 @@ def __init__(self, params): decoder_layers=self.params.decoder_layers, learning_rate=self.params.learning_rate, stat_exog_list=list(self.params.get("static_features", [])), - futr_exog_list=list(self.params.get("dynamic_future", [])), - hist_exog_list=list(self.params.get("dynamic_historical", [])), + futr_exog_list=list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + hist_exog_list=list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), accelerator=self.params.accelerator, devices=self.devices, ), @@ -347,8 +357,12 @@ def __init__(self, params): n_polynomials=self.params.n_polynomials, dropout_prob_theta=self.params.dropout_prob_theta, stat_exog_list=list(self.params.get("static_features", [])), - futr_exog_list=list(self.params.get("dynamic_future", [])), - hist_exog_list=list(self.params.get("dynamic_historical", [])), + futr_exog_list=list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + hist_exog_list=list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), accelerator=self.params.accelerator, devices=self.devices, ), @@ -381,8 +395,12 @@ def __init__(self, params): interpolation_mode=self.params.interpolation_mode, pooling_mode=self.params.pooling_mode, stat_exog_list=list(self.params.get("static_features", [])), - futr_exog_list=list(self.params.get("dynamic_future", [])), - hist_exog_list=list(self.params.get("dynamic_historical", [])), + futr_exog_list=list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + hist_exog_list=list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), accelerator=self.params.accelerator, devices=self.devices, ), @@ -406,8 +424,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): @@ -459,8 +481,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): @@ -511,8 +537,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): @@ -558,8 +588,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): @@ -608,8 +642,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): @@ -669,8 +707,12 @@ def __init__(self, params): ) self.exogs = { 'stat_exog_list': list(self.params.get("static_features", [])), - 'futr_exog_list': list(self.params.get("dynamic_future", [])), - 'hist_exog_list': list(self.params.get("dynamic_historical", [])), + 'futr_exog_list': list( + self.params.get("dynamic_future_numerical", []) + self.params.get("dynamic_future_categorical", []) + ), + 'hist_exog_list': list( + self.params.get("dynamic_historical_numerical", []) + self.params.get("dynamic_historical_categorical", []) + ), } def config(trial): diff --git a/mmf_sa/models/statsforecast/StatsFcForecastingPipeline.py b/mmf_sa/models/statsforecast/StatsFcForecastingPipeline.py index 64cdc90..26825b6 100644 --- a/mmf_sa/models/statsforecast/StatsFcForecastingPipeline.py +++ b/mmf_sa/models/statsforecast/StatsFcForecastingPipeline.py @@ -32,19 +32,20 @@ def prepare_data(self, df: pd.DataFrame, future: bool = False) -> pd.DataFrame: # Prepare historical dataframe with/out exogenous regressors for training # Fix here df[self.params.target] = df[self.params.target].clip(0) - if 'dynamic_future' in self.params.keys(): + features = [self.params.group_id, self.params.date_col, self.params.target] + if 'dynamic_future_numerical' in self.params.keys(): try: - df_statsfc = ( - df[[self.params.group_id, self.params.date_col, self.params.target] - + self.params.dynamic_future] - ) + features = features + self.params.dynamic_future_numerical except Exception as e: - raise Exception(f"Exogenous regressors missing: {e}") - else: - df_statsfc = df[[self.params.group_id, self.params.date_col, self.params.target]] - - df_statsfc = ( - df_statsfc.rename( + raise Exception(f"Dynamic future numerical missing: {e}") + if 'dynamic_future_categorical' in self.params.keys(): + try: + features = features + self.params.dynamic_future_categorical + except Exception as e: + raise Exception(f"Dynamic future categorical missing: {e}") + _df = df[features] + _df = ( + _df.rename( columns={ self.params.group_id: "unique_id", self.params.date_col: "ds", @@ -54,26 +55,27 @@ def prepare_data(self, df: pd.DataFrame, future: bool = False) -> pd.DataFrame: ) else: # Prepare future dataframe with/out exogenous regressors for forecasting - if 'dynamic_future' in self.params.keys(): + features = [self.params.group_id, self.params.date_col] + if 'dynamic_future_numerical' in self.params.keys(): try: - df_statsfc = ( - df[[self.params.group_id, self.params.date_col] - + self.params.dynamic_future] - ) + features = features + self.params.dynamic_future_numerical except Exception as e: - raise Exception(f"Exogenous regressors missing: {e}") - else: - df_statsfc = df[[self.params.group_id, self.params.date_col]] - - df_statsfc = ( - df_statsfc.rename( + raise Exception(f"Dynamic future numerical missing: {e}") + if 'dynamic_future_categorical' in self.params.keys(): + try: + features = features + self.params.dynamic_future_categorical + except Exception as e: + raise Exception(f"Dynamic future categorical missing: {e}") + _df = df[features] + _df = ( + _df.rename( columns={ self.params.group_id: "unique_id", self.params.date_col: "ds", } ) ) - return df_statsfc + return _df def fit(self, x, y=None): self.model = StatsForecast(models=[self.model_spec], freq=self.freq, n_jobs=-1) @@ -104,7 +106,7 @@ def forecast(self, df: pd.DataFrame, spark=None): _df = df[df[self.params.target].notnull()] _df = self.prepare_data(_df) self.fit(_df) - if 'dynamic_future' in self.params.keys(): + if 'dynamic_future_numerical' in self.params.keys() or 'dynamic_future_categorical' in self.params.keys(): _last_date = _df["ds"].max() _future_df = df[ (df[self.params["date_col"]] > np.datetime64(_last_date)) @@ -124,8 +126,7 @@ def forecast(self, df: pd.DataFrame, spark=None): else: forecast_df = self.model.predict(self.params["prediction_length"]) - target = [col for col in forecast_df.columns.to_list() - if col not in ["unique_id", "ds"]][0] + target = [col for col in forecast_df.columns.to_list() if col not in ["unique_id", "ds"]][0] forecast_df = forecast_df.reset_index(drop=True).rename( columns={ "unique_id": self.params.group_id, diff --git a/tests/unit/test_exogenous_regressors_pipeline.py b/tests/unit/test_exogenous_regressors_pipeline.py index 060253b..272b7e5 100644 --- a/tests/unit/test_exogenous_regressors_pipeline.py +++ b/tests/unit/test_exogenous_regressors_pipeline.py @@ -17,7 +17,7 @@ def base_config(): "prediction_length": 10, "metric": "smape", "active_models": ["StatsForecastAutoArima"], - "dynamic_reals": ["feature1", "feature2"] + "dynamic_future_numerical": ["feature1", "feature2"] } ) diff --git a/tests/unit/test_pipelines.py b/tests/unit/test_pipelines.py index fe9c693..0cb1d2d 100644 --- a/tests/unit/test_pipelines.py +++ b/tests/unit/test_pipelines.py @@ -26,8 +26,10 @@ def test_api_func(temp_dir, spark_session, m4_df, m4_df_exogenous): date_col="ds", target="y", freq="D", - dynamic_future=[], - dynamic_historical=[], + dynamic_future_numerical=[], + dynamic_future_categorical=[], + dynamic_historical_numerical=[], + dynamic_historical_categorical=[], static_features=[], train_predict_ratio=2, active_models=active_models, @@ -59,8 +61,8 @@ def test_api_func(temp_dir, spark_session, m4_df, m4_df_exogenous): date_col="ds", target="y", freq="D", - dynamic_future=["feature1", "feature2"], - dynamic_historical=[], + dynamic_future_categorical=["feature1", "feature2"], + dynamic_historical_categorical=[], static_features=[], train_predict_ratio=2, active_models=active_models,