Skip to content

Commit

Permalink
Implement reconciliation for hierarchical time series. (#72)
Browse files Browse the repository at this point in the history
* Fix some AutoSarima bugs.

* Harden models to granularities like MS

* Add RMSPE forecasting eval metric.

* Implement min-trace reconciliation.

* Fix bug for seasonality models on multivar data.

* Add test for minT reconciliation.

* Update docs.

* Fix computation of covariance matrix.

* Update version.

* Add data I/O utils for hierarchical time series.

* Add merlion.utils.data_io to docs.

* Add data I/O test.
  • Loading branch information
aadyotb committed Mar 3, 2022
1 parent 5a098ba commit adb73c1
Show file tree
Hide file tree
Showing 36 changed files with 572 additions and 140 deletions.
2 changes: 2 additions & 0 deletions docs/source/merlion.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ each associated with its own sub-package:
- :py:mod:`merlion.evaluate`: Evaluation metrics & pipelines to simulate the live deployment of a time series model
for any task.
- :py:mod:`merlion.plot`: Automated visualization of model outputs for univariate time series
- :py:mod:`merlion.utils`: Various utilities, including the `TimeSeries` class, resampling functions,
Bayesian conjugate priors, reconciliation for hierarchical time series, and more.

The key classes for input and output are `merlion.utils.time_series.TimeSeries` and
`merlion.utils.time_series.UnivariateTimeSeries`. Notably, these classes have transparent inter-operability
Expand Down
57 changes: 42 additions & 15 deletions docs/source/merlion.utils.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

merlion.utils package
=====================
This package contains various utilities, including the `TimeSeries` class and
Expand All @@ -8,44 +9,70 @@ utilities for resampling time series.
:undoc-members:
:show-inheritance:

.. autosummary::
time_series
resample
data_io
hts
ts_generator
conj_priors
istat

Submodules
----------

merlion.utils.conj_priors module
--------------------------------
.. automodule:: merlion.utils.conj_priors
merlion.utils.time\_series module
---------------------------------

.. automodule:: merlion.utils.time_series
:members:
:undoc-members:
:show-inheritance:

merlion.utils.istat module
--------------------------
merlion.utils.resample module
-----------------------------

.. automodule:: merlion.utils.istat
.. automodule:: merlion.utils.resample
:members:
:undoc-members:
:show-inheritance:

merlion.utils.misc module
-------------------------
merlion.utils.data\_io module
-----------------------------

.. automodule:: merlion.utils.misc
.. automodule:: merlion.utils.data_io
:members:
:undoc-members:
:show-inheritance:

merlion.utils.resample module
-----------------------------

.. automodule:: merlion.utils.resample
merlion.utils.hts module
------------------------

.. automodule:: merlion.utils.hts
:members:
:undoc-members:
:show-inheritance:

merlion.utils.time\_series module
---------------------------------
merlion.utils.ts\_generator module
----------------------------------

.. automodule:: merlion.utils.time_series
.. automodule:: merlion.utils.ts_generator
:members:
:undoc-members:
:show-inheritance:

merlion.utils.conj_priors module
--------------------------------
.. automodule:: merlion.utils.conj_priors
:members:
:undoc-members:
:show-inheritance:

merlion.utils.istat module
--------------------------

.. automodule:: merlion.utils.istat
:members:
:undoc-members:
:show-inheritance:
7 changes: 3 additions & 4 deletions examples/misc/generate_synthetic_tsad_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021 salesforce.com, inc.
# Copyright (c) 2022 salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
Expand All @@ -11,7 +11,6 @@
import numpy as np
from math import floor, ceil

from merlion.utils.time_series import ts_to_csv
from merlion.utils.ts_generator import GeneratorConcatenator, TimeSeriesGenerator
from merlion.transform.anomalize import LevelShift, Shock, TrendChange

Expand Down Expand Up @@ -64,13 +63,13 @@ def main():
for i, ts in enumerate(ts_list):
# write original ts
csv = join(anom_dir, f"{ts.names[0]}.csv")
ts_to_csv(ts, csv)
ts.to_csv(csv)
# anomalize ts with each anomalizer
for j, (name, anom) in enumerate(anomalizers.items()):
np.random.seed(1000 * i + j)
anom_ts = anom(ts)
csv = join(anom_dir, f"{anom_ts.names[0]}_{name}_anomaly.csv")
ts_to_csv(anom_ts, csv)
anom_ts.to_csv(csv)


if __name__ == "__main__":
Expand Down
23 changes: 23 additions & 0 deletions merlion/evaluate/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,23 @@ def smape(self):
warnings.warn("Some values very close to 0, sMAPE might not be estimated accurately.")
return np.mean(200.0 * errors / (scale + 1e-8))

def rmspe(self):
"""
Root Mean Squared Percent Error (RMSPE)
For ground truth time series :math:`y` and predicted time series :math:`\\hat{y}`
of length :math:`T`, it is computed as
.. math:: 100 \\cdot \\sqrt{\\frac{1}{T}\\sum_{t=1}^T\\frac{(y_t - \\hat{y}_t)}{y_t}^2}.
"""
self.check_before_eval()
predict_values = self.predict.univariates[self.predict.names[0]].np_values
ground_truth_values = self.ground_truth.univariates[self.ground_truth.names[0]].np_values
if (ground_truth_values < 1e-8).any():
warnings.warn("Some values very close to 0, RMSPE might not be estimated accurately.")
errors = ground_truth_values - predict_values
return 100 * np.sqrt(np.mean(np.square(errors / ground_truth_values)))

def mase(self):
"""
Mean Absolute Scaled Error (MASE)
Expand Down Expand Up @@ -240,6 +257,12 @@ class ForecastMetric(Enum):
200 \\cdot \\frac{1}{T}\\sum_{t=1}^{T}{\\frac{\\left| y_t
- \\hat{y}_t \\right|}{\\left| y_t \\right| + \\left| \\hat{y}_t \\right|}}.
"""
RMSPE = partial(accumulate_forecast_score, metric=ForecastScoreAccumulator.rmspe)
"""
Root Mean Square Percent Error is formulated as:
.. math:: 100 \\cdot \\sqrt{\\frac{1}{T}\\sum_{t=1}^T\\frac{(y_t - \\hat{y}_t)}{y_t}^2}.
"""
MASE = partial(accumulate_forecast_score, metric=ForecastScoreAccumulator.mase)
"""
Mean Absolute Scaled Error (MASE) is formulated as:
Expand Down
2 changes: 1 addition & 1 deletion merlion/models/automl/autosarima.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021 salesforce.com, inc.
# Copyright (c) 2022 salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
Expand Down
4 changes: 2 additions & 2 deletions merlion/models/automl/seasonality.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021 salesforce.com, inc.
# Copyright (c) 2022 salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
Expand Down Expand Up @@ -128,7 +128,7 @@ class SeasonalityLayer(AutoMLMixIn, metaclass=AutodocABCMeta):

@property
def require_univariate(self):
return getattr(self.config, "target_seq_index", None) is not None
return getattr(self.config, "target_seq_index", None) is None

@property
def multi_seasonality(self):
Expand Down
5 changes: 4 additions & 1 deletion merlion/models/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ def to_dict(self, _skipped_keys=None):
if self.models is None:
models = None
else:
models = [None if m is None else dict(name=type(m).__name__, **m.config.to_dict()) for m in self.models]
models = [
None if m is None else dict(name=type(m).__name__, **m.config.to_dict(_skipped_keys))
for m in self.models
]
config_dict["models"] = models
return config_dict

Expand Down
2 changes: 1 addition & 1 deletion merlion/models/forecast/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def train(self, train_data: TimeSeries, train_config=None):
if self.dim == 1:
logger.info(
f"Model is working on a univariate dataset, "
f"hybrid of sequence and autoregression training strategy will be adopted"
f"hybrid of sequence and autoregression training strategy will be adopted "
f"with prediction_stride = {self.prediction_stride} "
)
if self.sampling_mode != "normal":
Expand Down
2 changes: 1 addition & 1 deletion merlion/models/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def to_dict(self, _skipped_keys=None):
if self.model is None:
config_dict["model"] = None
else:
config_dict["model"] = dict(name=type(self.model).__name__, **self.model.config.to_dict())
config_dict["model"] = dict(name=type(self.model).__name__, **self.model.config.to_dict(_skipped_keys))
return config_dict

@classmethod
Expand Down
5 changes: 4 additions & 1 deletion merlion/utils/autosarima_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#
# Copyright (c) 2021 salesforce.com, inc.
# Copyright (c) 2022 salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
#
"""
Low-level utils for AutoML models.
"""
import functools
import logging
import time
Expand Down
Loading

0 comments on commit adb73c1

Please sign in to comment.