diff --git a/experiments/foundation-time-series-arena/eval-chronos.py b/experiments/foundation-time-series-arena/eval-chronos.py new file mode 100644 index 00000000..09f0e44e --- /dev/null +++ b/experiments/foundation-time-series-arena/eval-chronos.py @@ -0,0 +1,34 @@ +import fire +import transformers +from xiuhmolpilli.arena import FoundationalTimeSeriesArena +from xiuhmolpilli.models.foundational import Chronos + + +if __name__ == "__main__": + transformers.set_seed(42) # for reproducibility + + frequencies = ["Hourly", "Daily", "Weekly", "Monthly"] + files = [ + f"./nixtla-foundational-time-series/data/{freq}.parquet" for freq in frequencies + ] + arena = FoundationalTimeSeriesArena( + models=[ + Chronos( + repo_id="amazon/chronos-t5-large", batch_size=16, alias="Chronos-Large" + ), + Chronos( + repo_id="amazon/chronos-t5-base", batch_size=40, alias="Chronos-Base" + ), + Chronos( + repo_id="amazon/chronos-t5-small", batch_size=64, alias="Chronos-Small" + ), + Chronos( + repo_id="amazon/chronos-t5-mini", batch_size=128, alias="Chronos-Mini" + ), + Chronos( + repo_id="amazon/chronos-t5-tiny", batch_size=256, alias="Chronos-Tiny" + ), + ], + parquet_data_paths=files, + ) + fire.Fire(arena.compete) diff --git a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py index 20a39b0a..9768dc1f 100644 --- a/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py +++ b/experiments/foundation-time-series-arena/xiuhmolpilli/models/foundational/chronos.py @@ -1,4 +1,4 @@ -from typing import Iterable, List +from typing import Iterable import numpy as np import pandas as pd @@ -29,23 +29,13 @@ def __init__( @classmethod def from_df(cls, df: pd.DataFrame, batch_size: int): - num_unique_ids = df["unique_id"].nunique() - max_series_length = df["unique_id"].value_counts().max() - padded_tensor = torch.full( - size=(num_unique_ids, max_series_length), - fill_value=torch.nan, - dtype=torch.bfloat16, - ) # type: ignore + tensors = [] df_sorted = df.sort_values(by=["unique_id", "ds"]) - for idx, (_, group) in enumerate(df_sorted.groupby("unique_id")): - series_length = len(group) - padded_tensor[idx, -series_length:] = torch.tensor( - group["y"].values, - dtype=torch.bfloat16, - ) + for _, group in df_sorted.groupby("unique_id"): + tensors.append(torch.tensor(group["y"].values)) uids = df_sorted["unique_id"].unique() last_times = df_sorted.groupby("unique_id")["ds"].tail(1) - return cls(padded_tensor, uids, last_times, batch_size) + return cls(tensors, uids, last_times, batch_size) def __len__(self): return self.n_batches