Skip to content

Commit

Permalink
Merge pull request #256 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.21
  • Loading branch information
winedarksea authored Mar 5, 2025
2 parents 2735174 + 2286bf9 commit 7258a7a
Show file tree
Hide file tree
Showing 33 changed files with 174 additions and 111 deletions.
6 changes: 2 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.20 🇺🇦 🇺🇦 🇺🇦
* transformer bug fixes
* Prophet package adjustments
* linear model singular matrix handling
# 0.6.21 🇺🇦 🇺🇦 🇺🇦
* Prophet and Cassandra bug fixes

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.20'
__version__ = '0.6.21'

TransformTS = GeneralTransformer

Expand Down
27 changes: 14 additions & 13 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class AutoTS(object):
best_model_params (dict): model params
best_model_transformation_params (dict): transformation parameters
best_model_ensemble (int): Ensemble type int id
used_frequency (str): datetime frequency offset string
regression_check (bool): If True, the best_model uses an input 'User' future_regressor
df_wide_numeric (pd.DataFrame): dataframe containing shaped final data, will include preclean
initial_results.model_results (object): contains a collection of result metrics
Expand Down Expand Up @@ -439,9 +440,9 @@ def __init__(

full_params['transformations'] = transformations
full_params['transformation_params'] = transformation_params
self.initial_template.loc[index, 'TransformationParameters'] = (
json.dumps(full_params)
)
self.initial_template.loc[
index, 'TransformationParameters'
] = json.dumps(full_params)

self.regressor_used = False
self.subset_flag = False
Expand Down Expand Up @@ -2022,10 +2023,10 @@ def _run_template(
self.model_count = template_result.model_count
# capture results from lower-level template run
if "TotalRuntime" in template_result.model_results.columns:
template_result.model_results['TotalRuntime'] = (
template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
template_result.model_results[
'TotalRuntime'
] = template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
else:
# trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
Expand Down Expand Up @@ -2162,9 +2163,9 @@ def _run_validations(
frac=0.8, random_state=self.random_seed
).reindex(idx)
nan_frac = val_df_train.shape[1] / num_validations
val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
np.nan
)
val_df_train.iloc[
-2:, int(nan_frac * y) : int(nan_frac * (y + 1))
] = np.nan

# run validation template on current slice
result = self._run_template(
Expand Down Expand Up @@ -4845,9 +4846,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
)
y = pd.json_normalize(json.loads(row["ModelParameters"]))
y.index = [row['ID']]
y['Model'] = (
x # might need to remove this and do analysis independently for each
)
y[
'Model'
] = x # might need to remove this and do analysis independently for each
res.append(
pd.DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,18 +490,18 @@ def long_form_results(
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[interval_name] = (
f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[interval_name] = (
f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_lower[
interval_name
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
Expand Down
10 changes: 8 additions & 2 deletions autots/models/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def fit(
multivar_df = (
trs_df.T.groupby(self.categorical_groups) # axis=1
.mean()
.transpose()
.iloc[lag_1_indx]
)
multivar_df.index = self.df.index
Expand Down Expand Up @@ -543,7 +544,7 @@ def fit(
x_t = create_changepoint_features(
self.df.index,
changepoint_spacing=60,
changepoint_distance_end=120,
changepoint_distance_end=180,
)
x_list.append(x_t)
else:
Expand Down Expand Up @@ -643,20 +644,24 @@ def fit(
print(f"Dropping colinear feature columns {corel}")
# x_array = x_array.drop(columns=corel)
self.drop_colz.extend(corel.tolist())

if self.max_multicolinearity is not None:
colin = x_array.columns[w < self.max_multicolinearity]
if len(colin) > 0:
if len(colin) > 1:
if self.verbose > 2:
print(f"Dropping multi-colinear feature columns {colin}")
# x_array = x_array.drop(columns=colin)
self.drop_colz.extend(colin.tolist())
if len(set(self.drop_colz)) == x_array.shape[1]:
self.drop_colz = list(set(self.drop_colz))[1:]
x_array = x_array.drop(columns=self.drop_colz)

# things we want modeled but want to discard from evaluation (standins)
remove_patterns = [
"randnorm_",
"rolling_trend_",
"randomwalk_",
"changepoint_",
] # "intercept" added after, so not included

# RUN LINEAR MODEL
Expand Down Expand Up @@ -810,6 +815,7 @@ def fit(
self.residual_uncertainty_upper_std = res_upper.std()
self.residual_uncertainty_lower_std = res_lower.std()
else:
slope = np.zeros_like(self.df)
self.residual_uncertainty_upper = pd.Series(0, index=self.df.columns)
self.residual_uncertainty_lower = pd.Series(0, index=self.df.columns)
self.residual_uncertainty_upper_std = pd.Series(0, index=self.df.columns)
Expand Down
18 changes: 9 additions & 9 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -2340,15 +2340,15 @@ def _buildup_mosaics(
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
+ mi
) from e
melted['forecast'] = (
fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['upper_forecast'] = (
u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['lower_forecast'] = (
l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted[
'forecast'
] = fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'upper_forecast'
] = u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'lower_forecast'
] = l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]

forecast_df = melted.pivot(
values="forecast", columns="series_id", index="forecast_period"
Expand Down
97 changes: 61 additions & 36 deletions autots/models/prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(
random_seed: int = 2024,
verbose: int = 0,
n_jobs: int = None,
**kwargs,
):
ModelObject.__init__(
self,
Expand Down Expand Up @@ -202,9 +203,10 @@ def seek_the_oracle(
# n_changepoints -> changepoint_spacing
# changepoint_range -> changepoint_distance_end
if self.changepoint_range > 1 or self.changepoint_distance_end is not None:
non_null_indices = np.where(current_series["y"].notnull())[0]
pargs['changepoints'] = get_changepoints(
current_series.index[0],
current_series.index[-1],
current_series.index[non_null_indices[0]],
current_series.index[non_null_indices[-1]],
changepoint_spacing=(
int(len(current_series.index) / self.n_changepoints)
if self.changepoint_spacing is None
Expand All @@ -219,19 +221,26 @@ def seek_the_oracle(
pargs.pop("changepoint_range", None)
pargs.pop("n_changepoints", None)
m = Prophet(**pargs)
# as currently written this customization only works on daily data
if self.weekly_seasonality_prior_scale not in [None, "None"]:
m.add_seasonality(
name='weekly',
period=7,
period=168 if "H" in self.frequency else 7,
fourier_order=4,
prior_scale=self.weekly_seasonality_prior_scale,
)
if self.yearly_seasonality_prior_scale not in [None, "None"]:
if self.yearly_seasonality_order in [None, "None"]:
self.yearly_seasonality_order = 12
if "W" in str(self.frequency).upper():
yperiod = 52.18
elif "M" in str(self.frequency).upper():
yperiod = 12
else:
yperiod = 365.25
m.add_seasonality(
name='yearly',
period=365.25,
period=yperiod,
fourier_order=int(self.yearly_seasonality_order),
prior_scale=self.yearly_seasonality_prior_scale,
)
Expand All @@ -248,7 +257,8 @@ def seek_the_oracle(
else:
m.add_country_holidays(country_name=args['holiday_country'])
else:
raise ValueError("`holiday` arg for Prophet not recognized")
pass
# raise ValueError("`holiday` arg for Prophet not recognized")
if args['regression_type'] in ['User', 'user']:
current_series = pd.concat(
[current_series, args['regressor_train']], axis=1
Expand Down Expand Up @@ -427,14 +437,13 @@ def get_new_params(self, method: str = 'random'):
yearly_seasonality_order = None
yearly_seasonality_prior_scale = random.choices(
[None, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 15, 20, 25, 40], # default 10
[0.8, 0.2, 0.05, 0.05, 0.05, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05],
[0.4, 0.2, 0.05, 0.05, 0.05, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05],
)[0]
if yearly_seasonality_prior_scale is not None:
yearly_seasonality_order = random.choices(
[2, 6, 12, 30], [0.1, 0.2, 0.5, 0.1]
)[0]

return {
params = {
'holiday': holiday_choice,
'regression_type': regression_choice,
'changepoint_prior_scale': random.choices(
Expand All @@ -445,9 +454,37 @@ def get_new_params(self, method: str = 'random'):
[0.01, 0.1, 1.0, 10.0, 15, 20, 25, 40], # default 10
[0.05, 0.05, 0.05, 0.8, 0.05, 0.05, 0.05, 0.05],
)[0],
'yearly_seasonality_prior_scale': yearly_seasonality_prior_scale,
"yearly_seasonality_order": yearly_seasonality_order,
'weekly_seasonality_prior_scale': random.choices(
'holidays_prior_scale': random.choices(
[0.01, 0.1, 1.0, 10.0, 15, 20, 25, 40], # default 10
[0.05, 0.05, 0.05, 0.8, 0.05, 0.05, 0.05, 0.05],
)[0],
'seasonality_mode': random.choice(['additive', 'multiplicative']),
'growth': random.choices(["linear", "flat"], [0.9, 0.1])[0],
"trend_phi": random.choices(
[None, 0.98, 0.999, 0.95, 0.8, 0.99], [0.8, 0.1, 0.2, 0.1, 0.1, 0.05]
)[0],
}
way = random.choice(["new", "old"])
if way == "old":
params["n_changepoints"] = random.choices(
[5, 10, 20, 25, 30, 40, 50], [0.05, 0.1, 0.1, 0.9, 0.1, 0.05, 0.05]
)[0]
params["changepoint_range"] = random.choices(
[0.8, 0.85, 0.9, 0.95, 0.98, 30, 60, 180, 360],
[0.4, 0.3, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
)[0]
else:
params["changepoint_spacing"] = random.choices(
[10, 20, 180, 30, 40, 50, 60], [0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.9]
)[0]
params["changepoint_distance_end"] = random.choices(
[10, 20, 180, 30, 40, 50, 60], [0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.9]
)[0]
way = random.choice(["new", "old"])
if way == "new":
params["yearly_seasonality_prior_scale"] = yearly_seasonality_prior_scale
params["yearly_seasonality_order"] = yearly_seasonality_order
params["weekly_seasonality_prior_scale"] = random.choices(
[
None,
0.0001,
Expand All @@ -461,40 +498,28 @@ def get_new_params(self, method: str = 'random'):
25,
40,
], # default 10
[0.8, 0.2, 0.05, 0.05, 0.05, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05],
)[0],
'holidays_prior_scale': random.choices(
[0.01, 0.1, 1.0, 10.0, 15, 20, 25, 40], # default 10
[0.05, 0.05, 0.05, 0.8, 0.05, 0.05, 0.05, 0.05],
)[0],
'seasonality_mode': random.choice(['additive', 'multiplicative']),
'changepoint_range': random.choices(
[0.8, 0.85, 0.9, 0.95, 0.98, 30, 60],
[0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.2],
)[0],
'growth': random.choices(["linear", "flat"], [0.9, 0.1])[0],
'n_changepoints': random.choices(
[5, 10, 20, 25, 30, 40, 50], [0.05, 0.1, 0.1, 0.9, 0.1, 0.05, 0.05]
)[0],
'changepoint_spacing': random.choices(
[10, 20, 25, 30, 40, 50, 60], [0.05, 0.1, 0.1, 0.1, 0.1, 0.05, 0.9]
)[0],
"trend_phi": random.choices(
[None, 0.98, 0.999, 0.95, 0.8], [0.8, 0.1, 0.2, 0.1, 0.1]
)[0],
}
[0.4, 0.2, 0.05, 0.05, 0.05, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05],
)[0]
else:
pass
return params

def get_params(self):
"""Return dict of current parameters."""
return {
'holiday': self.holiday,
'regression_type': self.regression_type,
"growth": self.growth,
"n_changepoints": self.n_changepoints,
"changepoint_prior_scale": self.changepoint_prior_scale,
"seasonality_mode": self.seasonality_mode,
"changepoint_prior_scale": self.changepoint_prior_scale,
"n_changepoints": self.n_changepoints,
"changepoint_range": self.changepoint_range,
"changepoint_spacing": self.changepoint_spacing,
"changepoint_distance_end": self.changepoint_distance_end,
"seasonality_prior_scale": self.seasonality_prior_scale,
"yearly_seasonality_prior_scale": self.yearly_seasonality_prior_scale,
"yearly_seasonality_order": self.yearly_seasonality_order,
"weekly_seasonality_prior_scale": self.weekly_seasonality_prior_scale,
"holidays_prior_scale": self.holidays_prior_scale,
"trend_phi": self.trend_phi,
}
Expand Down Expand Up @@ -541,7 +566,7 @@ def get_changepoints(
cp_csv = custom_changepoints.replace(" ", "")
if len(cp_csv) > 0:
timestamps = [pd.Timestamp(cp_str) for cp_str in cp_csv.split(",")]
changepoints = changepoints.append(pd.Series(timestamps))
changepoints = pd.concat([changepoints, pd.Series(timestamps)], axis=0)
changepoints = changepoints.drop_duplicates().sort_values()
changepoints = changepoints.loc[
(changepoints > training_start_ds) & (changepoints < training_end_ds)
Expand Down
7 changes: 5 additions & 2 deletions autots/models/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,8 @@ def rolling_x_regressor_regressor(
X = X.set_index("series_id", append=True)
if series_id is not None:
hashed = (
int(hashlib.sha256(str(series_id).encode('utf-8')).hexdigest(), 16) % 10**16
int(hashlib.sha256(str(series_id).encode('utf-8')).hexdigest(), 16)
% 10**16
)
X['series_id'] = hashed
return X
Expand Down Expand Up @@ -3984,7 +3985,9 @@ def _rbf_kernel(self, x1, x2, gamma):
if gamma is None:
gamma = 1.0 / x1.shape[1]
distance = (
np.sum(x1**2, 1).reshape(-1, 1) + np.sum(x2**2, 1) - 2 * np.dot(x1, x2.T)
np.sum(x1**2, 1).reshape(-1, 1)
+ np.sum(x2**2, 1)
- 2 * np.dot(x1, x2.T)
)
return np.exp(-gamma * distance)

Expand Down
6 changes: 3 additions & 3 deletions autots/tools/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def profile_time_series(
& (metrics_df['cv_squared'] >= cvar_threshold),
'PROFILE',
] = 'lumpy'
metrics_df.loc[metrics_df['zero_diff_proportion'] >= flat_threshold, 'PROFILE'] = (
'flat'
)
metrics_df.loc[
metrics_df['zero_diff_proportion'] >= flat_threshold, 'PROFILE'
] = 'flat'
metrics_df.loc[
metrics_df['null_percentage'] >= new_product_threshold, 'PROFILE'
] = 'new_product'
Expand Down
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/doctrees/source/autots.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/source/autots.evaluator.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/source/autots.models.doctree
Binary file not shown.
Loading

0 comments on commit 7258a7a

Please sign in to comment.