Skip to content

Commit

Permalink
Merge pull request #226 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.8
  • Loading branch information
winedarksea authored Jan 18, 2024
2 parents 4e4f6bd + ec6860c commit c23c244
Show file tree
Hide file tree
Showing 38 changed files with 572 additions and 116 deletions.
9 changes: 4 additions & 5 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.6.7 🇺🇦 🇺🇦 🇺🇦
* Cassandra bug fix
* isolated_only to anomaly methods
* matse metric is possibly temporary and not added to per series weighting options
* added HistoricValues transformer
# 0.6.8 🇺🇦 🇺🇦 🇺🇦
* bug fixes, robust for OpenBLAS nan handling kernel failures
* added BKBandpassFilter
* added expand_horizontal for scaling mosaics

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.7'
__version__ = '0.6.8'

TransformTS = GeneralTransformer

Expand Down
5 changes: 4 additions & 1 deletion autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,10 @@ def __init__(
if self.transformation_dict is None:
self.transformation_dict = {}
self.transformer_object = GeneralTransformer(
**self.transformation_dict, n_jobs=n_jobs, holiday_country=holiday_country
**self.transformation_dict,
n_jobs=n_jobs,
holiday_country=holiday_country,
verbose=self.verbose,
)
self.model = ModelMonster(
model_str,
Expand Down
181 changes: 151 additions & 30 deletions autots/evaluator/auto_ts.py

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,24 +511,24 @@ def long_form_results(
"""
upload = pd.melt(
self.forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload[interval_name] = "50%"
upload_upper = pd.melt(
self.upper_forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name="SeriesID",
value_name="Value",
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[
Expand All @@ -537,7 +537,8 @@ def long_form_results(

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
upload = upload.reset_index(drop=False, names=datetime_column)
upload.index.name = str(datetime_column)
upload = upload.reset_index(drop=False)
if update_datetime_name is not None:
upload[update_datetime_name] = datetime.datetime.utcnow()
return upload
Expand Down
2 changes: 1 addition & 1 deletion autots/models/matrix_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def rrvar(data, R, pred_step, maxiter=100):
X1 = data[:, :-1]
X2 = data[:, 1:]
V = np.random.randn(R, N)
X1_pinv = np.linalg.pinv(X1)
X1_pinv = np.linalg.pinv(np.nan_to_num(X1))
for it in range(maxiter):
W = X2 @ np.linalg.pinv((V @ X1))
V = np.linalg.pinv(W) @ X2 @ X1_pinv
Expand Down
9 changes: 5 additions & 4 deletions autots/models/model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
'GLM': 1,
'ETS': 1,
'FBProphet': 0.5,
'GluonTS': 0.5,
# 'GluonTS': 0.5,
'UnobservedComponents': 1,
'VAR': 1,
'VECM': 1,
Expand All @@ -75,7 +75,7 @@
'ARDL': 1,
'ARCH': 1,
'MetricMotif': 1,
# 'SeasonalityMotif': 1,
'SeasonalityMotif': 1,
}
# fastest models at any scale
superfast = [
Expand All @@ -86,6 +86,7 @@
'SeasonalNaive',
# 'MetricMotif',
'SeasonalityMotif',
'SectionalMotif', # not entirely sure but so far this is pretty fast
]
# relatively fast
fast = {
Expand All @@ -105,13 +106,13 @@
'SectionalMotif': 1,
'NVAR': 0.3,
'MAR': 0.25,
'RRVAR': 1,
'RRVAR': 0.4,
'KalmanStateSpace': 0.4,
'MetricMotif': 1,
'Cassandra': 0.6,
'SeasonalityMotif': 1.5,
'FFT': 0.8,
"BallTreeMultivariateMotif": 1, # keep an eye on RAM
"BallTreeMultivariateMotif": 0.4, # keep an eye on RAM, not the fastest at scale but works...
}
# models that can scale well if many CPU cores are available
parallel = {
Expand Down
29 changes: 22 additions & 7 deletions autots/models/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,18 +675,28 @@ def retrieve_classifier(
}
# these are models that are relatively fast with large multioutput Y, small n obs
datepart_model_dict: dict = {
# 'RandomForest': 0.05, # crashes sometimes at scale for unclear reasons
'ElasticNet': 0.1,
'xgboost': 0.001, # excess memory at scale
'MLP': 0.05,
'DecisionTree': 0.02,
'Adaboost': 0.05,
'SVM': 0.01,
'KerasRNN': 0.02,
'Transformer': 0.02, # slow
'ExtraTrees': 0.00001, # some params cause RAM crash?
'RadiusNeighbors': 0.1,
'MultioutputGPR': 0.00001,
}
datepart_model_dict_deep = {
'RandomForest': 0.05, # crashes sometimes at scale for unclear reasons
'ElasticNet': 0.1,
'xgboost': 0.05,
'MLP': 0.05,
'DecisionTree': 0.02,
'Adaboost': 0.05,
'SVM': 0.01,
'KerasRNN': 0.02,
'Transformer': 0.02, # slow
'ExtraTrees': 0.01, # some params cause RAM crash?
'RadiusNeighbors': 0.1,
'MultioutputGPR': 0.001,
}
gpu = ['Transformer', 'KerasRNN', 'MLP'] # or more accurately, no dnn
gradient_boosting = {
Expand Down Expand Up @@ -788,7 +798,7 @@ def generate_classifier_params(
}
else:
model_dict = {
'xgboost': 1,
'xgboost': 0.5,
'ExtraTrees': 0.2,
'RandomForest': 0.1,
'KNN': 1,
Expand Down Expand Up @@ -2335,7 +2345,7 @@ def predict(
)
except Exception as e:
raise ValueError(
f"Datepart prediction with params {self.get_params()} failed"
f"Datepart prediction with params {self.get_params()} failed. This is often due to an improperly indexed future_regressor (with drop_most_recent especially)"
) from e

if just_point_forecast:
Expand Down Expand Up @@ -2366,7 +2376,12 @@ def predict(

def get_new_params(self, method: str = 'random'):
"""Return dict of new parameters for parameter tuning."""
model_choice = generate_regressor_params(model_dict=datepart_model_dict)
if method == 'deep':
model_choice = generate_regressor_params(
model_dict=datepart_model_dict_deep
)
else:
model_choice = generate_regressor_params(model_dict=datepart_model_dict)
datepart_choice = random.choices(
[
"recurring",
Expand Down
11 changes: 6 additions & 5 deletions autots/models/statsmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def predict(
parallel = False
# joblib multiprocessing to loop through series
if parallel:
df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose)(
df_list = Parallel(n_jobs=self.n_jobs, verbose=pool_verbose, timeout=3600)(
delayed(glm_forecast_by_column)(
current_series=df[col],
X=X,
Expand Down Expand Up @@ -1432,23 +1432,24 @@ def predict(
), "regressor row count not equal to forecast length"

# LinAlgError: SVD did not converge (occurs when NaN in train data)
# NaN must be removed for some BLAS packages else they will kill the kernel
if self.regression_type in ["User", "Holiday", 'user']:
maModel = VECM(
self.df_train,
self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
freq=self.frequency,
exog=np.array(self.regressor_train),
exog=np.nan_to_num(np.array(self.regressor_train)),
deterministic=self.deterministic,
k_ar_diff=self.k_ar_diff,
coint_rank=self.coint_rank,
seasons=self.seasons,
).fit()
# don't ask me why it is exog_fc here and not exog like elsewhere
forecast = maModel.predict(
steps=forecast_length, exog_fc=np.array(future_regressor)
steps=forecast_length, exog_fc=np.nan_to_num(np.array(future_regressor))
)
else:
maModel = VECM(
self.df_train,
self.df_train.replace([np.inf, -np.inf], np.nan).fillna(0),
freq=self.frequency,
deterministic=self.deterministic,
k_ar_diff=self.k_ar_diff,
Expand Down
11 changes: 8 additions & 3 deletions autots/tools/fast_kalman.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def holt_winters_damped_matrices(M, alpha, beta, gamma, phi=1.0):
return F, Q, H, R


def new_kalman_params(method=None):
def new_kalman_params(method=None, allow_auto=True):
if method in ['fast']:
em_iter = random.choices([None, 10], [0.8, 0.2])[0]
elif method == "superfast":
Expand Down Expand Up @@ -575,6 +575,9 @@ def new_kalman_params(method=None):
'observation_noise': 0.04,
}
params['em_iter'] = em_iter
if not allow_auto:
if params['observation_noise'] == 'auto':
params['observation_noise'] = 0.1
return params


Expand Down Expand Up @@ -1314,7 +1317,9 @@ def douter(a, b):
def dinv(A):
"Matrix inverse applied to last two axes"
try:
res = np.linalg.inv(A)
res = np.linalg.inv(
np.nan_to_num(A)
) # can cause kernel death in OpenBLAS with NaN
except Exception:
res = np.linalg.pinv(A) # slower but more robust
return res
Expand Down Expand Up @@ -1609,7 +1614,7 @@ def ensure_matrix(x, dim=1):
# pylint: disable=W0702,W0104,E1136
try:
y = np.array(x)
y.shape[0]
y.shape[0] # for reasons I don't understand, this line is critical
x = y
except Exception:
x = np.eye(dim) * x
Expand Down
Loading

0 comments on commit c23c244

Please sign in to comment.