Skip to content

Commit

Permalink
0.3.5
Browse files Browse the repository at this point in the history
0.3.5
  • Loading branch information
winedarksea authored Aug 30, 2021
2 parents 0ab2e48 + f24d7ad commit d20e3bb
Show file tree
Hide file tree
Showing 39 changed files with 1,277 additions and 479 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ Also take a look at the [production_example.py](https://github.com/winedarksea/A
* Set `model_interrupt=True` which passes over the current model when a `KeyboardInterrupt` ie `crtl+c` is pressed (although if the interrupt falls between generations it will stop the entire training).
* Use the `result_file` method of `.fit()` which will save progress after each generation - helpful to save progress if a long training is being done. Use `import_results` to recover.
* While Transformations are pretty fast, setting `transformer_max_depth` to a lower number (say, 2) will increase speed. Also utilize `transformer_list`.
* Check out [this example](https://github.com/winedarksea/AutoTS/discussions/76) of using AutoTS with pandas UDF.
* Ensembles are obviously slower to predict because they run many models, 'distance' models 2x slower, and 'simple' models 3x-5x slower.
* `ensemble='horizontal-max'` with `model_list='no_shared_fast'` can scale relatively well given many cpu cores because each model is only run on the series it is needed for.
* Reducing `num_validations` and `models_to_validate` will decrease runtime but may lead to poorer model selections.
Expand Down
22 changes: 10 additions & 12 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,16 @@
* Forecasts are desired for the future immediately following the most recent data.

# Latest
* improvements to joblib parallelized models (not copying the full df)
* additonal parameter checks
* made "auto" cpu_count even more conservative
* improved 'Score' generation. It should now be more equally weighted across metrics.
* fixed potential bug for horizontal ensemble selection if perfect forecasts were delivered
* Horizontal ensembles now chosen by combination of multiple metrics and metric_weighting (mae, rmse, spl, contour)
* re-weighted fillna probabilities for random choice
* addressed a few deprecation warnings
* new plot_horizontal() function for AutoTS to quickly visual horizontal ensembles
* Probabilistic and HDist ensembles are now deprecated (they can still be run by model_forecast but not by AutoTS class)
* new introduce_na parameter which makes series more robust to the last values being NaN in final but never in any validation
* Mosaic Ensembles! These can offer major improvements to MAE, but are also less stable than horizontal ensembles.
* New Transfromer ScipyFilter
* New models Univariate and MultivariateMotif
* 'midhinge' and "weighted_mean" to AverageValueNaive
* Add passing regressors to WindowRegression and made more efficient window generation
* more plotting methods: plot_horizontal_transformers
* for most -Regression type models, `model_params` is now treated as kwargs and can accept any args for that model
* ExtraTrees and RadiusRegressor to -Regression type models
* bug fix in generate_score_per_series
* 'Generation' now tracked in results table, plus plotting method for generation loss


# Errors:
DynamicFactor holidays Exceptions 'numpy.ndarray' object has no attribute 'values'
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from autots.tools.regressor import create_lagged_regressor
from autots.evaluator.auto_model import model_forecast

__version__ = '0.3.4'
__version__ = '0.3.5'

TransformTS = GeneralTransformer

Expand Down
31 changes: 31 additions & 0 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
AverageValueNaive,
SeasonalNaive,
ZeroesNaive,
Motif,
)
from autots.models.statsmodels import (
GLS,
Expand Down Expand Up @@ -400,6 +401,28 @@ def ModelMonster(
**parameters,
)

return model
elif model == 'MultivariateMotif':
model = Motif(
frequency=frequency,
prediction_interval=prediction_interval,
random_seed=random_seed,
verbose=verbose,
n_jobs=n_jobs,
multivariate=True,
**parameters,
)
return model
elif model == 'UnivariateMotif':
model = Motif(
frequency=frequency,
prediction_interval=prediction_interval,
random_seed=random_seed,
verbose=verbose,
n_jobs=n_jobs,
multivariate=False,
**parameters,
)
return model
else:
raise AttributeError(
Expand Down Expand Up @@ -1061,6 +1084,7 @@ def TemplateWizard(
'Ensemble': ensemble_input,
'Exceptions': np.nan,
'Runs': 1,
'Generation': current_generation,
'ValidationRound': validation_round,
},
index=[0],
Expand Down Expand Up @@ -1137,6 +1161,7 @@ def TemplateWizard(
'TotalRuntime': datetime.timedelta(0),
'Exceptions': "KeyboardInterrupt by user",
'Runs': 1,
'Generation': current_generation,
'ValidationRound': validation_round,
},
index=[0],
Expand Down Expand Up @@ -1184,6 +1209,7 @@ def TemplateWizard(
'TotalRuntime': datetime.timedelta(0),
'Exceptions': repr(e),
'Runs': 1,
'Generation': current_generation,
'ValidationRound': validation_round,
},
index=[0],
Expand Down Expand Up @@ -1698,12 +1724,17 @@ def generate_score_per_series(results_object, metric_weighting, total_validation
overall_score = overall_score + (contour_score * contour_weighting)
# remove basic duplicates
local_results = results_object.model_results.copy()
local_results = local_results[local_results['Exceptions'].isna()]
local_results = local_results.sort_values(by="TotalRuntimeSeconds", ascending=True)
local_results.drop_duplicates(
subset=['ValidationRound', 'smape', 'mae', 'spl'], keep="first", inplace=True
)
# select only models run through all validations
# run_count = temp.groupby(level=0).count().mean(axis=1)
# models_to_use = run_count[run_count >= total_validations].index.tolist()
run_count = local_results[['Model', 'ID']].groupby("ID").count()
models_to_use = run_count[run_count['Model'] >= total_validations].index.tolist()
overall_score = overall_score[overall_score.index.isin(models_to_use)]
# take the average score across validations
overall_score = overall_score.groupby(level=0).mean()
return overall_score
119 changes: 114 additions & 5 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ class AutoTS(object):
Attributes:
best_model (pandas.DataFrame): DataFrame containing template for the best ranked model
regression_check (bool): If True, the best_model uses an input 'User' future_regressor
Methods:
fit, predict
export_template, import_template, import_results
results, failure_rate
horizontal_to_df, mosaic_to_df
plot_horizontal, plot_horizontal_transformers, plot_generation_loss
"""

def __init__(
Expand Down Expand Up @@ -499,7 +506,7 @@ def fit(
ensemble = self.ensemble

# check if NaN in last row
nan_tail = df_wide_numeric.tail(1).isna().sum(axis=1).iloc[0] > 0
self._nan_tail = df_wide_numeric.tail(1).isna().sum(axis=1).iloc[0] > 0

self.df_wide_numeric = df_wide_numeric
self.startTimeStamps = df_wide_numeric.notna().idxmax()
Expand Down Expand Up @@ -705,6 +712,7 @@ def fit(
model_interrupt=self.model_interrupt,
grouping_ids=self.grouping_ids,
random_seed=random_seed,
current_generation=(current_generation + 1),
verbose=verbose,
n_jobs=self.n_jobs,
)
Expand Down Expand Up @@ -890,9 +898,8 @@ def fit(
val_future_regressor_test = []

# force NaN for robustness
if self.introduce_na or (self.introduce_na is None and nan_tail):
if self.introduce_na or (self.introduce_na is None and self._nan_tail):
nan_frac = val_df_train.shape[1] / num_validations
int(nan_frac * y), int(nan_frac * (y + 1))
val_df_train.iloc[
-1, int(nan_frac * y) : int(nan_frac * (y + 1))
] = np.nan
Expand All @@ -903,7 +910,6 @@ def fit(
df_train=val_df_train,
df_test=val_df_test,
weights=current_weights,
# model_count=model_count,
forecast_length=forecast_length,
frequency=frequency,
prediction_interval=prediction_interval,
Expand Down Expand Up @@ -1192,6 +1198,7 @@ def predict(
grouping_ids=self.grouping_ids,
random_seed=self.random_seed,
verbose=verbose,
n_jobs=self.n_jobs,
template_cols=self.template_cols,
)
# convert categorical back to numeric
Expand Down Expand Up @@ -1223,6 +1230,7 @@ def predict(
grouping_ids=self.grouping_ids,
random_seed=self.random_seed,
verbose=verbose,
n_jobs=self.n_jobs,
template_cols=self.template_cols,
)
# convert categorical back to numeric
Expand Down Expand Up @@ -1446,7 +1454,8 @@ def horizontal_to_df(self):
raise ValueError("No best_model. AutoTS .fit() needs to be run.")
if self.best_model['Ensemble'].iloc[0] != 2:
raise ValueError("Only works on horizontal ensemble type models.")
series = json.loads(self.best_model['ModelParameters'].iloc[0])['series']
ModelParameters = json.loads(self.best_model['ModelParameters'].iloc[0])
series = ModelParameters['series']
series = pd.DataFrame.from_dict(series, orient="index").reset_index(drop=False)
if series.shape[1] > 2:
# for mosaic style ensembles, choose the mode model id
Expand All @@ -1463,6 +1472,22 @@ def horizontal_to_df(self):
self.df_wide_numeric.mean().to_frame(), right_index=True, left_on="Series"
)
series.columns = ["Series", "ID", 'Model', "Volatility", "Mean"]
series['Transformers'] = series['ID'].copy()
series['FillNA'] = series['ID'].copy()
lookup = {}
na_lookup = {}
for k, v in ModelParameters['models'].items():
try:
trans_params = json.loads(v.get('TransformationParameters', '{}'))
lookup[k] = ",".join(trans_params.get('transformations', {}).values())
na_lookup[k] = trans_params.get('fillna', '')
except Exception:
lookup[k] = "None"
na_lookup[k] = "None"
series['Transformers'] = (
series['Transformers'].replace(lookup).replace("", "None")
)
series['FillNA'] = series['FillNA'].replace(na_lookup).replace("", "None")
return series

def mosaic_to_df(self):
Expand Down Expand Up @@ -1500,6 +1525,90 @@ def plot_horizontal(self, max_series: int = 20, **kwargs):
'log(Volatility)'
].plot(style='o', **kwargs)

def plot_horizontal_transformers(
self, method="transformers", color_list=None, **kwargs
):
"""Simple plot to visualize transformers used.
Note this doesn't capture transformers nested in simple ensembles.
Args:
method (str): 'fillna' or 'transformers' - which to plot
color_list = list of colors to *sample* for bar colors. Can be names or hex.
**kwargs passed to pandas.plot()
"""
series = self.horizontal_to_df()
if str(method).lower() == "fillna":
transformers = series['FillNA'].value_counts()
else:
transformers = pd.Series(
",".join(series['Transformers']).split(",")
).value_counts()
if color_list is None:
color_list = colors_list
colors = random.sample(color_list, transformers.shape[0])
# plot
transformers.plot(kind='bar', color=colors, **kwargs)

def plot_generation_loss(self, **kwargs):
"""Plot improvement in accuracy over generations.
Note: this is only "one size fits all" accuracy and
doesn't account for the benefits seen for ensembling.
Args:
**kwargs passed to pd.DataFrame.plot()
"""
for_gens = self.initial_results.model_results[
(self.initial_results.model_results['ValidationRound'] == 0)
& (self.initial_results.model_results['Ensemble'] < 1)
]
for_gens.groupby("Generation")['Score'].min().cummin().plot(
ylabel="Lowest Score", **kwargs
)


colors_list = [
'#FF00FF',
'#7FFFD4',
'#00FFFF',
'#F5DEB3',
'#FF6347',
'#8B008B',
'#696969',
'#FFC0CB',
'#C71585',
'#008080',
'#663399',
'#32CD32',
'#66CDAA',
'#A9A9A9',
'#2F4F4F',
'#FFDEAD',
'#800000',
'#FDF5E6',
'#F5F5F5',
'#F0FFF0',
'#87CEEB',
'#A52A2A',
'#90EE90',
'#7FFF00',
'#E9967A',
'#1E90FF',
'#FFF0F5',
'#ADD8E6',
'#008B8B',
'#FFF5EE',
'#00FA9A',
'#9370DB',
'#4682B4',
'#006400',
'#AFEEEE',
'#CD853F',
'#9400D3',
'#EE82EE',
'#00008B',
'#4B0082',
]


class AutoTSIntervals(object):
"""Autots looped to test multiple prediction intervals. Experimental.
Expand Down
Loading

0 comments on commit d20e3bb

Please sign in to comment.