From 8236e901d7ddc3d6d38af6f50da430f3dde0376b Mon Sep 17 00:00:00 2001 From: Umar Farooq Ghumman <46414488+mail4umar@users.noreply.github.com> Date: Fri, 8 Mar 2024 12:32:59 -0500 Subject: [PATCH] Doc Update - whats new v1.0.2 + TimeSeriesByCategory (#1190) * Added RST pages * updated doc for ensemble * changed version + addressed comments --- .../machine_learning_vertica_time_series.rst | 54 ++++++ docs/source/whats_new_v1_0_2.rst | 55 ++++++ setup.py | 2 +- verticapy/__init__.py | 2 +- .../machine_learning/vertica/tsa/ensemble.py | 163 +++++++++++++++++- 5 files changed, 267 insertions(+), 9 deletions(-) create mode 100644 docs/source/whats_new_v1_0_2.rst diff --git a/docs/source/machine_learning_vertica_time_series.rst b/docs/source/machine_learning_vertica_time_series.rst index 5aa5dcef4..a67d92c60 100644 --- a/docs/source/machine_learning_vertica_time_series.rst +++ b/docs/source/machine_learning_vertica_time_series.rst @@ -4,6 +4,60 @@ Time Series =============== + +Multi-Timeseries Model (Beta) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. currentmodule:: verticapy.machine_learning.vertica.tsa + +.. autosummary:: + :toctree: api/ + + ensemble.TimeSeriesByCategory + +.. currentmodule:: verticapy.machine_learning.vertica.tsa.ensemble + +**Methods:** + +.. autosummary:: + :toctree: api/ + + TimeSeriesByCategory.contour + TimeSeriesByCategory.deploySQL + TimeSeriesByCategory.does_model_exists + TimeSeriesByCategory.drop + TimeSeriesByCategory.export_models + TimeSeriesByCategory.features_importance + TimeSeriesByCategory.fit + TimeSeriesByCategory.get_attributes + TimeSeriesByCategory.get_match_index + TimeSeriesByCategory.get_params + TimeSeriesByCategory.get_plotting_lib + TimeSeriesByCategory.get_vertica_attributes + TimeSeriesByCategory.import_models + TimeSeriesByCategory.plot + TimeSeriesByCategory.predict + TimeSeriesByCategory.register + TimeSeriesByCategory.regression_report + TimeSeriesByCategory.report + TimeSeriesByCategory.score + TimeSeriesByCategory.set_params + TimeSeriesByCategory.summarize + TimeSeriesByCategory.to_binary + TimeSeriesByCategory.to_pmml + TimeSeriesByCategory.to_python + TimeSeriesByCategory.to_sql + TimeSeriesByCategory.to_tf + + +**Attributes:** + +.. autosummary:: + :toctree: api/ + + TimeSeriesByCategory.object_type + + ______ ARIMA diff --git a/docs/source/whats_new_v1_0_2.rst b/docs/source/whats_new_v1_0_2.rst new file mode 100644 index 000000000..a6db8bdb2 --- /dev/null +++ b/docs/source/whats_new_v1_0_2.rst @@ -0,0 +1,55 @@ +.. _whats_new_v1_0_2: + +=============== +Version 1.0.2 +=============== + +This minor release has some significant feature additions with other changes. Some salient ones are listed below: + +Pipelines (Beta) +----------------- + +VerticaPy now has **Pipelines**! + +- ``Pipelines`` is a YAML-based configuration for defining machine learning workflows, simplifying the process of setting up and managing machine learning pipelines. +- For beginners, it provides an easy-to-learn alternative to Python and SQL reducing the initial barriers to entry for creating models. +- For more experienced users, it offers templating features to enhance modularity, minimize errors, and promote efficient code reuse in machine learning projects. + + +Performance +------------ + +- We have enhanced the QueryProfiler to improve its robustness. :py:func:`~verticapy.performance.vertica.QueryProfiler`. +- Introducing a completely new **Query Profiler Interface**, enabling users to navigate through various queries and access them without the need to re-enter all the code. All of this can be accomplished using only your mouse within Jupyter Notebook environments. For more information please look at :py:func:`~verticapy.performance.vertica.QueryProfilerInterface`. + +These updates significantly enhance the accessibility, debugging, and enhancement capabilities of your queries. + +OAuth Refresh Tokens +--------------------- + +- We have updated the connector to accept OAuth refresh tokens. +- Additioanlly we have added a ``prompt`` option for :py:func:`~verticapy.connection.new_connection`. This allows the user to enter the secrets discretly with a masked display. + +Multi-TimeSeries (Beta) +----------------------- + +We added a new Time Series class: ``TimeSeriesByCategory``. This allows the users to build multiple models based off on a category. The number of models created +are equal to the categories. This saves users time to create multiple models separately. For more inofrmation please see :py:func:`~verticapy.machine_learning.vertica.tsa.ensemble.TimeSeriesByCategory`. + +Plots +------ + +- Two new plots have been added for plotly that were previously missing: + + - :py:func:`~verticapy.machine_learning.vertica.decomposition.plot_scree` + - :py:func:`~verticapy.machine_learning.vertica.decomposition.plot_var` + +Unit Tests +----------- + +- We continue to shift our old tests to the new more robust format. + +Examples +--------- + +- Most of the `examples `_ have been updated with the latest verticapy format. diff --git a/setup.py b/setup.py index d58f3be62..25b5f6e1a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setuptools.setup( name="verticapy", - version="1.0.1", + version="1.0.2", author="Badr Ouali", author_email="badr.ouali@vertica.com", url="https://github.com/vertica/VerticaPy", diff --git a/verticapy/__init__.py b/verticapy/__init__.py index 73a44bad1..ced92a59a 100755 --- a/verticapy/__init__.py +++ b/verticapy/__init__.py @@ -36,7 +36,7 @@ ) __url__: str = "https://github.com/vertica/verticapy/" __license__: str = "Apache License, Version 2.0" -__version__: str = "1.0.1" +__version__: str = "1.0.2" __iteration__: int = 1 __date__: str = "03082024" __last_commit__: str = "7def2745ffa5bbca9c30b3b08f52dd85c7d9675f" diff --git a/verticapy/machine_learning/vertica/tsa/ensemble.py b/verticapy/machine_learning/vertica/tsa/ensemble.py index 6e307a1b9..f6197c1fb 100755 --- a/verticapy/machine_learning/vertica/tsa/ensemble.py +++ b/verticapy/machine_learning/vertica/tsa/ensemble.py @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import copy from typing import Literal, Optional, Union @@ -42,10 +43,156 @@ class TimeSeriesByCategory(TimeSeriesModelBase): You should look at the source models to see entire examples. - :py:class:`~verticapy.machine_learning.vertica.tsa.ARIMA`; - :py:class:`~verticapy.machine_learning.vertica.tsa.ARMA`; - :py:class:`~verticapy.machine_learning.vertica.tsa.AR`; - :py:class:`~verticapy.machine_learning.vertica.tsa.MA`; + .. important:: This is still Beta. + + + Parameters + ---------- + name: str, optional + Name of the model. The model is stored in the + database. + overwrite_model: bool, optional + If set to ``True``, training a + model with the same name as an + existing model overwrites the + existing model. + base_model: TimeSeriesModelBase + The user should provide a base model which will + be used for each category. It could be + - :py:class:`~verticapy.machine_learning.vertica.tsa.ARIMA` + - :py:class:`~verticapy.machine_learning.vertica.tsa.ARMA` + - :py:class:`~verticapy.machine_learning.vertica.tsa.AR` + - :py:class:`~verticapy.machine_learning.vertica.tsa.MA' + + Attributes + ---------- + Many attributes are created + during the fitting phase. + + distinct: list + This provides a sequential list of the categories + used to build the different models. + + ts: str + The column name for time stamp. + + y: str + The column name used for building the model. + + _is_already_stored: bool + This tells us whether a model is stored in the Vertica + database. + + _get_model_names: list + This returns the list of names of the models created. + + + Examples + -------- + + The following examples provide a + basic understanding of usage. + + Initialization + ^^^^^^^^^^^^^^ + + For this example, we will use + a subset of the amazon dataset. + + .. code-block:: python + + import verticapy.datasets as vpd + + amazon_full = vpd.load_amazon() + + .. raw:: html + :file: /project/data/VerticaPy/docs/figures/datasets_loaders_load_amazon.html + + .. ipython:: python + :suppress: + + from verticapy.datasets import load_amazon + amazon_full = load_amazon() + + We can reduce the number of states for the sake + of ease in this example: + + .. ipython:: python + + amazon = amazon_full[(amazon_full["state"] == "PERNAMBUCO") | (amazon_full["state"] == "SERGIPE")] + + Now we can setup a base model that will be + created for each unique state inside the dataset. + For this example, we use ARIMA. + + .. ipython:: python + + from verticapy.machine_learning.vertica.tsa import ARIMA + + base_model = ARIMA(order = (2, 1, 2)) + + Finally we can now initiate our multiple models + in one go: + + .. ipython:: python + + from verticapy.machine_learning.vertica.tsa.ensemble import TimeSeriesByCategory + + model = TimeSeriesByCategory(base_model = base_model) + + Model Fitting + ^^^^^^^^^^^^^^^ + + We can now fit the model: + + .. ipython:: python + :okwarning: + + model.fit(amazon, ts = "date", y = "number", by = "state") + + + .. important:: + + To train a model, you can directly use the + :py:class:`~vDataFrame` or the name of the + relation stored in the database. The test + set is optional and is only used to compute + the test metrics. In :py:mod:`verticapy`, we + don't work using ``X`` matrices and ``y`` + vectors. Instead, we work directly with lists + of predictors and the response name. + + + Plots + ^^^^^^ + + We can conveniently plot the + predictions on a line plot to + observe the efficacy of our + model. We need to provide the + ``idx`` which represents the model number. + + .. code-block:: python + + model.plot(idx = 0, npredictions = 5) + + .. ipython:: python + :suppress: + :okwarning: + + vp.set_option("plotting_lib", "plotly") + fig = model.plot(idx = 0, npredictions = 5) + fig.write_html("/project/data/VerticaPy/docs/figures/machine_learning_vertica_tsa_ensemble_timeseriesbycategory_1.html") + + .. raw:: html + :file: /project/data/VerticaPy/docs/figures/machine_learning_vertica_tsa_ensemble_timeseriesbycategory_1.html + + .. note:: + + You can find out the name of the category by + the ``distinct`` attribute. The sequential list of + categories correspond to ``idx = 0, 1 ...``. + ``model.distinct``. """ # Properties. @@ -394,9 +541,11 @@ def deploySQL( all_predictions += [ extract_subquery( model.predict( - vdf=None - if isinstance(vdf, NoneType) - else vdf.search(f"{self.by} = '{category}'"), + vdf=( + None + if isinstance(vdf, NoneType) + else vdf.search(f"{self.by} = '{category}'") + ), ts=ts, y=y, start=start,