Emulation API mk 1 (#2)

* Emulation API mark 1 * bump version to 0.0.1 * Updated documentation * Updated pyproject.toml dependencies * Updated dependencies * Corrected typo in code_style.yaml * removed intersphinx for sklearn * Added docstrings * Added tests for skutils * Updated tests * Updated the default network
handley-lab · May 1, 2024 · 8df1ab7 · 8df1ab7
1 parent 8383448
commit 8df1ab7
Show file tree

Hide file tree

Showing 12 changed files with 436 additions and 5 deletions.
diff --git a/.github/workflows/code_style.yaml b/.github/workflows/code_style.yaml
@@ -46,4 +46,4 @@ jobs:
       - name: Install pydocstyle
         run: pip install pydocstyle
       - name: run pydocstyle
-        run: python -m pydocstyle --convention=numpy anesthetic
+        run: python -m pydocstyle --convention=numpy stemu
diff --git a/README.rst b/README.rst
@@ -3,7 +3,7 @@ stemu: s(t) emulation of smooth functions by stacking
 =====================================================
 :stemu: s(t) emulation of smooth functions by stacking 
 :Author: Harry Bevins & Will Handley
-:Version: 0.0.0
+:Version: 0.0.1
 :Homepage: https://github.com/handley-lab/stemu
 :Documentation: http://stemu.readthedocs.io/
 

diff --git a/docs/source/stemu.rst b/docs/source/stemu.rst
@@ -5,3 +5,30 @@ stemu package
    :members:
    :undoc-members:
    :show-inheritance:
+
+
+stemu.emu module
+----------------
+
+.. automodule:: stemu.emu
+   :members:
+   :undoc-members:
+
+
+stemu.skutils module
+--------------------
+
+.. automodule:: stemu.skutils
+   :members:
+   :undoc-members:
+
+
+stemu.utils module
+------------------
+
+.. automodule:: stemu.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,6 +17,9 @@ dependencies = [
     'numpy',
     'scipy',
     'matplotlib',
+    'pandas',
+    'tensorflow',
+    'scikit-learn',
 ]
 classifiers = [
     "Programming Language :: Python :: 3",

diff --git a/stemu/_version.py b/stemu/_version.py
@@ -1 +1 @@
-__version__ = "0.0.0"
+__version__ = "0.0.1"
diff --git a/stemu/emu.py b/stemu/emu.py
@@ -0,0 +1,119 @@
+"""Emulator base class."""
+
+import numpy as np
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import FunctionTransformer, StandardScaler
+from tensorflow import keras
+
+from stemu.skutils import CDFTransformer, FunctionScaler, IdentityTransformer
+from stemu.utils import stack, unstack
+
+default_network = [
+    keras.layers.Dense(30, activation="relu"),
+    keras.layers.Dense(30, activation="relu"),
+    keras.layers.Dense(30, activation="relu"),
+]
+
+
+class Emu(object):
+    """General Emulation base class.
+
+    This fits an emulator for y=f(t|X) in the style of sklearn models.
+
+    Anything with a default initialisation in the __init__ method is considered
+    a hyperparameter and can be adjusted by the user after initialisation.
+
+    Attributes
+    ----------
+    model : keras model, default is a simple dense network
+    epochs : int, default=100
+    loss : keras loss, default='mse'
+    optimizer : keras optimizer, default='adam'
+    callbacks : list of keras.callbacks
+    X_pipeline : sklearn.pipeline to transform input data X
+    t_pipeline : sklearn.pipeline to transform independent variable t
+    y_pipeline : sklearn.pipeline to transform dependent variable y
+    ty_pipeline : sklearn.pipeline to transform independent and dependent
+                  variables simultaneously
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.epochs = 100
+        self.loss = "mse"
+        self.optimizer = "adam"
+        self.callbacks = [keras.callbacks.EarlyStopping(monitor="loss", patience=3)]
+
+        self.X_pipeline = Pipeline([("scaler", StandardScaler())])
+        self.t_pipeline = Pipeline([("cdf", CDFTransformer())])
+        self.y_pipeline = Pipeline([("default", IdentityTransformer())])
+        self.ty_pipeline = Pipeline([("scaler", FunctionScaler())])
+
+        self.network = default_network
+
+    def fit(self, X, t, y):
+        """Fit the emulator.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        t : array-like of shape (n_target,)
+            The independent variable for the target
+        y : array-like of shape (n_samples, n_target)
+            The dependent variable for the target
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        self.t = t
+
+        X = self.X_pipeline.fit_transform(X)
+        y = self.y_pipeline.fit_transform(y)
+        t = self.t_pipeline.fit_transform(t, y)
+
+        ty = self.ty_pipeline.fit_transform(np.block([[t], [y]]))
+        t, y = ty[0], ty[1:]
+
+        X, y = stack(X, t, y)
+
+        self.model = keras.models.Sequential(
+            [keras.layers.Input(X.shape[-1:])]
+            + self.network
+            + [keras.layers.Dense(1, activation="linear")]
+        )
+
+        self.model.compile(loss=self.loss, optimizer=self.optimizer)
+        self.history = self.model.fit(
+            X, y, epochs=self.epochs, batch_size=len(t), callbacks=self.callbacks
+        )
+        return self
+
+    def predict(self, X, t=None):
+        """Predict the target.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        t : array-like of shape (n_target,)
+            The independent variable for the target
+            Defaults to the original training t
+
+        Returns
+        -------
+        y : array-like of shape (n_samples, n_target)
+            The predicted target
+        """
+        if t is None:
+            t = self.t
+        t = self.t_pipeline.transform(t)
+        X = self.X_pipeline.transform(np.atleast_2d(X))
+        X, _ = stack(X, np.atleast_1d(t), 1)
+        y = self.model.predict(X)
+        _, _, y = unstack(X, y)
+        ty = self.ty_pipeline.inverse_transform(np.block([[t], [y]]))
+        _, y = ty[0], ty[1:]
+        y = self.y_pipeline.inverse_transform(y)
+        return y
diff --git a/stemu/skutils.py b/stemu/skutils.py
@@ -0,0 +1,137 @@
+"""Utilities for working with scikit-learn."""
+
+import numpy as np
+from scipy.interpolate import interp1d
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import FunctionTransformer, StandardScaler
+
+
+class CDFTransformer(BaseEstimator, TransformerMixin):
+    """Transform independent variable using CDF from dependent variable.
+
+    The CDF is defined by the cumulative sum of the standard deviation of the
+    dependent variable.
+
+    This is in the style of other sklearn transformers.
+    """
+
+    def transform(self, X):
+        """Transform the independent variable.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        return self.cdf(X)
+
+    def inverse_transform(self, X):
+        """Inverse transform the independent variable.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        return self.icdf(X)
+
+    def fit(self, X, y=None):
+        """Fit the transformer.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        y : array-like of shape (n_samples, n_target)
+            The dependent variable for the target
+        """
+        cdf = y.std(axis=0).cumsum() / y.std(axis=0).sum()
+        self.cdf = interp1d(X, cdf)
+        self.icdf = interp1d(cdf, X)
+        return self
+
+
+class FunctionScaler(BaseEstimator, TransformerMixin):
+    """Scale dependent variable.
+
+    The function is defined by the mean and standard deviation of the dependent
+    variable (as a function of the independent variable).
+    """
+
+    def transform(self, X):
+        """Transform the dependent variable.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        t, y = X[0], X[1:]
+        y = (y - self.mean(t)) / self.std(t)
+        return np.block([[t], [y]])
+
+    def inverse_transform(self, X):
+        """Inverse transform the dependent variable.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        t, y = X[0], X[1:]
+        y = y * self.std(t) + self.mean(t)
+        return np.block([[t], [y]])
+
+    def fit(self, X, y=None):
+        """Fit the transformer.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        y : array-like of shape (n_samples, n_target)
+            The dependent variable for the target
+        """
+        t, y = X[0], X[1:]
+        self.mean = interp1d(t, y.mean(axis=0))
+        self.std = interp1d(t, y.std(axis=0))
+        return self
+
+
+class IdentityTransformer(BaseEstimator, TransformerMixin):
+    """Do nothing transformer."""
+
+    def __init__(self):
+        pass
+
+    def fit(self, X, y=None):
+        """Fit the transformer.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        return self
+
+    def transform(self, X, y=None):
+        """Transform the data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        y : array-like of shape (n_samples, n_target)
+            The dependent variable for the target
+        """
+        return X
+
+    def inverse_transform(self, X):
+        """Inverse transform the data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input data.
+        """
+        return X
diff --git a/stemu/utils.py b/stemu/utils.py
@@ -0,0 +1,59 @@
+"""Utility functions for stemu."""
+
+import numpy as np
+import pandas as pd
+
+
+def stack(X, t, y):
+    """Stack the data for training.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        The input data.
+    t : array-like of shape (n_target)
+        The independent variable for the target
+    y : array-like of shape (n_samples, n_target)
+        The dependent variable for the target
+
+    Returns
+    -------
+    X : array-like of shape (n_samples*n_target,n_features)
+        The input data.
+    y : array-like of shape (n_samples*n_target,)
+        The dependent variable for the target
+    """
+    data = pd.DataFrame(
+        y, columns=t, index=pd.MultiIndex.from_arrays(np.atleast_2d(X).T)
+    ).stack()
+    y = data.to_numpy()
+    X = data.index.to_frame().to_numpy()
+    return X, y
+
+
+def unstack(X, y):
+    """Unstack the data for prediction.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples*n_target, n_features)
+        The input data.
+    y : array-like of shape (n_samples*n_target,)
+        The dependent variable for the target
+
+    Returns
+    -------
+    X : array-like of shape (n_samples, n_features)
+        The input data.
+    t : array-like of shape (n_target,)
+        The independent variable for the target
+    y : array-like of shape (n_samples, n_target)
+        The dependent variable for the target
+    """
+    data = pd.DataFrame(y, index=pd.MultiIndex.from_arrays(np.atleast_2d(X).T)).unstack(
+        sort=False
+    )
+    y = data.to_numpy()
+    X = data.index.to_frame().to_numpy()
+    t = data.columns.get_level_values(1).to_numpy()
+    return X, t, y