Skip to content

Commit

Permalink
Emulation API mk 1 (#2)
Browse files Browse the repository at this point in the history
* Emulation API mark 1

* bump version to 0.0.1

* Updated documentation

* Updated pyproject.toml dependencies

* Updated dependencies

* Corrected typo in code_style.yaml

* removed intersphinx for sklearn

* Added docstrings

* Added tests for skutils

* Updated tests

* Updated the default network
  • Loading branch information
williamjameshandley authored May 1, 2024
1 parent 8383448 commit 8df1ab7
Show file tree
Hide file tree
Showing 12 changed files with 436 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code_style.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ jobs:
- name: Install pydocstyle
run: pip install pydocstyle
- name: run pydocstyle
run: python -m pydocstyle --convention=numpy anesthetic
run: python -m pydocstyle --convention=numpy stemu
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ stemu: s(t) emulation of smooth functions by stacking
=====================================================
:stemu: s(t) emulation of smooth functions by stacking
:Author: Harry Bevins & Will Handley
:Version: 0.0.0
:Version: 0.0.1
:Homepage: https://github.com/handley-lab/stemu
:Documentation: http://stemu.readthedocs.io/

Expand Down
27 changes: 27 additions & 0 deletions docs/source/stemu.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,30 @@ stemu package
:members:
:undoc-members:
:show-inheritance:


stemu.emu module
----------------

.. automodule:: stemu.emu
:members:
:undoc-members:


stemu.skutils module
--------------------

.. automodule:: stemu.skutils
:members:
:undoc-members:


stemu.utils module
------------------

.. automodule:: stemu.utils
:members:
:undoc-members:
:show-inheritance:


3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ dependencies = [
'numpy',
'scipy',
'matplotlib',
'pandas',
'tensorflow',
'scikit-learn',
]
classifiers = [
"Programming Language :: Python :: 3",
Expand Down
2 changes: 1 addition & 1 deletion stemu/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.0"
__version__ = "0.0.1"
119 changes: 119 additions & 0 deletions stemu/emu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""Emulator base class."""

import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler
from tensorflow import keras

from stemu.skutils import CDFTransformer, FunctionScaler, IdentityTransformer
from stemu.utils import stack, unstack

default_network = [
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(30, activation="relu"),
]


class Emu(object):
"""General Emulation base class.
This fits an emulator for y=f(t|X) in the style of sklearn models.
Anything with a default initialisation in the __init__ method is considered
a hyperparameter and can be adjusted by the user after initialisation.
Attributes
----------
model : keras model, default is a simple dense network
epochs : int, default=100
loss : keras loss, default='mse'
optimizer : keras optimizer, default='adam'
callbacks : list of keras.callbacks
X_pipeline : sklearn.pipeline to transform input data X
t_pipeline : sklearn.pipeline to transform independent variable t
y_pipeline : sklearn.pipeline to transform dependent variable y
ty_pipeline : sklearn.pipeline to transform independent and dependent
variables simultaneously
"""

def __init__(self, *args, **kwargs):
self.epochs = 100
self.loss = "mse"
self.optimizer = "adam"
self.callbacks = [keras.callbacks.EarlyStopping(monitor="loss", patience=3)]

self.X_pipeline = Pipeline([("scaler", StandardScaler())])
self.t_pipeline = Pipeline([("cdf", CDFTransformer())])
self.y_pipeline = Pipeline([("default", IdentityTransformer())])
self.ty_pipeline = Pipeline([("scaler", FunctionScaler())])

self.network = default_network

def fit(self, X, t, y):
"""Fit the emulator.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
t : array-like of shape (n_target,)
The independent variable for the target
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
Returns
-------
self : object
Returns self.
"""
self.t = t

X = self.X_pipeline.fit_transform(X)
y = self.y_pipeline.fit_transform(y)
t = self.t_pipeline.fit_transform(t, y)

ty = self.ty_pipeline.fit_transform(np.block([[t], [y]]))
t, y = ty[0], ty[1:]

X, y = stack(X, t, y)

self.model = keras.models.Sequential(
[keras.layers.Input(X.shape[-1:])]
+ self.network
+ [keras.layers.Dense(1, activation="linear")]
)

self.model.compile(loss=self.loss, optimizer=self.optimizer)
self.history = self.model.fit(
X, y, epochs=self.epochs, batch_size=len(t), callbacks=self.callbacks
)
return self

def predict(self, X, t=None):
"""Predict the target.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
t : array-like of shape (n_target,)
The independent variable for the target
Defaults to the original training t
Returns
-------
y : array-like of shape (n_samples, n_target)
The predicted target
"""
if t is None:
t = self.t
t = self.t_pipeline.transform(t)
X = self.X_pipeline.transform(np.atleast_2d(X))
X, _ = stack(X, np.atleast_1d(t), 1)
y = self.model.predict(X)
_, _, y = unstack(X, y)
ty = self.ty_pipeline.inverse_transform(np.block([[t], [y]]))
_, y = ty[0], ty[1:]
y = self.y_pipeline.inverse_transform(y)
return y
137 changes: 137 additions & 0 deletions stemu/skutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Utilities for working with scikit-learn."""

import numpy as np
from scipy.interpolate import interp1d
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import FunctionTransformer, StandardScaler


class CDFTransformer(BaseEstimator, TransformerMixin):
"""Transform independent variable using CDF from dependent variable.
The CDF is defined by the cumulative sum of the standard deviation of the
dependent variable.
This is in the style of other sklearn transformers.
"""

def transform(self, X):
"""Transform the independent variable.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
return self.cdf(X)

def inverse_transform(self, X):
"""Inverse transform the independent variable.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
return self.icdf(X)

def fit(self, X, y=None):
"""Fit the transformer.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
"""
cdf = y.std(axis=0).cumsum() / y.std(axis=0).sum()
self.cdf = interp1d(X, cdf)
self.icdf = interp1d(cdf, X)
return self


class FunctionScaler(BaseEstimator, TransformerMixin):
"""Scale dependent variable.
The function is defined by the mean and standard deviation of the dependent
variable (as a function of the independent variable).
"""

def transform(self, X):
"""Transform the dependent variable.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
t, y = X[0], X[1:]
y = (y - self.mean(t)) / self.std(t)
return np.block([[t], [y]])

def inverse_transform(self, X):
"""Inverse transform the dependent variable.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
t, y = X[0], X[1:]
y = y * self.std(t) + self.mean(t)
return np.block([[t], [y]])

def fit(self, X, y=None):
"""Fit the transformer.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
"""
t, y = X[0], X[1:]
self.mean = interp1d(t, y.mean(axis=0))
self.std = interp1d(t, y.std(axis=0))
return self


class IdentityTransformer(BaseEstimator, TransformerMixin):
"""Do nothing transformer."""

def __init__(self):
pass

def fit(self, X, y=None):
"""Fit the transformer.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
return self

def transform(self, X, y=None):
"""Transform the data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
"""
return X

def inverse_transform(self, X):
"""Inverse transform the data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
"""
return X
59 changes: 59 additions & 0 deletions stemu/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Utility functions for stemu."""

import numpy as np
import pandas as pd


def stack(X, t, y):
"""Stack the data for training.
Parameters
----------
X : array-like of shape (n_samples, n_features)
The input data.
t : array-like of shape (n_target)
The independent variable for the target
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
Returns
-------
X : array-like of shape (n_samples*n_target,n_features)
The input data.
y : array-like of shape (n_samples*n_target,)
The dependent variable for the target
"""
data = pd.DataFrame(
y, columns=t, index=pd.MultiIndex.from_arrays(np.atleast_2d(X).T)
).stack()
y = data.to_numpy()
X = data.index.to_frame().to_numpy()
return X, y


def unstack(X, y):
"""Unstack the data for prediction.
Parameters
----------
X : array-like of shape (n_samples*n_target, n_features)
The input data.
y : array-like of shape (n_samples*n_target,)
The dependent variable for the target
Returns
-------
X : array-like of shape (n_samples, n_features)
The input data.
t : array-like of shape (n_target,)
The independent variable for the target
y : array-like of shape (n_samples, n_target)
The dependent variable for the target
"""
data = pd.DataFrame(y, index=pd.MultiIndex.from_arrays(np.atleast_2d(X).T)).unstack(
sort=False
)
y = data.to_numpy()
X = data.index.to_frame().to_numpy()
t = data.columns.get_level_values(1).to_numpy()
return X, t, y
Loading

0 comments on commit 8df1ab7

Please sign in to comment.