diff --git a/alibi_detect/cd/_domain_clf.py b/alibi_detect/cd/_domain_clf.py index 84e540e7d..942ef43fe 100644 --- a/alibi_detect/cd/_domain_clf.py +++ b/alibi_detect/cd/_domain_clf.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -from typing import Callable import numpy as np from sklearn.svm import SVC from sklearn.calibration import CalibratedClassifierCV @@ -34,7 +33,6 @@ def predict(self, x: np.ndarray) -> np.ndarray: class _SVCDomainClf(_DomainClf): def __init__(self, - kernel: Callable, cal_method: str = 'sigmoid', clf_kwargs: dict = None): """ @@ -52,52 +50,51 @@ def __init__(self, clf_kwargs A dictionary of keyword arguments to be passed to the :py:class:`~sklearn.svm.SVC` classifier. """ - self.kernel = kernel self.cal_method = cal_method clf_kwargs = clf_kwargs or {} - self.clf = SVC(kernel=self.kernel, **clf_kwargs) + self.clf = SVC(kernel='precomputed', **clf_kwargs) - def fit(self, x: np.ndarray, y: np.ndarray): + def fit(self, K_x: np.ndarray, y: np.ndarray): """ Method to fit the classifier. Parameters ---------- - x - Array containing conditioning variables for each instance. + K_x + Kernel matrix on the conditioning variables. y Boolean array marking the domain each instance belongs to (`0` for reference, `1` for test). """ clf = self.clf - clf.fit(x, y) + clf.fit(K_x, y) self.clf = clf - def calibrate(self, x: np.ndarray, y: np.ndarray): + def calibrate(self, K_x: np.ndarray, y: np.ndarray): """ Method to calibrate the classifier's predicted probabilities. Parameters ---------- - x - Array containing conditioning variables for each instance. + K_x + Kernel matrix on the conditioning variables. y Boolean array marking the domain each instance belongs to (`0` for reference, `1` for test). """ clf = CalibratedClassifierCV(self.clf, method=self.cal_method, cv='prefit') - clf.fit(x, y) + clf.fit(K_x, y) self.clf = clf - def predict(self, x: np.ndarray) -> np.ndarray: + def predict(self, K_x: np.ndarray) -> np.ndarray: """ The classifier's predict method. Parameters ---------- - x - Array containing conditioning variables for each instance. + K_x + Kernel matrix on the conditioning variables. Returns ------- Propensity scores (the probability of being test instances). """ - return self.clf.predict_proba(x)[:, 1] + return self.clf.predict_proba(K_x)[:, 1] diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py index ca050c462..b09754312 100644 --- a/alibi_detect/cd/base.py +++ b/alibi_detect/cd/base.py @@ -508,7 +508,6 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, input_shape: Optional[tuple] = None, @@ -536,9 +535,6 @@ def __init__( for reservoir sampling {'reservoir_sampling': n} is passed. preprocess_fn Function to preprocess the data before computing the data drift metrics. - sigma - Optionally set the Gaussian RBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. configure_kernel_from_x_ref Whether to already configure the kernel bandwidth from the reference data. n_permutations @@ -553,12 +549,7 @@ def __init__( if p_val is None: logger.warning('No p-value set for the drift threshold. Need to set it to detect data drift.') - self.infer_sigma = configure_kernel_from_x_ref - if configure_kernel_from_x_ref and isinstance(sigma, np.ndarray): - self.infer_sigma = False - logger.warning('`sigma` is specified for the kernel and `configure_kernel_from_x_ref` ' - 'is set to True. `sigma` argument takes priority over ' - '`configure_kernel_from_x_ref` (set to False).') + self.infer_parameter = configure_kernel_from_x_ref # x_ref preprocessing self.preprocess_at_init = preprocess_at_init @@ -668,7 +659,6 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, n_permutations: int = 100, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, @@ -731,7 +721,6 @@ def __init__( # Other attributes self.p_val = p_val - self.sigma = sigma self.update_x_ref = update_x_ref self.preprocess_fn = preprocess_fn self.n = len(x_ref) diff --git a/alibi_detect/cd/context_aware.py b/alibi_detect/cd/context_aware.py index bb02c2ad3..ff2e193c2 100644 --- a/alibi_detect/cd/context_aware.py +++ b/alibi_detect/cd/context_aware.py @@ -4,6 +4,8 @@ from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow, BackendValidator, Framework from alibi_detect.utils.warnings import deprecated_alias from alibi_detect.base import DriftConfigMixin +from alibi_detect.utils.pytorch.kernels import BaseKernel as BaseKernel_pt +from alibi_detect.utils.tensorflow.kernels import BaseKernel as BaseKernel_tf if has_pytorch: from alibi_detect.cd.pytorch.context_aware import ContextMMDDriftTorch @@ -26,8 +28,8 @@ def __init__( preprocess_at_init: bool = True, update_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - x_kernel: Callable = None, - c_kernel: Callable = None, + x_kernel: Union[BaseKernel_pt, BaseKernel_tf] = None, + c_kernel: Union[BaseKernel_pt, BaseKernel_tf] = None, n_permutations: int = 1000, prop_c_held: float = 0.25, n_folds: int = 5, @@ -109,9 +111,9 @@ def __init__( else: from alibi_detect.utils.pytorch.kernels import GaussianRBF # type: ignore[no-redef] if x_kernel is None: - kwargs.update({'x_kernel': GaussianRBF}) + kwargs.update({'x_kernel': GaussianRBF()}) if c_kernel is None: - kwargs.update({'c_kernel': GaussianRBF}) + kwargs.update({'c_kernel': GaussianRBF()}) if backend == Framework.TENSORFLOW: kwargs.pop('device', None) diff --git a/alibi_detect/cd/keops/learned_kernel.py b/alibi_detect/cd/keops/learned_kernel.py index e3073713d..8a6b7d7c2 100644 --- a/alibi_detect/cd/keops/learned_kernel.py +++ b/alibi_detect/cd/keops/learned_kernel.py @@ -2,13 +2,12 @@ from functools import partial from tqdm import tqdm import numpy as np -from pykeops.torch import LazyTensor import torch import torch.nn as nn from torch.utils.data import DataLoader from typing import Callable, Dict, List, Optional, Union, Tuple from alibi_detect.cd.base import BaseLearnedKernelDrift -from alibi_detect.utils.pytorch import get_device, predict_batch +from alibi_detect.utils.pytorch import get_device from alibi_detect.utils.pytorch.data import TorchDataset from alibi_detect.utils.frameworks import Framework @@ -137,6 +136,7 @@ def __init__( self.device = get_device(device) self.original_kernel = kernel self.kernel = deepcopy(kernel) + self.kernel = self.kernel.to(self.device) # Check kernel format self.has_proj = hasattr(self.kernel, 'proj') and isinstance(self.kernel.proj, nn.Module) @@ -174,21 +174,10 @@ def __init__(self, kernel: nn.Module, var_reg: float, has_proj: bool, has_kernel def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: n = len(x) - if self.has_proj and isinstance(self.kernel.proj, nn.Module): - x_proj, y_proj = self.kernel.proj(x), self.kernel.proj(y) - else: - x_proj, y_proj = x, y - x2_proj, x_proj = LazyTensor(x_proj[None, :, :]), LazyTensor(x_proj[:, None, :]) - y2_proj, y_proj = LazyTensor(y_proj[None, :, :]), LazyTensor(y_proj[:, None, :]) - if self.has_kernel_b: - x2, x = LazyTensor(x[None, :, :]), LazyTensor(x[:, None, :]) - y2, y = LazyTensor(y[None, :, :]), LazyTensor(y[:, None, :]) - else: - x, x2, y, y2 = None, None, None, None - k_xy = self.kernel(x_proj, y2_proj, x, y2) - k_xx = self.kernel(x_proj, x2_proj, x, x2) - k_yy = self.kernel(y_proj, y2_proj, y, y2) + k_xy = self.kernel(x, y) + k_xx = self.kernel(x, x) + k_yy = self.kernel(y, y) h_mat = k_xx + k_yy - k_xy - k_xy.t() h_i = h_mat.sum(1).squeeze(-1) @@ -221,6 +210,7 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: self.kernel = deepcopy(self.original_kernel) if self.retrain_from_scratch else self.kernel self.kernel = self.kernel.to(self.device) + train_args = [self.j_hat, (dl_ref_tr, dl_cur_tr), self.device] LearnedKernelDriftKeops.trainer(*train_args, **self.train_kwargs) # type: ignore @@ -263,42 +253,24 @@ def _mmd2(self, x_all: Union[list, torch.Tensor], perms: List[torch.Tensor], m: preprocess_batch_fn = self.train_kwargs['preprocess_fn'] if isinstance(preprocess_batch_fn, Callable): # type: ignore[arg-type] x_all = preprocess_batch_fn(x_all) # type: ignore[operator] - if self.has_proj: - x_all_proj = predict_batch(x_all, self.kernel.proj, device=self.device, batch_size=self.batch_size_predict, - dtype=x_all.dtype if isinstance(x_all, torch.Tensor) else torch.float32) - else: - x_all_proj = x_all - x, x2, y, y2 = None, None, None, None + x, y = None, None k_xx, k_yy, k_xy = [], [], [] for batch in range(self.n_batches): i, j = batch * self.batch_size_perms, (batch + 1) * self.batch_size_perms # Stack a batch of permuted reference and test tensors and their projections - x_proj = torch.cat([x_all_proj[perm[:m]][None, :, :] for perm in perms[i:j]], 0) - y_proj = torch.cat([x_all_proj[perm[m:]][None, :, :] for perm in perms[i:j]], 0) - if self.has_kernel_b: - x = torch.cat([x_all[perm[:m]][None, :, :] for perm in perms[i:j]], 0) - y = torch.cat([x_all[perm[m:]][None, :, :] for perm in perms[i:j]], 0) + x = torch.cat([x_all[perm[:m]][None, :, :] for perm in perms[i:j]], 0) + y = torch.cat([x_all[perm[m:]][None, :, :] for perm in perms[i:j]], 0) if batch == 0: - x_proj = torch.cat([x_all_proj[None, :m, :], x_proj], 0) - y_proj = torch.cat([x_all_proj[None, m:, :], y_proj], 0) - if self.has_kernel_b: - x = torch.cat([x_all[None, :m, :], x], 0) # type: ignore[call-overload] - y = torch.cat([x_all[None, m:, :], y], 0) # type: ignore[call-overload] - x_proj, y_proj = x_proj.to(self.device), y_proj.to(self.device) - if self.has_kernel_b: - x, y = x.to(self.device), y.to(self.device) + x = torch.cat([x_all[None, :m, :], x], 0) # type: ignore[call-overload] + y = torch.cat([x_all[None, m:, :], y], 0) # type: ignore[call-overload] + x, y = x.to(self.device), y.to(self.device) # Batch-wise kernel matrix computation over the permutations with torch.no_grad(): - x2_proj, x_proj = LazyTensor(x_proj[:, None, :, :]), LazyTensor(x_proj[:, :, None, :]) - y2_proj, y_proj = LazyTensor(y_proj[:, None, :, :]), LazyTensor(y_proj[:, :, None, :]) - if self.has_kernel_b: - x2, x = LazyTensor(x[:, None, :, :]), LazyTensor(x[:, :, None, :]) - y2, y = LazyTensor(y[:, None, :, :]), LazyTensor(y[:, :, None, :]) - k_xy.append(self.kernel(x_proj, y2_proj, x, y2).sum(1).sum(1).squeeze(-1)) - k_xx.append(self.kernel(x_proj, x2_proj, x, x2).sum(1).sum(1).squeeze(-1)) - k_yy.append(self.kernel(y_proj, y2_proj, y, y2).sum(1).sum(1).squeeze(-1)) + k_xy.append(self.kernel(x, y).sum(1).sum(1).squeeze(-1)) + k_xx.append(self.kernel(x, x).sum(1).sum(1).squeeze(-1)) + k_yy.append(self.kernel(y, y).sum(1).sum(1).squeeze(-1)) c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) # Note that the MMD^2 estimates assume that the diagonal of the kernel matrix consists of 1's diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 5b1a2fdc0..3b93d50fb 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -1,10 +1,9 @@ import logging import numpy as np -from pykeops.torch import LazyTensor import torch from typing import Callable, Dict, List, Optional, Tuple, Union from alibi_detect.cd.base import BaseMMDDrift -from alibi_detect.utils.keops.kernels import GaussianRBF +from alibi_detect.utils.keops.kernels import BaseKernel, GaussianRBF from alibi_detect.utils.pytorch import get_device from alibi_detect.utils.frameworks import Framework @@ -20,8 +19,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - kernel: Callable = GaussianRBF, - sigma: Optional[np.ndarray] = None, + kernel: Union[BaseKernel, Callable] = GaussianRBF, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, batch_size_permutations: int = 1000000, @@ -53,9 +51,6 @@ def __init__( Function to preprocess the data before computing the data drift metrics. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. configure_kernel_from_x_ref Whether to already configure the kernel bandwidth from the reference data. n_permutations @@ -77,7 +72,6 @@ def __init__( preprocess_at_init=preprocess_at_init, update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, - sigma=sigma, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, input_shape=input_shape, @@ -88,24 +82,39 @@ def __init__( # set device self.device = get_device(device) - # initialize kernel - sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] - np.ndarray) else None - self.kernel = kernel(sigma).to(self.device) if kernel == GaussianRBF else kernel + # initialise kernel + if isinstance(kernel, BaseKernel): + self.kernel = kernel + elif kernel == GaussianRBF: + self.kernel = kernel() + else: + raise ValueError("kernel must be an instance of alibi_detect.utils.keops.kernels.BaseKernel or a callable ") + + self.kernel_parameter_specified = True + if hasattr(kernel, 'parameter_dict'): + for param in self.kernel.parameter_dict.keys(): + kernel.parameter_dict[param].value.to(self.device) + if kernel.parameter_dict[param].requires_init: + self.given_kernel_parameter = False + break + + if self.kernel_parameter_specified and self.infer_parameter: + self.infer_parameter = False + logger.warning('parameters are specified for the kernel and `configure_kernel_from_x_ref` ' + 'is set to True. Specified parameters take priority over ' + '`configure_kernel_from_x_ref` (set to False).') # set the correct MMD^2 function based on the batch size for the permutations self.batch_size = batch_size_permutations self.n_batches = 1 + (n_permutations - 1) // batch_size_permutations # infer the kernel bandwidth from the reference data - if isinstance(sigma, torch.Tensor): - self.infer_sigma = False - elif self.infer_sigma: - x = torch.from_numpy(self.x_ref).to(self.device) - _ = self.kernel(LazyTensor(x[:, None, :]), LazyTensor(x[None, :, :]), infer_sigma=self.infer_sigma) - self.infer_sigma = False + if self.infer_parameter: + x = torch.from_numpy(self.x_ref).to(self.device).reshape(1, self.x_ref.shape[0], -1) + _ = self.kernel(x, x, infer_parameter=self.infer_parameter) + self.infer_parameter = False else: - self.infer_sigma = True + self.infer_parameter = True def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ -> Tuple[torch.Tensor, torch.Tensor]: @@ -139,12 +148,10 @@ def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) x, y = x.to(self.device), y.to(self.device) # batch-wise kernel matrix computation over the permutations - k_xy.append(self.kernel( - LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :]), self.infer_sigma).sum(1).sum(1).squeeze(-1)) - k_xx.append(self.kernel( - LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1)) - k_yy.append(self.kernel( - LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) + k_xy.append(self.kernel(x, y, infer_parameter=self.infer_parameter).sum(1).sum(1).squeeze(-1)) + k_xx.append(self.kernel(x, x, infer_parameter=self.infer_parameter).sum(1).sum(1).squeeze(-1)) + k_yy.append(self.kernel(y, y, infer_parameter=self.infer_parameter).sum(1).sum(1).squeeze(-1)) + c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) # Note that the MMD^2 estimates assume that the diagonal of the kernel matrix consists of 1's stats = c_xx * (torch.cat(k_xx) - m) + c_yy * (torch.cat(k_yy) - n) - c_xy * torch.cat(k_xy) diff --git a/alibi_detect/cd/keops/tests/test_learned_kernel_keops.py b/alibi_detect/cd/keops/tests/test_learned_kernel_keops.py index 646027fe3..02ce9bcdc 100644 --- a/alibi_detect/cd/keops/tests/test_learned_kernel_keops.py +++ b/alibi_detect/cd/keops/tests/test_learned_kernel_keops.py @@ -9,29 +9,35 @@ from alibi_detect.utils.pytorch import mmd2_from_kernel_matrix if has_keops: from alibi_detect.cd.keops.learned_kernel import LearnedKernelDriftKeops - from alibi_detect.utils.keops import GaussianRBF - from pykeops.torch import LazyTensor + from alibi_detect.utils.keops import GaussianRBF, BaseKernel, ProjKernel n = 50 # number of instances used for the reference and test data samples in the tests if has_keops: - class MyKernel(nn.Module): + class MyKernel(BaseKernel): def __init__(self, n_features: int, proj: bool): super().__init__() sigma = .1 - self.kernel = GaussianRBF(trainable=True, sigma=torch.Tensor([sigma])) + self.kernel_a = GaussianRBF(trainable=True, sigma=torch.Tensor([sigma])) + self.log_sigma_a = self.kernel_a.parameter_dict['log-sigma'].value self.has_proj = proj if proj: self.proj = nn.Linear(n_features, 2) self.kernel_b = GaussianRBF(trainable=True, sigma=torch.Tensor([sigma])) + self.proj_kernel = ProjKernel(self.proj, self.kernel_b) + self.comp_kernel = self.proj_kernel + self.kernel_a + self.log_sigma_b = self.kernel_b.parameter_dict['log-sigma'].value + else: + self.comp_kernel = self.kernel_a - def forward(self, x_proj: LazyTensor, y_proj: LazyTensor, x: Optional[LazyTensor] = None, - y: Optional[LazyTensor] = None) -> LazyTensor: - similarity = self.kernel(x_proj, y_proj) - if self.has_proj: - similarity = similarity + self.kernel_b(x, y) - return similarity + def kernel_function( + self, + x: torch.Tensor, + y: torch.Tensor, + infer_parameter: Optional[bool] = False + ) -> torch.Tensor: + return self.comp_kernel(x, y, infer_parameter) # test List[Any] inputs to the detector @@ -124,7 +130,7 @@ def test_lkdrift(lkdrift_params): if isinstance(preprocess_batch, Callable): x_all = preprocess_batch(x_all) - kernel = GaussianRBFTorch(sigma=cd.kernel.kernel.sigma) + kernel = GaussianRBFTorch(sigma=cd.kernel.kernel_a.sigma.cpu()) kernel_mat = kernel(x_all, x_all) mmd2_torch = mmd2_from_kernel_matrix(kernel_mat, n_test) - np.testing.assert_almost_equal(mmd2, mmd2_torch, decimal=6) + np.testing.assert_almost_equal(mmd2.cpu(), mmd2_torch.cpu(), decimal=6) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index a64a78173..86ce980a0 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -112,9 +112,13 @@ def test_mmd(mmd_params): kernel = GaussianRBF(sigma=cd.kernel.sigma) if isinstance(preprocess_fn, Callable): x_ref, x_h1 = cd.preprocess(x_h1) - x_ref = torch.from_numpy(x_ref).float() - x_h1 = torch.from_numpy(x_h1).float() + x_ref = torch.from_numpy(x_ref).float().to(cd.kernel.sigma.device) + x_h1 = torch.from_numpy(x_h1).float().to(cd.kernel.sigma.device) x_all = torch.cat([x_ref, x_h1], 0) kernel_mat = kernel(x_all, x_all) mmd2_torch = mmd2_from_kernel_matrix(kernel_mat, x_h1.shape[0]) + if isinstance(mmd2, torch.Tensor): + mmd2 = mmd2.cpu().numpy() + if isinstance(mmd2_torch, torch.Tensor): + mmd2_torch = mmd2_torch.cpu().numpy() np.testing.assert_almost_equal(mmd2, mmd2_torch, decimal=6) diff --git a/alibi_detect/cd/lsdd.py b/alibi_detect/cd/lsdd.py index e8a45d30f..1514f5435 100644 --- a/alibi_detect/cd/lsdd.py +++ b/alibi_detect/cd/lsdd.py @@ -22,7 +22,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_permutations: int = 100, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, diff --git a/alibi_detect/cd/lsdd_online.py b/alibi_detect/cd/lsdd_online.py index d8d3d5bf6..15c36fb0f 100644 --- a/alibi_detect/cd/lsdd_online.py +++ b/alibi_detect/cd/lsdd_online.py @@ -18,7 +18,7 @@ def __init__( backend: str = 'tensorflow', preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_bootstraps: int = 1000, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 3a0c289a5..92da1f3c8 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -29,7 +29,6 @@ def __init__( update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, kernel: Callable = None, - sigma: Optional[np.ndarray] = None, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, batch_size_permutations: int = 1000000, @@ -63,9 +62,6 @@ def __init__( Function to preprocess the data before computing the data drift metrics. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. configure_kernel_from_x_ref Whether to already configure the kernel bandwidth from the reference data. n_permutations diff --git a/alibi_detect/cd/mmd_online.py b/alibi_detect/cd/mmd_online.py index a26624955..cee60e17b 100644 --- a/alibi_detect/cd/mmd_online.py +++ b/alibi_detect/cd/mmd_online.py @@ -5,9 +5,11 @@ if has_pytorch: from alibi_detect.cd.pytorch.mmd_online import MMDDriftOnlineTorch + from alibi_detect.utils.pytorch.kernels import BaseKernel as BaseKernelTorch if has_tensorflow: from alibi_detect.cd.tensorflow.mmd_online import MMDDriftOnlineTF + from alibi_detect.utils.tensorflow.kernels import BaseKernel as BaseKernelTF class MMDDriftOnline(DriftConfigMixin): @@ -19,8 +21,7 @@ def __init__( backend: str = 'tensorflow', preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - kernel: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, + kernel: Optional[Union[BaseKernelTorch, BaseKernelTF]] = None, n_bootstraps: int = 1000, device: Optional[str] = None, verbose: bool = True, @@ -51,10 +52,6 @@ def __init__( data will also be preprocessed. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. If `sigma` is not specified, the 'median - heuristic' is adopted whereby `sigma` is set as the median pairwise distance between reference samples. n_bootstraps The number of bootstrap simulations used to configure the thresholds. The larger this is the more accurately the desired ERT will be targeted. Should ideally be at least an order of magnitude diff --git a/alibi_detect/cd/pytorch/context_aware.py b/alibi_detect/cd/pytorch/context_aware.py index 7b63357ee..d3e2b89de 100644 --- a/alibi_detect/cd/pytorch/context_aware.py +++ b/alibi_detect/cd/pytorch/context_aware.py @@ -4,7 +4,7 @@ from typing import Callable, Dict, Optional, Tuple, Union from alibi_detect.cd.base import BaseContextMMDDrift from alibi_detect.utils.pytorch import get_device -from alibi_detect.utils.pytorch.kernels import GaussianRBF +from alibi_detect.utils.pytorch.kernels import BaseKernel, GaussianRBF from alibi_detect.utils.warnings import deprecated_alias from alibi_detect.utils.frameworks import Framework from alibi_detect.cd._domain_clf import _SVCDomainClf @@ -13,6 +13,29 @@ logger = logging.getLogger(__name__) +def _sigma_median_diag(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: + """ + Private version of the bandwidth estimation function :py:func:`~alibi_detect.utils.pytorch.kernels.sigma_median`, + with the +n (and -1) term excluded to account for the diagonal of the kernel matrix. + + Parameters + ---------- + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + + Returns + ------- + The computed bandwidth, `sigma`. + """ + n_median = np.prod(dist.shape) // 2 + sigma = (.5 * dist.flatten().sort().values[n_median].unsqueeze(dim=-1)) ** .5 + return sigma + + class ContextMMDDriftTorch(BaseContextMMDDrift): lams: Optional[Tuple[torch.Tensor, torch.Tensor]] = None @@ -26,8 +49,8 @@ def __init__( preprocess_at_init: bool = True, update_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - x_kernel: Callable = GaussianRBF, - c_kernel: Callable = GaussianRBF, + x_kernel: Union[BaseKernel, Callable] = GaussianRBF, + c_kernel: Union[BaseKernel, Callable] = GaussianRBF, n_permutations: int = 1000, prop_c_held: float = 0.25, n_folds: int = 5, @@ -111,9 +134,6 @@ def __init__( self.x_kernel = x_kernel(init_sigma_fn=_sigma_median_diag) if x_kernel == GaussianRBF else x_kernel self.c_kernel = c_kernel(init_sigma_fn=_sigma_median_diag) if c_kernel == GaussianRBF else c_kernel - # Initialize classifier (hardcoded for now) - self.clf = _SVCDomainClf(self.c_kernel) - def score(self, # type: ignore[override] x: Union[np.ndarray, list], c: np.ndarray) -> Tuple[float, float, float, Tuple]: """ @@ -137,6 +157,9 @@ def score(self, # type: ignore[override] x_ref = torch.from_numpy(x_ref).to(self.device) # type: ignore[assignment] c_ref = torch.from_numpy(self.c_ref).to(self.device) # type: ignore[assignment] + # Initialize classifier (hardcoded for now) + self.clf = _SVCDomainClf() + # Hold out a portion of contexts for conditioning on n, n_held = len(c), int(len(c)*self.prop_c_held) inds_held = np.random.choice(n, n_held, replace=False) @@ -155,12 +178,13 @@ def score(self, # type: ignore[override] L_held = self.c_kernel(c_held, c_all) # Fit and calibrate the domain classifier - c_all_np, bools_np = c_all.cpu().numpy(), bools.cpu().numpy() - self.clf.fit(c_all_np, bools_np) - self.clf.calibrate(c_all_np, bools_np) + bools_np = bools.cpu().numpy() + K_c_all_np = self.c_kernel(c_all, c_all).cpu().numpy() + self.clf.fit(K_c_all_np, bools_np) + self.clf.calibrate(K_c_all_np, bools_np) # Obtain n_permutations conditional reassignments - prop_scores = torch.as_tensor(self.clf.predict(c_all_np)) + prop_scores = torch.as_tensor(self.clf.predict(K_c_all_np)) self.redrawn_bools = [torch.bernoulli(prop_scores) for _ in range(self.n_permutations)] iters = tqdm(self.redrawn_bools, total=self.n_permutations) if self.verbose else self.redrawn_bools @@ -254,26 +278,3 @@ def _pick_lam(self, lams: torch.Tensor, K: torch.Tensor, L: torch.Tensor, n_fold kxx = torch.ones_like(lWk).to(lWk.device) * torch.max(K) losses += (lWKWl + kxx - 2*lWk).sum(-1) return lams[torch.argmin(losses)] - - -def _sigma_median_diag(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: - """ - Private version of the bandwidth estimation function :py:func:`~alibi_detect.utils.pytorch.kernels.sigma_median`, - with the +n (and -1) term excluded to account for the diagonal of the kernel matrix. - - Parameters - ---------- - x - Tensor of instances with dimension [Nx, features]. - y - Tensor of instances with dimension [Ny, features]. - dist - Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. - - Returns - ------- - The computed bandwidth, `sigma`. - """ - n_median = np.prod(dist.shape) // 2 - sigma = (.5 * dist.flatten().sort().values[int(n_median)].unsqueeze(dim=-1)) ** .5 - return sigma diff --git a/alibi_detect/cd/pytorch/lsdd.py b/alibi_detect/cd/pytorch/lsdd.py index cae318f97..9692024b4 100644 --- a/alibi_detect/cd/pytorch/lsdd.py +++ b/alibi_detect/cd/pytorch/lsdd.py @@ -19,7 +19,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_permutations: int = 100, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, @@ -77,7 +77,6 @@ def __init__( preprocess_at_init=preprocess_at_init, update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, - sigma=sigma, n_permutations=n_permutations, n_kernel_centers=n_kernel_centers, lambda_rd_max=lambda_rd_max, @@ -97,26 +96,22 @@ def __init__( x_ref = torch.as_tensor(self.x_ref).to(self.device) # type: ignore[assignment] self._configure_normalization(x_ref) # type: ignore[arg-type] x_ref = self._normalize(x_ref) - self._initialize_kernel(x_ref) # type: ignore[arg-type] + self.kernel = GaussianRBF(sigma=torch.tensor(sigma).to(self.device) if sigma is not None else None) + _ = self.kernel(x_ref, x_ref, infer_parameter=True) # infer sigma self._configure_kernel_centers(x_ref) # type: ignore[arg-type] self.x_ref = x_ref.cpu().numpy() # type: ignore[union-attr] # For stability in high dimensions we don't divide H by (pi*sigma^2)^(d/2) # Results in an alternative test-stat of LSDD*(pi*sigma^2)^(d/2). Same p-vals etc. self.H = GaussianRBF(np.sqrt(2.) * self.kernel.sigma)(self.kernel_centers, self.kernel_centers) - def _initialize_kernel(self, x_ref: torch.Tensor): - if self.sigma is None: - self.kernel = GaussianRBF() - _ = self.kernel(x_ref, x_ref, infer_sigma=True) - else: - sigma = torch.from_numpy(self.sigma) - self.kernel = GaussianRBF(sigma) - def _configure_normalization(self, x_ref: torch.Tensor, eps: float = 1e-12): + x_ref = x_ref.to(self.device) x_ref_means = x_ref.mean(0) x_ref_stds = x_ref.std(0) - self._normalize = lambda x: (torch.as_tensor(x) - x_ref_means) / (x_ref_stds + eps) # type: ignore[assignment] - self._unnormalize = lambda x: (torch.as_tensor(x) * (x_ref_stds + eps) # type: ignore[assignment] + self._normalize = lambda x: (torch.as_tensor(x, device=self.device) # type: ignore[assignment] + - x_ref_means) / (x_ref_stds + eps) + self._unnormalize = lambda x: (torch.as_tensor(x, device=self.device) # type: ignore[assignment] + * (x_ref_stds + eps) + x_ref_means).cpu().numpy() def _configure_kernel_centers(self, x_ref: torch.Tensor): @@ -152,7 +147,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: if self.preprocess_fn is not None and self.preprocess_at_init is False and not self.x_ref_preprocessed: self._configure_normalization(x_ref) # type: ignore[arg-type] x_ref = self._normalize(x_ref) - self._initialize_kernel(x_ref) # type: ignore[arg-type] + self.kernel = GaussianRBF() + _ = self.kernel(x_ref, x_ref, infer_parameter=True) # infer sigma self._configure_kernel_centers(x_ref) # type: ignore[arg-type] self.H = GaussianRBF(np.sqrt(2.) * self.kernel.sigma)(self.kernel_centers, self.kernel_centers) diff --git a/alibi_detect/cd/pytorch/lsdd_online.py b/alibi_detect/cd/pytorch/lsdd_online.py index a5c20ee40..c2ad0a521 100644 --- a/alibi_detect/cd/pytorch/lsdd_online.py +++ b/alibi_detect/cd/pytorch/lsdd_online.py @@ -4,7 +4,8 @@ from typing import Any, Callable, Optional, Union from alibi_detect.cd.base_online import BaseMultiDriftOnline from alibi_detect.utils.pytorch import get_device -from alibi_detect.utils.pytorch import GaussianRBF, permed_lsdds, quantile +from alibi_detect.utils.pytorch import permed_lsdds, quantile +from alibi_detect.utils.pytorch.kernels import GaussianRBF from alibi_detect.utils.frameworks import Framework from alibi_detect.base import DriftConfigMixin @@ -17,7 +18,7 @@ def __init__( window_size: int, preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_bootstraps: int = 1000, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, @@ -93,15 +94,7 @@ def __init__( self._configure_normalization() - # initialize kernel - if sigma is None: - x_ref = torch.from_numpy(self.x_ref).to(self.device) # type: ignore[assignment] - self.kernel = GaussianRBF() - _ = self.kernel(x_ref, x_ref, infer_sigma=True) - else: - sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] - np.ndarray) else None - self.kernel = GaussianRBF(sigma) # type: ignore[arg-type] + self.kernel = GaussianRBF(torch.tensor(sigma).to(self.device) if sigma is not None else None) if self.n_kernel_centers is None: self.n_kernel_centers = 2 * window_size @@ -115,7 +108,8 @@ def _configure_normalization(self, eps: float = 1e-12): x_ref_means = x_ref.mean(0) x_ref_stds = x_ref.std(0) self._normalize = lambda x: (x - x_ref_means) / (x_ref_stds + eps) - self._unnormalize = lambda x: (torch.as_tensor(x) * (x_ref_stds + eps) + x_ref_means).cpu().numpy() + self._unnormalize = lambda x: (torch.as_tensor(x, device=self.device) * (x_ref_stds + eps) + + x_ref_means).cpu().numpy() self.x_ref = self._normalize(x_ref).cpu().numpy() def _configure_kernel_centers(self): diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py index 666942b6c..8df7f8c97 100644 --- a/alibi_detect/cd/pytorch/mmd.py +++ b/alibi_detect/cd/pytorch/mmd.py @@ -5,7 +5,7 @@ from alibi_detect.cd.base import BaseMMDDrift from alibi_detect.utils.pytorch import get_device from alibi_detect.utils.pytorch.distance import mmd2_from_kernel_matrix -from alibi_detect.utils.pytorch.kernels import GaussianRBF +from alibi_detect.utils.pytorch.kernels import BaseKernel, GaussianRBF from alibi_detect.utils.warnings import deprecated_alias from alibi_detect.utils.frameworks import Framework @@ -22,8 +22,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - kernel: Callable = GaussianRBF, - sigma: Optional[np.ndarray] = None, + kernel: Union[BaseKernel, Callable] = GaussianRBF, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, device: Optional[str] = None, @@ -54,9 +53,6 @@ def __init__( Function to preprocess the data before computing the data drift metrics. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. configure_kernel_from_x_ref Whether to already configure the kernel bandwidth from the reference data. n_permutations @@ -76,7 +72,6 @@ def __init__( preprocess_at_init=preprocess_at_init, update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, - sigma=sigma, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, input_shape=input_shape, @@ -87,22 +82,39 @@ def __init__( # set device self.device = get_device(device) - # initialize kernel - sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] - np.ndarray) else None - self.kernel = kernel(sigma).to(self.device) if kernel == GaussianRBF else kernel + # initialise kernel + if isinstance(kernel, BaseKernel): + self.kernel = kernel + elif kernel == GaussianRBF: + self.kernel = kernel() + else: + raise ValueError("kernel must be an instance of alibi_detect.utils.pytorch.kernels.BaseKernel") + + self.kernel_parameter_specified = True + if hasattr(kernel, 'parameter_dict'): + for param in self.kernel.parameter_dict.keys(): + kernel.parameter_dict[param].value.to(self.device) + if kernel.parameter_dict[param].requires_init: + self.kernel_parameter_specified = False + break + + if self.kernel_parameter_specified and self.infer_parameter: + self.infer_parameter = False + logger.warning('parameters are specified for the kernel and `configure_kernel_from_x_ref` ' + 'is set to True. Specified parameters take priority over ' + '`configure_kernel_from_x_ref` (set to False).') # compute kernel matrix for the reference data - if self.infer_sigma or isinstance(sigma, torch.Tensor): + if self.infer_parameter or self.kernel_parameter_specified: x = torch.from_numpy(self.x_ref).to(self.device) - self.k_xx = self.kernel(x, x, infer_sigma=self.infer_sigma) - self.infer_sigma = False + self.k_xx = self.kernel(x, x, infer_parameter=self.infer_parameter) + self.infer_parameter = False else: - self.k_xx, self.infer_sigma = None, True + self.k_xx, self.infer_parameter = None, True def kernel_matrix(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: """ Compute and return full kernel matrix between arrays x and y. """ - k_xy = self.kernel(x, y, self.infer_sigma) + k_xy = self.kernel(x, y, self.infer_parameter) k_xx = self.k_xx if self.k_xx is not None and self.update_x_ref is None else self.kernel(x, x) k_yy = self.kernel(y, y) kernel_mat = torch.cat([torch.cat([k_xx, k_xy], 1), torch.cat([k_xy.T, k_yy], 1)], 0) diff --git a/alibi_detect/cd/pytorch/mmd_online.py b/alibi_detect/cd/pytorch/mmd_online.py index 808fe5c5d..12d9760c5 100644 --- a/alibi_detect/cd/pytorch/mmd_online.py +++ b/alibi_detect/cd/pytorch/mmd_online.py @@ -4,7 +4,7 @@ from typing import Any, Callable, Optional, Union from alibi_detect.cd.base_online import BaseMultiDriftOnline from alibi_detect.utils.pytorch import get_device -from alibi_detect.utils.pytorch.kernels import GaussianRBF +from alibi_detect.utils.pytorch.kernels import BaseKernel, GaussianRBF from alibi_detect.utils.pytorch import zero_diag, quantile from alibi_detect.utils.frameworks import Framework @@ -17,8 +17,7 @@ def __init__( window_size: int, preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - kernel: Callable = GaussianRBF, - sigma: Optional[np.ndarray] = None, + kernel: Union[BaseKernel, Callable] = GaussianRBF, n_bootstraps: int = 1000, device: Optional[str] = None, verbose: bool = True, @@ -47,10 +46,6 @@ def __init__( data will also be preprocessed. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. If `sigma` is not specified, the 'median - heuristic' is adopted whereby `sigma` is set as the median pairwise distance between reference samples. n_bootstraps The number of bootstrap simulations used to configure the thresholds. The larger this is the more accurately the desired ERT will be targeted. Should ideally be at least an order of magnitude @@ -81,14 +76,17 @@ def __init__( # set device self.device = get_device(device) - # initialize kernel - sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] - np.ndarray) else None - self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + # initialise kernel + if isinstance(kernel, BaseKernel): + self.kernel = kernel + elif kernel == GaussianRBF: + self.kernel = kernel() + else: + raise ValueError("kernel must be an instance of alibi_detect.utils.pytorch.kernels.BaseKernel") # compute kernel matrix for the reference data self.x_ref = torch.from_numpy(self.x_ref).to(self.device) - self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_sigma=(sigma is None)) + self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_parameter=self.kernel.init_required) self._configure_thresholds() self._initialise() diff --git a/alibi_detect/cd/tensorflow/context_aware.py b/alibi_detect/cd/tensorflow/context_aware.py index 6f9b773e4..3181267bd 100644 --- a/alibi_detect/cd/tensorflow/context_aware.py +++ b/alibi_detect/cd/tensorflow/context_aware.py @@ -4,7 +4,7 @@ import tensorflow_probability as tfp from typing import Callable, Dict, Optional, Tuple, Union, List from alibi_detect.cd.base import BaseContextMMDDrift -from alibi_detect.utils.tensorflow.kernels import GaussianRBF +from alibi_detect.utils.tensorflow.kernels import GaussianRBF, BaseKernel from alibi_detect.utils.warnings import deprecated_alias from alibi_detect.utils.frameworks import Framework from alibi_detect.cd._domain_clf import _SVCDomainClf @@ -13,6 +13,29 @@ logger = logging.getLogger(__name__) +def _sigma_median_diag(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: + """ + Private version of the bandwidth estimation function :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`, + with the +n (and -1) term excluded to account for the diagonal of the kernel matrix. + + Parameters + ---------- + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + + Returns + ------- + The computed bandwidth, `sigma`. + """ + n_median = tf.math.reduce_prod(dist.shape) // 2 + sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0) + return sigma + + class ContextMMDDriftTF(BaseContextMMDDrift): lams: Optional[Tuple[tf.Tensor, tf.Tensor]] @@ -26,8 +49,8 @@ def __init__( preprocess_at_init: bool = True, update_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - x_kernel: Callable = GaussianRBF, - c_kernel: Callable = GaussianRBF, + x_kernel: Union[BaseKernel, Callable] = GaussianRBF, + c_kernel: Union[BaseKernel, Callable] = GaussianRBF, n_permutations: int = 1000, prop_c_held: float = 0.25, n_folds: int = 5, @@ -104,9 +127,6 @@ def __init__( self.x_kernel = x_kernel(init_sigma_fn=_sigma_median_diag) if x_kernel == GaussianRBF else x_kernel self.c_kernel = c_kernel(init_sigma_fn=_sigma_median_diag) if c_kernel == GaussianRBF else c_kernel - # Initialize classifier (hardcoded for now) - self.clf = _SVCDomainClf(self.c_kernel) - def score(self, # type: ignore[override] x: Union[np.ndarray, list], c: np.ndarray) -> Tuple[float, float, float, Tuple]: """ @@ -128,6 +148,9 @@ def score(self, # type: ignore[override] """ x_ref, x = self.preprocess(x) + # Initialize classifier (hardcoded for now) + self.clf = _SVCDomainClf() + # Hold out a portion of contexts for conditioning on n, n_held = len(c), int(len(c)*self.prop_c_held) inds_held = np.random.choice(n, n_held, replace=False) @@ -145,12 +168,13 @@ def score(self, # type: ignore[override] L_held = self.c_kernel(c_held, c_all) # Fit and calibrate the domain classifier - c_all_np, bools_np = c_all.numpy(), bools.numpy() - self.clf.fit(c_all_np, bools_np) - self.clf.calibrate(c_all_np, bools_np) + bools_np = bools.numpy() + K_c_all_np = self.c_kernel(c_all, c_all).numpy() + self.clf.fit(K_c_all_np, bools_np) + self.clf.calibrate(K_c_all_np, bools_np) # Obtain n_permutations conditional reassignments - prop_scores = self.clf.predict(c_all_np) + prop_scores = self.clf.predict(K_c_all_np) self.redrawn_bools = [tfp.distributions.Bernoulli(probs=prop_scores).sample() for _ in range(self.n_permutations)] iters = tqdm(self.redrawn_bools, total=self.n_permutations) if self.verbose else self.redrawn_bools @@ -271,26 +295,3 @@ def _split_chunks(n: int, p: int) -> List[int]: else: chunks = [n // p + 1] * (n % p) + [n // p] * (p - n % p) return chunks - - -def _sigma_median_diag(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: - """ - Private version of the bandwidth estimation function :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`, - with the +n (and -1) term excluded to account for the diagonal of the kernel matrix. - - Parameters - ---------- - x - Tensor of instances with dimension [Nx, features]. - y - Tensor of instances with dimension [Ny, features]. - dist - Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. - - Returns - ------- - The computed bandwidth, `sigma`. - """ - n_median = tf.math.reduce_prod(dist.shape) // 2 - sigma = tf.expand_dims((.5 * tf.sort(tf.reshape(dist, (-1,)))[n_median]) ** .5, axis=0) - return sigma diff --git a/alibi_detect/cd/tensorflow/lsdd.py b/alibi_detect/cd/tensorflow/lsdd.py index ef0335ae9..8f31e9bbf 100644 --- a/alibi_detect/cd/tensorflow/lsdd.py +++ b/alibi_detect/cd/tensorflow/lsdd.py @@ -18,7 +18,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_permutations: int = 100, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, @@ -72,7 +72,6 @@ def __init__( preprocess_at_init=preprocess_at_init, update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, - sigma=sigma, n_permutations=n_permutations, n_kernel_centers=n_kernel_centers, lambda_rd_max=lambda_rd_max, @@ -85,21 +84,14 @@ def __init__( x_ref = tf.convert_to_tensor(self.x_ref) self._configure_normalization(x_ref) x_ref = self._normalize(x_ref) - self._initialize_kernel(x_ref) + self.kernel = GaussianRBF(tf.cast(sigma) if sigma is not None else None) + _ = self.kernel(x_ref, x_ref, infer_parameter=True) # infer sigma self._configure_kernel_centers(x_ref) self.x_ref = x_ref.numpy() # type: ignore[union-attr] # For stability in high dimensions we don't divide H by (pi*sigma^2)^(d/2) # Results in an alternative test-stat of LSDD*(pi*sigma^2)^(d/2). Same p-vals etc. self.H = GaussianRBF(np.sqrt(2.) * self.kernel.sigma)(self.kernel_centers, self.kernel_centers) - def _initialize_kernel(self, x_ref: tf.Tensor): - if self.sigma is None: - self.kernel = GaussianRBF() - _ = self.kernel(x_ref, x_ref, infer_sigma=True) - else: - sigma = tf.convert_to_tensor(self.sigma) - self.kernel = GaussianRBF(sigma) - def _configure_normalization(self, x_ref: tf.Tensor, eps: float = 1e-12): x_ref_means = tf.reduce_mean(x_ref, axis=0) x_ref_stds = tf.math.reduce_std(x_ref, axis=0) @@ -137,7 +129,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: if self.preprocess_fn is not None and not self.preprocess_at_init and not self.x_ref_preprocessed: self._configure_normalization(x_ref) x_ref = self._normalize(x_ref) - self._initialize_kernel(x_ref) + self.kernel = GaussianRBF() + _ = self.kernel(x_ref, x_ref, infer_parameter=True) # infer sigma self._configure_kernel_centers(x_ref) self.H = GaussianRBF(np.sqrt(2.) * self.kernel.sigma)(self.kernel_centers, self.kernel_centers) diff --git a/alibi_detect/cd/tensorflow/lsdd_online.py b/alibi_detect/cd/tensorflow/lsdd_online.py index 540884c5f..483a0a1d9 100644 --- a/alibi_detect/cd/tensorflow/lsdd_online.py +++ b/alibi_detect/cd/tensorflow/lsdd_online.py @@ -3,7 +3,8 @@ import tensorflow as tf from typing import Any, Callable, Optional, Union from alibi_detect.cd.base_online import BaseMultiDriftOnline -from alibi_detect.utils.tensorflow import GaussianRBF, quantile, permed_lsdds +from alibi_detect.utils.tensorflow import quantile, permed_lsdds +from alibi_detect.utils.tensorflow.kernels import GaussianRBF from alibi_detect.utils.frameworks import Framework @@ -15,7 +16,7 @@ def __init__( window_size: int, preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - sigma: Optional[np.ndarray] = None, + sigma: Optional[Union[np.ndarray, float]] = None, n_bootstraps: int = 1000, n_kernel_centers: Optional[int] = None, lambda_rd_max: float = 0.2, @@ -84,13 +85,7 @@ def __init__( self._configure_normalization() - # initialize kernel - if sigma is None: - self.kernel = GaussianRBF() - _ = self.kernel(self.x_ref, self.x_ref, infer_sigma=True) - else: - sigma = tf.convert_to_tensor(sigma) - self.kernel = GaussianRBF(sigma) + self.kernel = GaussianRBF(sigma=tf.cast(sigma) if sigma is not None else None) if self.n_kernel_centers is None: self.n_kernel_centers = 2*window_size diff --git a/alibi_detect/cd/tensorflow/mmd.py b/alibi_detect/cd/tensorflow/mmd.py index 977e1d18c..d6d7aa693 100644 --- a/alibi_detect/cd/tensorflow/mmd.py +++ b/alibi_detect/cd/tensorflow/mmd.py @@ -4,7 +4,7 @@ from typing import Callable, Dict, Optional, Tuple, Union from alibi_detect.cd.base import BaseMMDDrift from alibi_detect.utils.tensorflow.distance import mmd2_from_kernel_matrix -from alibi_detect.utils.tensorflow.kernels import GaussianRBF +from alibi_detect.utils.tensorflow.kernels import GaussianRBF, BaseKernel from alibi_detect.utils.warnings import deprecated_alias from alibi_detect.utils.frameworks import Framework @@ -21,8 +21,7 @@ def __init__( preprocess_at_init: bool = True, update_x_ref: Optional[Dict[str, int]] = None, preprocess_fn: Optional[Callable] = None, - kernel: Callable = GaussianRBF, - sigma: Optional[np.ndarray] = None, + kernel: Union[BaseKernel, Callable] = GaussianRBF, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, input_shape: Optional[tuple] = None, @@ -52,9 +51,6 @@ def __init__( Function to preprocess the data before computing the data drift metrics. kernel Kernel used for the MMD computation, defaults to Gaussian RBF kernel. - sigma - Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. - The kernel evaluation is then averaged over those bandwidths. configure_kernel_from_x_ref Whether to already configure the kernel bandwidth from the reference data. n_permutations @@ -71,7 +67,6 @@ def __init__( preprocess_at_init=preprocess_at_init, update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, - sigma=sigma, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, input_shape=input_shape, @@ -79,21 +74,37 @@ def __init__( ) self.meta.update({'backend': Framework.TENSORFLOW.value}) - # initialize kernel - if isinstance(sigma, np.ndarray): - sigma = tf.convert_to_tensor(sigma) - self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + # initialise kernel + if isinstance(kernel, BaseKernel): + self.kernel = kernel + elif kernel == GaussianRBF: + self.kernel = kernel() + else: + raise ValueError("kernel must be an instance of alibi_detect.utils.tensorflow.kernels.BaseKernel") + + self.kernel_parameter_specified = True + if hasattr(kernel, 'parameter_dict'): + for param in self.kernel.parameter_dict.keys(): + if kernel.parameter_dict[param].requires_init: + self.given_kernel_parameter = False + break + + if self.kernel_parameter_specified and self.infer_parameter: + self.infer_parameter = False + logger.warning('parameters are specified for the kernel and `configure_kernel_from_x_ref` ' + 'is set to True. Specified parameters take priority over ' + '`configure_kernel_from_x_ref` (set to False).') # compute kernel matrix for the reference data - if self.infer_sigma or isinstance(sigma, tf.Tensor): - self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_sigma=self.infer_sigma) + if self.infer_parameter or self.kernel_parameter_specified: + self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_parameter=self.infer_parameter) self.infer_sigma = False else: self.k_xx, self.infer_sigma = None, True def kernel_matrix(self, x: Union[np.ndarray, tf.Tensor], y: Union[np.ndarray, tf.Tensor]) -> tf.Tensor: """ Compute and return full kernel matrix between arrays x and y. """ - k_xy = self.kernel(x, y, self.infer_sigma) + k_xy = self.kernel(x, y, self.infer_parameter) k_xx = self.k_xx if self.k_xx is not None and self.update_x_ref is None else self.kernel(x, x) k_yy = self.kernel(y, y) kernel_mat = tf.concat([tf.concat([k_xx, k_xy], 1), tf.concat([tf.transpose(k_xy, (1, 0)), k_yy], 1)], 0) diff --git a/alibi_detect/cd/tensorflow/mmd_online.py b/alibi_detect/cd/tensorflow/mmd_online.py index 3d4a6b57a..5ae31c760 100644 --- a/alibi_detect/cd/tensorflow/mmd_online.py +++ b/alibi_detect/cd/tensorflow/mmd_online.py @@ -3,7 +3,7 @@ import tensorflow as tf from typing import Any, Callable, Optional, Union from alibi_detect.cd.base_online import BaseMultiDriftOnline -from alibi_detect.utils.tensorflow.kernels import GaussianRBF +from alibi_detect.utils.tensorflow.kernels import BaseKernel, GaussianRBF from alibi_detect.utils.tensorflow import zero_diag, quantile, subset_matrix from alibi_detect.utils.frameworks import Framework @@ -16,8 +16,7 @@ def __init__( window_size: int, preprocess_fn: Optional[Callable] = None, x_ref_preprocessed: bool = False, - kernel: Callable = GaussianRBF, - sigma: Optional[np.ndarray] = None, + kernel: Union[BaseKernel, Callable] = GaussianRBF, n_bootstraps: int = 1000, verbose: bool = True, input_shape: Optional[tuple] = None, @@ -73,13 +72,16 @@ def __init__( ) self.meta.update({'backend': Framework.TENSORFLOW.value}) - # initialize kernel - if isinstance(sigma, np.ndarray): - sigma = tf.convert_to_tensor(sigma) - self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + # initialise kernel + if isinstance(kernel, BaseKernel): + self.kernel = kernel + elif kernel == GaussianRBF: + self.kernel = kernel() + else: + raise ValueError("kernel must be an instance of alibi_detect.utils.tensorflow.kernels.BaseKernel") # compute kernel matrix for the reference data - self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_sigma=(sigma is None)) + self.k_xx = self.kernel(self.x_ref, self.x_ref, infer_parameter=self.kernel.init_required) self._configure_thresholds() self._initialise() diff --git a/alibi_detect/saving/loading.py b/alibi_detect/saving/loading.py index 977da1ac3..b1febdcb5 100644 --- a/alibi_detect/saving/loading.py +++ b/alibi_detect/saving/loading.py @@ -130,6 +130,7 @@ def _load_detector_config(filepath: Union[str, os.PathLike]) -> ConfigurableDete # Resolve and validate config cfg = validate_config(cfg) + logger.info('Validated unresolved config.') cfg = resolve_config(cfg, config_dir=config_dir) cfg = validate_config(cfg, resolved=True) @@ -369,6 +370,8 @@ def _get_nested_value(dic: dict, keys: list) -> Any: dic = dic[key] except (TypeError, KeyError): return None + except IndexError: + return None # only for scalar in composite kernels as it doesn't have any keys return dic @@ -466,8 +469,11 @@ def resolve_config(cfg: dict, config_dir: Optional[Path]) -> dict: if config_dir is not None: _prepend_cfg_filepaths(cfg, config_dir) + # get additional fields to resolve for composite kernels TODO make a private function for this part, get temp fields + FIELDS_TO_RESOLVE_TEMP = _add_composite_fields(cfg) + # Resolve filepaths (load files) and resolve function/object registries - for key in FIELDS_TO_RESOLVE: + for key in FIELDS_TO_RESOLVE_TEMP: logger.info('Resolving config field: {}.'.format(key)) src = _get_nested_value(cfg, key) obj = None @@ -519,6 +525,72 @@ def resolve_config(cfg: dict, config_dir: Optional[Path]) -> dict: return cfg +def _add_composite_fields(cfg): + """ + Check if the cfg contains a composite kernel and add the fields to resolve. + + Parameters + ---------- + cfg + Config dict. + + Returns + ------- + FIELDS_TO_RESOLVE_TEMP + List of fields to resolve. + """ + FIELDS_TO_RESOLVE_TEMP = FIELDS_TO_RESOLVE.copy() + if 'kernel' in cfg: + if isinstance(cfg['kernel'], dict): + if (cfg['kernel']['kernel_type'] == 'Sum') or (cfg['kernel']['kernel_type'] == 'Product'): + FIELDS_TO_RESOLVE_TEMP = FIELDS_TO_RESOLVE.copy() + composite_fields = _get_composite_kernel_fields(cfg['kernel']) + for field in composite_fields: + field.insert(0, 'kernel') + loc = FIELDS_TO_RESOLVE_TEMP.index(['kernel']) + FIELDS_TO_RESOLVE_TEMP[loc:loc] = composite_fields + return FIELDS_TO_RESOLVE_TEMP + + +def _get_composite_kernel_fields(cfg: dict) -> list: + """ + Get additional fields to resolve for composite kernels. + + Parameters + ---------- + cfg + The config dict. + + Returns + ------- + The additional fields to resolve. + """ + fields = [] + if 'kernel_type' in cfg: + if (cfg['kernel_type'] == 'Sum') or (cfg['kernel_type'] == 'Product'): + kernel_number = len(cfg['kernel_list']) + for i in range(kernel_number): + if isinstance(cfg['kernel_list']['comp_{}'.format(i)], dict): + if 'kernel_type' in cfg['kernel_list']['comp_{}'.format(i)]: + if (cfg['kernel_list']['comp_{}'.format(i)]['kernel_type'] == 'Sum') or \ + (cfg['kernel_list']['comp_{}'.format(i)]['kernel_type'] == 'Product'): + fields.extend(_get_composite_kernel_fields(cfg['kernel_list']['comp_{}'.format(i)])) + elif cfg['kernel_list']['comp_{}'.format(i)]['kernel_type'] == 'GaussianRBF': + fields.append(['kernel_list', 'comp_{}'.format(i), 'src']) + fields.append(['kernel_list', 'comp_{}'.format(i), 'init_sigma_fn']) + elif cfg['kernel_list']['comp_{}'.format(i)]['kernel_type'] == 'RationalQuadratic': + fields.append(['kernel_list', 'comp_{}'.format(i), 'src']) + fields.append(['kernel_list', 'comp_{}'.format(i), 'init_sigma_fn']) + fields.append(['kernel_list', 'comp_{}'.format(i), 'init_alpha_fn']) + elif cfg['kernel_list']['comp_{}'.format(i)]['kernel_type'] == 'Period': + fields.append(['kernel_list', 'comp_{}'.format(i), 'src']) + fields.append(['kernel_list', 'comp_{}'.format(i), 'init_sigma_fn']) + fields.append(['kernel_list', 'comp_{}'.format(i), 'init_tau_fn']) + else: + raise ValueError('Unknown kernel type: {}'.format(cfg['comp_{}'.format(i)]['kernel_type'])) + return fields + + def _replace(cfg: dict, orig: Optional[str], new: Optional[str]) -> dict: """ Recursively traverse a nested dictionary and replace values. diff --git a/alibi_detect/saving/registry.py b/alibi_detect/saving/registry.py index b1ad20303..41a4a5621 100644 --- a/alibi_detect/saving/registry.py +++ b/alibi_detect/saving/registry.py @@ -42,14 +42,18 @@ def my_function(x: np.ndarray) -> np.ndarray: preprocess_drift as preprocess_drift_tf from alibi_detect.utils.tensorflow.data import TFDataset as TFDataset_tf from alibi_detect.utils.tensorflow.kernels import \ - GaussianRBF as GaussianRBF_tf, sigma_median as sigma_median_tf + GaussianRBF as GaussianRBF_tf, sigma_median as sigma_median_tf, \ + log_sigma_median as log_sigma_median_tf, RationalQuadratic as RationalQuadratic_tf, \ + Periodic as Periodic_tf, SumKernel as SumKernel_tf, ProductKernel as ProductKernel_tf from alibi_detect.cd.tensorflow.context_aware import _sigma_median_diag as _sigma_median_diag_tf if has_pytorch: from alibi_detect.cd.pytorch import \ preprocess_drift as preprocess_drift_torch from alibi_detect.utils.pytorch.kernels import \ - GaussianRBF as GaussianRBF_torch, sigma_median as sigma_median_torch + GaussianRBF as GaussianRBF_torch, sigma_median as sigma_median_torch, \ + log_sigma_median as log_sigma_median_torch, RationalQuadratic as RationalQuadratic_torch, \ + Periodic as Periodic_torch, SumKernel as SumKernel_torch, ProductKernel as ProductKernel_torch from alibi_detect.cd.pytorch.context_aware import _sigma_median_diag as _sigma_median_diag_torch # Create registry @@ -58,13 +62,23 @@ def my_function(x: np.ndarray) -> np.ndarray: # Register alibi-detect classes/functions if has_tensorflow: registry.register('utils.tensorflow.kernels.GaussianRBF', func=GaussianRBF_tf) + registry.register('utils.tensorflow.kernels.RationalQuadratic', func=RationalQuadratic_tf) + registry.register('utils.tensorflow.kernels.Periodic', func=Periodic_tf) + registry.register('utils.tensorflow.kernels.SumKernel', func=SumKernel_tf) + registry.register('utils.tensorflow.kernels.ProductKernel', func=ProductKernel_tf) registry.register('utils.tensorflow.kernels.sigma_median', func=sigma_median_tf) + registry.register('utils.tensorflow.kernels.log_sigma_median', func=log_sigma_median_tf) registry.register('cd.tensorflow.context_aware._sigma_median_diag', func=_sigma_median_diag_tf) registry.register('cd.tensorflow.preprocess.preprocess_drift', func=preprocess_drift_tf) registry.register('utils.tensorflow.data.TFDataset', func=TFDataset_tf) if has_pytorch: registry.register('utils.pytorch.kernels.GaussianRBF', func=GaussianRBF_torch) + registry.register('utils.pytorch.kernels.RationalQuadratic', func=RationalQuadratic_torch) + registry.register('utils.pytorch.kernels.Periodic', func=Periodic_torch) + registry.register('utils.pytorch.kernels.SumKernel', func=SumKernel_torch) + registry.register('utils.pytorch.kernels.ProductKernel', func=ProductKernel_torch) registry.register('utils.pytorch.kernels.sigma_median', func=sigma_median_torch) + registry.register('utils.pytorch.kernels.log_sigma_median', func=log_sigma_median_torch) registry.register('cd.pytorch.context_aware._sigma_median_diag', func=_sigma_median_diag_torch) registry.register('cd.pytorch.preprocess.preprocess_drift', func=preprocess_drift_torch) diff --git a/alibi_detect/saving/saving.py b/alibi_detect/saving/saving.py index 9648e404f..3e8588207 100644 --- a/alibi_detect/saving/saving.py +++ b/alibi_detect/saving/saving.py @@ -503,6 +503,22 @@ def _save_kernel_config(kernel: Callable, if not isinstance(kernel_b, str) and kernel_b is not None: cfg_kernel['kernel_b'] = _save_kernel_config(cfg_kernel['kernel_b'], base_path, Path('kernel_b')) + # if a composite kernel + elif hasattr(kernel, 'kernel_list'): + kernel_class = kernel.__class__ + + if hasattr(kernel, 'get_config'): + cfg_kernel = kernel.get_config() # type: ignore[attr-defined] + else: + raise AttributeError("The detector's `kernel` must have a .get_config() method for it to be saved.") + + for i, k in enumerate(kernel.kernel_list): + if hasattr(k, 'get_config'): + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + _save_kernel_config(k, base_path, Path(local_path, 'kernel_{}'.format(i))) + cfg_kernel['kernel_list'] = dict(sorted(cfg_kernel['kernel_list'].items())) + cfg_kernel['src'], _ = _serialize_object(kernel_class, base_path, local_path.joinpath('kernel')) + # If any other kernel, serialize the class to disk and get config else: if isinstance(kernel, type): # if still a class @@ -512,8 +528,18 @@ def _save_kernel_config(kernel: Callable, kernel_class = kernel.__class__ if hasattr(kernel, 'get_config'): cfg_kernel = kernel.get_config() # type: ignore[attr-defined] - cfg_kernel['init_sigma_fn'], _ = _serialize_object(cfg_kernel['init_sigma_fn'], base_path, - local_path.joinpath('init_sigma_fn')) + if 'init_sigma_fn' in cfg_kernel: + if cfg_kernel['init_sigma_fn'] is not None: + cfg_kernel['init_sigma_fn'], _ = _serialize_object(cfg_kernel['init_sigma_fn'], base_path, + local_path.joinpath('init_sigma_fn')) + if 'init_alpha_fn' in cfg_kernel: + if cfg_kernel['init_alpha_fn'] is not None: + cfg_kernel['init_alpha_fn'], _ = _serialize_object(cfg_kernel['init_alpha_fn'], base_path, + local_path.joinpath('init_alpha_fn')) + if 'init_tau_fn' in cfg_kernel: + if cfg_kernel['init_tau_fn'] is not None: + cfg_kernel['init_tau_fn'], _ = _serialize_object(cfg_kernel['init_tau_fn'], base_path, + local_path.joinpath('init_tau_fn')) else: raise AttributeError("The detector's `kernel` must have a .get_config() method for it to be saved.") # Serialize the kernel class diff --git a/alibi_detect/saving/schemas.py b/alibi_detect/saving/schemas.py index 68a902929..4ba773084 100644 --- a/alibi_detect/saving/schemas.py +++ b/alibi_detect/saving/schemas.py @@ -51,6 +51,44 @@ def validate_model(cls, model: Any, values: dict) -> Any: raise TypeError('The model is not recognised as a supported type.') +def validate_composite_kernel_config(cfg_kernel_list: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate composite kernel config. + + Parameters + ---------- + cfg_kernel + Composite kernel config. + + Returns + ------- + cfg_kernel + Validated composite kernel config. + """ + # cfg_kernel = CompositeKernelConfig(**cfg_kernel).dict() + comp_number = len(cfg_kernel_list) + for i in range(comp_number): + if isinstance(cfg_kernel_list['comp_' + str(i)], dict): + if 'kernel_type' in cfg_kernel_list['comp_' + str(i)]: + if (cfg_kernel_list['comp_' + str(i)]['kernel_type'] == 'Sum') or\ + (cfg_kernel_list['comp_' + str(i)]['kernel_type'] == 'Product'): + cfg_kernel_list['comp_' + str(i)] =\ + CompositeKernelConfig(**cfg_kernel_list['comp_' + str(i)]).dict() + elif cfg_kernel_list['comp_' + str(i)]['kernel_type'] == 'GaussianRBF': + cfg_kernel_list['comp_' + str(i)] =\ + RBFKernelConfig(**cfg_kernel_list['comp_' + str(i)]).dict() + elif cfg_kernel_list['comp_' + str(i)]['kernel_type'] == 'RationalQuadratic': + cfg_kernel_list['comp_' + str(i)] =\ + RationalQuadraticKernelConfig(**cfg_kernel_list['comp_' + str(i)]).dict() + elif cfg_kernel_list['comp_' + str(i)]['kernel_type'] == 'Periodic': + cfg_kernel_list['comp_' + str(i)] =\ + PeriodicKernelConfig(**cfg_kernel_list['comp_' + str(i)]).dict() + else: + raise ValueError('Kernel type not supported.') + cfg_kernel_list = dict(sorted(cfg_kernel_list.items())) # Sort dict to ensure order is consistent + return cfg_kernel_list + + class SupportedOptimizer: """ Pydantic custom type to check the optimizer is one of the supported types (conditional on what optional deps @@ -320,7 +358,7 @@ class PreprocessConfig(CustomBaseModel): """ -class KernelConfig(CustomBaseModelWithKwargs): +class RBFKernelConfig(CustomBaseModelWithKwargs): """ Unresolved schema for kernels, to be passed to a detector's `kernel` kwarg. @@ -350,6 +388,136 @@ class KernelConfig(CustomBaseModelWithKwargs): src: str "A string referencing a filepath to a serialized kernel in `.dill` format, or an object registry reference." + kernel_type: Literal['GaussianRBF'] + + # Below kwargs are only passed if kernel == @GaussianRBF + flavour: Literal['tensorflow', 'pytorch'] + """ + Whether the kernel is a `tensorflow` or `pytorch` kernel. + """ + sigma: Optional[Union[float, List[float]]] = None + """ + Bandwidth used for the kernel. Needn’t be specified if being inferred or trained. Can pass multiple values to eval + kernel with and then average. + """ + trainable: bool = False + "Whether or not to track gradients w.r.t. sigma to allow it to be trained." + + init_sigma_fn: Optional[str] = None + """ + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. The function's signature + should match :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. If `None`, it is set to + :func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. + """ + # Validators + _validate_flavour = validator('flavour', allow_reuse=True, pre=False)(validate_framework) + _coerce_sigma2tensor = validator('sigma', allow_reuse=True, pre=False)(coerce_2_tensor) + + +class RationalQuadraticKernelConfig(CustomBaseModelWithKwargs): + """ + Unresolved schema for kernels, to be passed to a detector's `kernel` kwarg. + + If `src` specifies a :class:`~alibi_detect.utils.tensorflow.RationalQuadratic` kernel, the `sigma`, `alpha`, + 'trainable' and `init_sigma_fn`, 'init_alpha_fn' fields are passed to it. Otherwise, all fields except `src` + are passed as kwargs. + + Examples + -------- + A :class:`~alibi_detect.utils.tensorflow.RationalQuadratic` kernel, with three different bandwidths and alphas: + + .. code-block :: toml + + [kernel] + src = "@alibi_detect.utils.tensorflow.GaussianRBF" + trainable = false + sigma = [0.1, 0.2, 0.3] + alpha = [1.0, 2.0, 3.0] + + A serialized kernel with keyword arguments passed: + + .. code-block :: toml + + [kernel] + src = "mykernel.dill" + sigma = 0.42 + alpha = 2.0 + custom_setting = "xyz" + """ + src: str + "A string referencing a filepath to a serialized kernel in `.dill` format, or an object registry reference." + + kernel_type: Literal['RationalQuadratic'] + + # Below kwargs are only passed if kernel == @GaussianRBF + flavour: Literal['tensorflow', 'pytorch'] + """ + Whether the kernel is a `tensorflow` or `pytorch` kernel. + """ + sigma: Optional[Union[float, List[float]]] = None + """ + Bandwidth used for the kernel. Needn’t be specified if being inferred or trained. Can pass multiple values to eval + kernel with and then average. + """ + alpha: Optional[Union[float, List[float]]] = None + """ + Exponent used for the kernel. Needn’t be specified if being inferred or trained. Can pass multiple values to eval + kernel with and then average. + """ + trainable: bool = False + "Whether or not to track gradients w.r.t. sigma to allow it to be trained." + + init_sigma_fn: Optional[str] = None + """ + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. The function's signature + should match :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. If `None`, it is set to + :func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. + """ + init_alpha_fn: Optional[str] = None + """ + Function used to compute the exponent `alpha`. Used when `alpha` is to be inferred. The function's signature + should match :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. Defaults to None. + """ + # Validators + _validate_flavour = validator('flavour', allow_reuse=True, pre=False)(validate_framework) + _coerce_sigma2tensor = validator('sigma', allow_reuse=True, pre=False)(coerce_2_tensor) + _coerce_alpha2tensor = validator('alpha', allow_reuse=True, pre=False)(coerce_2_tensor) + + +class PeriodicKernelConfig(CustomBaseModelWithKwargs): + """ + Unresolved schema for kernels, to be passed to a detector's `kernel` kwarg. + + If `src` specifies a :class:`~alibi_detect.utils.tensorflow.PeriodicKernel` kernel, the `sigma`, 'tau', `trainable` + and `init_sigma_fn`, 'init_tau_fn' fields are passed to it. Otherwise, all fields except `src` are passed as kwargs. + + Examples + -------- + A :class:`~alibi_detect.utils.tensorflow.GaussianRBF` kernel, with three different bandwidths: + + .. code-block :: toml + + [kernel] + src = "@alibi_detect.utils.tensorflow.PeriodicKernel" + trainable = false + sigma = [0.1, 0.2, 0.3] + tau = [1.0, 2.0, 3.0] + + A serialized kernel with keyword arguments passed: + + .. code-block :: toml + + [kernel] + src = "mykernel.dill" + sigma = 0.42 + tau = 1.0 + custom_setting = "xyz" + """ + src: str + "A string referencing a filepath to a serialized kernel in `.dill` format, or an object registry reference." + + kernel_type: Literal['Periodic'] + # Below kwargs are only passed if kernel == @GaussianRBF flavour: Literal['tensorflow', 'pytorch'] """ @@ -360,6 +528,11 @@ class KernelConfig(CustomBaseModelWithKwargs): Bandwidth used for the kernel. Needn’t be specified if being inferred or trained. Can pass multiple values to eval kernel with and then average. """ + tau: Optional[Union[float, List[float]]] = None + """ + Period used for the kernel. Needn’t be specified if being inferred or trained. Can pass multiple values to eval + kernel with and then average. + """ trainable: bool = False "Whether or not to track gradients w.r.t. sigma to allow it to be trained." @@ -369,9 +542,45 @@ class KernelConfig(CustomBaseModelWithKwargs): should match :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. If `None`, it is set to :func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. """ + init_tau_fn: Optional[str] = None + """ + Function used to compute the period `tau`. Used when `tau` is to be inferred. The function's signature + should match :py:func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. Defaults to None. + """ # Validators _validate_flavour = validator('flavour', allow_reuse=True, pre=False)(validate_framework) _coerce_sigma2tensor = validator('sigma', allow_reuse=True, pre=False)(coerce_2_tensor) + _coerce_tau2tensor = validator('tau', allow_reuse=True, pre=False)(coerce_2_tensor) + + +class CompositeKernelConfig(CustomBaseModelWithKwargs): + """ + Unresolved schema for composite kernels, to be passed to a detector's `kernel` kwarg. + + Examples + -------- + A :class:`~alibi_detect.utils.tensorflow.SumKernel` obtained by adding two + :class:`~alibi_detect.utils.tensorflow.GaussianRBF` instances: + + .. code-block :: toml + + [kernel] + src = "@alibi_detect.utils.tensorflow.SumKernel" + kernel_list = [ + RBFKernelConfig(src="@alibi_detect.utils.tensorflow.GaussianRBF", trainable=false, sigma=0.1), + RBFKernelConfig(src="@alibi_detect.utils.tensorflow.GaussianRBF", trainable=false, sigma=0.2) + ] + """ + src: str + + kernel_type: Literal['Sum', 'Product'] + + flavour: Literal['tensorflow', 'pytorch'] + + kernel_list: Dict + + _validate_composite_kernel =\ + validator('kernel_list', allow_reuse=True, pre=False)(validate_composite_kernel_config) class DeepKernelConfig(CustomBaseModel): @@ -401,17 +610,23 @@ class DeepKernelConfig(CustomBaseModel): [kernel.proj] src = "model/" """ + kernel_type: Literal['Deep'] + + flavour: Literal['tensorflow', 'pytorch'] + proj: Union[str, ModelConfig] """ The projection to be applied to the inputs before applying `kernel_a`. This should be a Tensorflow or PyTorch model, specified as an object registry reference, or a :class:`~alibi_detect.utils.schemas.ModelConfig`. """ - kernel_a: Union[str, KernelConfig] = "@utils.tensorflow.kernels.GaussianRBF" + kernel_a: Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, PeriodicKernelConfig, CompositeKernelConfig]\ + = "@utils.tensorflow.kernels.GaussianRBF" """ The kernel to apply to the projected inputs. Defaults to a :class:`~alibi_detect.utils.tensorflow.kernels.GaussianRBF` with trainable bandwidth. """ - kernel_b: Optional[Union[str, KernelConfig]] = "@utils.tensorflow.kernels.GaussianRBF" + kernel_b: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, PeriodicKernelConfig, + CompositeKernelConfig]] = "@utils.tensorflow.kernels.GaussianRBF" """ The kernel to apply to the raw inputs. Defaults to a :class:`~alibi_detect.utils.tensorflow.kernels.GaussianRBF` with trainable bandwidth. Set to `None` in order to use only the deep component (i.e. `eps=0`). @@ -677,8 +892,8 @@ class MMDDriftConfig(DriftDetectorConfig): p_val: float = .05 preprocess_at_init: bool = True update_x_ref: Optional[Dict[str, int]] = None - kernel: Optional[Union[str, KernelConfig]] = None - sigma: Optional[NDArray[np.float32]] = None + kernel: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, + PeriodicKernelConfig, CompositeKernelConfig]] = None configure_kernel_from_x_ref: bool = True n_permutations: int = 100 batch_size_permutations: int = 1000000 @@ -698,7 +913,6 @@ class MMDDriftConfigResolved(DriftDetectorConfigResolved): preprocess_at_init: bool = True update_x_ref: Optional[Dict[str, int]] = None kernel: Optional[Callable] = None - sigma: Optional[NDArray[np.float32]] = None configure_kernel_from_x_ref: bool = True n_permutations: int = 100 batch_size_permutations: int = 1000000 @@ -839,7 +1053,8 @@ class SpotTheDiffDriftConfig(DriftDetectorConfig): verbose: int = 0 train_kwargs: Optional[dict] = None dataset: Optional[str] = None - kernel: Optional[Union[str, KernelConfig]] = None + kernel: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, + PeriodicKernelConfig, CompositeKernelConfig]] = None n_diffs: int = 1 initial_diffs: Optional[str] = None l1_reg: float = 0.01 @@ -959,8 +1174,10 @@ class ContextMMDDriftConfig(DriftDetectorConfig): c_ref: str preprocess_at_init: bool = True update_ref: Optional[Dict[str, int]] = None - x_kernel: Optional[Union[str, KernelConfig]] = None - c_kernel: Optional[Union[str, KernelConfig]] = None + x_kernel: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, + PeriodicKernelConfig, CompositeKernelConfig]] = None + c_kernel: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, + PeriodicKernelConfig, CompositeKernelConfig]] = None n_permutations: int = 100 prop_c_held: float = 0.25 n_folds: int = 5 @@ -1004,8 +1221,8 @@ class MMDDriftOnlineConfig(DriftDetectorConfig): backend: Literal['tensorflow', 'pytorch'] = 'tensorflow' ert: float window_size: int - kernel: Optional[Union[str, KernelConfig]] = None - sigma: Optional[np.ndarray] = None + kernel: Optional[Union[str, RBFKernelConfig, RationalQuadraticKernelConfig, + PeriodicKernelConfig, CompositeKernelConfig]] = None n_bootstraps: int = 1000 device: Optional[Literal['cpu', 'cuda']] = None verbose: bool = True @@ -1024,7 +1241,6 @@ class MMDDriftOnlineConfigResolved(DriftDetectorConfigResolved): ert: float window_size: int kernel: Optional[Callable] = None - sigma: Optional[np.ndarray] = None n_bootstraps: int = 1000 device: Optional[Literal['cpu', 'cuda']] = None verbose: bool = True diff --git a/alibi_detect/saving/tests/models.py b/alibi_detect/saving/tests/models.py index 5a1b28c0e..33afbf9e5 100644 --- a/alibi_detect/saving/tests/models.py +++ b/alibi_detect/saving/tests/models.py @@ -18,9 +18,17 @@ from alibi_detect.cd.tensorflow import UAE as UAE_tf from alibi_detect.cd.tensorflow import preprocess_drift as preprocess_drift_tf from alibi_detect.utils.pytorch.kernels import GaussianRBF as GaussianRBF_pt +from alibi_detect.utils.pytorch.kernels import RationalQuadratic as RationalQuadratic_pt +from alibi_detect.utils.pytorch.kernels import Periodic as Periodic_pt from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt +from alibi_detect.utils.pytorch.kernels import SumKernel as SumKernel_pt +from alibi_detect.utils.pytorch.kernels import ProductKernel as ProductKernel_pt from alibi_detect.utils.tensorflow.kernels import GaussianRBF as GaussianRBF_tf +from alibi_detect.utils.tensorflow.kernels import RationalQuadratic as RationalQuadratic_tf +from alibi_detect.utils.tensorflow.kernels import Periodic as Periodic_tf from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf +from alibi_detect.utils.tensorflow.kernels import SumKernel as SumKernel_tf +from alibi_detect.utils.tensorflow.kernels import ProductKernel as ProductKernel_tf from alibi_detect.models.pytorch import TransformerEmbedding as TransformerEmbedding_pt from alibi_detect.models.tensorflow import TransformerEmbedding as TransformerEmbedding_tf from alibi_detect.cd.pytorch import HiddenOutput as HiddenOutput_pt @@ -100,21 +108,14 @@ def preprocess_custom(encoder_model): @fixture def kernel(request, backend): """ - Gaussian RBF kernel for given backend. Settings are parametrised in the test function. + Kernel for given backend. Settings are parametrised in the test function. """ kernel = request.param - if isinstance(kernel, dict): # dict of kwargs - kernel_cfg = kernel.copy() - sigma = kernel_cfg.pop('sigma', None) if backend == 'tensorflow': - if sigma is not None and not isinstance(sigma, tf.Tensor): - sigma = tf.convert_to_tensor(sigma) - kernel = GaussianRBF_tf(sigma=sigma, **kernel_cfg) + kernel = initial_kernel_tf(kernel) elif backend == 'pytorch': - if sigma is not None and not isinstance(sigma, torch.Tensor): - sigma = torch.tensor(sigma) - kernel = GaussianRBF_pt(sigma=sigma, **kernel_cfg) + kernel = initial_kernel_pt(kernel) else: pytest.skip('`kernel` only implemented for tensorflow and pytorch.') return kernel @@ -147,8 +148,8 @@ def deep_kernel(request, backend, encoder_model): parametrised in the test function. """ # Get DeepKernel options - kernel_a = request.param.get('kernel_a', 'rbf') - kernel_b = request.param.get('kernel_b', 'rbf') + kernel_a = request.param.get('kernel_a', {'kernel_type': 'GaussianRBF'}) + kernel_b = request.param.get('kernel_b', {'kernel_type': 'GaussianRBF'}) eps = request.param.get('eps', 'trainable') # Proj model (backend managed in encoder_model fixture) @@ -156,18 +157,96 @@ def deep_kernel(request, backend, encoder_model): # Build DeepKernel if backend == 'tensorflow': - kernel_a = GaussianRBF_tf(**kernel_a) if isinstance(kernel_a, dict) else kernel_a - kernel_b = GaussianRBF_tf(**kernel_b) if isinstance(kernel_b, dict) else kernel_b + kernel_a = initial_kernel_tf(kernel_a) + kernel_b = initial_kernel_tf(kernel_b) deep_kernel = DeepKernel_tf(proj, kernel_a=kernel_a, kernel_b=kernel_b, eps=eps) elif backend == 'pytorch': - kernel_a = GaussianRBF_pt(**kernel_a) if isinstance(kernel_a, dict) else kernel_a - kernel_b = GaussianRBF_pt(**kernel_b) if isinstance(kernel_b, dict) else kernel_b + kernel_a = initial_kernel_pt(kernel_a) + kernel_b = initial_kernel_pt(kernel_b) deep_kernel = DeepKernel_pt(proj, kernel_a=kernel_a, kernel_b=kernel_b, eps=eps) else: pytest.skip('`deep_kernel` only implemented for tensorflow and pytorch.') return deep_kernel +def initial_kernel_tf(kernel_config): + kernel_config = kernel_config.copy() + if 'kernel_type' in kernel_config: + kernel_name = kernel_config.pop('kernel_type') + if ('sigma' in kernel_config) and (kernel_config['sigma'] is not None): + kernel_config['sigma'] = tf.convert_to_tensor(np.array(kernel_config['sigma']), dtype=tf.float32) + if ('alpha' in kernel_config) and (kernel_config['alpha'] is not None): + kernel_config['alpha'] = tf.convert_to_tensor(np.array(kernel_config['alpha']), dtype=tf.float32) + if ('tau' in kernel_config) and (kernel_config['tau'] is not None): + kernel_config['tau'] = tf.convert_to_tensor(np.array(kernel_config['tau']), dtype=tf.float32) + if kernel_name == 'GaussianRBF': + kernel = GaussianRBF_tf(**kernel_config) + elif kernel_name == 'RationalQuadratic': + kernel = RationalQuadratic_tf(**kernel_config) + elif kernel_name == 'Periodic': + kernel = Periodic_tf(**kernel_config) + elif kernel_name == 'Sum': + kernel_list = [] + for k_config in kernel_config.values(): + if isinstance(k_config, dict): + kernel_list.append(initial_kernel_tf(k_config)) + elif isinstance(k_config, float): + kernel_list.append(tf.cast(k_config, dtype=tf.float32)) + final_config = {'kernel_list': kernel_list} + kernel = SumKernel_tf(**final_config) + elif kernel_name == 'Product': + kernel_list = [] + for k_config in kernel_config.values(): + if isinstance(k_config, dict): + kernel_list.append(initial_kernel_tf(k_config)) + elif isinstance(k_config, float): + kernel_list.append(tf.cast(k_config, dtype=tf.float32)) + final_config = {'kernel_list': kernel_list} + kernel = ProductKernel_tf(**final_config) + else: + pytest.skip('`initial_kernel_tf` only implemented for GaussianRBF, RationalQuadratic and Periodic.') + return kernel + + +def initial_kernel_pt(kernel_config): + kernel_config = kernel_config.copy() + if 'kernel_type' in kernel_config: + kernel_name = kernel_config.pop('kernel_type') + if ('sigma' in kernel_config) and (kernel_config['sigma'] is not None): + kernel_config['sigma'] = torch.tensor(np.array(kernel_config['sigma']), dtype=torch.float32) + if ('alpha' in kernel_config) and (kernel_config['alpha'] is not None): + kernel_config['alpha'] = torch.tensor(np.array(kernel_config['alpha']), dtype=torch.float32) + if ('tau' in kernel_config) and (kernel_config['tau'] is not None): + kernel_config['tau'] = torch.tensor(np.array(kernel_config['tau']), dtype=torch.float32) + if kernel_name == 'GaussianRBF': + kernel = GaussianRBF_pt(**kernel_config) + elif kernel_name == 'RationalQuadratic': + kernel = RationalQuadratic_pt(**kernel_config) + elif kernel_name == 'Periodic': + kernel = Periodic_pt(**kernel_config) + elif kernel_name == 'Sum': + kernel_list = [] + for k_config in kernel_config.values(): + if isinstance(k_config, dict): + kernel_list.append(initial_kernel_pt(k_config)) + elif isinstance(k_config, float): + kernel_list.append(torch.tensor(k_config, dtype=torch.float32)) + final_config = {'kernel_list': kernel_list} + kernel = SumKernel_pt(**final_config) + elif kernel_name == 'Product': + kernel_list = [] + for k_config in kernel_config.values(): + if isinstance(k_config, dict): + kernel_list.append(initial_kernel_pt(k_config)) + elif isinstance(k_config, float): + kernel_list.append(torch.tensor(k_config, dtype=torch.float32)) + final_config = {'kernel_list': kernel_list} + kernel = ProductKernel_pt(**final_config) + else: + pytest.skip('`initial_kernel_pt` only implemented for GaussianRBF, RationalQuadratic and Periodic.') + return kernel + + @fixture def classifier_model(backend, current_cases): """ diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index 0ffa333c8..90e305870 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -42,7 +42,8 @@ from alibi_detect.saving.saving import _serialize_object from alibi_detect.saving.saving import (_path2str, _int2str_keys, _save_kernel_config, _save_model_config, _save_preprocess_config) -from alibi_detect.saving.schemas import DeepKernelConfig, KernelConfig, ModelConfig, PreprocessConfig +from alibi_detect.saving.schemas import DeepKernelConfig, ModelConfig, PreprocessConfig, RBFKernelConfig,\ + RationalQuadraticKernelConfig, PeriodicKernelConfig, CompositeKernelConfig from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf @@ -192,7 +193,13 @@ def test_save_cvmdrift(data, preprocess_custom, tmp_path): @parametrize('kernel', [ None, # Use default kernel - {'sigma': 0.5, 'trainable': False}, # pass kernel as object + {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False}, # pass kernel as object + {'kernel_type': 'RationalQuadratic', 'sigma': 0.5, 'alpha': 4.0, 'trainable': False}, + {'kernel_type': 'Periodic', 'sigma': 0.5, 'tau': 2.0, 'trainable': False}, + {'kernel_type': 'Sum', + 'comp_1': {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, + 'comp_2': {'kernel_type': 'GaussianRBF', 'sigma': 1.0, 'trainable': False, 'init_sigma_fn': None}, + 'comp_3': 0.5} ], indirect=True ) @parametrize_with_cases("data", cases=ContinuousData, prefix='data_') @@ -204,7 +211,6 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed) """ if backend not in ('tensorflow', 'pytorch', 'keops'): pytest.skip("Detector doesn't have this backend") - # Init detector and make predictions X_ref, X_h0 = data kwargs = { @@ -214,8 +220,7 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed) 'n_permutations': N_PERMUTATIONS, 'preprocess_at_init': True, 'kernel': kernel, - 'configure_kernel_from_x_ref': False, - 'sigma': np.array([0.5]) + 'configure_kernel_from_x_ref': False } if backend == 'pytorch': kwargs['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' @@ -228,16 +233,16 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed) with fixed_seed(seed): cd_load = load_detector(tmp_path) preds_load = cd_load.predict(X_h0) - # assertions np.testing.assert_array_equal(preprocess_custom(X_ref), cd_load._detector.x_ref) - assert not cd_load._detector.infer_sigma + assert not cd_load._detector.infer_parameter assert cd_load._detector.n_permutations == N_PERMUTATIONS assert cd_load._detector.p_val == P_VAL assert isinstance(cd_load._detector.preprocess_fn, Callable) assert cd_load._detector.preprocess_fn.func.__name__ == 'preprocess_drift' - assert cd._detector.kernel.sigma == cd_load._detector.kernel.sigma - assert cd._detector.kernel.init_sigma_fn == cd_load._detector.kernel.init_sigma_fn + if hasattr(cd._detector.kernel, 'sigma'): + assert cd._detector.kernel.sigma == cd_load._detector.kernel.sigma + assert cd._detector.kernel.init_sigma_fn == cd_load._detector.kernel.init_sigma_fn assert preds['data']['p_val'] == preds_load['data']['p_val'] @@ -459,8 +464,9 @@ def test_save_spotthediff(data, classifier_model, backend, tmp_path, seed): # n @parametrize('deep_kernel', [ - {'kernel_a': 'rbf', 'eps': 0.01} # Default for kernel_a - ], indirect=True + {'kernel_a': {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': True}, + 'eps': 0.01} + ], indirect=True ) @parametrize_with_cases("data", cases=ContinuousData, prefix='data_') def test_save_learnedkernel(data, deep_kernel, backend, tmp_path, seed): # noqa: F811 @@ -502,7 +508,10 @@ def test_save_learnedkernel(data, deep_kernel, backend, tmp_path, seed): # noqa @parametrize('kernel', [ None, # Default kernel - {'sigma': 0.5, 'trainable': False}, # pass kernels as GaussianRBF objects, with default sigma_median fn + {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False}, + # pass kernels as GaussianRBF objects, with default sigma_median fn + {'kernel_type': 'RationalQuadratic', 'sigma': 0.5, 'alpha': 4.0, 'trainable': False}, + {'kernel_type': 'Periodic', 'sigma': 0.5, 'tau': 2.0, 'trainable': False}, ], indirect=True ) @parametrize_with_cases("data", cases=ContinuousData, prefix='data_') @@ -615,7 +624,9 @@ def test_save_regressoruncertaintydrift(data, regressor, backend, tmp_path, seed @parametrize('kernel', [ None, # Use default kernel - {'sigma': 0.5, 'trainable': False}, # pass kernel as object + {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False}, # pass kernel as object + {'kernel_type': 'RationalQuadratic', 'sigma': 0.5, 'alpha': 4.0, 'trainable': False}, + {'kernel_type': 'Periodic', 'sigma': 0.5, 'tau': 2.0, 'trainable': False}, ], indirect=True ) @parametrize_with_cases("data", cases=ContinuousData, prefix='data_') @@ -657,7 +668,10 @@ def test_save_onlinemmddrift(data, kernel, preprocess_custom, backend, tmp_path, stats_load.append(pred['data']['test_stat']) # assertions - np.testing.assert_array_equal(preprocess_custom(X_ref), cd_load._detector.x_ref) + if backend == 'pytorch': + np.testing.assert_array_equal(preprocess_custom(X_ref), cd_load._detector.x_ref.cpu().numpy()) + else: + np.testing.assert_array_equal(preprocess_custom(X_ref), cd_load._detector.x_ref) assert cd_load._detector.n_bootstraps == N_BOOTSTRAPS assert cd_load._detector.ert == ERT assert isinstance(cd_load._detector.preprocess_fn, Callable) @@ -710,7 +724,11 @@ def test_save_onlinelsdddrift(data, preprocess_custom, backend, tmp_path, seed): assert cd_load._detector.ert == ERT assert isinstance(cd_load._detector.preprocess_fn, Callable) assert cd_load._detector.preprocess_fn.func.__name__ == 'preprocess_drift' - assert cd._detector.kernel.sigma == cd_load._detector.kernel.sigma + if backend == 'pytorch': + np.testing.assert_array_almost_equal(cd._detector.kernel.sigma.cpu().numpy(), + cd_load._detector.kernel.sigma.cpu().numpy(), 5) + else: + np.testing.assert_almost_equal(cd._detector.kernel.sigma, cd_load._detector.kernel.sigma, 5) assert cd._detector.kernel.init_sigma_fn == cd_load._detector.kernel.init_sigma_fn np.testing.assert_array_equal(stats, stats_load) @@ -852,9 +870,16 @@ def test_version_warning(data, tmp_path): @parametrize('kernel', [ - {'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, - {'sigma': [0.5, 0.8], 'trainable': False, 'init_sigma_fn': None}, - {'sigma': None, 'trainable': True, 'init_sigma_fn': None}, + {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, + {'kernel_type': 'GaussianRBF', 'sigma': [0.5, 0.8], 'trainable': False, 'init_sigma_fn': None}, + {'kernel_type': 'GaussianRBF', 'sigma': None, 'trainable': True, 'init_sigma_fn': None}, + {'kernel_type': 'RationalQuadratic', 'sigma': 0.5, 'alpha': 3.0, 'trainable': False, 'init_sigma_fn': None}, + {'kernel_type': 'RationalQuadratic', 'sigma': [0.5, 0.8], 'alpha': [2.0, 3.0], 'trainable': False, + 'init_sigma_fn': None}, + {'kernel_type': 'RationalQuadratic', 'sigma': None, 'alpha': None, 'trainable': True, 'init_sigma_fn': None}, + {'kernel_type': 'Periodic', 'sigma': 0.5, 'tau': 2.0, 'trainable': False, 'init_sigma_fn': None}, + {'kernel_type': 'Periodic', 'sigma': [0.5, 0.8], 'tau': [2.0, 3.0], 'trainable': False, 'init_sigma_fn': None}, + {'kernel_type': 'Periodic', 'sigma': None, 'tau': None, 'trainable': True, 'init_sigma_fn': None}, ], indirect=True ) def test_save_kernel(kernel, backend, tmp_path): # noqa: F811 @@ -867,9 +892,15 @@ def test_save_kernel(kernel, backend, tmp_path): # noqa: F811 filepath = tmp_path filename = Path('mykernel') cfg_kernel = _save_kernel_config(kernel, filepath, filename) - cfg_kernel = KernelConfig(**cfg_kernel).dict() # Pass through validator to test, and coerce sigma to Tensor if kernel.__class__.__name__ == 'GaussianRBF': assert cfg_kernel['src'] == '@utils.' + backend + '.kernels.GaussianRBF' + cfg_kernel = RBFKernelConfig(**cfg_kernel).dict() # Pass through validator to test, and coerce sigma to Tensor + elif kernel.__class__.__name__ == 'RationalQuadratic': + assert cfg_kernel['src'] == '@utils.' + backend + '.kernels.RationalQuadratic' + cfg_kernel = RationalQuadraticKernelConfig(**cfg_kernel).dict() # Pass through validator to test + elif kernel.__class__.__name__ == 'Periodic': + assert cfg_kernel['src'] == '@utils.' + backend + '.kernels.Periodic' + cfg_kernel = PeriodicKernelConfig(**cfg_kernel).dict() # Pass through validator to test else: assert Path(cfg_kernel['src']).suffix == '.dill' assert cfg_kernel['trainable'] == kernel.trainable @@ -883,6 +914,12 @@ def test_save_kernel(kernel, backend, tmp_path): # noqa: F811 # Call kernels X = np.random.standard_normal((10, 1)) + if backend == 'pytorch': + X = torch.from_numpy(X).float() + elif backend == 'tensorflow': + X = tf.convert_to_tensor(X) + else: + pytest.skip('Backend not supported.') kernel(X, X) kernel_loaded(X, X) @@ -893,14 +930,98 @@ def test_save_kernel(kernel, backend, tmp_path): # noqa: F811 else: np.testing.assert_array_almost_equal(np.array(kernel_loaded.sigma), np.array(kernel.sigma), 5) assert kernel_loaded.trainable == kernel.trainable - assert kernel_loaded.init_sigma_fn == kernel.init_sigma_fn + for tmp_key in kernel.parameter_dict.keys(): + assert kernel_loaded.parameter_dict[tmp_key].init_fn == kernel.parameter_dict[tmp_key].init_fn + + +@parametrize('kernel', [ + {'kernel_type': 'Sum', + 'comp_1': {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, + 'comp_2': {'kernel_type': 'GaussianRBF', 'sigma': 1.0, 'trainable': False, 'init_sigma_fn': None}, + 'comp_3': 0.01}, + {'kernel_type': 'Product', + 'comp_1': {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, + 'comp_2': {'kernel_type': 'GaussianRBF', 'sigma': 1.0, 'trainable': False, 'init_sigma_fn': None}}, + {'kernel_type': 'Product', + 'comp_1': 0.5, + 'comp_2': {'kernel_type': 'Sum', + 'comp_1': {'kernel_type': 'GaussianRBF', 'sigma': 0.5, 'trainable': False, 'init_sigma_fn': None}, + 'comp_2': {'kernel_type': 'GaussianRBF', 'sigma': 1.0, 'trainable': False, 'init_sigma_fn': None}, + 'comp_3': 0.5}}, + ], indirect=True +) +def test_save_composite_kernel(kernel, backend, tmp_path): # noqa: F811 + """ + Unit test for _save/_load_kernel_config, when kernel is a GaussianRBF kernel. + + Kernels are saved and then loaded, with assertions to check equivalence. + """ + # Save kernel to config + filepath = tmp_path + filename = Path('mykernel') + cfg_kernel = _save_kernel_config(kernel, filepath, filename) + if kernel.__class__.__name__ == 'SumKernel': + assert cfg_kernel['src'] == '@utils.' + backend + '.kernels.SumKernel' + cfg_kernel = CompositeKernelConfig(**cfg_kernel).dict() # Pass through validator to test + # cfg_kernel = _validate_composite_kernel_config(cfg_kernel) # Pass through validator to test + elif kernel.__class__.__name__ == 'ProductKernel': + assert cfg_kernel['src'] == '@utils.' + backend + '.kernels.ProductKernel' + cfg_kernel = CompositeKernelConfig(**cfg_kernel).dict() # Pass through validator to test + # cfg_kernel = _validate_composite_kernel_config(cfg_kernel) # Pass through validator to test + else: + assert Path(cfg_kernel['src']).suffix == '.dill' + + # Resolve and load config (_load_kernel_config is called within resolve_config) + cfg = {'kernel': cfg_kernel, 'backend': backend} + _prepend_cfg_filepaths(cfg, tmp_path) + kernel_loaded = resolve_config(cfg, tmp_path)['kernel'] + + # Call kernels + X = np.random.standard_normal((10, 1)) + if backend == 'pytorch': + X = torch.from_numpy(X).float() + elif backend == 'tensorflow': + X = tf.convert_to_tensor(X) + else: + pytest.skip('Backend not supported.') + K_0 = kernel(X, X) + K_1 = kernel_loaded(X, X) + + # Final checks + assert type(kernel_loaded) == type(kernel) + if backend == 'pytorch': + K_0 = K_0.detach().numpy().ravel() + K_1 = K_1.detach().numpy().ravel() + np.testing.assert_array_almost_equal(K_0, K_1, 5) + elif backend == 'tensorflow': + K_0 = K_0.numpy().ravel() + K_1 = K_1.numpy().ravel() + np.testing.assert_array_almost_equal(K_0, K_1, 5) + else: + raise NotImplementedError('Backend not supported.') + for i in range(len(kernel.kernel_list)): + if hasattr(kernel.kernel_list[i], 'sigma'): + if backend == 'pytorch': + np.testing.assert_array_almost_equal(kernel_loaded.kernel_list[i].sigma.detach().numpy(), + kernel.kernel_list[i].sigma.detach().numpy(), 5) + else: + np.testing.assert_array_almost_equal(np.array(kernel_loaded.kernel_list[i].sigma), + np.array(kernel.kernel_list[i].sigma), 5) + assert kernel_loaded.kernel_list[i].trainable == kernel.kernel_list[i].trainable + for tmp_key in kernel.kernel_list[i].parameter_dict.keys(): + assert kernel_loaded.kernel_list[i].parameter_dict[tmp_key].init_fn == \ + kernel.kernel_list[i].parameter_dict[tmp_key].init_fn # `data` passed below as needed in encoder_model, which is used in deep_kernel @parametrize_with_cases("data", cases=ContinuousData.data_synthetic_nd) @parametrize('deep_kernel', [ - {'kernel_a': 'rbf', 'kernel_b': 'rbf', 'eps': 'trainable'}, # Default for kernel_a and kernel_b, trainable eps - {'kernel_a': {'trainable': True}, 'kernel_b': 'rbf', 'eps': 0.01}, # Explicit kernel_a, fixed eps + {'kernel_a': {'kernel_type': 'GaussianRBF'}, + 'kernel_b': {'kernel_type': 'GaussianRBF'}, + 'eps': 'trainable'}, # Default for kernel_a and kernel_b, trainable eps + {'kernel_a': {'kernel_type': 'GaussianRBF', 'trainable': True}, + 'kernel_b': {'kernel_type': 'GaussianRBF'}, + 'eps': 0.01}, # Explicit kernel_a, fixed eps ], indirect=True ) def test_save_deepkernel(data, deep_kernel, backend, tmp_path): # noqa: F811 @@ -930,6 +1051,12 @@ def test_save_deepkernel(data, deep_kernel, backend, tmp_path): # noqa: F811 kernel_loaded = resolve_config(cfg, tmp_path)['kernel'] # implicitly calls _load_kernel_config # Call kernels + if backend == 'pytorch': + X = torch.from_numpy(X).float() + elif backend == 'tensorflow': + X = tf.convert_to_tensor(X) + else: + pytest.skip('Backend not supported.') deep_kernel.kernel_a(X, X) deep_kernel.kernel_b(X, X) kernel_loaded.kernel_a(X, X) diff --git a/alibi_detect/saving/tests/test_validate.py b/alibi_detect/saving/tests/test_validate.py index b9a777209..05680b5d3 100644 --- a/alibi_detect/saving/tests/test_validate.py +++ b/alibi_detect/saving/tests/test_validate.py @@ -3,7 +3,7 @@ from pydantic import ValidationError from alibi_detect.saving import validate_config -from alibi_detect.saving.schemas import KernelConfig +from alibi_detect.saving.schemas import RBFKernelConfig from alibi_detect.saving.saving import X_REF_FILENAME from alibi_detect.version import __version__ from copy import deepcopy @@ -101,11 +101,12 @@ def test_validate_kernel_and_coerce_2_tensor(flavour, sigma): kernel_cfg = { 'src': f'@utils.{flavour}.kernels.GaussianRBF', 'flavour': flavour, - 'sigma': sigma + 'sigma': sigma, + 'kernel_type': 'GaussianRBF' } # Pass through validation and check results - kernel_cfg_val = KernelConfig(**kernel_cfg).dict() + kernel_cfg_val = RBFKernelConfig(**kernel_cfg).dict() assert kernel_cfg_val['src'] == kernel_cfg['src'] assert kernel_cfg_val['flavour'] == flavour if sigma is None: diff --git a/alibi_detect/saving/validate.py b/alibi_detect/saving/validate.py index 672ee7431..bf9907526 100644 --- a/alibi_detect/saving/validate.py +++ b/alibi_detect/saving/validate.py @@ -1,7 +1,8 @@ import warnings from alibi_detect.saving.schemas import ( # type: ignore[attr-defined] - DETECTOR_CONFIGS, DETECTOR_CONFIGS_RESOLVED) + DETECTOR_CONFIGS, DETECTOR_CONFIGS_RESOLVED, + RBFKernelConfig, RationalQuadraticKernelConfig, PeriodicKernelConfig) from alibi_detect.version import __version__ @@ -54,3 +55,43 @@ def validate_config(cfg: dict, resolved: bool = False) -> dict: cfg['meta'].update({'version_warning': True}) return cfg + + +def validate_composite_kernel_config(cfg_kernel): + """ + Validate composite kernel config. + + Parameters + ---------- + cfg_kernel + Composite kernel config. + + Returns + ------- + cfg_kernel + Validated composite kernel config. + """ + # cfg_kernel = CompositeKernelConfig(**cfg_kernel).dict() + comp_number = len(cfg_kernel['kernel_list']) + for i in range(comp_number): + if isinstance(cfg_kernel['kernel_list']['comp_' + str(i)], dict): + if 'kernel_type' in cfg_kernel['kernel_list']['comp_' + str(i)]: + if cfg_kernel['kernel_list']['comp_' + str(i)]['kernel_type'] == 'Sum': + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + validate_composite_kernel_config(cfg_kernel['kernel_list']['comp_' + str(i)]) + elif cfg_kernel['kernel_list']['comp_' + str(i)]['kernel_type'] == 'Product': + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + validate_composite_kernel_config(cfg_kernel['kernel_list']['comp_' + str(i)]) + elif cfg_kernel['kernel_list']['comp_' + str(i)]['kernel_type'] == 'GaussianRBF': + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + RBFKernelConfig(**cfg_kernel['kernel_list']['comp_' + str(i)]).dict() + elif cfg_kernel['kernel_list']['comp_' + str(i)]['kernel_type'] == 'RationalQuadratic': + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + RationalQuadraticKernelConfig(**cfg_kernel['kernel_list']['comp_' + str(i)]).dict() + elif cfg_kernel['kernel_list']['comp_' + str(i)]['kernel_type'] == 'Periodic': + cfg_kernel['kernel_list']['comp_' + str(i)] =\ + PeriodicKernelConfig(**cfg_kernel['kernel_list']['comp_' + str(i)]).dict() + else: + raise ValueError('Kernel type not supported.') + cfg_kernel = dict(sorted(cfg_kernel.items())) # Sort dict to ensure order is consistent + return cfg_kernel diff --git a/alibi_detect/utils/keops/__init__.py b/alibi_detect/utils/keops/__init__.py index 36dc22971..bf8490260 100644 --- a/alibi_detect/utils/keops/__init__.py +++ b/alibi_detect/utils/keops/__init__.py @@ -1,12 +1,14 @@ from alibi_detect.utils.missing_optional_dependency import import_optional -GaussianRBF, DeepKernel = import_optional( +GaussianRBF, DeepKernel, BaseKernel, ProjKernel = import_optional( 'alibi_detect.utils.keops.kernels', - names=['GaussianRBF', 'DeepKernel'] + names=['GaussianRBF', 'DeepKernel', 'BaseKernel', 'ProjKernel'] ) __all__ = [ "GaussianRBF", - "DeepKernel" + "DeepKernel", + "BaseKernel", + "ProjKernel" ] diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 7da7a3ee9..cecba108b 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -1,7 +1,46 @@ +from abc import abstractmethod from pykeops.torch import LazyTensor +import numpy as np import torch import torch.nn as nn -from typing import Callable, Optional, Union +from typing import Callable, Optional, Union, List +from copy import deepcopy + + +def infer_kernel_parameter( + kernel: 'BaseKernel', + x: LazyTensor, + y: LazyTensor, + dist: torch.Tensor, + infer_parameter: bool = True +) -> None: + """ + Infer the kernel parameter from the data. + + Parameters + ---------- + kernel + The kernel function. + x + LazyTensor of instances with dimension [Nx, 1, features] or [batch_size, Nx, 1, features]. + The singleton dimension is necessary for broadcasting. + y + LazyTensor of instances with dimension [1, Ny, features] or [batch_size, 1, Ny, features]. + The singleton dimension is necessary for broadcasting. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + infer_parameter + Whether to infer the kernel parameter. + """ + if kernel.trainable and infer_parameter: + raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") + for parameter in kernel.parameter_dict.values(): + if parameter.requires_init: + if parameter.init_fn is not None: + with torch.no_grad(): + parameter.value.data = parameter.init_fn(x, y, dist).reshape(-1) + parameter.requires_init = False + kernel.init_required = False def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = 100) -> torch.Tensor: @@ -53,20 +92,296 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = 100) return sigma -class GaussianRBF(nn.Module): +class KernelParameter: def __init__( self, - sigma: Optional[torch.Tensor] = None, - init_sigma_fn: Optional[Callable] = None, - trainable: bool = False + value: torch.Tensor = None, + init_fn: Optional[Callable] = None, + requires_grad: bool = False, + requires_init: bool = False + ) -> None: + """ + Parameter class for kernels. + + Parameters + ---------- + value + The pre-specified value of the parameter. + init_fn + The function used to initialize the parameter. + requires_grad + Whether the parameter requires gradient. + requires_init + Whether the parameter requires initialization. + """ + super().__init__() + self.value = nn.Parameter(value if value is not None else torch.ones(1), + requires_grad=requires_grad) + self.init_fn = init_fn + self.requires_init = requires_init + + +class BaseKernel(nn.Module): + def __init__(self, active_dims: list = None) -> None: + """ + The base class for all kernels. + + Parameters + ---------- + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__() + self.parameter_dict: dict = {} + if active_dims is not None: + self.active_dims = torch.as_tensor(active_dims) + else: + self.active_dims = None + self.init_required = False + + @abstractmethod + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: Optional[bool] = False) -> torch.Tensor: + raise NotImplementedError + + def forward(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + if self.active_dims is not None: + x = torch.index_select(x, -1, self.active_dims) + y = torch.index_select(y, -1, self.active_dims) + if len(self.parameter_dict) > 0: + return self.kernel_function(x, y, infer_parameter) + else: + return self.kernel_function(x, y) + + def __add__( + self, + other: Union['BaseKernel', torch.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + other.kernel_list.append(self) + return other + elif isinstance(other, (BaseKernel, ProductKernel, torch.Tensor)): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.kernel_list.append(other) + return sum_kernel + else: + raise ValueError('Kernels can only added to another kernel or a constant.') + + def __radd__(self, other: 'BaseKernel') -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union['BaseKernel', torch.Tensor] + ) -> 'BaseKernel': + if isinstance(other, ProductKernel): + other.kernel_factors.append(self) + return other + elif isinstance(other, SumKernel): + sum_kernel = SumKernel() + for k in other.kernel_list: + sum_kernel.kernel_list.append(self * k) + return sum_kernel + else: + prod_kernel = ProductKernel() + prod_kernel.kernel_factors.append(self) + prod_kernel.kernel_factors.append(other) + return prod_kernel + + def __rmul__( + self, + other: 'BaseKernel' + ) -> 'BaseKernel': + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> 'BaseKernel': + if isinstance(other, torch.Tensor): + return self.__mul__(1. / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + +class SumKernel(BaseKernel): + def __init__(self) -> None: + """ + Construct a kernel by summing different kernels. + """ + super().__init__() + self.kernel_list: List[Union[BaseKernel, torch.Tensor]] = [] + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + K_sum = torch.tensor(0., device=x.device) + for k in self.kernel_list: + if isinstance(k, (BaseKernel, SumKernel, ProductKernel)): + K_sum = K_sum + k(x, y, infer_parameter) + elif isinstance(k, torch.Tensor): + K_sum = K_sum + k + else: + raise ValueError(type(k) + 'is not supported by SumKernel.') + return K_sum + + def __add__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + for k in other.kernel_list: + self.kernel_list.append(k) + else: + self.kernel_list.append(other) + return self + + def __radd__(self, other: BaseKernel) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> BaseKernel: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for ki in self.kernel_list: + for kj in other.kernel_list: + sum_kernel.kernel_list.append((ki * kj)) + return sum_kernel + elif isinstance(other, ProductKernel): + return other * self + elif isinstance(other, BaseKernel) or isinstance(other, torch.Tensor): + sum_kernel = SumKernel() + for ki in self.kernel_list: + sum_kernel.kernel_list.append(other * ki) + return sum_kernel + else: + raise ValueError(type(other) + 'is not supported by SumKernel.') + + def __rmul__( + self, + other: BaseKernel + ) -> BaseKernel: + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> BaseKernel: + if isinstance(other, torch.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + +class ProductKernel(BaseKernel): + def __init__(self) -> None: + """ + Construct a kernel by multiplying different kernels. + """ + super().__init__() + self.kernel_factors: List[Union[BaseKernel, torch.Tensor]] = [] + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + K_prod = torch.tensor(1., device=x.device) + for k in self.kernel_factors: + if isinstance(k, BaseKernel) or isinstance(k, SumKernel) or isinstance(k, ProductKernel): + K_prod = K_prod * k(x, y, infer_parameter) + elif isinstance(k, torch.Tensor): + K_prod = K_prod * k + else: + raise ValueError(type(k) + 'is not supported by ProductKernel.') + return K_prod + + def __add__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + other.kernel_list.append(self) + return other + else: + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.kernel_list.append(other) + return sum_kernel + + def __radd__( + self, + other: BaseKernel + ) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> BaseKernel: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for k in other.kernel_list: + tmp_prod_kernel = deepcopy(self) + tmp_prod_kernel.kernel_factors.append(k) + sum_kernel.kernel_list.append(tmp_prod_kernel) + return sum_kernel + elif isinstance(other, ProductKernel): + for k in other.kernel_factors: + self.kernel_factors.append(k) + return self + elif isinstance(other, BaseKernel) or isinstance(other, torch.Tensor): + self.kernel_factors.append(other) + return self + else: + raise ValueError(type(other) + 'is not supported by ProductKernel.') + + def __rmul__( + self, + other: BaseKernel + ) -> BaseKernel: + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> BaseKernel: + if isinstance(other, torch.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + +class GaussianRBF(BaseKernel): + def __init__( + self, + sigma: Optional[torch.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: list = None ) -> None: """ Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass takes - a batch of instances x and y and returns the kernel matrix. - x can be of shape [Nx, 1, features] or [batch_size, Nx, 1, features]. - y can be of shape [1, Ny, features] or [batch_size, 1, Ny, features]. - The returned kernel matrix can be of shape [Nx, Ny] or [batch_size, Nx, Ny]. - x, y and the returned kernel matrix are all lazy tensors. + a batch of instances x [Nx, features] and y [Ny, features] and returns the kernel + matrix [Nx, Ny]. Parameters ---------- @@ -75,38 +390,50 @@ def __init__( Can pass multiple values to eval kernel with and then average. init_sigma_fn Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. - The function's signature should match :py:func:`~alibi_detect.utils.keops.kernels.sigma_mean`, - meaning that it should take in the lazy tensors `x`, `y` and `dist` and return a tensor `sigma`. + The function's signature should match :py:func:`~alibi_detect.utils.pytorch.kernels.sigma_median`, + meaning that it should take in the tensors `x`, `y` and `dist` and return `sigma`. If `None`, it is set to + :func:`~alibi_detect.utils.pytorch.kernels.sigma_median`. trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. """ - super().__init__() + super().__init__(active_dims) init_sigma_fn = sigma_mean if init_sigma_fn is None else init_sigma_fn - if sigma is None: - self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) - self.init_required = True - else: - sigma = sigma.reshape(-1) # [Ns,] - self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) - self.init_required = False - self.init_sigma_fn = init_sigma_fn + self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': init_sigma_fn} + self.parameter_dict['log-sigma'] = KernelParameter( + value=sigma.log().reshape(-1) if sigma is not None else None, + init_fn=init_sigma_fn, + requires_grad=trainable, + requires_init=True if sigma is None else False, + ) self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) @property def sigma(self) -> torch.Tensor: - return self.log_sigma.exp() + return self.parameter_dict['log-sigma'].value.exp() + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> LazyTensor: + if len(x.shape) == 3: + x = LazyTensor(x[:, :, None, :]) + elif len(x.shape) == 2: + x = LazyTensor(x[:, None, :]) + else: + raise ValueError('x should be of shape [batch_size, n_instances, features] or [batch_size, features].') - def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> LazyTensor: + if len(y.shape) == 3: + y = LazyTensor(y[:, None, :, :]) + elif len(y.shape) == 2: + y = LazyTensor(y[None, :, :]) + else: + raise ValueError('y should be of shape [batch_size, n_instances, features] or [batch_size, features].') dist = ((x - y) ** 2).sum(-1) - if infer_sigma or self.init_required: - if self.trainable and infer_sigma: - raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, dist) - with torch.no_grad(): - self.log_sigma.copy_(sigma.log().clone()) - self.init_required = False + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) gamma = 1. / (2. * self.sigma ** 2) gamma = LazyTensor(gamma[None, None, :]) if len(dist.shape) == 2 else LazyTensor(gamma[None, None, None, :]) @@ -116,45 +443,84 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La return kernel_mat -class DeepKernel(nn.Module): +class ProjKernel(BaseKernel): def __init__( self, proj: nn.Module, - kernel_a: nn.Module = GaussianRBF(trainable=True), - kernel_b: Optional[nn.Module] = GaussianRBF(trainable=True), - eps: Union[float, str] = 'trainable' + raw_kernel: BaseKernel = GaussianRBF(trainable=True), ) -> None: """ - Computes similarities as k(x,y) = (1-eps)*k_a(proj(x), proj(y)) + eps*k_b(x,y). - A forward pass takes an already projected batch of instances x_proj and y_proj and optionally - (if k_b is present) a batch of instances x and y and returns the kernel matrix. - x_proj can be of shape [Nx, 1, features_proj] or [batch_size, Nx, 1, features_proj]. - y_proj can be of shape [1, Ny, features_proj] or [batch_size, 1, Ny, features_proj]. - x can be of shape [Nx, 1, features] or [batch_size, Nx, 1, features]. - y can be of shape [1, Ny, features] or [batch_size, 1, Ny, features]. - The returned kernel matrix can be of shape [Nx, Ny] or [batch_size, Nx, Ny]. - x, y and the returned kernel matrix are all lazy tensors. + A kernel that combines a raw kernel (e.g. RBF) with a projection function (e.g. deep net) as + k(x, y) = k(proj(x), proj(y)). A forward pass takes a batch of instances x [Nx, features] and + y [Ny, features] and returns the kernel matrix [Nx, Ny]. - Parameters + Parameters: ---------- proj - The projection to be applied to the inputs before applying kernel_a - kernel_a + The projection to be applied to the inputs before applying raw_kernel + raw_kernel The kernel to apply to the projected inputs. Defaults to a Gaussian RBF with trainable bandwidth. - kernel_b - The kernel to apply to the raw inputs. Defaults to a Gaussian RBF with trainable bandwidth. - Set to None in order to use only the deep component (i.e. eps=0). - eps - The proportion (in [0,1]) of weight to assign to the kernel applied to raw inputs. This can be - either specified or set to 'trainable'. Only relavent if kernel_b is not None. """ super().__init__() + self.proj = proj + self.raw_kernel = raw_kernel + self.init_required = False + + def kernel_function( + self, + x: Union[np.ndarray, torch.Tensor], + y: Union[np.ndarray, torch.Tensor], + infer_parameter: Optional[bool] = False + ) -> torch.Tensor: + return self.raw_kernel(self.proj(x), self.proj(y), infer_parameter) + + +class DeepKernel(BaseKernel): + """ + Computes similarities as k(x,y) = (1-eps)*k_a(proj(x), proj(y)) + eps*k_b(x,y). + A forward pass takes a batch of instances x [Nx, features] and y [Ny, features] and returns + the kernel matrix [Nx, Ny]. + + Parameters + ---------- + proj + The projection to be applied to the inputs before applying kernel_a + kernel_a + The kernel to apply to the projected inputs. Defaults to a Gaussian RBF with trainable bandwidth. + kernel_b + The kernel to apply to the raw inputs. Defaults to a Gaussian RBF with trainable bandwidth. + Set to None in order to use only the deep component (i.e. eps=0). + eps + The proportion (in [0,1]) of weight to assign to the kernel applied to raw inputs. This can be + either specified or set to 'trainable'. Only relavent if kernel_b is not None. + + """ + def __init__( + self, + proj: nn.Module, + kernel_a: BaseKernel = GaussianRBF(trainable=True), + kernel_b: Optional[BaseKernel] = GaussianRBF(trainable=True), + eps: Union[float, str] = 'trainable' + ) -> None: + super().__init__() + self.proj = proj self.kernel_a = kernel_a self.kernel_b = kernel_b - self.proj = proj + + if hasattr(self.kernel_a, 'parameter_dict'): + for param in self.kernel_a.parameter_dict.keys(): + setattr(self, param, self.kernel_a.parameter_dict[param].value) + + self.proj_kernel = ProjKernel(proj=self.proj, raw_kernel=self.kernel_a) if kernel_b is not None: self._init_eps(eps) + self.comp_kernel = (1-self.logit_eps.sigmoid())*self.proj_kernel + self.logit_eps.sigmoid()*self.kernel_b + if hasattr(self.kernel_b, 'parameter_dict'): + for param in self.kernel_b.parameter_dict.keys(): + setattr(self, param, self.kernel_b.parameter_dict[param].value) + else: + self.comp_kernel = self.proj_kernel def _init_eps(self, eps: Union[float, str]) -> None: if isinstance(eps, float): @@ -170,9 +536,10 @@ def _init_eps(self, eps: Union[float, str]) -> None: def eps(self) -> torch.Tensor: return self.logit_eps.sigmoid() if self.kernel_b is not None else torch.tensor(0.) - def forward(self, x_proj: LazyTensor, y_proj: LazyTensor, x: Optional[LazyTensor] = None, - y: Optional[LazyTensor] = None) -> LazyTensor: - similarity = self.kernel_a(x_proj, y_proj) - if self.kernel_b is not None: - similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) - return similarity + def kernel_function( + self, + x: torch.Tensor, + y: torch.Tensor, + infer_parameter: Optional[bool] = False + ) -> torch.Tensor: + return self.comp_kernel(x, y, infer_parameter) diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index b25554818..979da8c7d 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -6,7 +6,7 @@ import torch.nn as nn if has_keops: from pykeops.torch import LazyTensor - from alibi_detect.utils.keops import DeepKernel, GaussianRBF + from alibi_detect.utils.keops import DeepKernel, GaussianRBF, BaseKernel sigma = [None, np.array([1.]), np.array([1., 2.])] n_features = [5, 10] @@ -34,21 +34,15 @@ def test_gaussian_kernel(gaussian_kernel_params): sigma = sigma if sigma is None else torch.from_numpy(sigma).float() x = torch.from_numpy(np.random.random(xshape)).float() y = torch.from_numpy(np.random.random(yshape)).float() - if batch_size: - x_lazy, y_lazy = LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :]) - x_lazy2 = LazyTensor(x[:, None, :, :]) - else: - x_lazy, y_lazy = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]) - x_lazy2 = LazyTensor(x[None, :, :]) kernel = GaussianRBF(sigma=sigma, trainable=trainable) - infer_sigma = True if sigma is None else False - if trainable and infer_sigma: + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: with pytest.raises(ValueError): - kernel(x_lazy, y_lazy, infer_sigma=infer_sigma) + kernel(x, y, infer_parameter=infer_parameter) else: - k_xy = kernel(x_lazy, y_lazy, infer_sigma=infer_sigma) - k_xx = kernel(x_lazy, x_lazy2, infer_sigma=infer_sigma) + k_xx = kernel(x, x, infer_parameter=infer_parameter) + k_xy = kernel(x, y, infer_parameter=infer_parameter) k_xy_shape = n_instances k_xx_shape = (n_instances[0], n_instances[0]) axis = 1 @@ -66,11 +60,26 @@ def test_gaussian_kernel(gaussian_kernel_params): if has_keops: - class MyKernel(nn.Module): + class MyKernel(BaseKernel): def __init__(self): super().__init__() - def forward(self, x: LazyTensor, y: LazyTensor) -> LazyTensor: + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> LazyTensor: + if len(x.shape) == 3: + x = LazyTensor(x[:, :, None, :]) + elif len(x.shape) == 2: + x = LazyTensor(x[:, None, :]) + else: + raise ValueError('x should be of shape [batch_size, n_instances, features] or [batch_size, features].') + + if len(y.shape) == 3: + y = LazyTensor(y[:, None, :, :]) + elif len(y.shape) == 2: + y = LazyTensor(y[None, :, :]) + else: + raise ValueError('y should be of shape [batch_size, n_instances, features] or [batch_size, features].') + return (- ((x - y) ** 2).sum(-1)).exp() @@ -104,18 +113,10 @@ def test_deep_kernel(deep_kernel_params): xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) x = torch.as_tensor(np.random.random(xshape).astype('float32')) y = torch.as_tensor(np.random.random(yshape).astype('float32')) - x_proj, y_proj = kernel.proj(x), kernel.proj(y) - x2_proj, x_proj = LazyTensor(x_proj[None, :, :]), LazyTensor(x_proj[:, None, :]) - y2_proj, y_proj = LazyTensor(y_proj[None, :, :]), LazyTensor(y_proj[:, None, :]) - if kernel_b: - x2, x = LazyTensor(x[None, :, :]), LazyTensor(x[:, None, :]) - y2, y = LazyTensor(y[None, :, :]), LazyTensor(y[:, None, :]) - else: - x, x2, y, y2 = None, None, None, None - k_xy = kernel(x_proj, y2_proj, x, y2) - k_yx = kernel(y_proj, x2_proj, y, x2) - k_xx = kernel(x_proj, x2_proj, x, x2) + k_xy = kernel(x, y) + k_yx = kernel(y, x) + k_xx = kernel(x, x) assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) assert (k_xx.Kmin_argKmin(1, axis=1)[0] > 0.).all() assert (torch.abs(k_xy.sum(1).sum(1) - k_yx.t().sum(1).sum(1)) < 1e-5).all() diff --git a/alibi_detect/utils/pytorch/__init__.py b/alibi_detect/utils/pytorch/__init__.py index 35918f8a3..df17f2937 100644 --- a/alibi_detect/utils/pytorch/__init__.py +++ b/alibi_detect/utils/pytorch/__init__.py @@ -12,9 +12,9 @@ 'permed_lsdds', 'batch_compute_kernel_matrix'] ) -GaussianRBF, DeepKernel = import_optional( +GaussianRBF, DeepKernel, BaseKernel, RationalQuadratic, Periodic, log_sigma_median = import_optional( 'alibi_detect.utils.pytorch.kernels', - names=['GaussianRBF', 'DeepKernel'] + names=['GaussianRBF', 'DeepKernel', 'BaseKernel', 'RationalQuadratic', 'Periodic', 'log_sigma_median'] ) predict_batch, predict_batch_transformer = import_optional( @@ -32,7 +32,10 @@ "mmd2", "mmd2_from_kernel_matrix", "squared_pairwise_distance", + "BaseKernel", "GaussianRBF", + "RationalQuadratic", + "Periodic", "DeepKernel", "permed_lsdds", "predict_batch", @@ -40,5 +43,6 @@ "get_device", "quantile", "zero_diag", + "log_sigma_median", "TorchDataset" ] diff --git a/alibi_detect/utils/pytorch/distance.py b/alibi_detect/utils/pytorch/distance.py index b5b5e85de..86b1b0aa8 100644 --- a/alibi_detect/utils/pytorch/distance.py +++ b/alibi_detect/utils/pytorch/distance.py @@ -24,8 +24,8 @@ def squared_pairwise_distance(x: torch.Tensor, y: torch.Tensor, a_min: float = 1 ------- Pairwise squared Euclidean distance [Nx, Ny]. """ - x2 = x.pow(2).sum(dim=-1, keepdim=True) - y2 = y.pow(2).sum(dim=-1, keepdim=True) + x2 = torch.square(x).sum(dim=-1, keepdim=True) + y2 = torch.square(y).sum(dim=-1, keepdim=True) dist = torch.addmm(y2.transpose(-2, -1), x, y.transpose(-2, -1), alpha=-2).add_(x2) return dist.clamp_min_(a_min) diff --git a/alibi_detect/utils/pytorch/kernels.py b/alibi_detect/utils/pytorch/kernels.py index 78e730fb8..f8c58006c 100644 --- a/alibi_detect/utils/pytorch/kernels.py +++ b/alibi_detect/utils/pytorch/kernels.py @@ -1,11 +1,47 @@ +from abc import abstractmethod import numpy as np import torch from torch import nn from . import distance -from typing import Optional, Union, Callable +from typing import Optional, Union, Callable, List +from copy import deepcopy from alibi_detect.utils.frameworks import Framework +def infer_kernel_parameter( + kernel: 'BaseKernel', + x: torch.Tensor, + y: torch.Tensor, + dist: torch.Tensor, + infer_parameter: bool = True +) -> None: + """ + Infer the kernel parameter from the data. + + Parameters + ---------- + kernel + The kernel function. + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + infer_parameter + Whether to infer the kernel parameter. + """ + if kernel.trainable and infer_parameter: + raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") + for parameter in kernel.parameter_dict.values(): + if parameter.requires_init: + if parameter.init_fn is not None: + with torch.no_grad(): + parameter.value.data = parameter.init_fn(x, y, dist).reshape(-1) + parameter.requires_init = False + kernel.init_required = False + + def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: """ Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`. @@ -21,7 +57,7 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch. Returns ------- - The computed bandwidth, `sigma`. + The computed bandwidth, `log-sigma`. """ n = min(x.shape[0], y.shape[0]) n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0 @@ -30,12 +66,444 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch. return sigma -class GaussianRBF(nn.Module): +def log_sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: + """ + Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`. + + Parameters + ---------- + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + + Returns + ------- + The logrithm of the computed bandwidth, `log-sigma`. + """ + return torch.log(sigma_median(x, y, dist)) + + +class KernelParameter: def __init__( self, - sigma: Optional[torch.Tensor] = None, - init_sigma_fn: Optional[Callable] = None, - trainable: bool = False + value: torch.Tensor = None, + init_fn: Optional[Callable] = None, + requires_grad: bool = False, + requires_init: bool = False + ) -> None: + """ + Parameter class for kernels. + + Parameters + ---------- + value + The pre-specified value of the parameter. + init_fn + The function used to initialize the parameter. + requires_grad + Whether the parameter requires gradient. + requires_init + Whether the parameter requires initialization. + """ + super().__init__() + self.value = nn.Parameter(value if value is not None else torch.ones(1), + requires_grad=requires_grad) + self.init_fn = init_fn + self.requires_init = requires_init + + +class BaseKernel(nn.Module): + def __init__(self, active_dims: list = None) -> None: + """ + The base class for all kernels. + + Parameters + ---------- + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__() + self.parameter_dict: dict = {} + self.config: dict = {} + if active_dims is not None: + self.active_dims = torch.as_tensor(active_dims) + else: + self.active_dims = None + self.init_required = False + + @abstractmethod + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: Optional[bool] = False) -> torch.Tensor: + raise NotImplementedError + + def forward(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + if self.active_dims is not None: + x = torch.index_select(x, -1, self.active_dims) + y = torch.index_select(y, -1, self.active_dims) + if len(self.parameter_dict) > 0: + return self.kernel_function(x, y, infer_parameter) + else: + return self.kernel_function(x, y) + + def __add__( + self, + other: Union['BaseKernel', torch.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + kernel_count = len(other.kernel_list) + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(kernel_count)] = self.config # type: ignore + return other + elif isinstance(other, (BaseKernel, ProductKernel)): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.config # type: ignore + return sum_kernel + elif isinstance(other, torch.Tensor): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.detach().cpu().item() # type: ignore + return sum_kernel + else: + raise ValueError('Kernels can only added to another kernel or a constant.') + + def __radd__(self, other: 'BaseKernel') -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union['BaseKernel', torch.Tensor] + ) -> 'BaseKernel': + if isinstance(other, ProductKernel): + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(len(other.kernel_list))] = self.config # type: ignore + return other + elif isinstance(other, SumKernel): + sum_kernel = SumKernel() + kernel_count = 0 + for k in other.kernel_list: + sum_kernel.kernel_list.append(self * k) + sum_kernel.config['kernel_list']['comp_' + str(kernel_count)] = self.config # type: ignore + kernel_count += 1 + return sum_kernel + elif isinstance(other, BaseKernel): + prod_kernel = ProductKernel() + prod_kernel.kernel_list.append(self) + prod_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + prod_kernel.kernel_list.append(other) + prod_kernel.config['kernel_list']['comp_1'] = other.config # type: ignore + return prod_kernel + elif isinstance(other, torch.Tensor): + prod_kernel = ProductKernel() + prod_kernel.kernel_list.append(self) + prod_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + prod_kernel.kernel_list.append(other) + prod_kernel.config['kernel_list']['comp_1'] = other.detach().cpu().item() # type: ignore + return prod_kernel + else: + raise ValueError('Kernels can only be multiplied by another kernel or a constant.') + + def __rmul__( + self, + other: 'BaseKernel' + ) -> 'BaseKernel': + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> 'BaseKernel': + if isinstance(other, torch.Tensor): + return self.__mul__(1. / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + return self.config.copy() + + +class SumKernel(BaseKernel): + def __init__(self, + kernel_list: Optional[List[Union[BaseKernel, torch.Tensor]]] = None) -> None: + """ + Construct a kernel by summing different kernels. + """ + super().__init__() + self.kernel_list = [] + self.config: dict = {'kernel_type': 'Sum', 'kernel_list': {}} + if kernel_list is not None: + self.kernel_list = kernel_list + for i in range(len(self.kernel_list)): + if isinstance(self.kernel_list[i], BaseKernel): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].config # type: ignore + elif isinstance(self.kernel_list[i], torch.Tensor): + self.config['kernel_list']['comp_' + str(i)] = \ + self.kernel_list[i].detach().cpu().item() # type: ignore + else: + raise ValueError(str(type(self.kernel_list[i])) + 'is not supported by SumKernel.') + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + value_list: List[torch.Tensor] = [] + for k in self.kernel_list: + k.to(x.device) + if isinstance(k, (BaseKernel, SumKernel, ProductKernel)): + value_list.append(k(x, y, infer_parameter)) + elif isinstance(k, torch.Tensor): + value_list.append(k * torch.ones((x.shape[0], y.shape[0]), device=x.device)) + else: + raise ValueError(type(k) + 'is not supported by SumKernel.') + return torch.sum(torch.stack(value_list), dim=0) + + def __add__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> 'SumKernel': + kernel_count = len(self.kernel_list) + if isinstance(other, SumKernel): + for k in other.kernel_list: + self.kernel_list.append(k) + if isinstance(k, BaseKernel): + self.config['kernel_list']['comp_' + str(kernel_count)] = k.config + elif isinstance(k, torch.Tensor): + self.config['kernel_list']['comp_' + str(kernel_count)] = k.detach().cpu().item() + kernel_count += 1 + elif isinstance(other, BaseKernel): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(kernel_count)] = other.config + elif isinstance(other, torch.Tensor): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(kernel_count)] = other.detach().cpu().item() + else: + raise ValueError(type(other) + 'is not supported by SumKernel.') + return self + + def __radd__(self, other: BaseKernel) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> BaseKernel: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for ki in self.kernel_list: + for kj in other.kernel_list: + sum_kernel.kernel_list.append((ki * kj)) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list) - 1)] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + elif isinstance(other, ProductKernel): + return other * self + elif isinstance(other, BaseKernel) or isinstance(other, torch.Tensor): + sum_kernel = SumKernel() + for ki in self.kernel_list: + sum_kernel.kernel_list.append(other * ki) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list) - 1)] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + else: + raise ValueError(type(other) + 'is not supported by SumKernel.') + + def __rmul__( + self, + other: BaseKernel + ) -> BaseKernel: + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> BaseKernel: + if isinstance(other, torch.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + config = fill_composite_config(config) + return cls(**config) + + +class ProductKernel(BaseKernel): + def __init__(self, + kernel_list: Optional[List[Union[BaseKernel, torch.Tensor]]] = None) -> None: + """ + Construct a kernel by multiplying different kernels. + """ + super().__init__() + self.kernel_list = [] + self.config: dict = {'kernel_type': 'Product', 'kernel_list': {}} + if kernel_list is not None: + self.kernel_list = kernel_list + for i in range(len(self.kernel_list)): + if isinstance(self.kernel_list[i], BaseKernel): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].config # type: ignore + elif isinstance(self.kernel_list[i], torch.Tensor): + self.config['kernel_list']['comp_' + str(i)] = \ + self.kernel_list[i].detach().cpu().item() # type: ignore + else: + raise ValueError(str(type(self.kernel_list[i])) + 'is not supported by ProductKernel.') + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + value_list: List[torch.Tensor] = [] + for k in self.kernel_list: + k.to(x.device) + if isinstance(k, BaseKernel) or isinstance(k, SumKernel) or isinstance(k, ProductKernel): + value_list.append(k(x, y, infer_parameter)) + elif isinstance(k, torch.Tensor): + value_list.append(k * torch.ones((x.shape[0], y.shape[0]), device=x.device)) + else: + raise ValueError(type(k) + 'is not supported by ProductKernel.') + return torch.prod(torch.stack(value_list), dim=0) + + def __add__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(len(other.kernel_list))] = self.config + return other + elif isinstance(other, ProductKernel) or isinstance(other, BaseKernel): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.config + return sum_kernel + elif isinstance(other, torch.Tensor): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.detach().cpu().item() + return sum_kernel + else: + raise ValueError(type(other) + 'is not supported by ProductKernel.') + + def __radd__( + self, + other: BaseKernel + ) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, torch.Tensor] + ) -> BaseKernel: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for k in other.kernel_list: + tmp_prod_kernel = deepcopy(self) + tmp_prod_kernel.kernel_list.append(k) + sum_kernel.kernel_list.append(tmp_prod_kernel) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list))] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + elif isinstance(other, ProductKernel): + for k in other.kernel_list: + self.kernel_list.append(k) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] = k.config # type: ignore + return self + elif isinstance(other, BaseKernel): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] = other.config # type: ignore + return self + elif isinstance(other, torch.Tensor): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] =\ + other.detach().cpu().item() # type: ignore + return self + else: + raise ValueError(type(other) + 'is not supported by ProductKernel.') + + def __rmul__( + self, + other: BaseKernel + ) -> BaseKernel: + return self.__mul__(other) + + def __truediv__(self, other: torch.Tensor) -> BaseKernel: + if isinstance(other, torch.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + config = fill_composite_config(config) + return cls(**config) + + +class GaussianRBF(BaseKernel): + def __init__( + self, + sigma: Optional[torch.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: Optional[list] = None ) -> None: """ Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass takes @@ -54,37 +522,34 @@ def __init__( :func:`~alibi_detect.utils.pytorch.kernels.sigma_median`. trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. """ - super().__init__() - init_sigma_fn = sigma_median if init_sigma_fn is None else init_sigma_fn - self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': init_sigma_fn} - if sigma is None: - self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) - self.init_required = True - else: - sigma = sigma.reshape(-1) # [Ns,] - self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) - self.init_required = False - self.init_sigma_fn = init_sigma_fn + super().__init__(active_dims) + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': self.init_sigma_fn, + 'active_dims': active_dims, 'kernel_type': 'GaussianRBF'} + self.parameter_dict['log-sigma'] = KernelParameter( + value=sigma.log().reshape(-1) if sigma is not None else torch.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False, + ) self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) @property def sigma(self) -> torch.Tensor: - return self.log_sigma.exp() + return self.parameter_dict['log-sigma'].value.exp() - def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], - infer_sigma: bool = False) -> torch.Tensor: + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + n_x, n_y = x.shape[0], y.shape[0] + dist = distance.squared_pairwise_distance(x.reshape(n_x, -1), y.reshape(n_y, -1)) # [Nx, Ny] - x, y = torch.as_tensor(x), torch.as_tensor(y) - dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1)) # [Nx, Ny] - - if infer_sigma or self.init_required: - if self.trainable and infer_sigma: - raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, dist) - with torch.no_grad(): - self.log_sigma.copy_(sigma.log().clone()) - self.init_required = False + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) gamma = 1. / (2. * self.sigma ** 2) # [Ns,] # TODO: do matrix multiplication after all? @@ -93,11 +558,116 @@ def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch def get_config(self) -> dict: """ - Returns a serializable config dict (excluding the input_sigma_fn, which is serialized in alibi_detect.saving). + Returns a serializable config dict (excluding the infer_sigma_fn, which is serialized in alibi_detect.saving). + """ + cfg = self.config.copy() + if isinstance(cfg['sigma'], torch.Tensor): + cfg['sigma'] = cfg['sigma'].detach().cpu().numpy().tolist() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = torch.tensor(np.array(config['sigma'])) + return cls(**config) + + +class RationalQuadratic(BaseKernel): + def __init__( + self, + alpha: Optional[torch.Tensor] = None, + init_alpha_fn: Optional[Callable] = None, + sigma: Optional[torch.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: Optional[list] = None + ) -> None: + """ + Rational Quadratic kernel: k(x,y) = (1 + ||x-y||^2 / (2*sigma^2))^(-alpha). + A forward pass takesa batch of instances x [Nx, features] and y [Ny, features] + and returns the kernel matrix [Nx, Ny]. + + Parameters + ---------- + alpha + Exponent parameter of the kernel. + init_alpha_fn + Function used to compute the exponent parameter `alpha`. Used when `alpha` is to be inferred. + sigma + Bandwidth used for the kernel. + init_sigma_fn + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. + trainable + Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__(active_dims) + if alpha is not None and sigma is not None: + if alpha.shape != sigma.shape: + raise ValueError('alpha and sigma must have the same shape.') + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.init_alpha_fn = init_alpha_fn + self.config = {'alpha': alpha, 'sigma': sigma, 'trainable': trainable, 'active_dims': active_dims, + 'init_alpha_fn': self.init_alpha_fn, 'init_sigma_fn': self.init_sigma_fn, + 'kernel_type': 'RationalQuadratic'} + self.parameter_dict['log-alpha'] = KernelParameter( + value=alpha.log().reshape(-1) if alpha is not None else torch.zeros(1), + init_fn=self.init_alpha_fn, # type: ignore + requires_grad=trainable, + requires_init=True if alpha is None else False + ) + self.parameter_dict['log-sigma'] = KernelParameter( + value=sigma.log().reshape(-1) if sigma is not None else torch.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False + ) + self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) + + @property + def alpha(self) -> torch.Tensor: + return self.parameter_dict['log-alpha'].value.exp() + + @property + def sigma(self) -> torch.Tensor: + return self.parameter_dict['log-sigma'].value.exp() + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1)) + + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + + kernel_mat = torch.stack([(1 + torch.square(dist) / + (2 * self.alpha[i] * (self.sigma[i] ** 2))) + ** (-self.alpha[i]) for i in range(len(self.sigma))], dim=0) + + return kernel_mat.mean(dim=0) + + def get_config(self) -> dict: + """ + Returns a serializable config dict (excluding the infer_sigma_fn and infer_alpha_fn, + which is serialized in alibi_detect.saving). """ cfg = self.config.copy() if isinstance(cfg['sigma'], torch.Tensor): cfg['sigma'] = cfg['sigma'].detach().cpu().numpy().tolist() + if isinstance(cfg['alpha'], torch.Tensor): + cfg['alpha'] = cfg['alpha'].detach().cpu().numpy().tolist() cfg.update({'flavour': Framework.PYTORCH.value}) return cfg @@ -112,10 +682,165 @@ def from_config(cls, config): A kernel config dictionary. """ config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = torch.tensor(np.array(config['sigma'])) + if 'alpha' in config and config['alpha'] is not None: + config['alpha'] = torch.tensor(np.array(config['alpha'])) return cls(**config) -class DeepKernel(nn.Module): +class Periodic(BaseKernel): + def __init__( + self, + tau: Optional[torch.Tensor] = None, + init_tau_fn: Optional[Callable] = None, + sigma: Optional[torch.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: Optional[list] = None + ) -> None: + """ + Periodic kernel: k(x,y) = exp(-2 * sin(pi * |x - y| / tau)^2 / (sigma^2)). + A forward pass takesa batch of instances x [Nx, features] and y [Ny, features] + and returns the kernel matrix [Nx, Ny]. + + Parameters + ---------- + tau + Period of the periodic kernel. + init_tau_fn + Function used to compute the period `tau`. Used when `tau` is to be inferred. + sigma + Bandwidth used for the kernel. + init_sigma_fn + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. + trainable + Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__(active_dims) + if tau is not None and sigma is not None: + if tau.shape != sigma.shape: + raise ValueError('tau and sigma must have the same shape.') + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.init_tau_fn = init_tau_fn + self.config = {'tau': tau, 'sigma': sigma, 'trainable': trainable, 'active_dims': active_dims, + 'init_tau_fn': self.init_tau_fn, 'init_sigma_fn': self.init_sigma_fn, + 'kernel_type': 'Periodic'} + self.parameter_dict['log-tau'] = KernelParameter( + value=tau.log().reshape(-1) if tau is not None else torch.zeros(1), + init_fn=self.init_tau_fn, # type: ignore + requires_grad=trainable, + requires_init=True if tau is None else False + ) + self.parameter_dict['log-sigma'] = KernelParameter( + value=sigma.log().reshape(-1) if sigma is not None else torch.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False + ) + self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) + + @property + def tau(self) -> torch.Tensor: + return self.parameter_dict['log-tau'].value.exp() + + @property + def sigma(self) -> torch.Tensor: + return self.parameter_dict['log-sigma'].value.exp() + + def kernel_function(self, x: torch.Tensor, y: torch.Tensor, + infer_parameter: bool = False) -> torch.Tensor: + dist = distance.squared_pairwise_distance(x.flatten(1), y.flatten(1)) + + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + + kernel_mat = torch.stack([torch.exp(-2 * torch.square( + torch.sin(torch.as_tensor(np.pi) * dist / self.tau[i])) / (self.sigma[i] ** 2)) + for i in range(len(self.sigma))], dim=0) + return kernel_mat.mean(dim=0) + + def get_config(self) -> dict: + """ + Returns a serializable config dict (excluding the infer_sigma_fn and infer_tau_fn, + which is serialized in alibi_detect.saving). + """ + cfg = self.config.copy() + if isinstance(cfg['sigma'], torch.Tensor): + cfg['sigma'] = cfg['sigma'].detach().cpu().numpy().tolist() + if isinstance(cfg['tau'], torch.Tensor): + cfg['tau'] = cfg['tau'].detach().cpu().numpy().tolist() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = torch.tensor(np.array(config['sigma'])) + if 'tau' in config and config['tau'] is not None: + config['tau'] = torch.tensor(np.array(config['tau'])) + return cls(**config) + + +class ProjKernel(BaseKernel): + def __init__( + self, + proj: nn.Module, + raw_kernel: BaseKernel = GaussianRBF(trainable=True), + ) -> None: + """ + A kernel that combines a raw kernel (e.g. RBF) with a projection function (e.g. deep net) as + k(x, y) = k(proj(x), proj(y)). A forward pass takes a batch of instances x [Nx, features] and + y [Ny, features] and returns the kernel matrix [Nx, Ny]. + + Parameters: + ---------- + proj + The projection to be applied to the inputs before applying raw_kernel + raw_kernel + The kernel to apply to the projected inputs. Defaults to a Gaussian RBF with trainable bandwidth. + """ + super().__init__() + self.config = {'proj': proj, 'raw_kernel': raw_kernel, 'kernel_type': 'Proj'} + self.proj = proj + self.raw_kernel = raw_kernel + self.init_required = False + + def kernel_function( + self, + x: Union[np.ndarray, torch.Tensor], + y: Union[np.ndarray, torch.Tensor], + infer_parameter: Optional[bool] = False + ) -> torch.Tensor: + return self.raw_kernel(self.proj(x), self.proj(y), infer_parameter) + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg + + @classmethod + def from_config(cls, config): + config.pop('flavour') + config.pop('kernel_type') + return cls(**config) + + +class DeepKernel(BaseKernel): """ Computes similarities as k(x,y) = (1-eps)*k_a(proj(x), proj(y)) + eps*k_b(x,y). A forward pass takes a batch of instances x [Nx, features] and y [Ny, features] and returns @@ -138,21 +863,29 @@ class DeepKernel(nn.Module): def __init__( self, proj: nn.Module, - kernel_a: Union[nn.Module, str] = 'rbf', - kernel_b: Optional[Union[nn.Module, str]] = 'rbf', + kernel_a: BaseKernel = GaussianRBF(trainable=True), + kernel_b: BaseKernel = GaussianRBF(trainable=True), eps: Union[float, str] = 'trainable' ) -> None: super().__init__() - self.config = {'proj': proj, 'kernel_a': kernel_a, 'kernel_b': kernel_b, 'eps': eps} - if kernel_a == 'rbf': - kernel_a = GaussianRBF(trainable=True) - if kernel_b == 'rbf': - kernel_b = GaussianRBF(trainable=True) + self.config = {'proj': proj, 'kernel_a': kernel_a, 'kernel_b': kernel_b, 'eps': eps, 'kernel_type': 'Deep'} + self.proj = proj self.kernel_a = kernel_a self.kernel_b = kernel_b - self.proj = proj + + if hasattr(self.kernel_a, 'parameter_dict'): + for param in self.kernel_a.parameter_dict.keys(): + setattr(self, param, self.kernel_a.parameter_dict[param].value) + + self.proj_kernel = ProjKernel(proj=proj, raw_kernel=kernel_a) if kernel_b is not None: self._init_eps(eps) + self.comp_kernel = (1-self.eps)*self.proj_kernel + self.eps*self.kernel_b + if hasattr(self.kernel_b, 'parameter_dict'): + for param in self.kernel_b.parameter_dict.keys(): + setattr(self, param, self.kernel_b.parameter_dict[param].value) + else: + self.comp_kernel = self.proj_kernel def _init_eps(self, eps: Union[float, str]) -> None: if isinstance(eps, float): @@ -168,15 +901,46 @@ def _init_eps(self, eps: Union[float, str]) -> None: def eps(self) -> torch.Tensor: return self.logit_eps.sigmoid() if self.kernel_b is not None else torch.tensor(0.) - def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - similarity = self.kernel_a(self.proj(x), self.proj(y)) # type: ignore[operator] - if self.kernel_b is not None: - similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) # type: ignore[operator] - return similarity + def kernel_function( + self, + x: torch.Tensor, + y: torch.Tensor, + infer_parameter: Optional[bool] = False + ) -> torch.Tensor: + return self.comp_kernel(x, y, infer_parameter) def get_config(self) -> dict: - return self.config.copy() + cfg = self.config.copy() + cfg.update({'flavour': Framework.PYTORCH.value}) + return cfg @classmethod def from_config(cls, config): + config.pop('kernel_type') + config.pop('flavour') return cls(**config) + + +def fill_composite_config(config: dict) -> dict: + final_config: dict = {'kernel_list': []} + for k_config in config['kernel_list'].values(): + if isinstance(k_config, dict): + k_config.pop('src') + if k_config['kernel_type'] == 'Sum': + final_config['kernel_list'].append(SumKernel.from_config(k_config)) + elif k_config['kernel_type'] == 'Product': + final_config['kernel_list'].append(ProductKernel.from_config(k_config)) + elif k_config['kernel_type'] == 'GaussianRBF': + final_config['kernel_list'].append(GaussianRBF.from_config(k_config)) + elif k_config['kernel_type'] == 'Periodic': + final_config['kernel_list'].append(Periodic.from_config(k_config)) + elif k_config['kernel_type'] == 'RationalQuadratic': + final_config['kernel_list'].append(RationalQuadratic.from_config(k_config)) + else: + raise ValueError('Unknown kernel type.') + elif isinstance(k_config, np.ndarray) or isinstance(k_config, float) or \ + isinstance(k_config, np.float32) or isinstance(k_config, np.float64): + final_config['kernel_list'].append(torch.tensor(np.array(k_config))) + else: + raise ValueError('Unknown component type.') + return final_config diff --git a/alibi_detect/utils/pytorch/prediction.py b/alibi_detect/utils/pytorch/prediction.py index 05aded4aa..d8c47dbe2 100644 --- a/alibi_detect/utils/pytorch/prediction.py +++ b/alibi_detect/utils/pytorch/prediction.py @@ -35,6 +35,8 @@ def predict_batch(x: Union[list, np.ndarray, torch.Tensor], model: Union[Callabl Numpy array, torch tensor or tuples of those with model outputs. """ device = get_device(device) + if isinstance(model, nn.Module): + model = model.to(device) if isinstance(x, np.ndarray): x = torch.from_numpy(x) n = len(x) diff --git a/alibi_detect/utils/pytorch/tests/test_kernels_pt.py b/alibi_detect/utils/pytorch/tests/test_kernels_pt.py index ba351678d..dbf6efd11 100644 --- a/alibi_detect/utils/pytorch/tests/test_kernels_pt.py +++ b/alibi_detect/utils/pytorch/tests/test_kernels_pt.py @@ -3,7 +3,10 @@ import pytest import torch from torch import nn -from alibi_detect.utils.pytorch import GaussianRBF, DeepKernel +from typing import Union +from alibi_detect.utils.pytorch import GaussianRBF, DeepKernel, BaseKernel, RationalQuadratic, Periodic, \ + log_sigma_median +from alibi_detect.utils.pytorch.distance import squared_pairwise_distance sigma = [None, np.array([1.]), np.array([1., 2.])] n_features = [5, 10] @@ -27,24 +30,200 @@ def test_gaussian_kernel(gaussian_kernel_params): y = torch.from_numpy(np.random.random(yshape)).float() kernel = GaussianRBF(sigma=sigma, trainable=trainable) - infer_sigma = True if sigma is None else False - if trainable and infer_sigma: + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: with pytest.raises(Exception): - kernel(x, y, infer_sigma=infer_sigma) + kernel(x, y, infer_parameter=infer_parameter) else: - k_xy = kernel(x, y, infer_sigma=infer_sigma).detach().numpy() - k_xx = kernel(x, x, infer_sigma=infer_sigma).detach().numpy() + k_xy = kernel(x, y, infer_parameter=infer_parameter).detach().numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).detach().numpy() assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) assert (k_xx > 0.).all() and (k_xy > 0.).all() -class MyKernel(nn.Module): # TODO: Support then test models using keras functional API +def log_sigma_mean(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: + sigma = (.5 * torch.mean(dist.flatten()) ** .5).unsqueeze(-1) + return torch.log(sigma) + + +kernel_ref = ['GaussianRBF', 'RationalQuadratic', 'Periodic'] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +init_fn = [None, log_sigma_median, log_sigma_mean] +tests_init_fn = list(product(kernel_ref, n_features, n_instances, trainable, init_fn)) + + +@pytest.fixture +def init_fn_params(request): + return tests_init_fn[request.param] + + +@pytest.mark.parametrize('init_fn_params', list(range(len(tests_init_fn))), indirect=True) +def test_init_fn(init_fn_params): + kernel_ref, n_features, n_instances, trainable, init_fn = init_fn_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + x = torch.from_numpy(np.random.random(xshape)).float() + y = torch.from_numpy(np.random.random(yshape)).float() + + if kernel_ref == 'GaussianRBF': + kernel = GaussianRBF(trainable=trainable, init_sigma_fn=init_fn) + elif kernel_ref == 'RationalQuadratic': + kernel = RationalQuadratic(trainable=trainable, init_sigma_fn=init_fn) + elif kernel_ref == 'Periodic': + kernel = Periodic(trainable=trainable, init_sigma_fn=init_fn) + else: + raise NotImplementedError + if trainable: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=True) + else: + k_xy = kernel(x, y, infer_parameter=True).numpy() + k_xx = kernel(x, x, infer_parameter=True).numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + if init_fn is not None: + np.testing.assert_almost_equal(kernel.sigma.numpy(), + np.exp(init_fn(x, y, squared_pairwise_distance(x, y)).numpy()), + decimal=4) + + +sigma = [None, np.array([1.]), np.array([2.])] +alpha = [None, np.array([1.]), np.array([2.])] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_rqk = list(product(sigma, alpha, n_features, n_instances, trainable)) +n_tests_rqk = len(tests_rqk) + + +@pytest.fixture +def rationalquadratic_kernel_params(request): + return tests_rqk[request.param] + + +@pytest.mark.parametrize('rationalquadratic_kernel_params', list(range(n_tests_rqk)), indirect=True) +def test_rationalquadratic_kernel(rationalquadratic_kernel_params): + sigma, alpha, n_features, n_instances, trainable = rationalquadratic_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + sigma = sigma if sigma is None else torch.from_numpy(sigma) + alpha = alpha if alpha is None else torch.from_numpy(alpha) + x = torch.from_numpy(np.random.random(xshape)).float() + y = torch.from_numpy(np.random.random(yshape)).float() + + kernel = RationalQuadratic(sigma=sigma, alpha=alpha, trainable=trainable) + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).detach().numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).detach().numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +sigma = [None, np.array([1.]), np.array([2.])] +tau = [None, np.array([8.]), np.array([24.])] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_pk = list(product(sigma, tau, n_features, n_instances, trainable)) +n_tests_pk = len(tests_pk) + + +@pytest.fixture +def periodic_kernel_params(request): + return tests_pk[request.param] + + +@pytest.mark.parametrize('periodic_kernel_params', list(range(n_tests_pk)), indirect=True) +def test_periodic_kernel(periodic_kernel_params): + sigma, tau, n_features, n_instances, trainable = periodic_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + sigma = sigma if sigma is None else torch.from_numpy(sigma) + tau = tau if tau is None else torch.from_numpy(tau) + x = torch.from_numpy(np.random.random(xshape)).float() + y = torch.from_numpy(np.random.random(yshape)).float() + + kernel = Periodic(sigma=sigma, tau=tau, trainable=trainable) + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).detach().numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).detach().numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +sigma_0 = [None, np.array([1.])] +sigma_1 = [None, np.array([1.])] +sigma_2 = [None, np.array([1.])] +operation_0 = ['*', '+'] +operation_1 = ['*', '+'] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_ck = list(product(sigma_0, sigma_1, sigma_2, + operation_0, operation_1, n_features, n_instances, trainable)) +n_tests_ck = len(tests_ck) + + +@pytest.fixture +def comp_kernel_params(request): + return tests_ck[request.param] + + +@pytest.mark.parametrize('comp_kernel_params', list(range(n_tests_ck)), indirect=True) +def test_comp_kernel(comp_kernel_params): + (sigma_0, sigma_1, sigma_2, operation_0, operation_1, + n_features, n_instances, trainable) = comp_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + sigma_0 = sigma_0 if sigma_0 is None else torch.from_numpy(sigma_0) + sigma_1 = sigma_1 if sigma_1 is None else torch.from_numpy(sigma_1) + sigma_2 = sigma_2 if sigma_2 is None else torch.from_numpy(sigma_2) + x = torch.from_numpy(np.random.random(xshape)).float() + y = torch.from_numpy(np.random.random(yshape)).float() + + kernel_0 = GaussianRBF(sigma=sigma_0, trainable=trainable) + kernel_1 = GaussianRBF(sigma=sigma_1, trainable=trainable) + kernel_2 = GaussianRBF(sigma=sigma_2, trainable=trainable) + if operation_0 == '*' and operation_1 == '*': + kernel = kernel_0 * kernel_1 * kernel_2 + elif operation_0 == '*' and operation_1 == '+': + kernel = (kernel_0 * kernel_1 + kernel_2) / torch.tensor(2.0) # ensure k(x, x) = 1 + elif operation_0 == '+' and operation_1 == '*': + kernel = (kernel_0 + kernel_1 * kernel_2) / torch.tensor(2.0) # ensure k(x, x) = 1 + elif operation_0 == '+' and operation_1 == '+': + kernel = (kernel_0 + kernel_1 + kernel_2) / torch.tensor(3.0) # ensure k(x, x) = 1 + else: + with pytest.raises(Exception): + raise Exception('Invalid operation') + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).detach().numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).detach().numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +class MyKernel(BaseKernel): # TODO: Support then test models using keras functional API def __init__(self, n_features: int): super().__init__() self.linear = nn.Linear(n_features, 20) - def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], + infer_parameter: bool = False) -> torch.Tensor: return torch.einsum('ji,ki->jk', self.linear(x), self.linear(y)) diff --git a/alibi_detect/utils/tensorflow/__init__.py b/alibi_detect/utils/tensorflow/__init__.py index 25c182df4..099fdd1d9 100644 --- a/alibi_detect/utils/tensorflow/__init__.py +++ b/alibi_detect/utils/tensorflow/__init__.py @@ -8,9 +8,9 @@ ) -GaussianRBF, DeepKernel = import_optional( +GaussianRBF, DeepKernel, BaseKernel, RationalQuadratic, Periodic, log_sigma_median = import_optional( 'alibi_detect.utils.tensorflow.kernels', - names=['GaussianRBF', 'DeepKernel'] + names=['GaussianRBF', 'DeepKernel', 'BaseKernel', 'RationalQuadratic', 'Periodic', 'log_sigma_median'] ) @@ -45,6 +45,9 @@ "relative_euclidean_distance", "squared_pairwise_distance", "GaussianRBF", + "BaseKernel", + "RationalQuadratic", + "Periodic", "DeepKernel", "permed_lsdds", "predict_batch", @@ -52,6 +55,7 @@ "quantile", "subset_matrix", "zero_diag", + "log_sigma_median", "mutate_categorical", "TFDataset" ] diff --git a/alibi_detect/utils/tensorflow/kernels.py b/alibi_detect/utils/tensorflow/kernels.py index b2ec3cb13..6ecb6ce8d 100644 --- a/alibi_detect/utils/tensorflow/kernels.py +++ b/alibi_detect/utils/tensorflow/kernels.py @@ -1,11 +1,46 @@ +from abc import abstractmethod import tensorflow as tf import numpy as np from . import distance -from typing import Optional, Union, Callable +from typing import Optional, Union, Callable, List from scipy.special import logit +from copy import deepcopy from alibi_detect.utils.frameworks import Framework +def infer_kernel_parameter( + kernel: 'BaseKernel', + x: tf.Tensor, + y: tf.Tensor, + dist: tf.Tensor, + infer_parameter: bool = True, +) -> None: + """ + Infer the kernel parameter from the data. + + Parameters + ---------- + kernel + The kernel function. + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + infer_parameter + Whether to infer the kernel parameter. + """ + if kernel.trainable and infer_parameter: + raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") + for parameter in kernel.parameter_dict.values(): + if parameter.requires_init: + if parameter.init_fn is not None: + parameter.value.assign(tf.reshape(parameter.init_fn(x, y, dist), -1)) + parameter.requires_init = False + kernel.init_required = False + + def sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: """ Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`. @@ -21,7 +56,7 @@ def sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: Returns ------- - The computed bandwidth, `sigma`. + The logrithm of the computed bandwidth, `log-sigma`. """ n = min(x.shape[0], y.shape[0]) n = n if tf.reduce_all(x[:n] == y[:n]) and x.shape == y.shape else 0 @@ -30,12 +65,436 @@ def sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: return sigma -class GaussianRBF(tf.keras.Model): +def log_sigma_median(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: + """ + Bandwidth estimation using the median heuristic :cite:t:`Gretton2012`. + + Parameters + ---------- + x + Tensor of instances with dimension [Nx, features]. + y + Tensor of instances with dimension [Ny, features]. + dist + Tensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + + Returns + ------- + The logrithm of the computed bandwidth, `log-sigma`. + """ + return tf.math.log(sigma_median(x, y, dist)) + + +class KernelParameter: + def __init__( + self, + value: tf.Tensor = None, + init_fn: Optional[Callable] = None, + requires_grad: bool = False, + requires_init: bool = False + ) -> None: + """ + Parameter class for kernels. + + Parameters + ---------- + value + The pre-specified value of the parameter. If `None`, the parameter is set to 1 by default. + init_fn + The function used to initialize the parameter. + requires_grad + Whether the parameter requires gradient. + requires_init + Whether the parameter requires initialization. + """ + self.value = tf.Variable(value if value is not None + else tf.ones(1, dtype=tf.keras.backend.floatx()), + trainable=requires_grad) + self.init_fn = init_fn + self.requires_init = requires_init + + def __repr__(self) -> str: + return self.value.__repr__() + + +class BaseKernel(tf.keras.Model): + def __init__(self, active_dims: list = None) -> None: + """ + The base class for all kernels. + + Parameters + ---------- + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__() + self.parameter_dict: dict = {} + self.config: dict = {} + self.active_dims = active_dims + self.init_required = False + + @abstractmethod + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, + infer_parameter: Optional[bool] = False) -> tf.Tensor: + return NotImplementedError + + def call(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: + y = tf.cast(y, x.dtype) + if self.active_dims is not None: + x = tf.gather(x, self.active_dims, axis=-1) + y = tf.gather(y, self.active_dims, axis=-1) + return self.kernel_function(x, y, infer_parameter) + + def __add__( + self, + other: Union['BaseKernel', tf.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + kernel_count = len(other.kernel_list) + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(kernel_count)] = self.config # type: ignore + return other + elif isinstance(other, (BaseKernel, ProductKernel)): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.config # type: ignore + return sum_kernel + elif isinstance(other, tf.Tensor): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.numpy() # type: ignore + return sum_kernel + else: + raise ValueError('Kernels can only added to another kernel or a constant.') + + def __radd__(self, other: 'BaseKernel') -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union['BaseKernel', tf.Tensor] + ) -> 'BaseKernel': + if isinstance(other, ProductKernel): + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(len(other.kernel_list))] = self.config # type: ignore + return other + elif isinstance(other, SumKernel): + sum_kernel = SumKernel() + kernel_count = 0 + for k in other.kernel_list: + sum_kernel.kernel_list.append(self * k) + sum_kernel.config['kernel_list']['comp_' + str(kernel_count)] = self.config # type: ignore + kernel_count += 1 + return sum_kernel + elif isinstance(other, BaseKernel): + prod_kernel = ProductKernel() + prod_kernel.kernel_list.append(self) + prod_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + prod_kernel.kernel_list.append(other) + prod_kernel.config['kernel_list']['comp_1'] = other.config # type: ignore + return prod_kernel + elif isinstance(other, tf.Tensor): + prod_kernel = ProductKernel() + prod_kernel.kernel_list.append(self) + prod_kernel.config['kernel_list']['comp_0'] = self.config # type: ignore + prod_kernel.kernel_list.append(other) + prod_kernel.config['kernel_list']['comp_1'] = other.numpy() # type: ignore + return prod_kernel + else: + raise ValueError('Kernels can only be multiplied by another kernel or a constant.') + + def __rmul__( + self, + other: 'BaseKernel' + ) -> 'BaseKernel': + return self.__mul__(other) + + def __truediv__(self, other: tf.Tensor) -> 'ProductKernel': + if isinstance(other, tf.Tensor): + return self.__mul__(1. / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + return self.config.copy() + + +class SumKernel(BaseKernel): + def __init__(self, + kernel_list: Optional[List[Union[BaseKernel, tf.Tensor]]] = None) -> None: + """ + Construct a kernel by summing different kernels. + """ + super().__init__() + self.kernel_list = [] + self.config: dict = {'kernel_type': 'Sum', 'kernel_list': {}} + if kernel_list is not None: + self.kernel_list = kernel_list + for i in range(len(self.kernel_list)): + if isinstance(self.kernel_list[i], BaseKernel): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].config # type: ignore + elif isinstance(self.kernel_list[i], tf.Tensor): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].numpy() # type: ignore + else: + raise ValueError(str(type(self.kernel_list[i])) + 'is not supported by SumKernel.') + + def call(self, x: Union[np.ndarray, tf.Tensor], y: Union[np.ndarray, tf.Tensor], + infer_parameter: bool = False) -> tf.Tensor: + value_list: List[tf.Tensor] = [] + for k in self.kernel_list: + if isinstance(k, BaseKernel) or isinstance(k, SumKernel) or isinstance(k, ProductKernel): + value_list.append(k(x, y, infer_parameter)) + elif isinstance(k, tf.Tensor): + value_list.append(k * tf.ones((x.shape[0], y.shape[0]))) + else: + raise ValueError(type(k) + 'is not supported by SumKernel.') + return tf.reduce_sum(tf.stack(value_list), axis=0) + + def __add__( + self, + other: Union[BaseKernel, tf.Tensor] + ) -> 'SumKernel': + kernel_count = len(self.kernel_list) + if isinstance(other, SumKernel): + for k in other.kernel_list: + self.kernel_list.append(k) + if isinstance(k, BaseKernel): + self.config['kernel_list']['comp_' + str(kernel_count)] = k.config + elif isinstance(k, tf.Tensor): + self.config['kernel_list']['comp_' + str(kernel_count)] = k.numpy() + kernel_count += 1 + elif isinstance(other, BaseKernel): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(kernel_count)] = other.config + elif isinstance(other, tf.Tensor): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(kernel_count)] = other.numpy() + else: + raise ValueError(type(other) + 'is not supported by SumKernel.') + return self + + def __radd__(self, other: BaseKernel) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, tf.Tensor] + ) -> BaseKernel: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for ki in self.kernel_list: + for kj in other.kernel_list: + sum_kernel.kernel_list.append((ki * kj)) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list) - 1)] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + elif isinstance(other, ProductKernel): + return other * self + elif isinstance(other, BaseKernel) or isinstance(other, tf.Tensor): + sum_kernel = SumKernel() + for ki in self.kernel_list: + sum_kernel.kernel_list.append(other * ki) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list) - 1)] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + else: + raise ValueError(type(other) + 'is not supported by SumKernel.') + + def __rmul__( + self, + other: BaseKernel + ) -> BaseKernel: + return self.__mul__(other) + + def __truediv__(self, other: tf.Tensor) -> BaseKernel: + if isinstance(other, tf.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + config = fill_composite_config(config) + return cls(**config) + + +class ProductKernel(tf.keras.Model): + def __init__(self, + kernel_list: Optional[List[Union[BaseKernel, tf.Tensor]]] = None) -> None: + """ + Construct a kernel by multiplying different kernels. + """ + super().__init__() + self.kernel_list = [] + self.config: dict = {'kernel_type': 'Product', 'kernel_list': {}} + if kernel_list is not None: + self.kernel_list = kernel_list + for i in range(len(self.kernel_list)): + if isinstance(self.kernel_list[i], BaseKernel): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].config # type: ignore + elif isinstance(self.kernel_list[i], tf.Tensor): + self.config['kernel_list']['comp_' + str(i)] = self.kernel_list[i].cpu().numpy() # type: ignore + else: + raise ValueError(str(type(self.kernel_list[i])) + 'is not supported by ProductKernel.') + + def call(self, x: Union[np.ndarray, tf.Tensor], y: Union[np.ndarray, tf.Tensor], + infer_parameter: bool = False) -> tf.Tensor: + value_list: List[tf.Tensor] = [] + for k in self.kernel_list: + if isinstance(k, BaseKernel) or isinstance(k, SumKernel) or isinstance(k, ProductKernel): + value_list.append(k(x, y, infer_parameter)) + elif isinstance(k, tf.Tensor): + value_list.append(k * tf.ones((x.shape[0], y.shape[0]))) + else: + raise ValueError(type(k) + 'is not supported by ProductKernel.') + return tf.reduce_prod(tf.stack(value_list), axis=0) + + def __add__( + self, + other: Union[BaseKernel, 'SumKernel', 'ProductKernel', tf.Tensor] + ) -> 'SumKernel': + if isinstance(other, SumKernel): + other.kernel_list.append(self) + other.config['kernel_list']['comp_' + str(len(other.kernel_list))] = self.config + return other + elif isinstance(other, ProductKernel) or isinstance(other, BaseKernel): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.config + return sum_kernel + elif isinstance(other, tf.Tensor): + sum_kernel = SumKernel() + sum_kernel.kernel_list.append(self) + sum_kernel.config['kernel_list']['comp_0'] = self.config + sum_kernel.kernel_list.append(other) + sum_kernel.config['kernel_list']['comp_1'] = other.numpy() + return sum_kernel + else: + raise ValueError(type(other) + 'is not supported by ProductKernel.') + + def __radd__( + self, + other: Union[BaseKernel, 'SumKernel', 'ProductKernel'] + ) -> 'SumKernel': + return self.__add__(other) + + def __mul__( + self, + other: Union[BaseKernel, 'SumKernel', 'ProductKernel', tf.Tensor] + ) -> Union['SumKernel', 'ProductKernel']: + if isinstance(other, SumKernel): + sum_kernel = SumKernel() + for k in other.kernel_list: + tmp_prod_kernel = deepcopy(self) + tmp_prod_kernel.kernel_list.append(k) + sum_kernel.kernel_list.append(tmp_prod_kernel) + sum_kernel.config['kernel_list']['comp_' + str(len(sum_kernel.kernel_list))] = \ + sum_kernel.kernel_list[-1].config # type: ignore + return sum_kernel + elif isinstance(other, ProductKernel): + for k in other.kernel_list: + self.kernel_list.append(k) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] = k.config # type: ignore + return self + elif isinstance(other, BaseKernel): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] = other.config # type: ignore + return self + elif isinstance(other, tf.Tensor): + self.kernel_list.append(other) + self.config['kernel_list']['comp_' + str(len(self.kernel_list))] = other.numpy() # type: ignore + return self + else: + raise ValueError(type(other) + 'is not supported by ProductKernel.') + + def __rmul__( + self, + other: Union[BaseKernel, 'SumKernel', 'ProductKernel'] + ) -> Union['SumKernel', 'ProductKernel']: + return self.__mul__(other) + + def __truediv__(self, other: tf.Tensor) -> Union['SumKernel', 'ProductKernel']: + if isinstance(other, tf.Tensor): + return self.__mul__(1 / other) + else: + raise ValueError('Kernels can only be divided by a constant.') + + def __rtruediv__(self, other): + raise ValueError('Kernels can not be used as divisor.') + + def __sub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def __rsub__(self, other): + raise ValueError('Kernels do not support subtraction.') + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + config = fill_composite_config(config) + return cls(**config) + + +class GaussianRBF(BaseKernel): def __init__( self, sigma: Optional[tf.Tensor] = None, init_sigma_fn: Optional[Callable] = None, - trainable: bool = False + trainable: bool = False, + active_dims: Optional[list] = None ) -> None: """ Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass takes @@ -54,35 +513,35 @@ def __init__( :func:`~alibi_detect.utils.tensorflow.kernels.sigma_median`. trainable Whether or not to track gradients w.r.t. sigma to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. """ - super().__init__() - init_sigma_fn = sigma_median if init_sigma_fn is None else init_sigma_fn - self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': init_sigma_fn} - if sigma is None: - self.log_sigma = tf.Variable(np.empty(1), dtype=tf.keras.backend.floatx(), trainable=trainable) - self.init_required = True - else: - sigma = tf.cast(tf.reshape(sigma, (-1,)), dtype=tf.keras.backend.floatx()) # [Ns,] - self.log_sigma = tf.Variable(tf.math.log(sigma), trainable=trainable) - self.init_required = False - self.init_sigma_fn = init_sigma_fn + super().__init__(active_dims) + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': self.init_sigma_fn, + 'active_dims': active_dims, 'kernel_type': 'GaussianRBF'} + self.parameter_dict['log-sigma'] = KernelParameter( + value=tf.reshape(tf.math.log( + tf.cast(sigma, tf.keras.backend.floatx())), -1) if sigma is not None else tf.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False + ) self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) @property def sigma(self) -> tf.Tensor: - return tf.math.exp(self.log_sigma) + return tf.math.exp(self.parameter_dict['log-sigma'].value) - def call(self, x: tf.Tensor, y: tf.Tensor, infer_sigma: bool = False) -> tf.Tensor: + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: y = tf.cast(y, x.dtype) x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1)) # flatten dist = distance.squared_pairwise_distance(x, y) # [Nx, Ny] - if infer_sigma or self.init_required: - if self.trainable and infer_sigma: - raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, dist) - self.log_sigma.assign(tf.math.log(sigma)) - self.init_required = False + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) gamma = tf.constant(1. / (2. * self.sigma ** 2), dtype=x.dtype) # [Ns,] # TODO: do matrix multiplication after all? @@ -91,7 +550,7 @@ def call(self, x: tf.Tensor, y: tf.Tensor, infer_sigma: bool = False) -> tf.Tens def get_config(self) -> dict: """ - Returns a serializable config dict (excluding the input_sigma_fn, which is serialized in alibi_detect.saving). + Returns a serializable config dict (excluding the infer_sigma_fn, which is serialized in alibi_detect.saving). """ cfg = self.config.copy() if isinstance(cfg['sigma'], tf.Tensor): @@ -110,10 +569,270 @@ def from_config(cls, config): A kernel config dictionary. """ config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = tf.convert_to_tensor(np.array(config['sigma'])) return cls(**config) -class DeepKernel(tf.keras.Model): +class RationalQuadratic(BaseKernel): + def __init__( + self, + alpha: Optional[tf.Tensor] = None, + init_alpha_fn: Optional[Callable] = None, + sigma: Optional[tf.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: Optional[list] = None + ) -> None: + """ + Rational Quadratic kernel: k(x,y) = (1 + ||x-y||^2 / (2*sigma^2))^(-alpha). + A forward pass takesa batch of instances x [Nx, features] and y [Ny, features] + and returns the kernel matrix [Nx, Ny]. + + Parameters + ---------- + alpha + Exponent parameter of the kernel. + init_alpha_fn + Function used to compute the exponent parameter `alpha`. Used when `alpha` is to be inferred. + sigma + Bandwidth used for the kernel. + init_sigma_fn + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. + trainable + Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__(active_dims) + if alpha is not None and sigma is not None: + if alpha.shape != sigma.shape: + raise ValueError('alpha and sigma must have the same shape.') + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.init_alpha_fn = init_alpha_fn + self.config = {'alpha': alpha, 'sigma': sigma, 'trainable': trainable, 'active_dims': active_dims, + 'init_sigma_fn': self.init_sigma_fn, 'init_alpha_fn': self.init_alpha_fn, + 'kernel_type': 'RationalQuadratic'} + self.parameter_dict['log-alpha'] = KernelParameter( + value=tf.reshape(tf.math.log( + tf.cast(alpha, tf.keras.backend.floatx())), -1) if alpha is not None else tf.zeros(1), + init_fn=self.init_alpha_fn, # type: ignore + requires_grad=trainable, + requires_init=True if alpha is None else False + ) + self.parameter_dict['log-sigma'] = KernelParameter( + value=tf.reshape(tf.math.log( + tf.cast(sigma, tf.keras.backend.floatx())), -1) if sigma is not None else tf.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False + ) + self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) + + @property + def sigma(self) -> tf.Tensor: + return tf.math.exp(self.parameter_dict['log-sigma'].value) + + @property + def alpha(self) -> tf.Tensor: + return tf.math.exp(self.parameter_dict['log-alpha'].value) + + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: + y = tf.cast(y, x.dtype) + x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1)) + dist = distance.squared_pairwise_distance(x, y) + + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + + kernel_mat = tf.stack([(1 + tf.square(dist) / + (2 * self.alpha[i] * (self.sigma[i] ** 2))) + ** (-self.alpha[i]) for i in range(len(self.sigma))], axis=0) + return tf.reduce_mean(kernel_mat, axis=0) + + def get_config(self) -> dict: + """ + Returns a serializable config dict (excluding the infer_sigma_fn and infer_alpha_fn, + which is serialized in alibi_detect.saving). + """ + cfg = self.config.copy() + if isinstance(cfg['sigma'], tf.Tensor): + cfg['sigma'] = cfg['sigma'].numpy().tolist() + if isinstance(cfg['alpha'], tf.Tensor): + cfg['alpha'] = cfg['alpha'].numpy().tolist() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = tf.convert_to_tensor(np.array(config['sigma'])) + if 'alpha' in config and config['alpha'] is not None: + config['alpha'] = tf.convert_to_tensor(np.array(config['alpha'])) + return cls(**config) + + +class Periodic(BaseKernel): + def __init__( + self, + tau: Optional[tf.Tensor] = None, + init_tau_fn: Optional[Callable] = None, + sigma: Optional[tf.Tensor] = None, + init_sigma_fn: Optional[Callable] = None, + trainable: bool = False, + active_dims: Optional[list] = None + ) -> None: + """ + Periodic kernel: k(x,y) = exp(-2 * sin(pi * |x - y| / tau)^2 / (sigma^2)). + A forward pass takesa batch of instances x [Nx, features] and y [Ny, features] + and returns the kernel matrix [Nx, Ny]. + + Parameters + ---------- + tau + Period of the periodic kernel. + init_tau_fn + Function used to compute the period `tau`. Used when `tau` is to be inferred. + sigma + Bandwidth used for the kernel. + init_sigma_fn + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. + trainable + Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + active_dims + Indices of the dimensions of the feature to be used for the kernel. If None, all dimensions are used. + """ + super().__init__(active_dims) + if tau is not None and sigma is not None: + if tau.shape != sigma.shape: + raise ValueError('tau and sigma must have the same shape.') + self.init_sigma_fn = log_sigma_median if init_sigma_fn is None else init_sigma_fn + self.init_tau_fn = init_tau_fn + self.config = {'tau': tau, 'sigma': sigma, 'trainable': trainable, 'active_dims': active_dims, + 'init_tau_fn': self.init_tau_fn, 'init_sigma_fn': self.init_sigma_fn, + 'kernel_type': 'Periodic'} + self.parameter_dict['log-tau'] = KernelParameter( + value=tf.reshape(tf.math.log( + tf.cast(tau, tf.keras.backend.floatx())), -1) if tau is not None else tf.zeros(1), + init_fn=self.init_tau_fn, # type: ignore + requires_grad=trainable, + requires_init=True if tau is None else False + ) + self.parameter_dict['log-sigma'] = KernelParameter( + value=tf.reshape(tf.math.log( + tf.cast(sigma, tf.keras.backend.floatx())), -1) if sigma is not None else tf.zeros(1), + init_fn=self.init_sigma_fn, # type: ignore + requires_grad=trainable, + requires_init=True if sigma is None else False + ) + self.trainable = trainable + self.init_required = any([param.requires_init for param in self.parameter_dict.values()]) + + @property + def tau(self) -> tf.Tensor: + return tf.math.exp(self.parameter_dict['log-tau'].value) + + @property + def sigma(self) -> tf.Tensor: + return tf.math.exp(self.parameter_dict['log-sigma'].value) + + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: + y = tf.cast(y, x.dtype) + x, y = tf.reshape(x, (x.shape[0], -1)), tf.reshape(y, (y.shape[0], -1)) + dist = distance.squared_pairwise_distance(x, y) + + if infer_parameter or self.init_required: + infer_kernel_parameter(self, x, y, dist, infer_parameter) + + kernel_mat = tf.stack([tf.math.exp(-2 * tf.square( + tf.math.sin(tf.cast(np.pi, x.dtype) * dist / self.tau[i])) / (self.sigma[i] ** 2)) + for i in range(len(self.sigma))], axis=0) + return tf.reduce_mean(kernel_mat, axis=0) + + def get_config(self) -> dict: + """ + Returns a serializable config dict (excluding the infer_sigma_fn and infer_tau_fn, + which is serialized in alibi_detect.saving). + """ + cfg = self.config.copy() + if isinstance(cfg['sigma'], tf.Tensor): + cfg['sigma'] = cfg['sigma'].numpy().tolist() + if isinstance(cfg['tau'], tf.Tensor): + cfg['tau'] = cfg['tau'].numpy().tolist() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + config.pop('kernel_type') + if 'sigma' in config and config['sigma'] is not None: + config['sigma'] = tf.convert_to_tensor(np.array(config['sigma'])) + if 'tau' in config and config['tau'] is not None: + config['tau'] = tf.convert_to_tensor(np.array(config['tau'])) + return cls(**config) + + +class ProjKernel(BaseKernel): + def __init__( + self, + proj: tf.keras.Model, + raw_kernel: BaseKernel = GaussianRBF(trainable=True), + ) -> None: + """ + A kernel that combines a raw kernel (e.g. RBF) with a projection function (e.g. deep net) as + k(x, y) = k(proj(x), proj(y)). A forward pass takes a batch of instances x [Nx, features] and + y [Ny, features] and returns the kernel matrix [Nx, Ny]. + + Parameters: + ---------- + proj + The projection to be applied to the inputs before applying raw_kernel + raw_kernel + The kernel to apply to the projected inputs. Defaults to a Gaussian RBF with trainable bandwidth. + """ + super().__init__() + self.config = {'proj': proj, 'raw_kernel': raw_kernel, 'kernel_type': 'Proj'} + self.proj = proj + self.raw_kernel = raw_kernel + self.init_required = False + + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: + return self.raw_kernel(self.proj(x), self.proj(y), infer_parameter) + + def get_config(self) -> dict: + cfg = self.config.copy() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg + + @classmethod + def from_config(cls, config): + config.pop('flavour') + config.pop('kernel_type') + return cls(**config) + + +class DeepKernel(BaseKernel): """ Computes similarities as k(x,y) = (1-eps)*k_a(proj(x), proj(y)) + eps*k_b(x,y). A forward pass takes a batch of instances x [Nx, features] and y [Ny, features] and returns @@ -136,21 +855,21 @@ class DeepKernel(tf.keras.Model): def __init__( self, proj: tf.keras.Model, - kernel_a: Union[tf.keras.Model, str] = 'rbf', - kernel_b: Optional[Union[tf.keras.Model, str]] = 'rbf', + kernel_a: BaseKernel = GaussianRBF(trainable=True), + kernel_b: BaseKernel = GaussianRBF(trainable=True), eps: Union[float, str] = 'trainable' ) -> None: super().__init__() - self.config = {'proj': proj, 'kernel_a': kernel_a, 'kernel_b': kernel_b, 'eps': eps} - if kernel_a == 'rbf': - kernel_a = GaussianRBF(trainable=True) - if kernel_b == 'rbf': - kernel_b = GaussianRBF(trainable=True) + self.proj = proj self.kernel_a = kernel_a self.kernel_b = kernel_b - self.proj = proj + proj_kernel = ProjKernel(proj=proj, raw_kernel=kernel_a) if kernel_b is not None: self._init_eps(eps) + self.comp_kernel = (1-tf.sigmoid(self.logit_eps))*proj_kernel + tf.sigmoid(self.logit_eps)*kernel_b + else: + self.comp_kernel = proj_kernel + self.config = {'proj': proj, 'kernel_a': kernel_a, 'kernel_b': kernel_b, 'eps': eps, 'kernel_type': 'Deep'} def _init_eps(self, eps: Union[float, str]) -> None: if isinstance(eps, float): @@ -167,15 +886,41 @@ def _init_eps(self, eps: Union[float, str]) -> None: def eps(self) -> tf.Tensor: return tf.math.sigmoid(self.logit_eps) if self.kernel_b is not None else tf.constant(0.) - def call(self, x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: - similarity = self.kernel_a(self.proj(x), self.proj(y)) # type: ignore[operator] - if self.kernel_b is not None: - similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) # type: ignore[operator] - return similarity + def kernel_function(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: + return self.comp_kernel(x, y, infer_parameter) def get_config(self) -> dict: - return self.config.copy() + cfg = self.config.copy() + cfg.update({'flavour': Framework.TENSORFLOW.value}) + return cfg @classmethod def from_config(cls, config): + config.pop('kernel_type') + config.pop('flavour') return cls(**config) + + +def fill_composite_config(config: dict) -> dict: + final_config: dict = {'kernel_list': []} + for k_config in config['kernel_list'].values(): + if isinstance(k_config, dict): + k_config.pop('src') + if k_config['kernel_type'] == 'Sum': + final_config['kernel_list'].append(SumKernel.from_config(k_config)) + elif k_config['kernel_type'] == 'Product': + final_config['kernel_list'].append(ProductKernel.from_config(k_config)) + elif k_config['kernel_type'] == 'GaussianRBF': + final_config['kernel_list'].append(GaussianRBF.from_config(k_config)) + elif k_config['kernel_type'] == 'Periodic': + final_config['kernel_list'].append(Periodic.from_config(k_config)) + elif k_config['kernel_type'] == 'RationalQuadratic': + final_config['kernel_list'].append(RationalQuadratic.from_config(k_config)) + else: + raise ValueError('Unknown kernel type.') + elif isinstance(k_config, np.ndarray) or isinstance(k_config, float) or \ + isinstance(k_config, np.float32) or isinstance(k_config, np.float64): + final_config['kernel_list'].append(tf.cast(np.array(k_config), tf.keras.backend.floatx())) + else: + raise ValueError('Unknown component type.') + return final_config diff --git a/alibi_detect/utils/tensorflow/tests/test_kernels_tf.py b/alibi_detect/utils/tensorflow/tests/test_kernels_tf.py index 20f26962a..80c40498a 100644 --- a/alibi_detect/utils/tensorflow/tests/test_kernels_tf.py +++ b/alibi_detect/utils/tensorflow/tests/test_kernels_tf.py @@ -3,7 +3,9 @@ import pytest import tensorflow as tf from tensorflow.keras.layers import Dense, Input -from alibi_detect.utils.tensorflow import GaussianRBF, DeepKernel +from alibi_detect.utils.tensorflow import GaussianRBF, DeepKernel, BaseKernel, RationalQuadratic, Periodic, \ + log_sigma_median +from alibi_detect.utils.tensorflow.distance import squared_pairwise_distance sigma = [None, np.array([1.]), np.array([1., 2.])] n_features = [5, 10] @@ -26,24 +28,192 @@ def test_gaussian_kernel(gaussian_kernel_params): y = tf.convert_to_tensor(np.random.random(yshape).astype('float32')) kernel = GaussianRBF(sigma=sigma, trainable=trainable) - infer_sigma = True if sigma is None else False - if trainable and infer_sigma: + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: with pytest.raises(Exception): - kernel(x, y, infer_sigma=infer_sigma) + kernel(x, y, infer_parameter=infer_parameter) else: - k_xy = kernel(x, y, infer_sigma=infer_sigma).numpy() - k_xx = kernel(x, x, infer_sigma=infer_sigma).numpy() + k_xy = kernel(x, y, infer_parameter=infer_parameter).numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).numpy() assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) assert (k_xx > 0.).all() and (k_xy > 0.).all() -class MyKernel(tf.keras.Model): # TODO: Support then test models using keras functional API +def log_sigma_mean(x: tf.Tensor, y: tf.Tensor, dist: tf.Tensor) -> tf.Tensor: + sigma = tf.expand_dims(.5 * tf.reduce_mean(tf.reshape(dist, (-1,))) ** .5, axis=0) + return tf.math.log(sigma) + + +kernel_ref = ['GaussianRBF', 'RationalQuadratic', 'Periodic'] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +init_fn = [None, log_sigma_median, log_sigma_mean] +tests_init_fn = list(product(kernel_ref, n_features, n_instances, trainable, init_fn)) + + +@pytest.fixture +def init_fn_params(request): + return tests_init_fn[request.param] + + +@pytest.mark.parametrize('init_fn_params', list(range(len(tests_init_fn))), indirect=True) +def test_init_fn(init_fn_params): + kernel_ref, n_features, n_instances, trainable, init_fn = init_fn_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + x = tf.convert_to_tensor(np.random.random(xshape).astype('float32')) + y = tf.convert_to_tensor(np.random.random(yshape).astype('float32')) + + if kernel_ref == 'GaussianRBF': + kernel = GaussianRBF(trainable=trainable, init_sigma_fn=init_fn) + elif kernel_ref == 'RationalQuadratic': + kernel = RationalQuadratic(trainable=trainable, init_sigma_fn=init_fn) + elif kernel_ref == 'Periodic': + kernel = Periodic(trainable=trainable, init_sigma_fn=init_fn) + else: + raise NotImplementedError + if trainable: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=True) + else: + k_xy = kernel(x, y, infer_parameter=True).numpy() + k_xx = kernel(x, x, infer_parameter=True).numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + if init_fn is not None: + np.testing.assert_almost_equal(kernel.sigma.numpy(), + np.exp(init_fn(x, y, squared_pairwise_distance(x, y)).numpy()), + decimal=4) + + +sigma = [None, np.array([1.]), np.array([2.])] +alpha = [None, np.array([1.]), np.array([2.])] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_rqk = list(product(sigma, alpha, n_features, n_instances, trainable)) +n_tests_rqk = len(tests_rqk) + + +@pytest.fixture +def rationalquadratic_kernel_params(request): + return tests_rqk[request.param] + + +@pytest.mark.parametrize('rationalquadratic_kernel_params', list(range(n_tests_rqk)), indirect=True) +def test_rationalquadratic_kernel(rationalquadratic_kernel_params): + sigma, alpha, n_features, n_instances, trainable = rationalquadratic_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + x = tf.convert_to_tensor(np.random.random(xshape).astype('float32')) + y = tf.convert_to_tensor(np.random.random(yshape).astype('float32')) + + kernel = RationalQuadratic(sigma=sigma, alpha=alpha, trainable=trainable) + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +sigma = [None, np.array([1.]), np.array([2.])] +tau = [None, np.array([8.]), np.array([24.])] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_pk = list(product(sigma, tau, n_features, n_instances, trainable)) +n_tests_pk = len(tests_pk) + + +@pytest.fixture +def periodic_kernel_params(request): + return tests_pk[request.param] + + +@pytest.mark.parametrize('periodic_kernel_params', list(range(n_tests_pk)), indirect=True) +def test_periodic_kernel(periodic_kernel_params): + sigma, tau, n_features, n_instances, trainable = periodic_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + x = tf.convert_to_tensor(np.random.random(xshape).astype('float32')) + y = tf.convert_to_tensor(np.random.random(yshape).astype('float32')) + + kernel = Periodic(sigma=sigma, tau=tau, trainable=trainable) + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +sigma_0 = [None, np.array([1.])] +sigma_1 = [None, np.array([1.])] +sigma_2 = [None, np.array([1.])] +operation_0 = ['*', '+'] +operation_1 = ['*', '+'] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_ck = list(product(sigma_0, sigma_1, sigma_2, + operation_0, operation_1, n_features, n_instances, trainable)) +n_tests_ck = len(tests_ck) + + +@pytest.fixture +def comp_kernel_params(request): + return tests_ck[request.param] + + +@pytest.mark.parametrize('comp_kernel_params', list(range(n_tests_ck)), indirect=True) +def test_comp_kernel(comp_kernel_params): + (sigma_0, sigma_1, sigma_2, operation_0, operation_1, + n_features, n_instances, trainable) = comp_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + x = tf.convert_to_tensor(np.random.random(xshape).astype('float32')) + y = tf.convert_to_tensor(np.random.random(yshape).astype('float32')) + + kernel_0 = GaussianRBF(sigma=sigma_0, trainable=trainable) + kernel_1 = GaussianRBF(sigma=sigma_1, trainable=trainable) + kernel_2 = GaussianRBF(sigma=sigma_2, trainable=trainable) + if operation_0 == '*' and operation_1 == '*': + kernel = kernel_0 * kernel_1 * kernel_2 + elif operation_0 == '*' and operation_1 == '+': + kernel = (kernel_0 * kernel_1 + kernel_2) / tf.convert_to_tensor(2.0) # ensure k(x, x) = 1 + elif operation_0 == '+' and operation_1 == '*': + kernel = (kernel_0 + kernel_1 * kernel_2) / tf.convert_to_tensor(2.0) # ensure k(x, x) = 1 + elif operation_0 == '+' and operation_1 == '+': + kernel = (kernel_0 + kernel_1 + kernel_2) / tf.convert_to_tensor(3.0) # ensure k(x, x) = 1 + else: + with pytest.raises(Exception): + raise Exception('Invalid operation') + infer_parameter = True if sigma is None else False + if trainable and infer_parameter: + with pytest.raises(Exception): + kernel(x, y, infer_parameter=infer_parameter) + else: + k_xy = kernel(x, y, infer_parameter=infer_parameter).numpy() + k_xx = kernel(x, x, infer_parameter=infer_parameter).numpy() + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + np.testing.assert_almost_equal(k_xx.trace(), xshape[0], decimal=4) + assert (k_xx > 0.).all() and (k_xy > 0.).all() + + +class MyKernel(BaseKernel): # TODO: Support then test models using keras functional API def __init__(self, n_features: int): super().__init__() self.dense = Dense(20) - def call(self, x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: + def call(self, x: tf.Tensor, y: tf.Tensor, infer_parameter: bool = False) -> tf.Tensor: return tf.einsum('ji,ki->jk', self.dense(x), self.dense(y)) diff --git a/doc/source/examples/cd_combined_kernel.ipynb b/doc/source/examples/cd_combined_kernel.ipynb new file mode 100644 index 000000000..773fa9aed --- /dev/null +++ b/doc/source/examples/cd_combined_kernel.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create sum and product kernels with exsisting kernels\n", + "\n", + "\n", + "### From time to time, out dataset might contain values and features that might be of different types or scales. For instance, a temperture dataset might have two features with one being the timestamp and the other being the reading. As a result, we might want to apply differnt kernels on these two features respectively, and use the combined kernel for the drift detectors for a better test power." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy.stats as stats\n", + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "\n", + "backend = 'tensorflow'\n", + "\n", + "from alibi_detect.cd import MMDDrift\n", + "if backend == 'pytorch':\n", + " from alibi_detect.utils.pytorch.kernels import GaussianRBF, Periodic\n", + "elif backend == 'tensorflow':\n", + " from alibi_detect.utils.tensorflow.kernels import GaussianRBF, Periodic\n", + "else:\n", + " raise ValueError('Backend {} not supported'.format(backend))\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sin(N):\n", + " c_0 = np.random.uniform(0, 168, N)\n", + " x_0 = np.sin(c_0 / (12 / np.pi)) + np.random.normal(0, 0.1, N)\n", + "\n", + " c_1 = stats.beta.rvs(a=1.2, b=1.2, size=N) * 24 + np.random.choice([0, 24, 48, 72, 96, 120, 144], size=N)\n", + " x_1 = np.sin(c_1 / (12 / np.pi)) * (np.mod(c_1, 24) < 12) + \\\n", + " np.sin(c_1 / (12 / np.pi)) * (np.mod(c_1, 24) >= 12) * 1.25 + \\\n", + " + np.random.normal(0, 0.1, N)\n", + " \n", + " x_ref = np.hstack([c_0.reshape(-1, 1), x_0.reshape(-1, 1)])\n", + " x_test = np.hstack([c_1.reshape(-1, 1), x_1.reshape(-1, 1)]) \n", + " \n", + " return x_ref, x_test" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "x_ref, x_test = get_sin(N=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here, we create two simple datasets with waves and have two features. The test data shows apparent drift around the wave through." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.5, 1.5)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(8, 3), dpi=128)\n", + "plt.plot(x_ref[:, 0], x_ref[:, 1], 'bo', alpha=0.5, markersize=2.5, label='Reference')\n", + "plt.plot(x_test[:, 0], x_test[:, 1], 'ro', alpha=0.5, markersize=2.5, label='Test')\n", + "plt.legend()\n", + "plt.ylim(-1.5, 1.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### If we use the standard RBF kernel on both features with the MMD drift detector, we can see that the drift is not detected." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "Kernel_RBF = GaussianRBF()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "cd_RBF = MMDDrift(x_ref=x_ref,\n", + " backend=backend,\n", + " kernel=Kernel_RBF)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'is_drift': 0,\n", + " 'distance': -0.000772655,\n", + " 'p_val': 0.8,\n", + " 'threshold': 0.05,\n", + " 'distance_threshold': 0.0021861196},\n", + " 'meta': {'name': 'MMDDriftTF',\n", + " 'detector_type': 'offline',\n", + " 'data_type': None,\n", + " 'version': '0.9.2dev',\n", + " 'backend': 'tensorflow'}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds_RBF = cd_RBF.predict(x_test)\n", + "preds_RBF" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### To facilitate our knowledge that the data contain waves, we use a combined kernel averaged from two kernels. The first kernel is a periodic kernel with a specified period of 24 and only works on the first feature. The second kernel is an RBF kernel with an inferred bandwidth and only works on the second feature." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "if backend == 'pytorch':\n", + " Kernel_0 = Periodic(tau=torch.tensor([24.0]), active_dims=[0])\n", + " Kernel_1 = GaussianRBF(active_dims=[1])\n", + " Kernel_avg = (Kernel_0 + Kernel_1) / torch.tensor(2.0)\n", + "elif backend == 'tensorflow':\n", + " Kernel_0 = Periodic(tau=tf.convert_to_tensor([24.0]), active_dims=[0])\n", + " Kernel_1 = GaussianRBF(active_dims=[1])\n", + " Kernel_avg = (Kernel_0 + Kernel_1) / tf.convert_to_tensor(2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "cd_avg = MMDDrift(x_ref=x_ref,\n", + " backend=backend,\n", + " kernel=Kernel_avg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We can see the drift is detected with the combined kernel." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'is_drift': 1,\n", + " 'distance': 0.0052251816,\n", + " 'p_val': 0.0,\n", + " 'threshold': 0.05,\n", + " 'distance_threshold': 0.0009160042},\n", + " 'meta': {'name': 'MMDDriftTF',\n", + " 'detector_type': 'offline',\n", + " 'data_type': None,\n", + " 'version': '0.9.2dev',\n", + " 'backend': 'tensorflow'}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds_avg = cd_avg.predict(x_test)\n", + "preds_avg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The kernel, its components and associated parameters can be inspected as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "ListWrapper([, ])\n", + "ListWrapper([, ])\n" + ] + } + ], + "source": [ + "print(cd_avg._detector.kernel)\n", + "print(cd_avg._detector.kernel.kernel_list[0].kernel_factors)\n", + "print(cd_avg._detector.kernel.kernel_list[1].kernel_factors)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tf.Tensor([24.], shape=(1,), dtype=float32)\n", + "tf.Tensor([34.68171], shape=(1,), dtype=float32)\n" + ] + } + ], + "source": [ + "print(Kernel_avg.kernel_list[0].kernel_factors[0].tau)\n", + "print(Kernel_avg.kernel_list[0].kernel_factors[0].sigma)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tf.Tensor([0.5185638], shape=(1,), dtype=float32)\n" + ] + } + ], + "source": [ + "print(Kernel_avg.kernel_list[1].kernel_factors[0].sigma)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/source/examples/cd_create_customised_kernel.ipynb b/doc/source/examples/cd_create_customised_kernel.ipynb new file mode 100644 index 000000000..c3a8052aa --- /dev/null +++ b/doc/source/examples/cd_create_customised_kernel.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create customised kernel to be used with drift detectors\n", + "\n", + "### Sometimes we might prefer to use some prior knowledge or pre-trained embeddings to build a customised kernel (distance) function instead. In this notebook, we will demonstrate how to implement a user-defined kernel with either a customised distance function or a specific feature projection function. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import scipy.stats as stats\n", + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "\n", + "backend = 'pytorch'\n", + "\n", + "from alibi_detect.cd import MMDDrift\n", + "if backend == 'pytorch':\n", + " from alibi_detect.utils.pytorch.kernels import BaseKernel, ProjKernel, GaussianRBF\n", + "elif backend == 'tensorflow':\n", + " from alibi_detect.utils.tensorflow.kernels import BaseKernel, ProjKernel, GaussianRBF\n", + "else:\n", + " raise ValueError('Backend {} not supported'.format(backend))\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We first consider to create a kernel that uses a user specified distance function. For instance, we can write a periodic kernel's distance function with the Trigonometric functions: $k(x,y) = exp(-2 \\cdot \\frac{sin(pi \\cdot \\frac{|x - y|}{\\tau})^2}{\\sigma^2})$. To do so, the easiest way is to import and inherit the BaseKernel class from the corresponding backend (here we use Pytorch), and overload the kernelfunction method." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### For this example, we manually specified the kernel's parameters in the kernel function. To implement these parameters as variables for training or initialisation heuristics, please refer to the implementations in the built-in kernels." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class PeriodicKernel(BaseKernel):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + "\n", + " def kernel_function(self, x, y):\n", + " tau = 24.0 # period parameter\n", + " sigma = 0.05 # bandwidth parameter\n", + " x, y = torch.as_tensor(x), torch.as_tensor(y)\n", + " x2 = x.pow(2).sum(dim=-1, keepdim=True)\n", + " y2 = y.pow(2).sum(dim=-1, keepdim=True)\n", + " dist = torch.addmm(y2.transpose(-2, -1), x, y.transpose(-2, -1), alpha=-2).add_(x2)\n", + " kernel_mat = torch.exp(-2 * torch.square(torch.sin(torch.as_tensor(np.pi) * dist / tau)) / (sigma ** 2))\n", + " return kernel_mat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Now we create a toy dataset to test our new kernel, where the test data shows an apparent drift around the wave through." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sin(N):\n", + " c_0 = np.random.uniform(0, 168, N)\n", + " x_0 = np.sin(c_0 / (12 / np.pi)) + np.random.normal(0, 0.1, N)\n", + "\n", + " c_1 = stats.beta.rvs(a=1.2, b=1.2, size=N) * 24 + np.random.choice([0, 24, 48, 72, 96, 120, 144], size=N)\n", + " x_1 = np.sin(c_1 / (12 / np.pi)) * (np.mod(c_1, 24) < 12) + \\\n", + " np.sin(c_1 / (12 / np.pi)) * (np.mod(c_1, 24) >= 12) * 1.25 + \\\n", + " + np.random.normal(0, 0.1, N)\n", + " \n", + " x_ref = np.hstack([c_0.reshape(-1, 1), x_0.reshape(-1, 1)])\n", + " x_test = np.hstack([c_1.reshape(-1, 1), x_1.reshape(-1, 1)]) \n", + " \n", + " return x_ref, x_test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "x_ref, x_test = get_sin(N=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.5, 1.5)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(8, 3), dpi=128)\n", + "plt.plot(x_ref[:, 0], x_ref[:, 1], 'bo', alpha=0.5, markersize=2.5, label='Reference')\n", + "plt.plot(x_test[:, 0], x_test[:, 1], 'ro', alpha=0.5, markersize=2.5, label='Test')\n", + "plt.legend()\n", + "plt.ylim(-1.5, 1.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We can now create an instance of the periodic kernel implemented above and use it with the MMD detector. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No GPU detected, fall back on CPU.\n" + ] + } + ], + "source": [ + "kernel_period = PeriodicKernel()\n", + "\n", + "cd = MMDDrift(x_ref=x_ref,\n", + " backend=backend,\n", + " kernel=kernel_period)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'is_drift': 1,\n", + " 'distance': 0.0006290622733601328,\n", + " 'p_val': 0.029999999329447746,\n", + " 'threshold': 0.05,\n", + " 'distance_threshold': array(0.00055086, dtype=float32)},\n", + " 'meta': {'name': 'MMDDriftTorch',\n", + " 'detector_type': 'offline',\n", + " 'data_type': None,\n", + " 'version': '0.9.2dev',\n", + " 'backend': 'pytorch'}}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds = cd.predict(x_test)\n", + "preds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Alternatively, we might consider using a projection function (which could be anything from a straightforward linear transform to a deep net) to imply our knowledge about the dataset. In this case, we can consider implementing the kernel with the ProjKernel class, where we can define the projection function using the model class from the corresponding backend (i.e. torch.nn.Module)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class MyProj(torch.nn.Module):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + "\n", + " def forward(self, x):\n", + " x = torch.as_tensor(x)\n", + " return torch.cat([torch.remainder(x[:, 0], 24).reshape(-1, 1), x[:, 1].reshape(-1, 1)], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### As indicated by the code above, here we create a simple projection function by getting the remainder of the first feature after dividing by 24, while the second feature is kept." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.5, 1.5)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "proj = MyProj()\n", + "\n", + "x_proj_ref = proj(x_ref)\n", + "\n", + "x_proj_test = proj(x_test)\n", + "\n", + "plt.figure(figsize=(4, 3), dpi=128)\n", + "plt.plot(x_proj_ref[:, 0], x_proj_ref[:, 1], 'bo', alpha=0.5, markersize=2.5, label='Reference')\n", + "plt.plot(x_proj_test[:, 0], x_proj_test[:, 1], 'ro', alpha=0.5, markersize=2.5, label='Test')\n", + "plt.legend()\n", + "plt.ylim(-1.5, 1.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We can then create the kernel with the projection model and a base RBF kernel and use it together with the MMD detector. " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No GPU detected, fall back on CPU.\n" + ] + } + ], + "source": [ + "kernel_proj = ProjKernel(proj = proj,\n", + " raw_kernel= GaussianRBF(sigma=torch.as_tensor(0.05)))\n", + "\n", + "cd_proj = MMDDrift(x_ref=x_ref,\n", + " backend=backend,\n", + " kernel=kernel_proj)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': {'is_drift': 1,\n", + " 'distance': 0.0009441937452792366,\n", + " 'p_val': 0.0,\n", + " 'threshold': 0.05,\n", + " 'distance_threshold': array(0.00010083, dtype=float32)},\n", + " 'meta': {'name': 'MMDDriftTorch',\n", + " 'detector_type': 'offline',\n", + " 'data_type': None,\n", + " 'version': '0.9.2dev',\n", + " 'backend': 'pytorch'}}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preds_proj = cd_proj.predict(x_test)\n", + "preds_proj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/cd_combined_kernel.ipynb b/examples/cd_combined_kernel.ipynb new file mode 100644 index 000000000..d713eeff1 --- /dev/null +++ b/examples/cd_combined_kernel.ipynb @@ -0,0 +1 @@ +../doc/source/examples/cd_combined_kernel.ipynb \ No newline at end of file