From 90b81acd205a9920b473914b8aee6c739f8d6b18 Mon Sep 17 00:00:00 2001 From: Nicolas Tessore Date: Sat, 25 Nov 2023 16:14:43 +0000 Subject: [PATCH] add regularize_spectra() function --- docs/index.rst | 1 + docs/references.rst | 7 ++ docs/spectra.rst | 15 ++++ src/angst/__init__.py | 2 + src/angst/_spectra.py | 183 +++++++++++++++++++++++++++++++++++++++++- 5 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 docs/references.rst diff --git a/docs/index.rst b/docs/index.rst index 86f1b97..9e176c1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,7 @@ spectra core glossary + references Indices and tables diff --git a/docs/references.rst b/docs/references.rst new file mode 100644 index 0000000..226582a --- /dev/null +++ b/docs/references.rst @@ -0,0 +1,7 @@ +References +========== + +.. [Higham02] Higham N. J., 2002, IMA J. Numer. Anal., 22, 329. + +.. [Xavier16] Xavier H. S., Abdalla F. B., Joachimi B., 2016, MNRAS, 459, 3693. + `doi:10.1093/mnras/stw874 `_ diff --git a/docs/spectra.rst b/docs/spectra.rst index 963630d..a0f1ac0 100644 --- a/docs/spectra.rst +++ b/docs/spectra.rst @@ -38,3 +38,18 @@ in the first :math:`T_n = n \, (n + 1) / 2` entries of the sequence. To easily generate or iterate over sequences of angular power spectra in standard order, see the :func:`enumerate_spectra` and :func:`spectra_indices` functions. + + +Regularisation +-------------- + +When sets of angular power spectra are used to sample random fields, their +matrix :math:`C_\ell^{ij}` for fixed :math:`\ell` must form a valid +positive-definite covariance matrix. This is not always the case, for example +due to numerical inaccuracies, or transformations of the underlying fields +[Xavier16]_. + +Regularisation takes sets of spectra which are ill-posed for sampling, and +returns sets which are well-defined and, in some sense, "close" to the input. + +.. autofunction:: regularized_spectra diff --git a/src/angst/__init__.py b/src/angst/__init__.py index 4cfbe39..0e693c3 100644 --- a/src/angst/__init__.py +++ b/src/angst/__init__.py @@ -5,6 +5,7 @@ "displacement", "enumerate_spectra", "inv_triangle_number", + "regularized_spectra", "spectra_indices", "__version__", "__version_tuple__", @@ -19,6 +20,7 @@ ) from ._spectra import ( enumerate_spectra, + regularized_spectra, spectra_indices, ) from ._version import ( diff --git a/src/angst/_spectra.py b/src/angst/_spectra.py index 42d41bc..0ba6862 100644 --- a/src/angst/_spectra.py +++ b/src/angst/_spectra.py @@ -2,9 +2,16 @@ from __future__ import annotations +import numpy as np + # typing -from typing import Iterable, Iterator -from numpy.typing import ArrayLike +from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Sequence +from numpy.typing import ArrayLike, NDArray + +if TYPE_CHECKING: + from typing import TypeAlias + +from . import _core def enumerate_spectra( @@ -41,3 +48,175 @@ def spectra_indices(n: int) -> Iterator[tuple[int, int]]: for i in range(n): for j in range(i, -1, -1): yield i, j + + +def _cov_clip_negative_eigenvalues(cov: NDArray[Any]) -> NDArray[Any]: + """ + Covariance matrix from clipping negative eigenvalues. + """ + + # set negative eigenvalues to zero + w, v = np.linalg.eigh(cov) + w[w < 0] = 0 + + # put matrix back together + reg: NDArray[Any] = np.einsum("...ij,...j,...kj->...ik", v, w, v) + + # fix the upper triangular part of the matrix to zero + reg[(...,) + np.triu_indices(w.shape[-1], 1)] = 0 + + # return the regularised covariance matrix + return reg + + +def _cov_nearest_correlation_matrix(cov: NDArray[Any], niter: int = 20) -> NDArray[Any]: + """ + Covariance matrix from nearest correlation matrix. Uses the + algorithm of Higham (2002). + """ + + # size of the covariance matrix + s = np.shape(cov) + n = s[-1] + + # make a copy to work on + corr = np.copy(cov) + + # view onto the diagonal of the correlation matrix + diag = corr.reshape(s[:-2] + (-1,))[..., :: n + 1] + + # set correlations with nonpositive diagonal to zero + good = diag > 0 + corr *= good[..., np.newaxis, :] + corr *= good[..., :, np.newaxis] + + # get sqrt of the diagonal for normalization + norm = np.sqrt(diag) + + # compute the correlation matrix + np.divide(corr, norm[..., np.newaxis, :], where=good[..., np.newaxis, :], out=corr) + np.divide(corr, norm[..., :, np.newaxis], where=good[..., :, np.newaxis], out=corr) + + # indices of the upper triangular part of the matrix + triu = (...,) + np.triu_indices(n, 1) + + # always keep upper triangular part of matrix fixed to zero + # otherwise, Dykstra's correction points in the wrong direction + corr[triu] = 0 + + # find the nearest covariance matrix with given diagonal + dyks = np.zeros_like(corr) + proj = np.empty_like(corr) + for k in range(niter): + # apply Dykstra's correction to current result + np.subtract(corr, dyks, out=proj) + + # project onto positive semi-definite matrices + w, v = np.linalg.eigh(proj) + w[w < 0] = 0 + np.einsum("...ij,...j,...kj->...ik", v, w, v, out=corr) + + # keep upper triangular part fixed to zero + corr[triu] = 0 + + # compute Dykstra's correction + np.subtract(corr, proj, out=dyks) + + # project onto matrices with unit diagonal + diag[good] = 1 + + # put the normalisation back to convert correlations to covariance + np.multiply(corr, norm[..., np.newaxis, :], out=corr) + np.multiply(corr, norm[..., :, np.newaxis], out=corr) + + # return the regularised covariance matrix + return corr + + +# valid ``method`` parameter values for :func:`regularized_spectra` +RegularizedSpectraMethod: TypeAlias = Literal["nearest", "clip"] + + +def regularized_spectra( + spectra: Sequence[ArrayLike], + lmax: int | None = None, + method: RegularizedSpectraMethod = "nearest", + **method_kwargs: Any, +) -> list[ArrayLike]: + """ + Regularises a complete set *spectra* of angular power spectra in + :ref:`standard order ` such that at every angular + mode number :math:`\\ell`, the matrix :math:`C_\\ell^{ij}` is a + valid positive semi-definite covariance matrix. + + The length of the returned spectra is set by *lmax*, or the maximum + length of the given spectra if *lmax* is not given. Shorter input + spectra are padded with zeros as necessary. Missing spectra can be + set to :data:`None` or, preferably, an empty array. + + The *method* parameter determines how the regularisation is carried + out. The following methods are supported: + + ``regularized_spectra(..., method="nearest", niter=20)`` + Compute the (possibly defective) correlation matrices of the + given spectra, then find the nearest valid correlation matrices, + using the algorithm from [Higham02]_ for *niter* iterations. + This keeps the diagonals (i.e. auto-correlations) fixed, but + requires all of them to be nonnegative. + + ``regularized_spectra(..., method="clip")`` + Compute the eigendecomposition of the given spectra's matrices + and set all negative eigenvalues to zero. + + """ + + # regularise the cov matrix using the chosen method + if method == "clip": + method_func = _cov_clip_negative_eigenvalues + elif method == "nearest": + method_func = _cov_nearest_correlation_matrix + else: + raise ValueError(f"unknown method '{method}'") + + # recover the number of fields from the number of spectra + try: + n = _core.inv_triangle_number(len(spectra)) + except ValueError as exc: + raise ValueError("invalid number of spectra") from exc + + if lmax is None: + # maximum length in input spectra + k = max((np.size(cl) for cl in spectra if cl is not None), default=0) + else: + k = lmax + 1 + + # this is the covariance matrix of the spectra + # the leading dimension is k, then it is a n-by-n covariance matrix + # missing entries are zero, which is the default value + cov = np.zeros((k, n, n)) + + # fill the matrix up by going through the spectra in order + # skip over entries that are None + # if the spectra are ragged, some entries at high ell may remain zero + # only fill the lower triangular part, everything is symmetric + for i, j, cl in enumerate_spectra(spectra): + if cl is not None: + cov[: np.size(cl), j, i] = np.reshape(cl, -1)[:k] + + # use cholesky() as a fast way to check for positive semi-definite + # if it fails, the matrix of spectra needs regularisation + # otherwise, the matrix is pos. def. and the spectra are good + try: + np.linalg.cholesky(cov + np.finfo(0.0).tiny) + except np.linalg.LinAlgError: + # regularise the cov matrix using the chosen method + cov = method_func(cov, **method_kwargs) + + # gather regularised spectra from array + # convert matrix slices to contiguous arrays for type safety + reg: list[ArrayLike] = [] + for i, j in spectra_indices(n): + reg.append(np.ascontiguousarray(cov[:, j, i])) + + # return the regularised spectra + return reg