From 90b81acd205a9920b473914b8aee6c739f8d6b18 Mon Sep 17 00:00:00 2001
From: Nicolas Tessore <n.tessore@ucl.ac.uk>
Date: Sat, 25 Nov 2023 16:14:43 +0000
Subject: [PATCH] add regularize_spectra() function

---
 docs/index.rst        |   1 +
 docs/references.rst   |   7 ++
 docs/spectra.rst      |  15 ++++
 src/angst/__init__.py |   2 +
 src/angst/_spectra.py | 183 +++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 docs/references.rst

diff --git a/docs/index.rst b/docs/index.rst
index 86f1b97..9e176c1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -13,6 +13,7 @@
    spectra
    core
    glossary
+   references
 
 
 Indices and tables
diff --git a/docs/references.rst b/docs/references.rst
new file mode 100644
index 0000000..226582a
--- /dev/null
+++ b/docs/references.rst
@@ -0,0 +1,7 @@
+References
+==========
+
+.. [Higham02] Higham N. J., 2002, IMA J. Numer. Anal., 22, 329.
+
+.. [Xavier16] Xavier H. S., Abdalla F. B., Joachimi B., 2016, MNRAS, 459, 3693.
+   `doi:10.1093/mnras/stw874 <https://dx.doi.org/10.1093/mnras/stw874>`_
diff --git a/docs/spectra.rst b/docs/spectra.rst
index 963630d..a0f1ac0 100644
--- a/docs/spectra.rst
+++ b/docs/spectra.rst
@@ -38,3 +38,18 @@ in the first :math:`T_n = n \, (n + 1) / 2` entries of the sequence.
 To easily generate or iterate over sequences of angular power spectra in
 standard order, see the :func:`enumerate_spectra` and :func:`spectra_indices`
 functions.
+
+
+Regularisation
+--------------
+
+When sets of angular power spectra are used to sample random fields, their
+matrix :math:`C_\ell^{ij}` for fixed :math:`\ell` must form a valid
+positive-definite covariance matrix.  This is not always the case, for example
+due to numerical inaccuracies, or transformations of the underlying fields
+[Xavier16]_.
+
+Regularisation takes sets of spectra which are ill-posed for sampling, and
+returns sets which are well-defined and, in some sense, "close" to the input.
+
+.. autofunction:: regularized_spectra
diff --git a/src/angst/__init__.py b/src/angst/__init__.py
index 4cfbe39..0e693c3 100644
--- a/src/angst/__init__.py
+++ b/src/angst/__init__.py
@@ -5,6 +5,7 @@
     "displacement",
     "enumerate_spectra",
     "inv_triangle_number",
+    "regularized_spectra",
     "spectra_indices",
     "__version__",
     "__version_tuple__",
@@ -19,6 +20,7 @@
 )
 from ._spectra import (
     enumerate_spectra,
+    regularized_spectra,
     spectra_indices,
 )
 from ._version import (
diff --git a/src/angst/_spectra.py b/src/angst/_spectra.py
index 42d41bc..0ba6862 100644
--- a/src/angst/_spectra.py
+++ b/src/angst/_spectra.py
@@ -2,9 +2,16 @@
 
 from __future__ import annotations
 
+import numpy as np
+
 # typing
-from typing import Iterable, Iterator
-from numpy.typing import ArrayLike
+from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, Sequence
+from numpy.typing import ArrayLike, NDArray
+
+if TYPE_CHECKING:
+    from typing import TypeAlias
+
+from . import _core
 
 
 def enumerate_spectra(
@@ -41,3 +48,175 @@ def spectra_indices(n: int) -> Iterator[tuple[int, int]]:
     for i in range(n):
         for j in range(i, -1, -1):
             yield i, j
+
+
+def _cov_clip_negative_eigenvalues(cov: NDArray[Any]) -> NDArray[Any]:
+    """
+    Covariance matrix from clipping negative eigenvalues.
+    """
+
+    # set negative eigenvalues to zero
+    w, v = np.linalg.eigh(cov)
+    w[w < 0] = 0
+
+    # put matrix back together
+    reg: NDArray[Any] = np.einsum("...ij,...j,...kj->...ik", v, w, v)
+
+    # fix the upper triangular part of the matrix to zero
+    reg[(...,) + np.triu_indices(w.shape[-1], 1)] = 0
+
+    # return the regularised covariance matrix
+    return reg
+
+
+def _cov_nearest_correlation_matrix(cov: NDArray[Any], niter: int = 20) -> NDArray[Any]:
+    """
+    Covariance matrix from nearest correlation matrix.  Uses the
+    algorithm of Higham (2002).
+    """
+
+    # size of the covariance matrix
+    s = np.shape(cov)
+    n = s[-1]
+
+    # make a copy to work on
+    corr = np.copy(cov)
+
+    # view onto the diagonal of the correlation matrix
+    diag = corr.reshape(s[:-2] + (-1,))[..., :: n + 1]
+
+    # set correlations with nonpositive diagonal to zero
+    good = diag > 0
+    corr *= good[..., np.newaxis, :]
+    corr *= good[..., :, np.newaxis]
+
+    # get sqrt of the diagonal for normalization
+    norm = np.sqrt(diag)
+
+    # compute the correlation matrix
+    np.divide(corr, norm[..., np.newaxis, :], where=good[..., np.newaxis, :], out=corr)
+    np.divide(corr, norm[..., :, np.newaxis], where=good[..., :, np.newaxis], out=corr)
+
+    # indices of the upper triangular part of the matrix
+    triu = (...,) + np.triu_indices(n, 1)
+
+    # always keep upper triangular part of matrix fixed to zero
+    # otherwise, Dykstra's correction points in the wrong direction
+    corr[triu] = 0
+
+    # find the nearest covariance matrix with given diagonal
+    dyks = np.zeros_like(corr)
+    proj = np.empty_like(corr)
+    for k in range(niter):
+        # apply Dykstra's correction to current result
+        np.subtract(corr, dyks, out=proj)
+
+        # project onto positive semi-definite matrices
+        w, v = np.linalg.eigh(proj)
+        w[w < 0] = 0
+        np.einsum("...ij,...j,...kj->...ik", v, w, v, out=corr)
+
+        # keep upper triangular part fixed to zero
+        corr[triu] = 0
+
+        # compute Dykstra's correction
+        np.subtract(corr, proj, out=dyks)
+
+        # project onto matrices with unit diagonal
+        diag[good] = 1
+
+    # put the normalisation back to convert correlations to covariance
+    np.multiply(corr, norm[..., np.newaxis, :], out=corr)
+    np.multiply(corr, norm[..., :, np.newaxis], out=corr)
+
+    # return the regularised covariance matrix
+    return corr
+
+
+# valid ``method`` parameter values for :func:`regularized_spectra`
+RegularizedSpectraMethod: TypeAlias = Literal["nearest", "clip"]
+
+
+def regularized_spectra(
+    spectra: Sequence[ArrayLike],
+    lmax: int | None = None,
+    method: RegularizedSpectraMethod = "nearest",
+    **method_kwargs: Any,
+) -> list[ArrayLike]:
+    """
+    Regularises a complete set *spectra* of angular power spectra in
+    :ref:`standard order <spectra_order>` such that at every angular
+    mode number :math:`\\ell`, the matrix :math:`C_\\ell^{ij}` is a
+    valid positive semi-definite covariance matrix.
+
+    The length of the returned spectra is set by *lmax*, or the maximum
+    length of the given spectra if *lmax* is not given.  Shorter input
+    spectra are padded with zeros as necessary.  Missing spectra can be
+    set to :data:`None` or, preferably, an empty array.
+
+    The *method* parameter determines how the regularisation is carried
+    out.  The following methods are supported:
+
+    ``regularized_spectra(..., method="nearest", niter=20)``
+        Compute the (possibly defective) correlation matrices of the
+        given spectra, then find the nearest valid correlation matrices,
+        using the algorithm from [Higham02]_ for *niter* iterations.
+        This keeps the diagonals (i.e. auto-correlations) fixed, but
+        requires all of them to be nonnegative.
+
+    ``regularized_spectra(..., method="clip")``
+        Compute the eigendecomposition of the given spectra's matrices
+        and set all negative eigenvalues to zero.
+
+    """
+
+    # regularise the cov matrix using the chosen method
+    if method == "clip":
+        method_func = _cov_clip_negative_eigenvalues
+    elif method == "nearest":
+        method_func = _cov_nearest_correlation_matrix
+    else:
+        raise ValueError(f"unknown method '{method}'")
+
+    # recover the number of fields from the number of spectra
+    try:
+        n = _core.inv_triangle_number(len(spectra))
+    except ValueError as exc:
+        raise ValueError("invalid number of spectra") from exc
+
+    if lmax is None:
+        # maximum length in input spectra
+        k = max((np.size(cl) for cl in spectra if cl is not None), default=0)
+    else:
+        k = lmax + 1
+
+    # this is the covariance matrix of the spectra
+    # the leading dimension is k, then it is a n-by-n covariance matrix
+    # missing entries are zero, which is the default value
+    cov = np.zeros((k, n, n))
+
+    # fill the matrix up by going through the spectra in order
+    # skip over entries that are None
+    # if the spectra are ragged, some entries at high ell may remain zero
+    # only fill the lower triangular part, everything is symmetric
+    for i, j, cl in enumerate_spectra(spectra):
+        if cl is not None:
+            cov[: np.size(cl), j, i] = np.reshape(cl, -1)[:k]
+
+    # use cholesky() as a fast way to check for positive semi-definite
+    # if it fails, the matrix of spectra needs regularisation
+    # otherwise, the matrix is pos. def. and the spectra are good
+    try:
+        np.linalg.cholesky(cov + np.finfo(0.0).tiny)
+    except np.linalg.LinAlgError:
+        # regularise the cov matrix using the chosen method
+        cov = method_func(cov, **method_kwargs)
+
+    # gather regularised spectra from array
+    # convert matrix slices to contiguous arrays for type safety
+    reg: list[ArrayLike] = []
+    for i, j in spectra_indices(n):
+        reg.append(np.ascontiguousarray(cov[:, j, i]))
+
+    # return the regularised spectra
+    return reg