diff --git a/README.rst b/README.rst index 8be0d84..7b4e4de 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference ======================================= :lsbi: Linear Simulation Based Inference :Author: Will Handley & David Yallup -:Version: 0.12.0 +:Version: 0.13.0 :Homepage: https://github.com/handley-lab/lsbi :Documentation: http://lsbi.readthedocs.io/ diff --git a/lsbi/_version.py b/lsbi/_version.py index ea370a8..f23a6b3 100644 --- a/lsbi/_version.py +++ b/lsbi/_version.py @@ -1 +1 @@ -__version__ = "0.12.0" +__version__ = "0.13.0" diff --git a/lsbi/model.py b/lsbi/model.py index b5fc90e..a8b82ee 100644 --- a/lsbi/model.py +++ b/lsbi/model.py @@ -6,7 +6,7 @@ from numpy.linalg import inv, solve from scipy.special import logsumexp -from lsbi.stats import dkl, mixture_normal, multivariate_normal +from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal from lsbi.utils import alias, dediagonalise, logdet @@ -261,9 +261,31 @@ def ppd(self, D0): """P(D|D0) as a distribution object.""" return self.update(D0).evidence() - def dkl(self, D, n=0): + def dkl(self, D, N=0): """KL divergence between the posterior and prior. + Analytically this is + + _P(θ|D) = 1/2 (log|1 + M Σ M'/ C| + - tr M Σ M'/ (C+M Σ M') + (μ - μ_P)' Σ^-1 (μ - μ_P)) + + Parameters + ---------- + D : array_like, shape (..., d) + Data to form the posterior + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + """ + return dkl(self.posterior(D), self.prior(), N) + + def bmd(self, D, N=0): + """Bayesian model dimensionality. + + Analytically this is + + var(log P(θ|D)/P(θ))_P(θ|D) = 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P) + Parameters ---------- D : array_like, shape (..., d) @@ -271,7 +293,76 @@ def dkl(self, D, n=0): n : int, optional Number of samples for a monte carlo estimate, defaults to 0 """ - return dkl(self.posterior(D), self.prior(), n) + return bmd(self.posterior(D), self.prior(), N) + + def mutual_information(self, N=0, mcerror=False): + """Mutual information between the parameters and the data. + + Analytically this is + + _P(D,θ) = log|1 + M Σ M'/ C|/2 + + Parameters + ---------- + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + mcerror: bool, optional + Produce a monte carlo error estimate + """ + if N > 0: + N = int(N**0.5) + D = self.evidence().rvs(N) + θ = self.posterior(D).rvs() + logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf( + θ, broadcast=True + ) + ans = logR.mean(axis=0) + if mcerror: + err = (logR.var(axis=0) / (N - 1)) ** 0.5 + ans = (ans, err) + return ans + + MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M) + C = self._C + return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape) + + def dimensionality(self, N=0, mcerror=False): + """Model dimensionality. + + Analytically this is + + _P(D) = tr(M Σ M'/ (C+M Σ M')) - 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + + Parameters + ---------- + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + mcerror: bool, optional + Produce a monte carlo error estimate + """ + if N > 0: + N = int(N**0.5) + D = self.evidence().rvs(N) + θ = self.posterior(D).rvs(N) + logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf( + θ, broadcast=True + ) + ans = logR.var(axis=0).mean(axis=0) * 2 + if mcerror: + err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2 + err = ((err**2).sum(axis=0) / (N - 1)) ** 0.5 + ans = (ans, err) + return ans + + MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M) + C = self._C + invCpMΣM = inv(C + MΣM) + + return np.broadcast_to( + 2 * np.einsum("...ij,...ji->...", MΣM, invCpMΣM) + - np.einsum("...ij,...jk,...kl,...li->...", MΣM, invCpMΣM, MΣM, invCpMΣM), + self.shape, + ) @property def _M(self): @@ -445,24 +536,70 @@ def update(self, D, inplace=False): if not inplace: return dist - def dkl(self, D, n=0): + def dkl(self, D, N=0): """KL divergence between the posterior and prior. Parameters ---------- D : array_like, shape (..., d) Data to form the posterior - n : int, optional + N : int, optional Number of samples for a monte carlo estimate, defaults to 0 """ - if n == 0: - raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.") + if N == 0: + raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.") p = self.posterior(D) q = self.prior() - x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True) + x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True) return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0) + def bmd(self, D, N=0): + """Bayesian model dimensionality. + + Parameters + ---------- + D : array_like, shape (..., d) + Data to form the posterior + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + """ + if N == 0: + raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.") + + p = self.posterior(D) + q = self.prior() + x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True) + return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0) + + def mutual_information(self, N=0, mcerror=False): + """Mutual information between the parameters and the data. + + Parameters + ---------- + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + mcerror: bool, optional + Produce a monte carlo error estimate + """ + if N == 0: + raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.") + return super().mutual_information(N, mcerror) + + def dimensionality(self, N=0, mcerror=False): + """Model dimensionality. + + Parameters + ---------- + N : int, optional + Number of samples for a monte carlo estimate, defaults to 0 + mcerror: bool, optional + Produce a monte carlo error estimate + """ + if N == 0: + raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.") + return super().dimensionality(N, mcerror) + class ReducedLinearModel(object): """A model with no data. diff --git a/lsbi/stats.py b/lsbi/stats.py index 562db6c..e6fc8a2 100644 --- a/lsbi/stats.py +++ b/lsbi/stats.py @@ -585,15 +585,22 @@ def __getitem__(self, arg): # noqa: D105 return dist -def dkl(p, q, n=0): - """Kullback-Leibler divergence between two distributions. +def dkl(p, q, N=0, mcerror=False): + r"""Kullback-Leibler divergence between two distributions. + + if P ~ N(p,P) and Q ~ N(q,Q) then + + D_KL(P\||Q) = _P = + 1/2 * (log(\|Q|/\|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p)) Parameters ---------- p : lsbi.stats.multivariate_normal q : lsbi.stats.multivariate_normal - n : int, optional, default=0 + N : int, optional, default=0 Number of samples to mcmc estimate the divergence. + mcerror: bool, optional, default=False + Produce a Monte Carlo error estimate Returns ------- @@ -601,9 +608,15 @@ def dkl(p, q, n=0): Kullback-Leibler divergence between p and q. """ shape = np.broadcast_shapes(p.shape, q.shape) - if n: - x = p.rvs(size=(n, *shape), broadcast=True) - return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0) + if N: + x = p.rvs(size=(N, *shape), broadcast=True) + logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True) + ans = logR.mean(axis=0) + if mcerror: + err = (logR.var(axis=0) / (N - 1)) ** 0.5 + ans = (ans, err) + return ans + dkl = -p.dim * np.ones(shape) dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal) dkl = dkl - logdet(p.cov * np.ones(p.dim), p.diagonal) @@ -623,3 +636,73 @@ def dkl(p, q, n=0): dkl = dkl + np.einsum("...ij,...ji->...", invq, p.cov) return dkl / 2 + + +def bmd(p, q, N=0, mcerror=False): + """Bayesian model dimensionality between two distributions. + + if P ~ N(p,P) and Q ~ N(q,Q) then + + bmd/2 = var(log P/Q)_P + = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2 + + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2 + + Parameters + ---------- + p : lsbi.stats.multivariate_normal + q : lsbi.stats.multivariate_normal + N : int, optional, default=0 + Number of samples to mcmc estimate the divergence. + mcerror: bool, optional, default=False + Produce a Monte Carlo error estimate + + Returns + ------- + bmd : array_like + Bayesian model dimensionality between p and q. + """ + shape = np.broadcast_shapes(p.shape, q.shape) + if N: + x = p.rvs(size=(N, *shape), broadcast=True) + logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True) + ans = logR.var(axis=0) * 2 + if mcerror: + err = ans * (2 / N - 1) ** 0.5 + ans = (ans, err) + return ans + + bmd = p.dim / 2 * np.ones(shape) + pq = (p.mean - q.mean) * np.ones(p.dim) + if p.diagonal and q.diagonal: + pcov = p.cov * np.ones(p.dim) + qcov = q.cov * np.ones(q.dim) + bmd = bmd + (pq**2 * pcov / qcov**2).sum(axis=-1) + bmd = bmd + (pcov**2 / qcov**2).sum(axis=-1) / 2 + bmd = bmd - (pcov / qcov).sum(axis=-1) + elif p.diagonal: + invq = inv(q.cov) + pcov = p.cov * np.ones(p.dim) + bmd = bmd + np.einsum( + "...i,...ij,...j,...jl,...l->...", pq, invq, pcov, invq, pq + ) + bmd = bmd - np.einsum("...jj,...j->...", invq, pcov) + bmd = bmd + np.einsum("...lj,...j,...jl,...l->...", invq, pcov, invq, pcov) / 2 + elif q.diagonal: + invq = np.ones(q.dim) / q.cov + pcov = p.cov * np.ones(p.dim) + bmd = bmd + np.einsum( + "...i,...i,...ik,...k,...k->...", pq, invq, pcov, invq, pq + ) + bmd = bmd - np.einsum("...j,...jj->...", invq, pcov) + bmd = bmd + np.einsum("...j,...jk,...k,...kj->...", invq, pcov, invq, pcov) / 2 + else: + invq = inv(q.cov) + bmd = bmd + np.einsum( + "...i,...ij,...jk,...kl,...l->...", pq, invq, p.cov, invq, pq + ) + bmd = bmd - np.einsum("...ij,...ji->...", invq, p.cov) + bmd = ( + bmd + + np.einsum("...ij,...jk,...kl,...li->...", invq, p.cov, invq, p.cov) / 2 + ) + return bmd * 2 diff --git a/tests/test_model.py b/tests/test_model.py index 8d85a72..1615933 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -265,6 +265,27 @@ def test_posterior( assert dkl.shape == model.shape assert (dkl >= 0).all() + bmd = model.bmd(D) + assert bmd.shape == model.shape + assert (bmd >= 0).all() + + mutual_information = model.mutual_information() + assert mutual_information.shape == model.shape + mutual_information.shape + assert (mutual_information >= 0).all() + + mutual_information_mc, err = model.mutual_information(N, True) + assert mutual_information_mc.shape == model.shape + assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=10) + + dimensionality = model.dimensionality() + assert dimensionality.shape == model.shape + assert (dimensionality >= 0).all() + + dimensionality_mc, err = model.dimensionality(N, True) + assert dimensionality_mc.shape == model.shape + assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=10) + def test_evidence( self, M_shape, @@ -620,6 +641,24 @@ def test_posterior( dkl = model.dkl(D, 10) assert dkl.shape == model.shape[:-1] + with pytest.raises(ValueError): + model.bmd(D) + + bmd = model.bmd(D, 10) + assert bmd.shape == model.shape[:-1] + + with pytest.raises(ValueError): + model.mutual_information() + + mutual_information = model.mutual_information(10) + assert mutual_information.shape == model.shape[:-1] + + with pytest.raises(ValueError): + model.dimensionality() + + dimensionality = model.dimensionality(10) + assert dimensionality.shape == model.shape[:-1] + def test_evidence( self, logw_shape, diff --git a/tests/test_stats.py b/tests/test_stats.py index 6f957a4..d054fab 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -5,7 +5,7 @@ from scipy.special import logsumexp from scipy.stats import multivariate_normal as scipy_multivariate_normal -from lsbi.stats import dkl, mixture_normal, multivariate_normal +from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal shapes = [(2, 3), (3,), ()] sizes = [(6, 5), (5,), ()] @@ -563,6 +563,7 @@ def test_dkl( cov_shape_q, diagonal_q, ): + dim = dim_p p = TestMultivariateNormal().random( dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p ) @@ -578,7 +579,42 @@ def test_dkl( assert (dkl_pq >= 0).all() assert dkl_pq.shape == np.broadcast_shapes(p.shape, q.shape) - dkl_mc = dkl(p, q, 1000) + dkl_mc, err = dkl(p, q, 1000, True) assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape) - assert_allclose(dkl_pq, dkl_mc, atol=1) + assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=10) + + +@pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests) +@pytest.mark.parametrize("dim_q, shape_q, mean_shape_q, cov_shape_q, diagonal_q", tests) +def test_bmd( + dim_p, + shape_p, + mean_shape_p, + cov_shape_p, + diagonal_p, + dim_q, + shape_q, + mean_shape_q, + cov_shape_q, + diagonal_q, +): + dim = dim_p + p = TestMultivariateNormal().random( + dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p + ) + q = TestMultivariateNormal().random( + dim, shape_q, mean_shape_q, cov_shape_q, diagonal_q + ) + + bmd_pq = bmd(p, q) + + assert_allclose(bmd(p, p), 0, atol=1e-10) + assert_allclose(bmd(q, q), 0, atol=1e-10) + + assert (bmd_pq >= 0).all() + assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape) + + bmd_mc, err = bmd(p, q, 1000, True) + assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape) + assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=10)