From 6127eb54d65a84478bccd7f59849716b810782c8 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 5 Mar 2024 23:21:15 +0000
Subject: [PATCH 01/11] implemented Bayesian model dimensionality

---
 lsbi/model.py       | 44 +++++++++++++++++++++++++++-
 lsbi/stats.py       | 70 +++++++++++++++++++++++++++++++++++++++++++++
 tests/test_model.py | 10 +++++++
 tests/test_stats.py | 37 +++++++++++++++++++++++-
 4 files changed, 159 insertions(+), 2 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index b5fc90e..a2637d7 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -6,7 +6,7 @@
 from numpy.linalg import inv, solve
 from scipy.special import logsumexp
 
-from lsbi.stats import dkl, mixture_normal, multivariate_normal
+from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
 from lsbi.utils import alias, dediagonalise, logdet
 
 
@@ -264,6 +264,10 @@ def ppd(self, D0):
     def dkl(self, D, n=0):
         """KL divergence between the posterior and prior.
 
+        Analytically this is
+
+        1/2 (log|1 + M Σ M'/ C| + tr(Σ_P/Σ)
+
         Parameters
         ----------
         D : array_like, shape (..., d)
@@ -273,6 +277,26 @@ def dkl(self, D, n=0):
         """
         return dkl(self.posterior(D), self.prior(), n)
 
+    def bmd(self, D, n=0):
+        """Bayesian model dimensionality.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        return bmd(self.posterior(D), self.prior(), n)
+
+    def mutual_information(self):
+        """Mutual information.
+
+        Analytically this is
+
+        """
+        return inv(self.posterior(D).cov)
+
     @property
     def _M(self):
         return dediagonalise(self.M, self.diagonal_M, self.d, self.n)
@@ -463,6 +487,24 @@ def dkl(self, D, n=0):
         x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
+    def bmd(self, D, n=0):
+        """Bayesian model dimensionality.
+
+        Parameters
+        ----------
+        D : array_like, shape (..., d)
+            Data to form the posterior
+        n : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        """
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+
+        p = self.posterior(D)
+        q = self.prior()
+        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 562db6c..b584c6b 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -588,6 +588,11 @@ def __getitem__(self, arg):  # noqa: D105
 def dkl(p, q, n=0):
     """Kullback-Leibler divergence between two distributions.
 
+    if P ~ N(p,P) and Q ~ N(q,Q) then
+
+    D_KL(P||Q) = <log(P/Q)>_P
+    = 1/2 * (log(|Q|/|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
+
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -623,3 +628,68 @@ def dkl(p, q, n=0):
             dkl = dkl + np.einsum("...ij,...ji->...", invq, p.cov)
 
     return dkl / 2
+
+
+def bmd(p, q, n=0):
+    """Bayesian model dimensionality between two distributions.
+
+    if P ~ N(p,P) and Q ~ N(q,Q) then
+
+    bmd/2 = var(log P/Q)_P
+    = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
+    + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
+
+    Parameters
+    ----------
+    p : lsbi.stats.multivariate_normal
+    q : lsbi.stats.multivariate_normal
+    n : int, optional, default=0
+        Number of samples to mcmc estimate the divergence.
+
+    Returns
+    -------
+    bmd : array_like
+        Bayesian model dimensionality between p and q.
+    """
+    shape = np.broadcast_shapes(p.shape, q.shape)
+    if n:
+        x = p.rvs(size=(n, *shape), broadcast=True)
+        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(
+            axis=0
+        ) * 2
+
+    bmd = p.dim / 2 * np.ones(shape)
+    pq = (p.mean - q.mean) * np.ones(p.dim)
+    if p.diagonal and q.diagonal:
+        pcov = p.cov * np.ones(p.dim)
+        qcov = q.cov * np.ones(q.dim)
+        bmd = bmd + (pq**2 * pcov / qcov**2).sum(axis=-1)
+        bmd = bmd + (pcov**2 / qcov**2).sum(axis=-1) / 2
+        bmd = bmd - (pcov / qcov).sum(axis=-1)
+    elif p.diagonal:
+        invq = inv(q.cov)
+        pcov = p.cov * np.ones(p.dim)
+        bmd = bmd + np.einsum(
+            "...i,...ij,...j,...jl,...l->...", pq, invq, pcov, invq, pq
+        )
+        bmd = bmd - np.einsum("...jj,...j->...", invq, pcov)
+        bmd = bmd + np.einsum("...lj,...j,...jl,...l->...", invq, pcov, invq, pcov) / 2
+    elif q.diagonal:
+        invq = np.ones(q.dim) / q.cov
+        pcov = p.cov * np.ones(p.dim)
+        bmd = bmd + np.einsum(
+            "...i,...i,...ik,...k,...k->...", pq, invq, pcov, invq, pq
+        )
+        bmd = bmd - np.einsum("...j,...jj->...", invq, pcov)
+        bmd = bmd + np.einsum("...j,...jk,...k,...kj->...", invq, pcov, invq, pcov) / 2
+    else:
+        invq = inv(q.cov)
+        bmd = bmd + np.einsum(
+            "...i,...ij,...jk,...kl,...l->...", pq, invq, p.cov, invq, pq
+        )
+        bmd = bmd - np.einsum("...ij,...ji->...", invq, p.cov)
+        bmd = (
+            bmd
+            + np.einsum("...ij,...jk,...kl,...li->...", invq, p.cov, invq, p.cov) / 2
+        )
+    return bmd * 2
diff --git a/tests/test_model.py b/tests/test_model.py
index 8d85a72..fe9c657 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -265,6 +265,10 @@ def test_posterior(
         assert dkl.shape == model.shape
         assert (dkl >= 0).all()
 
+        bmd = model.bmd(D)
+        assert bmd.shape == model.shape
+        assert (bmd >= 0).all()
+
     def test_evidence(
         self,
         M_shape,
@@ -620,6 +624,12 @@ def test_posterior(
         dkl = model.dkl(D, 10)
         assert dkl.shape == model.shape[:-1]
 
+        with pytest.raises(ValueError):
+            model.bmd(D)
+
+        bmd = model.bmd(D, 10)
+        assert bmd.shape == model.shape[:-1]
+
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 6f957a4..f4841ad 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -5,7 +5,7 @@
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal as scipy_multivariate_normal
 
-from lsbi.stats import dkl, mixture_normal, multivariate_normal
+from lsbi.stats import bmd, dkl, mixture_normal, multivariate_normal
 
 shapes = [(2, 3), (3,), ()]
 sizes = [(6, 5), (5,), ()]
@@ -582,3 +582,38 @@ def test_dkl(
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
     assert_allclose(dkl_pq, dkl_mc, atol=1)
+
+
+@pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
+@pytest.mark.parametrize("dim_q, shape_q, mean_shape_q, cov_shape_q, diagonal_q", tests)
+def test_bmd(
+    dim_p,
+    shape_p,
+    mean_shape_p,
+    cov_shape_p,
+    diagonal_p,
+    dim_q,
+    shape_q,
+    mean_shape_q,
+    cov_shape_q,
+    diagonal_q,
+):
+    p = TestMultivariateNormal().random(
+        dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
+    )
+    q = TestMultivariateNormal().random(
+        dim, shape_q, mean_shape_q, cov_shape_q, diagonal_q
+    )
+
+    bmd_pq = bmd(p, q)
+
+    assert_allclose(bmd(p, p), 0, atol=1e-10)
+    assert_allclose(bmd(q, q), 0, atol=1e-10)
+
+    assert (bmd_pq >= 0).all()
+    assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    bmd_mc = bmd(p, q, 10000)
+    assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
+
+    assert_allclose(bmd_pq, bmd_mc, atol=1)

From d28419e71e4fd321a850b3601925b92bd30e975f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 00:02:50 +0000
Subject: [PATCH 02/11] Added mutual information and total dimensionality

---
 lsbi/model.py       | 65 ++++++++++++++++++++++++++++++++++++++++++---
 tests/test_model.py | 32 ++++++++++++++++++++++
 tests/test_stats.py |  4 +--
 3 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index a2637d7..e40cbe4 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -266,7 +266,9 @@ def dkl(self, D, n=0):
 
         Analytically this is
 
-        1/2 (log|1 + M Σ M'/ C| + tr(Σ_P/Σ)
+        <log P(θ|D)/P(θ)>_P(θ|D)
+
+        1/2 (log|1 + M Σ M'/ C| - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
 
         Parameters
         ----------
@@ -280,6 +282,11 @@ def dkl(self, D, n=0):
     def bmd(self, D, n=0):
         """Bayesian model dimensionality.
 
+        Analytically this is
+        bmd/2 = var(log P(θ|D)/P(θ))_P(θ|D)
+
+        = 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
+
         Parameters
         ----------
         D : array_like, shape (..., d)
@@ -289,13 +296,51 @@ def bmd(self, D, n=0):
         """
         return bmd(self.posterior(D), self.prior(), n)
 
-    def mutual_information(self):
-        """Mutual information.
+    def mutual_information(self, n=0):
+        """Mutual information between the parameters and the data.
 
         Analytically this is
 
+        <log P(D|θ)/P(D)>_P(D|θ)
+
+        = log|1 + M Σ M'/ C|/2
         """
-        return inv(self.posterior(D).cov)
+        if n > 0:
+            D = self.evidence().rvs(n)
+            θ = self.posterior(D).rvs(n, broadcast=True)
+            return (
+                self.posterior(D).logpdf(θ, broadcast=True)
+                - self.prior().logpdf(θ, broadcast=True)
+            ).mean(axis=0)
+
+        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
+        C = self._C
+        return (
+            logdet(C + np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M))
+            / 2
+            - logdet(C) / 2
+        )
+
+    def dimensionality(self):
+        """Model dimensionality.
+
+        Analytically this is
+
+        bmd/2 = <bmd/2>_P(D)
+
+        = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        """
+        if n > 0:
+            n = int(n**0.5)
+            D = self.evidence().rvs(n)
+            θ = self.posterior(D).rvs(n)
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ)
+            return logR.var(axis=0).mean(axis=0) * 2
+        MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
+        C = self._C
+        return 2 * np.trace(MΣM @ inv(C + MΣM)) - np.trace(
+            MΣM @ inv(C + MΣM) @ MΣM @ inv(C + MΣM)
+        )
 
     @property
     def _M(self):
@@ -505,6 +550,18 @@ def bmd(self, D, n=0):
         x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
+    def mutual_information(self, n=0):
+        """Mutual information between the parameters and the data."""
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        return super().mutual_information(n)
+
+    def dimensionality(self):
+        """Model dimensionality."""
+        if n == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        return super().dimensionality(n)
+
 
 class ReducedLinearModel(object):
     """A model with no data.
diff --git a/tests/test_model.py b/tests/test_model.py
index fe9c657..0f77768 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -269,6 +269,26 @@ def test_posterior(
         assert bmd.shape == model.shape
         assert (bmd >= 0).all()
 
+        mutual_information = model.mutual_information()
+        assert mutual_information.shape == model.shape
+        assert (mutual_information >= 0).all()
+
+        N = 1000
+        mutual_information_mc = model.mutual_information_mc(N)
+        assert mutual_information_mc.shape == model.shape
+        assert (mutual_information_mc >= 0).all()
+        assert_allclose(mutual_information, mutual_information_mc, rtol=1)
+
+        dimensionality = model.dimensionality()
+        assert dimensionality.shape == model.shape
+        assert (dimensionality >= 0).all()
+
+        N = 1000
+        dimensionality_mc = model.dimensionality_mc(N)
+        assert dimensionality_mc.shape == model.shape
+        assert (dimensionality_mc >= 0).all()
+        assert_allclose(dimensionality, dimensionality_mc, rtol=1)
+
     def test_evidence(
         self,
         M_shape,
@@ -630,6 +650,18 @@ def test_posterior(
         bmd = model.bmd(D, 10)
         assert bmd.shape == model.shape[:-1]
 
+        with pytest.raises(ValueError):
+            model.mutual_information()
+
+        mutual_information = model.mutual_information(10)
+        assert mutual_information.shape == model.shape[:-1]
+
+        with pytest.raises(ValueError):
+            model.mutual_information()
+
+        mutual_information_mc = model.mutual_information_mc(10)
+        assert mutual_information_mc.shape == model.shape[:-1]
+
     def test_evidence(
         self,
         logw_shape,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index f4841ad..37286b3 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -613,7 +613,7 @@ def test_bmd(
     assert (bmd_pq >= 0).all()
     assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    bmd_mc = bmd(p, q, 10000)
+    bmd_mc = bmd(p, q, 1000)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, atol=1)
+    assert_allclose(bmd_pq, bmd_mc, rtol=1)

From 99b4f46ee8950478de140a1c1ddebf3c2d592291 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 00:11:02 +0000
Subject: [PATCH 03/11] Updated model dimensionality for tests

---
 lsbi/model.py       | 10 ++++++----
 tests/test_model.py | 13 ++++++-------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index e40cbe4..6d0c9d0 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -306,12 +306,14 @@ def mutual_information(self, n=0):
         = log|1 + M Σ M'/ C|/2
         """
         if n > 0:
+            n = int(n**0.5)
             D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n, broadcast=True)
+            θ = self.posterior(D).rvs(n)
             return (
-                self.posterior(D).logpdf(θ, broadcast=True)
-                - self.prior().logpdf(θ, broadcast=True)
-            ).mean(axis=0)
+                (self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ))
+                .mean(axis=0)
+                .mean(axis=0)
+            )
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
diff --git a/tests/test_model.py b/tests/test_model.py
index 0f77768..196923a 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -273,8 +273,8 @@ def test_posterior(
         assert mutual_information.shape == model.shape
         assert (mutual_information >= 0).all()
 
-        N = 1000
-        mutual_information_mc = model.mutual_information_mc(N)
+        n = 1000
+        mutual_information_mc = model.mutual_information(n)
         assert mutual_information_mc.shape == model.shape
         assert (mutual_information_mc >= 0).all()
         assert_allclose(mutual_information, mutual_information_mc, rtol=1)
@@ -283,8 +283,7 @@ def test_posterior(
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        N = 1000
-        dimensionality_mc = model.dimensionality_mc(N)
+        dimensionality_mc = model.dimensionality(n)
         assert dimensionality_mc.shape == model.shape
         assert (dimensionality_mc >= 0).all()
         assert_allclose(dimensionality, dimensionality_mc, rtol=1)
@@ -657,10 +656,10 @@ def test_posterior(
         assert mutual_information.shape == model.shape[:-1]
 
         with pytest.raises(ValueError):
-            model.mutual_information()
+            model.dimensionality()
 
-        mutual_information_mc = model.mutual_information_mc(10)
-        assert mutual_information_mc.shape == model.shape[:-1]
+        dimensionality = model.dimensionality(10)
+        assert dimensionality.shape == model.shape[:-1]
 
     def test_evidence(
         self,

From 9a15e22c8e03408bdd0d6212966a35b54c9837b2 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 08:19:26 +0000
Subject: [PATCH 04/11] Tests now passing

---
 lsbi/model.py       | 87 ++++++++++++++++++++++++---------------------
 tests/test_model.py | 10 +++---
 2 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 6d0c9d0..be6c9e8 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -261,7 +261,7 @@ def ppd(self, D0):
         """P(D|D0) as a distribution object."""
         return self.update(D0).evidence()
 
-    def dkl(self, D, n=0):
+    def dkl(self, D, N=0):
         """KL divergence between the posterior and prior.
 
         Analytically this is
@@ -274,12 +274,12 @@ def dkl(self, D, n=0):
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        return dkl(self.posterior(D), self.prior(), n)
+        return dkl(self.posterior(D), self.prior(), N)
 
-    def bmd(self, D, n=0):
+    def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Analytically this is
@@ -294,9 +294,9 @@ def bmd(self, D, n=0):
         n : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        return bmd(self.posterior(D), self.prior(), n)
+        return bmd(self.posterior(D), self.prior(), N)
 
-    def mutual_information(self, n=0):
+    def mutual_information(self, N=0):
         """Mutual information between the parameters and the data.
 
         Analytically this is
@@ -305,25 +305,24 @@ def mutual_information(self, n=0):
 
         = log|1 + M Σ M'/ C|/2
         """
-        if n > 0:
-            n = int(n**0.5)
-            D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n)
+        if N > 0:
+            N = int(N**0.5)
+            D = self.evidence().rvs(N)
+            θ = self.posterior(D).rvs(N)
             return (
-                (self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ))
+                (
+                    self.posterior(D).logpdf(θ, broadcast=True)
+                    - self.prior().logpdf(θ, broadcast=True)
+                )
                 .mean(axis=0)
                 .mean(axis=0)
             )
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
-        return (
-            logdet(C + np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M))
-            / 2
-            - logdet(C) / 2
-        )
+        return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape)
 
-    def dimensionality(self):
+    def dimensionality(self, N=0):
         """Model dimensionality.
 
         Analytically this is
@@ -332,16 +331,22 @@ def dimensionality(self):
 
         = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
         """
-        if n > 0:
-            n = int(n**0.5)
-            D = self.evidence().rvs(n)
-            θ = self.posterior(D).rvs(n)
-            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(θ)
+        if N > 0:
+            N = int(N**0.5)
+            D = self.evidence().rvs(N)
+            θ = self.posterior(D).rvs(N)
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
+                θ, broadcast=True
+            )
             return logR.var(axis=0).mean(axis=0) * 2
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
-        return 2 * np.trace(MΣM @ inv(C + MΣM)) - np.trace(
-            MΣM @ inv(C + MΣM) @ MΣM @ inv(C + MΣM)
+        invCpMΣM = inv(C + MΣM)
+
+        return np.broadcast_to(
+            2 * np.einsum("...ij,...ji->...", MΣM, invCpMΣM)
+            - np.einsum("...ij,...jk,...kl,...li->...", MΣM, invCpMΣM, MΣM, invCpMΣM),
+            self.shape,
         )
 
     @property
@@ -516,53 +521,53 @@ def update(self, D, inplace=False):
         if not inplace:
             return dist
 
-    def dkl(self, D, n=0):
+    def dkl(self, D, N=0):
         """KL divergence between the posterior and prior.
 
         Parameters
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
 
         p = self.posterior(D)
         q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
 
-    def bmd(self, D, n=0):
+    def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Parameters
         ----------
         D : array_like, shape (..., d)
             Data to form the posterior
-        n : int, optional
+        N : int, optional
             Number of samples for a monte carlo estimate, defaults to 0
         """
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
 
         p = self.posterior(D)
         q = self.prior()
-        x = p.rvs(size=(n, *self.shape[:-1]), broadcast=True)
+        x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
-    def mutual_information(self, n=0):
+    def mutual_information(self, N=0):
         """Mutual information between the parameters and the data."""
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().mutual_information(n)
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
+        return super().mutual_information(N)
 
     def dimensionality(self):
         """Model dimensionality."""
-        if n == 0:
-            raise ValueError("MixtureModel requires a monte carlo estimate. Use n>0.")
-        return super().dimensionality(n)
+        if N == 0:
+            raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
+        return super().dimensionality(N)
 
 
 class ReducedLinearModel(object):
diff --git a/tests/test_model.py b/tests/test_model.py
index 196923a..f71c889 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -271,21 +271,19 @@ def test_posterior(
 
         mutual_information = model.mutual_information()
         assert mutual_information.shape == model.shape
+        mutual_information.shape
         assert (mutual_information >= 0).all()
 
-        n = 1000
-        mutual_information_mc = model.mutual_information(n)
+        mutual_information_mc = model.mutual_information(N)
         assert mutual_information_mc.shape == model.shape
-        assert (mutual_information_mc >= 0).all()
-        assert_allclose(mutual_information, mutual_information_mc, rtol=1)
+        assert_allclose(mutual_information, mutual_information_mc, atol=1)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        dimensionality_mc = model.dimensionality(n)
+        dimensionality_mc = model.dimensionality(N)
         assert dimensionality_mc.shape == model.shape
-        assert (dimensionality_mc >= 0).all()
         assert_allclose(dimensionality, dimensionality_mc, rtol=1)
 
     def test_evidence(

From 5bf5fb44339f40189d89d0b4d8bd0d5eac679f8e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 08:53:20 +0000
Subject: [PATCH 05/11] added monte carlo error estimates

---
 lsbi/model.py       | 68 +++++++++++++++++++++++++++++++++------------
 lsbi/stats.py       | 37 ++++++++++++++++--------
 tests/test_model.py |  8 +++---
 tests/test_stats.py |  8 +++---
 4 files changed, 84 insertions(+), 37 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index be6c9e8..670a61b 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -296,7 +296,7 @@ def bmd(self, D, N=0):
         """
         return bmd(self.posterior(D), self.prior(), N)
 
-    def mutual_information(self, N=0):
+    def mutual_information(self, N=0, mcerror=False):
         """Mutual information between the parameters and the data.
 
         Analytically this is
@@ -304,25 +304,31 @@ def mutual_information(self, N=0):
         <log P(D|θ)/P(D)>_P(D|θ)
 
         = log|1 + M Σ M'/ C|/2
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
         """
         if N > 0:
             N = int(N**0.5)
             D = self.evidence().rvs(N)
-            θ = self.posterior(D).rvs(N)
-            return (
-                (
-                    self.posterior(D).logpdf(θ, broadcast=True)
-                    - self.prior().logpdf(θ, broadcast=True)
-                )
-                .mean(axis=0)
-                .mean(axis=0)
+            θ = self.posterior(D).rvs()
+            logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
+                θ, broadcast=True
             )
+            ans = logR.mean(axis=0)
+            if mcerror:
+                var = logR.var(axis=0) / (N - 1)
+                ans = (ans, var**0.5)
+            return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
         return np.broadcast_to(logdet(C + MΣM) / 2 - logdet(C) / 2, self.shape)
 
-    def dimensionality(self, N=0):
+    def dimensionality(self, N=0, mcerror=False):
         """Model dimensionality.
 
         Analytically this is
@@ -330,6 +336,13 @@ def dimensionality(self, N=0):
         bmd/2 = <bmd/2>_P(D)
 
         = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
         """
         if N > 0:
             N = int(N**0.5)
@@ -338,7 +351,12 @@ def dimensionality(self, N=0):
             logR = self.posterior(D).logpdf(θ, broadcast=True) - self.prior().logpdf(
                 θ, broadcast=True
             )
-            return logR.var(axis=0).mean(axis=0) * 2
+            ans = logR.var(axis=0).mean(axis=0) * 2
+            if mcerror:
+                var = logR.var(axis=0).var(axis=0) / (2 * (N - 1) * (N - 1))
+                ans = (ans, var**0.5)
+            return ans
+
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
         C = self._C
         invCpMΣM = inv(C + MΣM)
@@ -557,17 +575,33 @@ def bmd(self, D, N=0):
         x = p.rvs(size=(N, *self.shape[:-1]), broadcast=True)
         return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(axis=0)
 
-    def mutual_information(self, N=0):
-        """Mutual information between the parameters and the data."""
+    def mutual_information(self, N=0, mcerror=False):
+        """Mutual information between the parameters and the data.
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
+        """
         if N == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().mutual_information(N)
+        return super().mutual_information(N, mcerror)
 
-    def dimensionality(self):
-        """Model dimensionality."""
+    def dimensionality(self, N=0, mcerror=False):
+        """Model dimensionality.
+
+        Parameters
+        ----------
+        N : int, optional
+            Number of samples for a monte carlo estimate, defaults to 0
+        mcerror: bool, optional
+            Produce a monte carlo error estimate
+        """
         if N == 0:
             raise ValueError("MixtureModel requires a monte carlo estimate. Use N>0.")
-        return super().dimensionality(N)
+        return super().dimensionality(N, mcerror)
 
 
 class ReducedLinearModel(object):
diff --git a/lsbi/stats.py b/lsbi/stats.py
index b584c6b..29a632e 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -585,7 +585,7 @@ def __getitem__(self, arg):  # noqa: D105
         return dist
 
 
-def dkl(p, q, n=0):
+def dkl(p, q, N=0, mcerror=False):
     """Kullback-Leibler divergence between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
@@ -597,8 +597,10 @@ def dkl(p, q, n=0):
     ----------
     p : lsbi.stats.multivariate_normal
     q : lsbi.stats.multivariate_normal
-    n : int, optional, default=0
+    N : int, optional, default=0
         Number of samples to mcmc estimate the divergence.
+    mcerror: bool, optional, default=False
+        Produce a Monte Carlo error estimate
 
     Returns
     -------
@@ -606,9 +608,15 @@ def dkl(p, q, n=0):
         Kullback-Leibler divergence between p and q.
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
-    if n:
-        x = p.rvs(size=(n, *shape), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).mean(axis=0)
+    if N:
+        x = p.rvs(size=(N, *shape), broadcast=True)
+        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
+        ans = logR.mean(axis=0)
+        if mcerror:
+            var = logR.var(axis=0) / N
+            ans = (ans, var**0.5)
+        return ans
+
     dkl = -p.dim * np.ones(shape)
     dkl = dkl + logdet(q.cov * np.ones(q.dim), q.diagonal)
     dkl = dkl - logdet(p.cov * np.ones(p.dim), p.diagonal)
@@ -630,7 +638,7 @@ def dkl(p, q, n=0):
     return dkl / 2
 
 
-def bmd(p, q, n=0):
+def bmd(p, q, N=0, mcerror=False):
     """Bayesian model dimensionality between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
@@ -643,8 +651,10 @@ def bmd(p, q, n=0):
     ----------
     p : lsbi.stats.multivariate_normal
     q : lsbi.stats.multivariate_normal
-    n : int, optional, default=0
+    N : int, optional, default=0
         Number of samples to mcmc estimate the divergence.
+    mcerror: bool, optional, default=False
+        Produce a Monte Carlo error estimate
 
     Returns
     -------
@@ -652,11 +662,14 @@ def bmd(p, q, n=0):
         Bayesian model dimensionality between p and q.
     """
     shape = np.broadcast_shapes(p.shape, q.shape)
-    if n:
-        x = p.rvs(size=(n, *shape), broadcast=True)
-        return (p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)).var(
-            axis=0
-        ) * 2
+    if N:
+        x = p.rvs(size=(N, *shape), broadcast=True)
+        logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
+        ans = logR.var(axis=0) * 2
+        if mcerror:
+            var = logR.var(axis=0) / (2 * (N - 1)) * 4
+            ans = (ans, var**0.5)
+        return ans
 
     bmd = p.dim / 2 * np.ones(shape)
     pq = (p.mean - q.mean) * np.ones(p.dim)
diff --git a/tests/test_model.py b/tests/test_model.py
index f71c889..c9b68a1 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -274,17 +274,17 @@ def test_posterior(
         mutual_information.shape
         assert (mutual_information >= 0).all()
 
-        mutual_information_mc = model.mutual_information(N)
+        mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose(mutual_information, mutual_information_mc, atol=1)
+        assert_allclose(mutual_information, mutual_information_mc, atol=(5 * err).max())
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
         assert (dimensionality >= 0).all()
 
-        dimensionality_mc = model.dimensionality(N)
+        dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose(dimensionality, dimensionality_mc, rtol=1)
+        assert_allclose(dimensionality, dimensionality_mc, atol=(5 * err).max())
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 37286b3..a4a74d7 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -578,10 +578,10 @@ def test_dkl(
     assert (dkl_pq >= 0).all()
     assert dkl_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    dkl_mc = dkl(p, q, 1000)
+    dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(dkl_pq, dkl_mc, atol=1)
+    assert_allclose(dkl_pq, dkl_mc, atol=(5 * err).max())
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
@@ -613,7 +613,7 @@ def test_bmd(
     assert (bmd_pq >= 0).all()
     assert bmd_pq.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    bmd_mc = bmd(p, q, 1000)
+    bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, rtol=1)
+    assert_allclose(bmd_pq, bmd_mc, atol=(5 * err).max())

From ca4d0d298caada591a7fd8019e33db007d1c70d7 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 10:05:41 +0000
Subject: [PATCH 06/11] Trying to debug underestimate of dimensionality error

---
 lsbi/model.py       | 9 +++++----
 lsbi/stats.py       | 8 ++++----
 tests/test_model.py | 4 ++--
 tests/test_stats.py | 4 ++--
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index 670a61b..e940016 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -320,8 +320,8 @@ def mutual_information(self, N=0, mcerror=False):
             )
             ans = logR.mean(axis=0)
             if mcerror:
-                var = logR.var(axis=0) / (N - 1)
-                ans = (ans, var**0.5)
+                err = (logR.var(axis=0) / (N - 1)) ** 0.5
+                ans = (ans, err)
             return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
@@ -353,8 +353,9 @@ def dimensionality(self, N=0, mcerror=False):
             )
             ans = logR.var(axis=0).mean(axis=0) * 2
             if mcerror:
-                var = logR.var(axis=0).var(axis=0) / (2 * (N - 1) * (N - 1))
-                ans = (ans, var**0.5)
+                err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+                err = ((err**2).sum(axis=0) / (N - 1)) ** 0.5
+                ans = (ans, err)
             return ans
 
         MΣM = np.einsum("...ja,...ab,...kb->...jk", self._M, self._Σ, self._M)
diff --git a/lsbi/stats.py b/lsbi/stats.py
index 29a632e..2274420 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -613,8 +613,8 @@ def dkl(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.mean(axis=0)
         if mcerror:
-            var = logR.var(axis=0) / N
-            ans = (ans, var**0.5)
+            err = (logR.var(axis=0) / (N - 1)) ** 0.5
+            ans = (ans, err)
         return ans
 
     dkl = -p.dim * np.ones(shape)
@@ -667,8 +667,8 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            var = logR.var(axis=0) / (2 * (N - 1)) * 4
-            ans = (ans, var**0.5)
+            err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+            ans = (ans, err)
         return ans
 
     bmd = p.dim / 2 * np.ones(shape)
diff --git a/tests/test_model.py b/tests/test_model.py
index c9b68a1..e291952 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -276,7 +276,7 @@ def test_posterior(
 
         mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose(mutual_information, mutual_information_mc, atol=(5 * err).max())
+        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=5)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
@@ -284,7 +284,7 @@ def test_posterior(
 
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose(dimensionality, dimensionality_mc, atol=(5 * err).max())
+        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index a4a74d7..15cb784 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -581,7 +581,7 @@ def test_dkl(
     dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(dkl_pq, dkl_mc, atol=(5 * err).max())
+    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=5)
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)
@@ -616,4 +616,4 @@ def test_bmd(
     bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose(bmd_pq, bmd_mc, atol=(5 * err).max())
+    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=5)

From 5a82db73efccecc21feaa23626cda4f6fd4af78d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 18:06:37 +0000
Subject: [PATCH 07/11] Made some notes about bmd error estimation. To naut.
 Let's just give it generous error bars

---
 lsbi/stats.py       | 16 +++++++++++++++-
 tests/test_model.py |  1 +
 tests/test_stats.py |  5 +++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 2274420..16980e0 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -647,6 +647,17 @@ def bmd(p, q, N=0, mcerror=False):
     = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
     + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
 
+    From:
+    https://stats.stackexchange.com/q/333838
+    we can estimate the error in the sample variance S as:
+
+    S^2/sigma^2 =  chi_df^2 / df
+    df = 2n/(kappa-(n-3)/(n-1))
+
+    kappa is the kurtosis, so if a normal distribution is assumed, kappa = 3 and df = n-1
+    Here we take kappa to be the kurtosis of the logR, which should in
+    principle be 3 + 12/d for a chi squared distribution. since logR is distributed as chi squared with bmd degrees of freedom, we take kappa = 3 + 12/(bmd)
+
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -667,7 +678,10 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            err = logR.var(axis=0) * (2 / (N - 1)) ** 0.5 * 2
+            # kappa = 3 + 12 / ans
+            # df = 2 * N / (kappa - (N - 3) / (N - 1))
+            # err = ans * (2 / df) ** 0.5
+            err = ans * (2 / N - 1) ** 0.5
             ans = (ans, err)
         return ans
 
diff --git a/tests/test_model.py b/tests/test_model.py
index e291952..4bb5a69 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -285,6 +285,7 @@ def test_posterior(
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
         assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
+        plt.hist(logR[:, 0, 0], bins=100)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 15cb784..874884e 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -563,6 +563,7 @@ def test_dkl(
     cov_shape_q,
     diagonal_q,
 ):
+    dim = dim_p
     p = TestMultivariateNormal().random(
         dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
     )
@@ -598,6 +599,7 @@ def test_bmd(
     cov_shape_q,
     diagonal_q,
 ):
+    dim = dim_p
     p = TestMultivariateNormal().random(
         dim, shape_p, mean_shape_p, cov_shape_p, diagonal_p
     )
@@ -615,5 +617,4 @@ def test_bmd(
 
     bmd_mc, err = bmd(p, q, 1000, True)
     assert bmd_mc.shape == np.broadcast_shapes(p.shape, q.shape)
-
-    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=5)
+    assert_allclose((bmd_pq - bmd_mc) / err, 0, atol=10)

From c091a36d3e1ba79ac45c0fc335578dc2dc28ddba Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 18:07:10 +0000
Subject: [PATCH 08/11] Cleaned up stats.py

---
 lsbi/stats.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/lsbi/stats.py b/lsbi/stats.py
index 16980e0..f8bd7c8 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -647,17 +647,6 @@ def bmd(p, q, N=0, mcerror=False):
     = 1/2 tr(Q^{-1} P Q^{-1} P) - 1/2 (tr(Q^{-1} P))^2
     + (q - p)' Q^{-1} P Q^{-1} (q - p) + d/2
 
-    From:
-    https://stats.stackexchange.com/q/333838
-    we can estimate the error in the sample variance S as:
-
-    S^2/sigma^2 =  chi_df^2 / df
-    df = 2n/(kappa-(n-3)/(n-1))
-
-    kappa is the kurtosis, so if a normal distribution is assumed, kappa = 3 and df = n-1
-    Here we take kappa to be the kurtosis of the logR, which should in
-    principle be 3 + 12/d for a chi squared distribution. since logR is distributed as chi squared with bmd degrees of freedom, we take kappa = 3 + 12/(bmd)
-
     Parameters
     ----------
     p : lsbi.stats.multivariate_normal
@@ -678,9 +667,6 @@ def bmd(p, q, N=0, mcerror=False):
         logR = p.logpdf(x, broadcast=True) - q.logpdf(x, broadcast=True)
         ans = logR.var(axis=0) * 2
         if mcerror:
-            # kappa = 3 + 12 / ans
-            # df = 2 * N / (kappa - (N - 3) / (N - 1))
-            # err = ans * (2 / df) ** 0.5
             err = ans * (2 / N - 1) ** 0.5
             ans = (ans, err)
         return ans

From d0e083bd6060ce7f81d69a55b908350ce829020f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 19:29:59 +0000
Subject: [PATCH 09/11] Tests now up to date

---
 tests/test_model.py | 5 ++---
 tests/test_stats.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/test_model.py b/tests/test_model.py
index 4bb5a69..1615933 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -276,7 +276,7 @@ def test_posterior(
 
         mutual_information_mc, err = model.mutual_information(N, True)
         assert mutual_information_mc.shape == model.shape
-        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=5)
+        assert_allclose((mutual_information - mutual_information_mc) / err, 0, atol=10)
 
         dimensionality = model.dimensionality()
         assert dimensionality.shape == model.shape
@@ -284,8 +284,7 @@ def test_posterior(
 
         dimensionality_mc, err = model.dimensionality(N, True)
         assert dimensionality_mc.shape == model.shape
-        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=5)
-        plt.hist(logR[:, 0, 0], bins=100)
+        assert_allclose((dimensionality - dimensionality_mc) / err, 0, atol=10)
 
     def test_evidence(
         self,
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 874884e..d054fab 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -582,7 +582,7 @@ def test_dkl(
     dkl_mc, err = dkl(p, q, 1000, True)
     assert dkl_mc.shape == np.broadcast_shapes(p.shape, q.shape)
 
-    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=5)
+    assert_allclose((dkl_pq - dkl_mc) / err, 0, atol=10)
 
 
 @pytest.mark.parametrize("dim_p, shape_p, mean_shape_p, cov_shape_p, diagonal_p", tests)

From 362b06a7a8fe8ab28f45069e9d01131ff8aa053f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 19:31:42 +0000
Subject: [PATCH 10/11] bump version to 0.13.0

---
 README.rst       | 2 +-
 lsbi/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 8be0d84..7b4e4de 100644
--- a/README.rst
+++ b/README.rst
@@ -3,7 +3,7 @@ lsbi: Linear Simulation Based Inference
 =======================================
 :lsbi: Linear Simulation Based Inference
 :Author: Will Handley & David Yallup
-:Version: 0.12.0
+:Version: 0.13.0
 :Homepage: https://github.com/handley-lab/lsbi
 :Documentation: http://lsbi.readthedocs.io/
 
diff --git a/lsbi/_version.py b/lsbi/_version.py
index ea370a8..f23a6b3 100644
--- a/lsbi/_version.py
+++ b/lsbi/_version.py
@@ -1 +1 @@
-__version__ = "0.12.0"
+__version__ = "0.13.0"

From ba3abce0a79f1f69396fca75515d8a85e0c0c224 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 6 Mar 2024 22:31:55 +0000
Subject: [PATCH 11/11] Fixed documentation

---
 lsbi/model.py | 16 ++++++----------
 lsbi/stats.py |  6 +++---
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/lsbi/model.py b/lsbi/model.py
index e940016..a8b82ee 100644
--- a/lsbi/model.py
+++ b/lsbi/model.py
@@ -266,9 +266,8 @@ def dkl(self, D, N=0):
 
         Analytically this is
 
-        <log P(θ|D)/P(θ)>_P(θ|D)
-
-        1/2 (log|1 + M Σ M'/ C| - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
+        <log P(θ|D)/P(θ)>_P(θ|D) = 1/2 (log|1 + M Σ M'/ C|
+        - tr M Σ M'/ (C+M Σ M')  + (μ - μ_P)' Σ^-1 (μ - μ_P))
 
         Parameters
         ----------
@@ -283,9 +282,9 @@ def bmd(self, D, N=0):
         """Bayesian model dimensionality.
 
         Analytically this is
-        bmd/2 = var(log P(θ|D)/P(θ))_P(θ|D)
 
-        = 1/2 tr(M Σ M'/ (C+M Σ M'))^2 + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
+        var(log P(θ|D)/P(θ))_P(θ|D) = 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        + (μ - μ_P)' Σ^-1 Σ_P Σ^-1(μ - μ_P)
 
         Parameters
         ----------
@@ -301,9 +300,8 @@ def mutual_information(self, N=0, mcerror=False):
 
         Analytically this is
 
-        <log P(D|θ)/P(D)>_P(D|θ)
+        <log P(D|θ)/P(D)>_P(D,θ) = log|1 + M Σ M'/ C|/2
 
-        = log|1 + M Σ M'/ C|/2
         Parameters
         ----------
         N : int, optional
@@ -333,9 +331,7 @@ def dimensionality(self, N=0, mcerror=False):
 
         Analytically this is
 
-        bmd/2 = <bmd/2>_P(D)
-
-        = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
+        <bmd/2>_P(D) = tr(M Σ M'/ (C+M Σ M'))  - 1/2 tr(M Σ M'/ (C+M Σ M'))^2
 
         Parameters
         ----------
diff --git a/lsbi/stats.py b/lsbi/stats.py
index f8bd7c8..e6fc8a2 100644
--- a/lsbi/stats.py
+++ b/lsbi/stats.py
@@ -586,12 +586,12 @@ def __getitem__(self, arg):  # noqa: D105
 
 
 def dkl(p, q, N=0, mcerror=False):
-    """Kullback-Leibler divergence between two distributions.
+    r"""Kullback-Leibler divergence between two distributions.
 
     if P ~ N(p,P) and Q ~ N(q,Q) then
 
-    D_KL(P||Q) = <log(P/Q)>_P
-    = 1/2 * (log(|Q|/|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
+    D_KL(P\||Q) = <log(P/Q)>_P =
+    1/2 * (log(\|Q|/\|P|) - d + tr(Q^{-1} P) + (q - p)' Q^{-1} (q - p))
 
     Parameters
     ----------