From 8c972f58fb4ac9dde2e4cf67c204df7f8d24d15d Mon Sep 17 00:00:00 2001
From: Lukas Hergt <lthergt@posteo.de>
Date: Fri, 27 Sep 2024 04:07:59 -0700
Subject: [PATCH 1/2] stop warning when `logL==logL_birth==-inf` (#395)

* stop warning about `logL<=logL_birth` when both are `-inf`, just silently drop those samples

* version bump to 2.8.15

* fix test for warning after having relaxed its eagerness

* Update README.rst

* Update _version.py
---
 README.rst             |  2 +-
 anesthetic/_version.py |  2 +-
 anesthetic/samples.py  | 20 +++++++++++---------
 tests/test_samples.py  |  1 +
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/README.rst b/README.rst
index 5acc5279..5e28cce7 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.8.15
+:Version: 2.8.16
 :Homepage: https://github.com/handley-lab/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index 7cdefb37..f8f5d150 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.8.15'
+__version__ = '2.8.16'
diff --git a/anesthetic/samples.py b/anesthetic/samples.py
index d0dee2b1..44c81076 100644
--- a/anesthetic/samples.py
+++ b/anesthetic/samples.py
@@ -1302,15 +1302,17 @@ def recompute(self, logL_birth=None, inplace=False):
             n_bad = invalid.sum()
             n_equal = (samples.logL == samples.logL_birth).sum()
             if n_bad:
-                warnings.warn("%i out of %i samples have logL <= logL_birth,"
-                              "\n%i of which have logL == logL_birth."
-                              "\nThis may just indicate numerical rounding "
-                              "errors at the peak of the likelihood, but "
-                              "further investigation of the chains files is "
-                              "recommended."
-                              "\nDropping the invalid samples." %
-                              (n_bad, len(samples), n_equal),
-                              RuntimeWarning)
+                n_inf = ((samples.logL == samples.logL_birth) &
+                         (samples.logL == -np.inf)).sum()
+                if n_bad > n_inf:
+                    warnings.warn(
+                        "%i out of %i samples have logL <= logL_birth,\n"
+                        "%i of which have logL == logL_birth.\n"
+                        "This may just indicate numerical rounding errors at "
+                        "the peak of the likelihood, but further "
+                        "investigation of the chains files is recommended.\n"
+                        "Dropping the invalid samples."
+                        % (n_bad, len(samples), n_equal), RuntimeWarning)
                 samples = samples[~invalid].reset_index(drop=True)
 
             samples.sort_values('logL', inplace=True)
diff --git a/tests/test_samples.py b/tests/test_samples.py
index f49aed3d..0dfdf4c8 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1133,6 +1133,7 @@ def test_beta():
 def test_beta_with_logL_infinities():
     ns = read_chains("./tests/example_data/pc")
     ns.loc[:10, ('logL', r'$\ln\mathcal{L}$')] = -np.inf
+    ns.loc[1000, ('logL', r'$\ln\mathcal{L}$')] = -np.inf
     with pytest.warns(RuntimeWarning):
         ns.recompute(inplace=True)
     assert (ns.logL == -np.inf).sum() == 0

From a4c852156d8dcec2dbb266f83741d8bf891a46cd Mon Sep 17 00:00:00 2001
From: Lukas Hergt <lthergt@posteo.de>
Date: Fri, 27 Sep 2024 14:27:14 -0700
Subject: [PATCH 2/2] Normalised stats (#396)

* allow passing a `NestedSamples.stats` instance as a normalisation reference to `NestedSamples.stats` producing columns of differences `[Delta_logZ, ...]`

* add tests for the normalisation kwarg `norm` for `NestedSamples.stats`

* bump version to 2.9.0

* improve docstring for `norm` kwarg, saying what columns will additionally be created
---
 README.rst             |  2 +-
 anesthetic/_version.py |  2 +-
 anesthetic/samples.py  | 37 +++++++++++++++++++++++++----
 tests/test_samples.py  | 54 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 7 deletions(-)

diff --git a/README.rst b/README.rst
index 5e28cce7..4ecb81c2 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.8.16
+:Version: 2.9.0
 :Homepage: https://github.com/handley-lab/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index f8f5d150..387cfacc 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.8.16'
+__version__ = '2.9.0'
diff --git a/anesthetic/samples.py b/anesthetic/samples.py
index 44c81076..134e7e2a 100644
--- a/anesthetic/samples.py
+++ b/anesthetic/samples.py
@@ -761,7 +761,7 @@ def ns_output(self, *args, **kwargs):
             " as well as average loglikelihoods: help(samples.logL_P)"
             )
 
-    def stats(self, nsamples=None, beta=None):
+    def stats(self, nsamples=None, beta=None, norm=None):
         r"""Compute Nested Sampling statistics.
 
         Using nested sampling we can compute:
@@ -822,20 +822,27 @@ def stats(self, nsamples=None, beta=None):
         beta : float, array-like, optional
             inverse temperature(s) beta=1/kT. Default self.beta
 
+        norm : Series, :class:`Samples`, optional
+            :meth:`NestedSamples.stats` output used for normalisation.
+            Can be either a Series of mean values or Samples produced with
+            matching `nsamples` and `beta`. In addition to the columns
+            ['logZ', 'D_KL', 'logL_P', 'd_G'], this adds the normalised
+            versions ['Delta_logZ', 'Delta_D_KL', 'Delta_logL_P', 'Delta_d_G'].
+
         Returns
         -------
         if beta is scalar and nsamples is None:
-            Series, index ['logZ', 'd_G', 'DK_L', 'logL_P']
+            Series, index ['logZ', 'd_G', 'D_KL', 'logL_P']
         elif beta is scalar and nsamples is int:
             :class:`Samples`, index range(nsamples),
-            columns ['logZ', 'd_G', 'DK_L', 'logL_P']
+            columns ['logZ', 'd_G', 'D_KL', 'logL_P']
         elif beta is array-like and nsamples is None:
             :class:`Samples`, index beta,
-            columns ['logZ', 'd_G', 'DK_L', 'logL_P']
+            columns ['logZ', 'd_G', 'D_KL', 'logL_P']
         elif beta is array-like and nsamples is int:
             :class:`Samples`, index :class:`pandas.MultiIndex` the product of
             beta and range(nsamples)
-            columns ['logZ', 'd_G', 'DK_L', 'logL_P']
+            columns ['logZ', 'd_G', 'D_KL', 'logL_P']
         """
         logw = self.logw(nsamples, beta)
         if nsamples is None and beta is None:
@@ -861,6 +868,26 @@ def stats(self, nsamples=None, beta=None):
         samples.set_label('d_G', r'$d_\mathrm{G}$')
 
         samples.label = self.label
+
+        if norm is not None:
+            samples['Delta_logZ'] = samples['logZ'] - norm['logZ']
+            samples.set_label('Delta_logZ',
+                              r"$\Delta\ln\mathcal{Z}$")
+
+            samples['Delta_D_KL'] = samples['D_KL'] - norm['D_KL']
+            samples.set_label('Delta_D_KL',
+                              r"$\Delta\mathcal{D}_\mathrm{KL}$")
+
+            samples['Delta_logL_P'] = samples['logL_P'] - norm['logL_P']
+            samples.set_label(
+                'Delta_logL_P',
+                r"$\Delta\langle\ln\mathcal{L}\rangle_\mathcal{P}$"
+            )
+
+            samples['Delta_d_G'] = samples['d_G'] - norm['d_G']
+            samples.set_label('Delta_d_G',
+                              r"$\Delta d_\mathrm{G}$")
+
         return samples
 
     def logX(self, nsamples=None):
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 0dfdf4c8..eef22e61 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -946,17 +946,27 @@ def test_stats():
     beta = [0., 0.5, 1.]
 
     vals = ['logZ', 'D_KL', 'logL_P', 'd_G']
+    delta_vals = ['Delta_logZ', 'Delta_D_KL', 'Delta_logL_P', 'Delta_d_G']
 
     labels = [r'$\ln\mathcal{Z}$',
               r'$\mathcal{D}_\mathrm{KL}$',
               r'$\langle\ln\mathcal{L}\rangle_\mathcal{P}$',
               r'$d_\mathrm{G}$']
+    delta_labels = [r'$\Delta\ln\mathcal{Z}$',
+                    r'$\Delta\mathcal{D}_\mathrm{KL}$',
+                    r'$\Delta\langle\ln\mathcal{L}\rangle_\mathcal{P}$',
+                    r'$\Delta d_\mathrm{G}$']
 
     stats = pc.stats()
     assert isinstance(stats, WeightedLabelledSeries)
     assert_array_equal(stats.drop_labels().index, vals)
     assert_array_equal(stats.get_labels(), labels)
 
+    stats = pc.stats(norm=pc.stats())
+    assert isinstance(stats, WeightedLabelledSeries)
+    assert_array_equal(stats.drop_labels().index, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+
     stats = pc.stats(nsamples=nsamples)
     assert isinstance(stats, WeightedLabelledDataFrame)
     assert_array_equal(stats.drop_labels().columns, vals)
@@ -964,6 +974,20 @@ def test_stats():
     assert stats.index.name == 'samples'
     assert_array_equal(stats.index, range(nsamples))
 
+    stats = pc.stats(nsamples=nsamples, norm=pc.stats())
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.name == 'samples'
+    assert_array_equal(stats.index, range(nsamples))
+
+    stats = pc.stats(nsamples=nsamples, norm=pc.stats(nsamples=nsamples))
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.name == 'samples'
+    assert_array_equal(stats.index, range(nsamples))
+
     stats = pc.stats(beta=beta)
     assert isinstance(stats, WeightedLabelledDataFrame)
     assert_array_equal(stats.drop_labels().columns, vals)
@@ -971,6 +995,20 @@ def test_stats():
     assert stats.index.name == 'beta'
     assert_array_equal(stats.index, beta)
 
+    stats = pc.stats(beta=beta, norm=pc.stats())
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.name == 'beta'
+    assert_array_equal(stats.index, beta)
+
+    stats = pc.stats(beta=beta, norm=pc.stats(beta=beta))
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.name == 'beta'
+    assert_array_equal(stats.index, beta)
+
     stats = pc.stats(nsamples=nsamples, beta=beta)
     assert isinstance(stats, WeightedLabelledDataFrame)
     assert_array_equal(stats.drop_labels().columns, vals)
@@ -978,7 +1016,23 @@ def test_stats():
     assert stats.index.names == ['beta', 'samples']
     assert stats.index.levshape == (len(beta), nsamples)
 
+    stats = pc.stats(nsamples=nsamples, beta=beta, norm=pc.stats())
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.names == ['beta', 'samples']
+    assert stats.index.levshape == (len(beta), nsamples)
+
+    stats = pc.stats(nsamples=nsamples, beta=beta,
+                     norm=pc.stats(nsamples=nsamples, beta=beta))
+    assert isinstance(stats, WeightedLabelledDataFrame)
+    assert_array_equal(stats.drop_labels().columns, vals + delta_vals)
+    assert_array_equal(stats.get_labels(), labels + delta_labels)
+    assert stats.index.names == ['beta', 'samples']
+    assert stats.index.levshape == (len(beta), nsamples)
+
     for beta in [1., 0., 0.5]:
+        np.random.seed(42)
         pc.beta = beta
         n = 1000
         PC = pc.stats(n, beta)