From 8c972f58fb4ac9dde2e4cf67c204df7f8d24d15d Mon Sep 17 00:00:00 2001 From: Lukas Hergt Date: Fri, 27 Sep 2024 04:07:59 -0700 Subject: [PATCH 1/2] stop warning when `logL==logL_birth==-inf` (#395) * stop warning about `logL<=logL_birth` when both are `-inf`, just silently drop those samples * version bump to 2.8.15 * fix test for warning after having relaxed its eagerness * Update README.rst * Update _version.py --- README.rst | 2 +- anesthetic/_version.py | 2 +- anesthetic/samples.py | 20 +++++++++++--------- tests/test_samples.py | 1 + 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 5acc5279..5e28cce7 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.8.15 +:Version: 2.8.16 :Homepage: https://github.com/handley-lab/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index 7cdefb37..f8f5d150 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.8.15' +__version__ = '2.8.16' diff --git a/anesthetic/samples.py b/anesthetic/samples.py index d0dee2b1..44c81076 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -1302,15 +1302,17 @@ def recompute(self, logL_birth=None, inplace=False): n_bad = invalid.sum() n_equal = (samples.logL == samples.logL_birth).sum() if n_bad: - warnings.warn("%i out of %i samples have logL <= logL_birth," - "\n%i of which have logL == logL_birth." - "\nThis may just indicate numerical rounding " - "errors at the peak of the likelihood, but " - "further investigation of the chains files is " - "recommended." - "\nDropping the invalid samples." % - (n_bad, len(samples), n_equal), - RuntimeWarning) + n_inf = ((samples.logL == samples.logL_birth) & + (samples.logL == -np.inf)).sum() + if n_bad > n_inf: + warnings.warn( + "%i out of %i samples have logL <= logL_birth,\n" + "%i of which have logL == logL_birth.\n" + "This may just indicate numerical rounding errors at " + "the peak of the likelihood, but further " + "investigation of the chains files is recommended.\n" + "Dropping the invalid samples." + % (n_bad, len(samples), n_equal), RuntimeWarning) samples = samples[~invalid].reset_index(drop=True) samples.sort_values('logL', inplace=True) diff --git a/tests/test_samples.py b/tests/test_samples.py index f49aed3d..0dfdf4c8 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1133,6 +1133,7 @@ def test_beta(): def test_beta_with_logL_infinities(): ns = read_chains("./tests/example_data/pc") ns.loc[:10, ('logL', r'$\ln\mathcal{L}$')] = -np.inf + ns.loc[1000, ('logL', r'$\ln\mathcal{L}$')] = -np.inf with pytest.warns(RuntimeWarning): ns.recompute(inplace=True) assert (ns.logL == -np.inf).sum() == 0 From a4c852156d8dcec2dbb266f83741d8bf891a46cd Mon Sep 17 00:00:00 2001 From: Lukas Hergt Date: Fri, 27 Sep 2024 14:27:14 -0700 Subject: [PATCH 2/2] Normalised stats (#396) * allow passing a `NestedSamples.stats` instance as a normalisation reference to `NestedSamples.stats` producing columns of differences `[Delta_logZ, ...]` * add tests for the normalisation kwarg `norm` for `NestedSamples.stats` * bump version to 2.9.0 * improve docstring for `norm` kwarg, saying what columns will additionally be created --- README.rst | 2 +- anesthetic/_version.py | 2 +- anesthetic/samples.py | 37 +++++++++++++++++++++++++---- tests/test_samples.py | 54 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 5e28cce7..4ecb81c2 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.8.16 +:Version: 2.9.0 :Homepage: https://github.com/handley-lab/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index f8f5d150..387cfacc 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.8.16' +__version__ = '2.9.0' diff --git a/anesthetic/samples.py b/anesthetic/samples.py index 44c81076..134e7e2a 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -761,7 +761,7 @@ def ns_output(self, *args, **kwargs): " as well as average loglikelihoods: help(samples.logL_P)" ) - def stats(self, nsamples=None, beta=None): + def stats(self, nsamples=None, beta=None, norm=None): r"""Compute Nested Sampling statistics. Using nested sampling we can compute: @@ -822,20 +822,27 @@ def stats(self, nsamples=None, beta=None): beta : float, array-like, optional inverse temperature(s) beta=1/kT. Default self.beta + norm : Series, :class:`Samples`, optional + :meth:`NestedSamples.stats` output used for normalisation. + Can be either a Series of mean values or Samples produced with + matching `nsamples` and `beta`. In addition to the columns + ['logZ', 'D_KL', 'logL_P', 'd_G'], this adds the normalised + versions ['Delta_logZ', 'Delta_D_KL', 'Delta_logL_P', 'Delta_d_G']. + Returns ------- if beta is scalar and nsamples is None: - Series, index ['logZ', 'd_G', 'DK_L', 'logL_P'] + Series, index ['logZ', 'd_G', 'D_KL', 'logL_P'] elif beta is scalar and nsamples is int: :class:`Samples`, index range(nsamples), - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + columns ['logZ', 'd_G', 'D_KL', 'logL_P'] elif beta is array-like and nsamples is None: :class:`Samples`, index beta, - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + columns ['logZ', 'd_G', 'D_KL', 'logL_P'] elif beta is array-like and nsamples is int: :class:`Samples`, index :class:`pandas.MultiIndex` the product of beta and range(nsamples) - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + columns ['logZ', 'd_G', 'D_KL', 'logL_P'] """ logw = self.logw(nsamples, beta) if nsamples is None and beta is None: @@ -861,6 +868,26 @@ def stats(self, nsamples=None, beta=None): samples.set_label('d_G', r'$d_\mathrm{G}$') samples.label = self.label + + if norm is not None: + samples['Delta_logZ'] = samples['logZ'] - norm['logZ'] + samples.set_label('Delta_logZ', + r"$\Delta\ln\mathcal{Z}$") + + samples['Delta_D_KL'] = samples['D_KL'] - norm['D_KL'] + samples.set_label('Delta_D_KL', + r"$\Delta\mathcal{D}_\mathrm{KL}$") + + samples['Delta_logL_P'] = samples['logL_P'] - norm['logL_P'] + samples.set_label( + 'Delta_logL_P', + r"$\Delta\langle\ln\mathcal{L}\rangle_\mathcal{P}$" + ) + + samples['Delta_d_G'] = samples['d_G'] - norm['d_G'] + samples.set_label('Delta_d_G', + r"$\Delta d_\mathrm{G}$") + return samples def logX(self, nsamples=None): diff --git a/tests/test_samples.py b/tests/test_samples.py index 0dfdf4c8..eef22e61 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -946,17 +946,27 @@ def test_stats(): beta = [0., 0.5, 1.] vals = ['logZ', 'D_KL', 'logL_P', 'd_G'] + delta_vals = ['Delta_logZ', 'Delta_D_KL', 'Delta_logL_P', 'Delta_d_G'] labels = [r'$\ln\mathcal{Z}$', r'$\mathcal{D}_\mathrm{KL}$', r'$\langle\ln\mathcal{L}\rangle_\mathcal{P}$', r'$d_\mathrm{G}$'] + delta_labels = [r'$\Delta\ln\mathcal{Z}$', + r'$\Delta\mathcal{D}_\mathrm{KL}$', + r'$\Delta\langle\ln\mathcal{L}\rangle_\mathcal{P}$', + r'$\Delta d_\mathrm{G}$'] stats = pc.stats() assert isinstance(stats, WeightedLabelledSeries) assert_array_equal(stats.drop_labels().index, vals) assert_array_equal(stats.get_labels(), labels) + stats = pc.stats(norm=pc.stats()) + assert isinstance(stats, WeightedLabelledSeries) + assert_array_equal(stats.drop_labels().index, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + stats = pc.stats(nsamples=nsamples) assert isinstance(stats, WeightedLabelledDataFrame) assert_array_equal(stats.drop_labels().columns, vals) @@ -964,6 +974,20 @@ def test_stats(): assert stats.index.name == 'samples' assert_array_equal(stats.index, range(nsamples)) + stats = pc.stats(nsamples=nsamples, norm=pc.stats()) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.name == 'samples' + assert_array_equal(stats.index, range(nsamples)) + + stats = pc.stats(nsamples=nsamples, norm=pc.stats(nsamples=nsamples)) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.name == 'samples' + assert_array_equal(stats.index, range(nsamples)) + stats = pc.stats(beta=beta) assert isinstance(stats, WeightedLabelledDataFrame) assert_array_equal(stats.drop_labels().columns, vals) @@ -971,6 +995,20 @@ def test_stats(): assert stats.index.name == 'beta' assert_array_equal(stats.index, beta) + stats = pc.stats(beta=beta, norm=pc.stats()) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.name == 'beta' + assert_array_equal(stats.index, beta) + + stats = pc.stats(beta=beta, norm=pc.stats(beta=beta)) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.name == 'beta' + assert_array_equal(stats.index, beta) + stats = pc.stats(nsamples=nsamples, beta=beta) assert isinstance(stats, WeightedLabelledDataFrame) assert_array_equal(stats.drop_labels().columns, vals) @@ -978,7 +1016,23 @@ def test_stats(): assert stats.index.names == ['beta', 'samples'] assert stats.index.levshape == (len(beta), nsamples) + stats = pc.stats(nsamples=nsamples, beta=beta, norm=pc.stats()) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.names == ['beta', 'samples'] + assert stats.index.levshape == (len(beta), nsamples) + + stats = pc.stats(nsamples=nsamples, beta=beta, + norm=pc.stats(nsamples=nsamples, beta=beta)) + assert isinstance(stats, WeightedLabelledDataFrame) + assert_array_equal(stats.drop_labels().columns, vals + delta_vals) + assert_array_equal(stats.get_labels(), labels + delta_labels) + assert stats.index.names == ['beta', 'samples'] + assert stats.index.levshape == (len(beta), nsamples) + for beta in [1., 0., 0.5]: + np.random.seed(42) pc.beta = beta n = 1000 PC = pc.stats(n, beta)