From ab634869d31e7f6ceeb5dd8769d31f5b45497e25 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:01:51 +0000 Subject: [PATCH 01/71] first pass at WeightedGroupBy --- anesthetic/weighted_pandas.py | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index fb5ab1bc..dfd7d734 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -1,14 +1,59 @@ """Pandas DataFrame and Series with weighted samples.""" +import warnings from inspect import signature import numpy as np from pandas import Series, DataFrame, concat, MultiIndex +from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy +from pandas._libs import lib +from pandas._libs.lib import no_default +from pandas.util._exceptions import find_stack_level from pandas.util import hash_pandas_object from numpy.ma import masked_array from anesthetic.utils import (compress_weights, channel_capacity, quantile, temporary_seed, adjust_docstrings) +class WeightedGroupBy(GroupBy): + def mean(self, numeric_only=False): + result = self.agg(lambda df: self.obj._constructor(df).mean( + numeric_only=numeric_only)) + return result.__finalize__(self.obj, method="groupby") + + def std(self, numeric_only=False): + result = self.agg(lambda df: self.obj._constructor(df).std( + numeric_only=numeric_only)) + return result.__finalize__(self.obj, method="groupby") + + def kurtosis(self, numeric_only=False): + result = self.agg(lambda df: self.obj._constructor(df).kurtosis( + numeric_only=numeric_only)) + return result.__finalize__(self.obj, method="groupby") + + def median(self, numeric_only=False): + result = self.agg(lambda df: self.obj._constructor(df).median( + numeric_only=numeric_only)) + return result.__finalize__(self.obj, method="groupby") + + def var(self, numeric_only=False): + result = self.agg(lambda df: self.obj._constructor(df).var( + numeric_only=numeric_only)) + return result.__finalize__(self.obj, method="groupby") + + +class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): + def cov(self, other, skipna=True): + result = self.agg(lambda df: self.obj._constructor(df).cov( + other, skipna)) + return result.__finalize__(self.obj, method="groupby") + + +class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): + def cov(self, skipna=True): + result = self.agg(lambda df: self.obj._constructor(df).cov(skipna)) + return result.__finalize__(self.obj, method="groupby") + + class _WeightedObject(object): """Common methods for `WeightedSeries` and `WeightedDataFrame`. @@ -204,6 +249,37 @@ def _constructor(self): def _constructor_expanddim(self): return WeightedDataFrame + def groupby( + self, + by=None, + axis=0, + level=None, + as_index=True, + sort=True, + group_keys=True, + observed=False, + dropna=True, + ) -> SeriesGroupBy: + from pandas.core.groupby.generic import SeriesGroupBy + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + if not as_index: + raise TypeError("as_index=False only valid with DataFrame") + axis = self._get_axis_number(axis) + + return WeightedSeriesGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + class WeightedDataFrame(_WeightedObject, DataFrame): """Weighted version of :class:`pandas.DataFrame`.""" @@ -405,6 +481,51 @@ def _constructor_sliced(self): def _constructor(self): return WeightedDataFrame + def groupby( + self, + by=None, + axis=no_default, + level=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + observed: bool = False, + dropna: bool = True, + ) -> DataFrameGroupBy: # pragma: no cover + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + if axis == 1: + warnings.warn( + "DataFrame.groupby with axis=1 is deprecated. Do " + "`frame.T.groupby(...)` without axis instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + "The 'axis' keyword in DataFrame.groupby is deprecated " + "and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + axis = 0 + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + + return WeightedDataFrameGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + for cls in [WeightedDataFrame, WeightedSeries]: adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame') From e4e834edacd51434a25245f91f9aeb82301946b3 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:10:17 +0000 Subject: [PATCH 02/71] correct cov --- anesthetic/weighted_pandas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index dfd7d734..3f47a4b6 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -46,12 +46,13 @@ def cov(self, other, skipna=True): result = self.agg(lambda df: self.obj._constructor(df).cov( other, skipna)) return result.__finalize__(self.obj, method="groupby") + def cov(self, other, skipna=True): + return self._op_via_apply("cov", other=other, skipna=skipna) class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): def cov(self, skipna=True): - result = self.agg(lambda df: self.obj._constructor(df).cov(skipna)) - return result.__finalize__(self.obj, method="groupby") + return self._op_via_apply("cov", skipna=skipna) class _WeightedObject(object): From 94aecb98749dfe122ef437f8e9268197dd31416c Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:15:51 +0000 Subject: [PATCH 03/71] remove duplicate cov --- anesthetic/weighted_pandas.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 3f47a4b6..b710d371 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -42,10 +42,6 @@ def var(self, numeric_only=False): class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - def cov(self, other, skipna=True): - result = self.agg(lambda df: self.obj._constructor(df).cov( - other, skipna)) - return result.__finalize__(self.obj, method="groupby") def cov(self, other, skipna=True): return self._op_via_apply("cov", other=other, skipna=skipna) From ada20d705895302a3cf87f556fdd8596dc848431 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:25:01 +0000 Subject: [PATCH 04/71] give up on cov for now --- anesthetic/weighted_pandas.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index b710d371..1fd0dd6a 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -42,13 +42,11 @@ def var(self, numeric_only=False): class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - def cov(self, other, skipna=True): - return self._op_via_apply("cov", other=other, skipna=skipna) + pass class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): - def cov(self, skipna=True): - return self._op_via_apply("cov", skipna=skipna) + pass class _WeightedObject(object): From 3abca58e7841e5cfeb62a0c5ece4508a75085732 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:34:52 +0000 Subject: [PATCH 05/71] use Lukas' test --- tests/test_samples.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index de3145cd..5539a7be 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1332,3 +1332,32 @@ def test_old_gui(): make_2d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'}) with pytest.raises(NotImplementedError): make_1d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'}) + + +def test_groupby(): + params = ['a', 'b'] + data = np.random.rand(4, 2) + weights = np.random.randint(1, 10, 4) + samples = Samples(data, weights=weights, columns=params) + samples['group'] = np.ones(4, dtype=int) + samples.loc[2:, 'group'] = 2 + + group1_means = np.average(samples.loc[:1, params], axis=0) + group1_wmeans = np.average(samples.loc[:1, params], axis=0, + weights=samples.get_weights()[:2]) + group2_means = np.average(samples.loc[2:, params], axis=0) + group2_wmeans = np.average(samples.loc[2:, params], axis=0, + weights=samples.get_weights()[2:]) + group_means = np.vstack([group1_means, group2_means]) + group_wmeans = np.vstack([group1_wmeans, group2_wmeans]) + group_weights = [samples.get_weights()[:2].sum(), + samples.get_weights()[2:].sum()] + mean = np.average(group_means, axis=0) + wmean = np.average(group_wmeans, axis=0, weights=group_weights) + + groups = samples.groupby('group') + print(groups.mean()) + assert not np.any(samples.groupby('group').mean() == group_means) + assert np.all(samples.groupby('group').mean() == group_wmeans) + + assert not np.any(samples.groupby('group').mean().mean() == mean) From 626bce358764f58952a72c79bbcb015b3b5d07e8 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:38:14 +0000 Subject: [PATCH 06/71] remove unecessary import --- anesthetic/weighted_pandas.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 1fd0dd6a..0f981899 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -255,8 +255,6 @@ def groupby( observed=False, dropna=True, ) -> SeriesGroupBy: - from pandas.core.groupby.generic import SeriesGroupBy - if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: From ca313333802cf9d2048a0b40d373ec5b39ff9835 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:45:47 +0000 Subject: [PATCH 07/71] remove currently unused lines from tests --- tests/test_samples.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 5539a7be..6b35674c 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1350,10 +1350,10 @@ def test_groupby(): weights=samples.get_weights()[2:]) group_means = np.vstack([group1_means, group2_means]) group_wmeans = np.vstack([group1_wmeans, group2_wmeans]) - group_weights = [samples.get_weights()[:2].sum(), - samples.get_weights()[2:].sum()] + # group_weights = [samples.get_weights()[:2].sum(), + # samples.get_weights()[2:].sum()] mean = np.average(group_means, axis=0) - wmean = np.average(group_wmeans, axis=0, weights=group_weights) + # wmean = np.average(group_wmeans, axis=0, weights=group_weights) groups = samples.groupby('group') print(groups.mean()) From 9578b4bcc0ae6e4965ebc175b9753c9a001eab72 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:51:36 +0000 Subject: [PATCH 08/71] version bump --- README.rst | 2 +- anesthetic/_version.py | 2 +- tests/test_samples.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index c1260e94..31ec3a26 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.0.0-beta.22 +:Version: 2.0.0-beta.23 :Homepage: https://github.com/williamjameshandley/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index 7d2b271c..d4666d28 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.0.0b22' +__version__ = '2.0.0b23' diff --git a/tests/test_samples.py b/tests/test_samples.py index 6b35674c..6046e60a 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1361,3 +1361,4 @@ def test_groupby(): assert np.all(samples.groupby('group').mean() == group_wmeans) assert not np.any(samples.groupby('group').mean().mean() == mean) + # assert np.all(samples.groupby('group').mean().mean() == wmean) From 8531a5665e9046193295edd5acf3f94ef1e795b0 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 16:59:07 +0000 Subject: [PATCH 09/71] sort out docstrings --- anesthetic/weighted_pandas.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 0f981899..bb92f814 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -15,37 +15,43 @@ class WeightedGroupBy(GroupBy): - def mean(self, numeric_only=False): + """Weighted version of :class:`pandas.core.groupby.GroupBy`.""" + + def mean(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).mean( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def std(self, numeric_only=False): + def std(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).std( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def kurtosis(self, numeric_only=False): + def kurtosis(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).kurtosis( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def median(self, numeric_only=False): + def median(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).median( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def var(self, numeric_only=False): + def var(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).var( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): + """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`.""" + pass class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): + """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`.""" + pass @@ -254,7 +260,7 @@ def groupby( group_keys=True, observed=False, dropna=True, - ) -> SeriesGroupBy: + ): # noqa: D102 if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: @@ -484,7 +490,7 @@ def groupby( group_keys: bool = True, observed: bool = False, dropna: bool = True, - ) -> DataFrameGroupBy: # pragma: no cover + ): # pragma: no cover # noqa: D102 if axis is not lib.no_default: axis = self._get_axis_number(axis) if axis == 1: From 9598eac51cbda76f92667b1d7220c4504c40d4ff Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 17:02:08 +0000 Subject: [PATCH 10/71] fix indentation --- anesthetic/weighted_pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index bb92f814..77550b61 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -260,7 +260,7 @@ def groupby( group_keys=True, observed=False, dropna=True, - ): # noqa: D102 + ): # noqa: D102 if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: @@ -490,7 +490,7 @@ def groupby( group_keys: bool = True, observed: bool = False, dropna: bool = True, - ): # pragma: no cover # noqa: D102 + ): # pragma: no cover # noqa: D102 if axis is not lib.no_default: axis = self._get_axis_number(axis) if axis == 1: From 192bffe47e53a65a82f126299a45089ee26a05d7 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 17:39:23 +0000 Subject: [PATCH 11/71] tests using cobaya chains --- tests/test_samples.py | 44 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 6046e60a..7a31e335 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1334,31 +1334,19 @@ def test_old_gui(): make_1d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'}) -def test_groupby(): - params = ['a', 'b'] - data = np.random.rand(4, 2) - weights = np.random.randint(1, 10, 4) - samples = Samples(data, weights=weights, columns=params) - samples['group'] = np.ones(4, dtype=int) - samples.loc[2:, 'group'] = 2 - - group1_means = np.average(samples.loc[:1, params], axis=0) - group1_wmeans = np.average(samples.loc[:1, params], axis=0, - weights=samples.get_weights()[:2]) - group2_means = np.average(samples.loc[2:, params], axis=0) - group2_wmeans = np.average(samples.loc[2:, params], axis=0, - weights=samples.get_weights()[2:]) - group_means = np.vstack([group1_means, group2_means]) - group_wmeans = np.vstack([group1_wmeans, group2_wmeans]) - # group_weights = [samples.get_weights()[:2].sum(), - # samples.get_weights()[2:].sum()] - mean = np.average(group_means, axis=0) - # wmean = np.average(group_wmeans, axis=0, weights=group_weights) - - groups = samples.groupby('group') - print(groups.mean()) - assert not np.any(samples.groupby('group').mean() == group_means) - assert np.all(samples.groupby('group').mean() == group_wmeans) - - assert not np.any(samples.groupby('group').mean().mean() == mean) - # assert np.all(samples.groupby('group').mean().mean() == wmean) +def test_groupby_stats(): + mcmc = read_chains('./tests/example_data/cb') + chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].mean() + .to_numpy()[:-1], + chains.mean().iloc[0, :].to_numpy())) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std() + .to_numpy()[:-1], + chains.std().iloc[0, :].to_numpy())) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis() + .dropna().to_numpy(), + chains.kurtosis().iloc[0, :].dropna().to_numpy())) + # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median().to_numpy()[:-1], + # chains.median().iloc[0, :].to_numpy())) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var().to_numpy()[:-1], + chains.var().iloc[0, :].to_numpy())) From de9b4f7a0702e0c092f85d5d872dad09348fe30c Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 10 Mar 2023 17:40:15 +0000 Subject: [PATCH 12/71] test formatting --- tests/test_samples.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 7a31e335..124836df 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1346,7 +1346,9 @@ def test_groupby_stats(): assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis() .dropna().to_numpy(), chains.kurtosis().iloc[0, :].dropna().to_numpy())) - # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median().to_numpy()[:-1], - # chains.median().iloc[0, :].to_numpy())) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var().to_numpy()[:-1], + # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median() + # .to_numpy()[:-1], + # chains.median().iloc[0, :].to_numpy())) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var() + .to_numpy()[:-1], chains.var().iloc[0, :].to_numpy())) From 55e9f4e30ddfa1e65e2fb4d4e74af166c71ff8df Mon Sep 17 00:00:00 2001 From: Ormorod Date: Tue, 14 Mar 2023 12:07:45 +0000 Subject: [PATCH 13/71] reinstate median test --- tests/test_samples.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 124836df..34559920 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1346,9 +1346,9 @@ def test_groupby_stats(): assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis() .dropna().to_numpy(), chains.kurtosis().iloc[0, :].dropna().to_numpy())) - # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median() - # .to_numpy()[:-1], - # chains.median().iloc[0, :].to_numpy())) + assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median() + .to_numpy()[:-1], + chains.median().iloc[0, :].to_numpy())) assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var() .to_numpy()[:-1], chains.var().iloc[0, :].to_numpy())) From dfc4c3b4908afd981871edebcd8b884ad865b456 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Tue, 14 Mar 2023 12:08:08 +0000 Subject: [PATCH 14/71] change numeric_only to None in median --- anesthetic/weighted_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 77550b61..0ba6bf5d 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -32,7 +32,7 @@ def kurtosis(self, numeric_only=False): # noqa: D102 numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def median(self, numeric_only=False): # noqa: D102 + def median(self, numeric_only=None): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).median( numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") From 244cd1f76b264e488f1ad8ff758e79bfe29eea56 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Tue, 14 Mar 2023 12:21:08 +0000 Subject: [PATCH 15/71] stick underscores in front to see if this fixes the documentation --- anesthetic/weighted_pandas.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 0ba6bf5d..882990af 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -14,7 +14,7 @@ temporary_seed, adjust_docstrings) -class WeightedGroupBy(GroupBy): +class _WeightedGroupBy(GroupBy): """Weighted version of :class:`pandas.core.groupby.GroupBy`.""" def mean(self, numeric_only=False): # noqa: D102 @@ -43,13 +43,13 @@ def var(self, numeric_only=False): # noqa: D102 return result.__finalize__(self.obj, method="groupby") -class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): +class _WeightedSeriesGroupBy(_WeightedGroupBy, SeriesGroupBy): """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`.""" pass -class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): +class _WeightedDataFrameGroupBy(_WeightedGroupBy, DataFrameGroupBy): """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`.""" pass @@ -267,7 +267,7 @@ def groupby( raise TypeError("as_index=False only valid with DataFrame") axis = self._get_axis_number(axis) - return WeightedSeriesGroupBy( + return _WeightedSeriesGroupBy( obj=self, keys=by, axis=axis, @@ -513,7 +513,7 @@ def groupby( if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") - return WeightedDataFrameGroupBy( + return _WeightedDataFrameGroupBy( obj=self, keys=by, axis=axis, From e3badbb66cfa94faf4637d79754b9af71be158cb Mon Sep 17 00:00:00 2001 From: Ormorod Date: Tue, 14 Mar 2023 12:26:25 +0000 Subject: [PATCH 16/71] Revert "stick underscores in front to see if this fixes the documentation" This reverts commit 244cd1f76b264e488f1ad8ff758e79bfe29eea56. --- anesthetic/weighted_pandas.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 882990af..0ba6bf5d 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -14,7 +14,7 @@ temporary_seed, adjust_docstrings) -class _WeightedGroupBy(GroupBy): +class WeightedGroupBy(GroupBy): """Weighted version of :class:`pandas.core.groupby.GroupBy`.""" def mean(self, numeric_only=False): # noqa: D102 @@ -43,13 +43,13 @@ def var(self, numeric_only=False): # noqa: D102 return result.__finalize__(self.obj, method="groupby") -class _WeightedSeriesGroupBy(_WeightedGroupBy, SeriesGroupBy): +class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`.""" pass -class _WeightedDataFrameGroupBy(_WeightedGroupBy, DataFrameGroupBy): +class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`.""" pass @@ -267,7 +267,7 @@ def groupby( raise TypeError("as_index=False only valid with DataFrame") axis = self._get_axis_number(axis) - return _WeightedSeriesGroupBy( + return WeightedSeriesGroupBy( obj=self, keys=by, axis=axis, @@ -513,7 +513,7 @@ def groupby( if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") - return _WeightedDataFrameGroupBy( + return WeightedDataFrameGroupBy( obj=self, keys=by, axis=axis, From ca0025583b48ddbfb2a4a6c67ae96fac3db4a664 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Tue, 14 Mar 2023 15:43:50 +0000 Subject: [PATCH 17/71] add missing no cover to WeightedSeries.groupby() --- anesthetic/weighted_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 0ba6bf5d..d064793f 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -260,7 +260,7 @@ def groupby( group_keys=True, observed=False, dropna=True, - ): # noqa: D102 + ): # pragma: no cover # noqa: D102 if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: From d97b760f2a59a522ebb7c634ddf4c9361c586f8f Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 14 Mar 2023 17:02:22 -0700 Subject: [PATCH 18/71] remove `:show-inheritance:` for `weighted_pandas` autodocs, cross referencing pandas is a pain, some of its classes lack docs --- docs/source/anesthetic.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index 9e9b8284..97fa9731 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -99,6 +99,5 @@ anesthetic.weighted\_pandas module .. automodule:: anesthetic.weighted_pandas :members: :undoc-members: - :show-inheritance: From bfa2647e4c2082a14178e6402845f8b130fb8501 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 14 Mar 2023 17:02:43 -0700 Subject: [PATCH 19/71] fix autodocs for `weighted_pandas` * `GroupBy` does not have its own docs. - Its initialisation signature looks like a core dump, hence implementing our own initialisation function. - Trying to cross-reference as ``:class:`pandas.core.groupby.GroupBy` `` will fail, hence dropping the link attempt. Same goes for `SeriesGroupBy` and `DataFrameGroupBy`. * Dropping `kurtosis` from `WeightedGroupBy`, since it is not implemented in `pandas.core.croupby.GroupBy`. Leave that to tacke once/if/when we really need it. * Add docstring adjustments for `WeightedDataFrameGroupBy` and `WeightedSeriesGroupBy` to the end of `weighted_pandas` in the same way as previously done for `WeightedDataFrame` and `WeightedSeries`. --- anesthetic/weighted_pandas.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index d064793f..bc503413 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -15,7 +15,10 @@ class WeightedGroupBy(GroupBy): - """Weighted version of :class:`pandas.core.groupby.GroupBy`.""" + """Weighted version of ``pandas.core.groupby.GroupBy``.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) def mean(self, numeric_only=False): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).mean( @@ -27,11 +30,6 @@ def std(self, numeric_only=False): # noqa: D102 numeric_only=numeric_only)) return result.__finalize__(self.obj, method="groupby") - def kurtosis(self, numeric_only=False): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).kurtosis( - numeric_only=numeric_only)) - return result.__finalize__(self.obj, method="groupby") - def median(self, numeric_only=None): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).median( numeric_only=numeric_only)) @@ -44,13 +42,13 @@ def var(self, numeric_only=False): # noqa: D102 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`.""" + """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" pass class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): - """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`.""" + """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" pass @@ -526,14 +524,18 @@ def groupby( ) -for cls in [WeightedDataFrame, WeightedSeries]: +for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy]: adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame') adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames') adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries') adjust_docstrings(cls, 'core', 'pandas.core') - adjust_docstrings(cls, 'DataFrameGroupBy', - 'pandas.core.groupby.DataFrameGroupBy') - adjust_docstrings(cls, 'SeriesGroupBy', - 'pandas.core.groupby.SeriesGroupBy') adjust_docstrings(cls, 'pandas.core.window.Rolling.quantile', 'pandas.core.window.rolling.Rolling.quantile') + adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy') + adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy') + adjust_docstrings(cls, 'WeightedDataFrameGroupBy.sample', + 'pandas.core.groupby.DataFrameGroupBy.sample') + adjust_docstrings(cls, 'WeightedSeriesGroupBy.sample', + 'pandas.core.groupby.SeriesGroupBy.sample') +adjust_docstrings(WeightedDataFrame, 'resample', 'pandas.DataFrame.resample') +adjust_docstrings(WeightedSeries, 'resample', 'pandas.Series.resample') From 6703ca7d990b51827ee326ac9f99298eaa40f91c Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 14 Mar 2023 17:17:51 -0700 Subject: [PATCH 20/71] drop `WeightedGroupBy.kurtosis` also from tests --- tests/test_samples.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 34559920..e190041d 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,9 +1343,6 @@ def test_groupby_stats(): assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std() .to_numpy()[:-1], chains.std().iloc[0, :].to_numpy())) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis() - .dropna().to_numpy(), - chains.kurtosis().iloc[0, :].dropna().to_numpy())) assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median() .to_numpy()[:-1], chains.median().iloc[0, :].to_numpy())) From 40cd004d8fcfb3edc55512069d374b89a53d3864 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 14 Mar 2023 17:32:45 -0700 Subject: [PATCH 21/71] make `WeightedDataFramGroupBy` and `WeightedSeriesGroupBy` private, since they have essentially no documentation anyhow --- anesthetic/weighted_pandas.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index bc503413..7f6b4b80 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -42,13 +42,19 @@ def var(self, numeric_only=False): # noqa: D102 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" + """Weighted version of ``pandas.core.groupby.SeriesGroupBy``. + + :meta private: + """ pass class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): - """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" + """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``. + + :meta private: + """ pass From 0f2104e26ce78b6700de35c61e4576492c97ffd7 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 14 Mar 2023 18:25:37 -0700 Subject: [PATCH 22/71] make `WeightedGroupBy.grouper` private --- anesthetic/weighted_pandas.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 7f6b4b80..f6be2f4e 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -4,7 +4,7 @@ from inspect import signature import numpy as np from pandas import Series, DataFrame, concat, MultiIndex -from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy +from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy, ops from pandas._libs import lib from pandas._libs.lib import no_default from pandas.util._exceptions import find_stack_level @@ -17,6 +17,9 @@ class WeightedGroupBy(GroupBy): """Weighted version of ``pandas.core.groupby.GroupBy``.""" + grouper: ops.BaseGrouper + """:meta private:""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) From 9636d5f39a4eb4db337b5c9cefca67c3b3f37f56 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Thu, 16 Mar 2023 14:28:01 +0000 Subject: [PATCH 23/71] version bump --- README.rst | 2 +- anesthetic/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 31ec3a26..687bb728 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.0.0-beta.23 +:Version: 2.0.0-beta.24 :Homepage: https://github.com/williamjameshandley/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index d4666d28..03670122 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.0.0b23' +__version__ = '2.0.0b24' From 0a83fe0bbbfe0de055a26d991575d62d13470332 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 20 Mar 2023 09:45:01 +0000 Subject: [PATCH 24/71] version bump --- README.rst | 2 +- anesthetic/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 687bb728..8e1bb219 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.0.0-beta.24 +:Version: 2.0.0-beta.25 :Homepage: https://github.com/williamjameshandley/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index 03670122..c4c4e20b 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.0.0b24' +__version__ = '2.0.0b25' From 8c907f187705766171095f3fa2a02ecd169ad84e Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 22 Mar 2023 11:45:19 +0000 Subject: [PATCH 25/71] Removed hard-coded numeric_only arguments --- anesthetic/weighted_pandas.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index f6be2f4e..d9e1f1df 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -23,24 +23,24 @@ class WeightedGroupBy(GroupBy): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def mean(self, numeric_only=False): # noqa: D102 + def mean(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).mean( - numeric_only=numeric_only)) + *args, **kwargs)) return result.__finalize__(self.obj, method="groupby") - def std(self, numeric_only=False): # noqa: D102 + def std(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).std( - numeric_only=numeric_only)) + *args, **kwargs)) return result.__finalize__(self.obj, method="groupby") - def median(self, numeric_only=None): # noqa: D102 + def median(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).median( - numeric_only=numeric_only)) + *args, **kwargs)) return result.__finalize__(self.obj, method="groupby") - def var(self, numeric_only=False): # noqa: D102 + def var(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).var( - numeric_only=numeric_only)) + *args, **kwargs)) return result.__finalize__(self.obj, method="groupby") From eac682aaec1859f39530e2ebc85c29da52b9891a Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 22 Mar 2023 11:45:51 +0000 Subject: [PATCH 26/71] version bump --- README.rst | 2 +- anesthetic/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 8e1bb219..c876a856 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.0.0-beta.25 +:Version: 2.0.0-beta.26 :Homepage: https://github.com/williamjameshandley/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index c4c4e20b..019ed87d 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.0.0b25' +__version__ = '2.0.0b26' From c49536210ac8b3c74f99b548802d73737e04bb1a Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 22 Mar 2023 14:11:25 +0000 Subject: [PATCH 27/71] Updated weighted samples --- anesthetic/weighted_pandas.py | 55 +++++++++++++++++++++++++++++++---- tests/test_samples.py | 39 ++++++++++++++++--------- 2 files changed, 76 insertions(+), 18 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index d9e1f1df..49d2a29b 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -25,24 +25,31 @@ def __init__(self, *args, **kwargs): def mean(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).mean( - *args, **kwargs)) + *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") def std(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).std( - *args, **kwargs)) + *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") def median(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).median( - *args, **kwargs)) + *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") def var(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).var( - *args, **kwargs)) + *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + + def get_weights(self): + """Return the weights of the grouped samples.""" + return self.agg(lambda df: df.get_weights().sum()) + class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): """Weighted version of ``pandas.core.groupby.SeriesGroupBy``. @@ -59,7 +66,45 @@ class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): :meta private: """ - pass + def get_weights(self): + """Return the weights of the grouped samples.""" + return super().get_weights().min(axis=1-self.axis) + + def _gotitem(self, key, ndim: int, subset=None): + if ndim == 2: + if subset is None: + subset = self.obj + return WeightedDataFrameGroupBy( + subset, + self.grouper, + axis=self.axis, + level=self.level, + grouper=self.grouper, + exclusions=self.exclusions, + selection=key, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + observed=self.observed, + dropna=self.dropna, + ) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return WeightedSeriesGroupBy( + subset, + level=self.level, + grouper=self.grouper, + exclusions=self.exclusions, + selection=key, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + observed=self.observed, + dropna=self.dropna, + ) + + raise AssertionError("invalid ndim for _gotitem") class _WeightedObject(object): diff --git a/tests/test_samples.py b/tests/test_samples.py index 86581704..801366de 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,16 +1343,29 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') - chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].mean() - .to_numpy()[:-1], - chains.mean().iloc[0, :].to_numpy())) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std() - .to_numpy()[:-1], - chains.std().iloc[0, :].to_numpy())) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median() - .to_numpy()[:-1], - chains.median().iloc[0, :].to_numpy())) - assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var() - .to_numpy()[:-1], - chains.var().iloc[0, :].to_numpy())) + chains = mcmc.groupby('chain') + for chain in [1, 2]: + i = mcmc.chain == chain + assert_allclose(mcmc.loc[i].mean().drop('chain'), + chains.mean().loc[chain, :]) + assert_allclose(mcmc.loc[i].std().drop('chain'), + chains.std().loc[chain, :]) + assert_allclose(mcmc.loc[i].median().drop('chain'), + chains.median().loc[chain, :]) + assert_allclose(mcmc.loc[i].var().drop('chain'), + chains.var().loc[chain, :]) + + assert_allclose(mcmc.mean().drop('chain'), chains.mean().mean()) + + for col in mcmc.columns: + if 'chain' not in col: + for chain in [1, 2]: + i = mcmc.chain == chain + assert_allclose(mcmc.loc[i, col].mean(), + chains[[col]].mean().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].std(), + chains[[col]].std().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].median(), + chains[[col]].median().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].var(), + chains[[col]].var().loc[chain, :]) From 7fa1cec13be606309446240d2ea29373cf1339c5 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 22 Mar 2023 15:49:22 +0000 Subject: [PATCH 28/71] Completed coverage --- anesthetic/weighted_pandas.py | 2 +- tests/test_samples.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 49d2a29b..b2cc9caf 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -70,7 +70,7 @@ def get_weights(self): """Return the weights of the grouped samples.""" return super().get_weights().min(axis=1-self.axis) - def _gotitem(self, key, ndim: int, subset=None): + def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover if ndim == 2: if subset is None: subset = self.obj diff --git a/tests/test_samples.py b/tests/test_samples.py index 801366de..166a22d9 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1369,3 +1369,8 @@ def test_groupby_stats(): chains[[col]].median().loc[chain, :]) assert_allclose(mcmc.loc[i, col].var(), chains[[col]].var().loc[chain, :]) + + sample = chains.sample(5) + assert len(sample) == 10 + assert sample.value_counts('chain')[1] == 5 + assert sample.value_counts('chain')[2] == 5 From 99a52e34f7f0d65293d44014e6abc27088a84cc4 Mon Sep 17 00:00:00 2001 From: Adam Ormondroyd <52655393+Ormorod@users.noreply.github.com> Date: Wed, 22 Mar 2023 19:12:40 +0000 Subject: [PATCH 29/71] add missing space before inline comment --- anesthetic/weighted_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index b2cc9caf..cd532a93 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -70,7 +70,7 @@ def get_weights(self): """Return the weights of the grouped samples.""" return super().get_weights().min(axis=1-self.axis) - def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover + def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover if ndim == 2: if subset is None: subset = self.obj From 2e6d54ef164facd293fc908fc2fb15d67ca6c678 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Wed, 22 Mar 2023 19:23:36 +0000 Subject: [PATCH 30/71] joint call of column name and label --- tests/test_samples.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 166a22d9..95e150f0 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,19 +1343,19 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') - chains = mcmc.groupby('chain') + chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) for chain in [1, 2]: i = mcmc.chain == chain - assert_allclose(mcmc.loc[i].mean().drop('chain'), + assert_allclose(mcmc.loc[i].mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().loc[chain, :]) - assert_allclose(mcmc.loc[i].std().drop('chain'), + assert_allclose(mcmc.loc[i].std().drop(('chain', '$n_\\mathrm{chain}$')), chains.std().loc[chain, :]) - assert_allclose(mcmc.loc[i].median().drop('chain'), + assert_allclose(mcmc.loc[i].median().drop(('chain', '$n_\\mathrm{chain}$')), chains.median().loc[chain, :]) - assert_allclose(mcmc.loc[i].var().drop('chain'), + assert_allclose(mcmc.loc[i].var().drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) - assert_allclose(mcmc.mean().drop('chain'), chains.mean().mean()) + assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) for col in mcmc.columns: if 'chain' not in col: From e45aaa6a5fcf164f59ac38f2f59171ca8224862a Mon Sep 17 00:00:00 2001 From: Ormorod Date: Wed, 22 Mar 2023 19:31:06 +0000 Subject: [PATCH 31/71] formatting --- tests/test_samples.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 95e150f0..d8ae9e3d 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1346,16 +1346,21 @@ def test_groupby_stats(): chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) for chain in [1, 2]: i = mcmc.chain == chain - assert_allclose(mcmc.loc[i].mean().drop(('chain', '$n_\\mathrm{chain}$')), + assert_allclose(mcmc.loc[i].mean() + .drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().loc[chain, :]) - assert_allclose(mcmc.loc[i].std().drop(('chain', '$n_\\mathrm{chain}$')), + assert_allclose(mcmc.loc[i].std() + .drop(('chain', '$n_\\mathrm{chain}$')), chains.std().loc[chain, :]) - assert_allclose(mcmc.loc[i].median().drop(('chain', '$n_\\mathrm{chain}$')), + assert_allclose(mcmc.loc[i].median() + .drop(('chain', '$n_\\mathrm{chain}$')), chains.median().loc[chain, :]) - assert_allclose(mcmc.loc[i].var().drop(('chain', '$n_\\mathrm{chain}$')), + assert_allclose(mcmc.loc[i].var() + .drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) - assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) + assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), + chains.mean().mean()) for col in mcmc.columns: if 'chain' not in col: From 5725ddfceb6d285c547cdbec8ed49f8596d2a9f8 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Wed, 22 Mar 2023 19:32:00 +0000 Subject: [PATCH 32/71] additional chains.get_group(chains) tests --- tests/test_samples.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index d8ae9e3d..3ba2bfc5 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1359,6 +1359,19 @@ def test_groupby_stats(): .drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) + assert_allclose(chains.get_group(chain).mean() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.mean().loc[chain, :]) + assert_allclose(chains.get_group(chain).std() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.std().loc[chain, :]) + assert_allclose(chains.get_group(chain).median() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.median().loc[chain, :]) + assert_allclose(chains.get_group(chain).var() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.var().loc[chain, :]) + assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) From 788fa8493dca86bd4b046fb855228ebe33a4a34d Mon Sep 17 00:00:00 2001 From: Will Handley Date: Thu, 23 Mar 2023 09:23:51 +0000 Subject: [PATCH 33/71] added kurtosis, kurt, skew, mad, sem --- anesthetic/utils.py | 11 +++++----- anesthetic/weighted_pandas.py | 20 ++++++++++++++++++ tests/test_samples.py | 40 +++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/anesthetic/utils.py b/anesthetic/utils.py index 0208a855..541d2132 100644 --- a/anesthetic/utils.py +++ b/anesthetic/utils.py @@ -530,8 +530,9 @@ class to adjust """ for key, val in cls.__dict__.items(): doc = inspect.getdoc(val) - newdoc = re.sub(pattern, repl, doc, *args, **kwargs) - try: - cls.__dict__[key].__doc__ = newdoc - except AttributeError: - pass + if doc is not None: + newdoc = re.sub(pattern, repl, doc, *args, **kwargs) + try: + cls.__dict__[key].__doc__ = newdoc + except AttributeError: + pass diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index cd532a93..92e053c7 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -43,6 +43,26 @@ def var(self, *args, **kwargs): # noqa: D102 *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") + def kurt(self, *args, **kwargs): # noqa: D102 + result = self.agg(lambda df: self.obj._constructor(df).kurt( + *args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + + def kurtosis(self, *args, **kwargs): # noqa: D102 + result = self.agg(lambda df: self.obj._constructor(df).kurtosis( + *args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + + def skew(self, *args, **kwargs): # noqa: D102 + result = self.agg(lambda df: self.obj._constructor(df).skew( + *args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + + def sem(self, *args, **kwargs): # noqa: D102 + result = self.agg(lambda df: self.obj._constructor(df).sem( + *args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + def sample(self, *args, **kwargs): # noqa: D102 return super().sample(weights=self.obj.get_weights(), *args, **kwargs) diff --git a/tests/test_samples.py b/tests/test_samples.py index 3ba2bfc5..6b4b9899 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1358,6 +1358,21 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[i].var() .drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) + assert_allclose(mcmc.loc[i].kurt() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.kurt().loc[chain, :]) + assert_allclose(mcmc.loc[i].kurtosis() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.kurtosis().loc[chain, :]) + assert_allclose(mcmc.loc[i].skew() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.skew().loc[chain, :]) + assert_allclose(mcmc.loc[i].mad() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.mad().loc[chain, :]) + assert_allclose(mcmc.loc[i].sem() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.sem().loc[chain, :]) assert_allclose(chains.get_group(chain).mean() .drop(('chain', '$n_\\mathrm{chain}$')), @@ -1371,6 +1386,21 @@ def test_groupby_stats(): assert_allclose(chains.get_group(chain).var() .drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) + assert_allclose(chains.get_group(chain).kurt() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.kurt().loc[chain, :]) + assert_allclose(chains.get_group(chain).kurtosis() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.kurtosis().loc[chain, :]) + assert_allclose(chains.get_group(chain).skew() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.skew().loc[chain, :]) + assert_allclose(chains.get_group(chain).mad() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.mad().loc[chain, :]) + assert_allclose(chains.get_group(chain).sem() + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.sem().loc[chain, :]) assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) @@ -1387,6 +1417,16 @@ def test_groupby_stats(): chains[[col]].median().loc[chain, :]) assert_allclose(mcmc.loc[i, col].var(), chains[[col]].var().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].kurt(), + chains[[col]].kurt().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].kurtosis(), + chains[[col]].kurtosis().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].skew(), + chains[[col]].skew().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].mad(), + chains[[col]].mad().loc[chain, :]) + assert_allclose(mcmc.loc[i, col].sem(), + chains[[col]].sem().loc[chain, :]) sample = chains.sample(5) assert len(sample) == 10 From f22a24ccb00f3f3a112663db84e0015cf4f9c2bd Mon Sep 17 00:00:00 2001 From: lukashergt Date: Fri, 24 Mar 2023 13:35:12 -0700 Subject: [PATCH 34/71] fix docs for weighted groupby sample methods * move sample method from `WeightedGroupBy` to `WeightedSeriesGroupBy` and `WeightedDataFrameGroupBy` * modify `adjust_docstrings` accordingly * make `WeightedSeriesGroupBy` and `WeightedDataFrameGroupBy` public' --- anesthetic/weighted_pandas.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 92e053c7..c9d72ba5 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -63,28 +63,20 @@ def sem(self, *args, **kwargs): # noqa: D102 *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") - def sample(self, *args, **kwargs): # noqa: D102 - return super().sample(weights=self.obj.get_weights(), *args, **kwargs) - def get_weights(self): """Return the weights of the grouped samples.""" return self.agg(lambda df: df.get_weights().sum()) class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - """Weighted version of ``pandas.core.groupby.SeriesGroupBy``. + """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" - :meta private: - """ - - pass + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.obj.get_weights(), *args, **kwargs) class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): - """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``. - - :meta private: - """ + """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" def get_weights(self): """Return the weights of the grouped samples.""" @@ -126,6 +118,9 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover raise AssertionError("invalid ndim for _gotitem") + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + class _WeightedObject(object): """Common methods for `WeightedSeries` and `WeightedDataFrame`. @@ -598,7 +593,8 @@ def groupby( ) -for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy]: +for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy, + WeightedDataFrameGroupBy, WeightedSeriesGroupBy]: adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame') adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames') adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries') @@ -607,9 +603,5 @@ def groupby( 'pandas.core.window.rolling.Rolling.quantile') adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy') adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy') - adjust_docstrings(cls, 'WeightedDataFrameGroupBy.sample', - 'pandas.core.groupby.DataFrameGroupBy.sample') - adjust_docstrings(cls, 'WeightedSeriesGroupBy.sample', - 'pandas.core.groupby.SeriesGroupBy.sample') adjust_docstrings(WeightedDataFrame, 'resample', 'pandas.DataFrame.resample') adjust_docstrings(WeightedSeries, 'resample', 'pandas.Series.resample') From d2215a5dbaeee01ea7d76b63c79ded9fb425a568 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Fri, 24 Mar 2023 14:45:48 -0700 Subject: [PATCH 35/71] complete coverage by adding test for `WeightedSeriesGroupBy.sample` --- tests/test_samples.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 6b4b9899..d09fe5d2 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1427,6 +1427,8 @@ def test_groupby_stats(): chains[[col]].mad().loc[chain, :]) assert_allclose(mcmc.loc[i, col].sem(), chains[[col]].sem().loc[chain, :]) + sample = chains[[col]].sample(5) + assert len(sample) == 10 sample = chains.sample(5) assert len(sample) == 10 From 866b3b38b72b6b409f700018803562967a157aa1 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Fri, 24 Mar 2023 15:36:08 -0700 Subject: [PATCH 36/71] fix groupby test for `WeightedSeriesGroupBy.sample` --- tests/test_samples.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index d09fe5d2..424c57a6 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1427,10 +1427,14 @@ def test_groupby_stats(): chains[[col]].mad().loc[chain, :]) assert_allclose(mcmc.loc[i, col].sem(), chains[[col]].sem().loc[chain, :]) - sample = chains[[col]].sample(5) - assert len(sample) == 10 sample = chains.sample(5) assert len(sample) == 10 assert sample.value_counts('chain')[1] == 5 assert sample.value_counts('chain')[2] == 5 + + chains = mcmc.chain.groupby(mcmc.chain) + sample = chains.sample(5) + assert len(sample) == 10 + assert sample.value_counts()[1] == 5 + assert sample.value_counts()[2] == 5 From 57a1c1fb4a874009cc76073e39ed22a12948116f Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 27 Mar 2023 15:33:44 +0100 Subject: [PATCH 37/71] add quantile --- anesthetic/weighted_pandas.py | 5 +++++ tests/test_samples.py | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index c9d72ba5..94fb089d 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -63,6 +63,11 @@ def sem(self, *args, **kwargs): # noqa: D102 *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") + def quantile(self, *args, **kwargs): # noqa: D102 + result = self.agg(lambda df: self.obj._constructor(df).quantile( + *args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + def get_weights(self): """Return the weights of the grouped samples.""" return self.agg(lambda df: df.get_weights().sum()) diff --git a/tests/test_samples.py b/tests/test_samples.py index 424c57a6..43da09fa 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1401,6 +1401,10 @@ def test_groupby_stats(): assert_allclose(chains.get_group(chain).sem() .drop(('chain', '$n_\\mathrm{chain}$')), chains.sem().loc[chain, :]) + q = np.random.rand() + assert_allclose(chains.get_group(chain).quantile(q) + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.quantile(q).loc[chain, :]) assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) @@ -1427,6 +1431,9 @@ def test_groupby_stats(): chains[[col]].mad().loc[chain, :]) assert_allclose(mcmc.loc[i, col].sem(), chains[[col]].sem().loc[chain, :]) + q = np.random.rand() + assert_allclose(mcmc.loc[i, col].quantile(q), + chains[[col]].quantile(q).loc[chain, :]) sample = chains.sample(5) assert len(sample) == 10 From aff1455c1dcf8450e9d5e55abb2473319c5bc858 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 27 Mar 2023 16:48:14 +0100 Subject: [PATCH 38/71] add tests for corr, line 1441 causing invalid value warning --- tests/test_samples.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 43da09fa..49217ee3 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,7 +1343,7 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') - chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) + chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$')) for chain in [1, 2]: i = mcmc.chain == chain assert_allclose(mcmc.loc[i].mean() @@ -1401,6 +1401,10 @@ def test_groupby_stats(): assert_allclose(chains.get_group(chain).sem() .drop(('chain', '$n_\\mathrm{chain}$')), chains.sem().loc[chain, :]) + assert_allclose(chains.get_group(chain).corr() + .drop(('chain', '$n_\\mathrm{chain}$')) + .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), + chains.corr().loc[chain, :]) q = np.random.rand() assert_allclose(chains.get_group(chain).quantile(q) .drop(('chain', '$n_\\mathrm{chain}$')), @@ -1434,6 +1438,9 @@ def test_groupby_stats(): q = np.random.rand() assert_allclose(mcmc.loc[i, col].quantile(q), chains[[col]].quantile(q).loc[chain, :]) + assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]), + chains[[col]].corr(mcmc.loc[i, col]) + .loc[chain, :]) sample = chains.sample(5) assert len(sample) == 10 From 83e2c4d6e2750004a0355322197561f37b9115f6 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 27 Mar 2023 17:03:28 +0100 Subject: [PATCH 39/71] add test for cov --- tests/test_samples.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 49217ee3..f1559684 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1405,6 +1405,10 @@ def test_groupby_stats(): .drop(('chain', '$n_\\mathrm{chain}$')) .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), chains.corr().loc[chain, :]) + assert_allclose(chains.get_group(chain).cov() + .drop(('chain', '$n_\\mathrm{chain}$')) + .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), + chains.cov().loc[chain, :]) q = np.random.rand() assert_allclose(chains.get_group(chain).quantile(q) .drop(('chain', '$n_\\mathrm{chain}$')), @@ -1438,6 +1442,8 @@ def test_groupby_stats(): q = np.random.rand() assert_allclose(mcmc.loc[i, col].quantile(q), chains[[col]].quantile(q).loc[chain, :]) + assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]), + chains[[col]].cov().loc[chain, :]) assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]), chains[[col]].corr(mcmc.loc[i, col]) .loc[chain, :]) From 0654d98259a7b8d525beda29b124d4aa93b4820e Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 27 Mar 2023 17:07:33 +0100 Subject: [PATCH 40/71] move quantile to end --- tests/test_samples.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index f1559684..6e62ed06 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1439,14 +1439,14 @@ def test_groupby_stats(): chains[[col]].mad().loc[chain, :]) assert_allclose(mcmc.loc[i, col].sem(), chains[[col]].sem().loc[chain, :]) - q = np.random.rand() - assert_allclose(mcmc.loc[i, col].quantile(q), - chains[[col]].quantile(q).loc[chain, :]) assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]), chains[[col]].cov().loc[chain, :]) assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]), chains[[col]].corr(mcmc.loc[i, col]) .loc[chain, :]) + q = np.random.rand() + assert_allclose(mcmc.loc[i, col].quantile(q), + chains[[col]].quantile(q).loc[chain, :]) sample = chains.sample(5) assert len(sample) == 10 From 43f08821d27973664b5dc8692d0d167c8c0b2bb9 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Mon, 27 Mar 2023 17:12:59 +0100 Subject: [PATCH 41/71] add test for corrwith --- tests/test_samples.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 6e62ed06..da4d6c8f 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1409,6 +1409,10 @@ def test_groupby_stats(): .drop(('chain', '$n_\\mathrm{chain}$')) .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), chains.cov().loc[chain, :]) + assert_allclose(chains.get_group(chain).corrwith(mcmc) + .drop(('chain', '$n_\\mathrm{chain}$')), + chains.corrwith(mcmc).loc[chain, :] + .drop(('chain', '$n_\\mathrm{chain}$'))) q = np.random.rand() assert_allclose(chains.get_group(chain).quantile(q) .drop(('chain', '$n_\\mathrm{chain}$')), From f1c966d5a3e656b02d0efc84485b2d937d56f285 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Mon, 27 Mar 2023 13:30:54 -0700 Subject: [PATCH 42/71] change `i` to `mask` to make it clearer that this is not a single index, but a boolean mask --- tests/test_samples.py | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index da4d6c8f..477f9831 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1345,32 +1345,32 @@ def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$')) for chain in [1, 2]: - i = mcmc.chain == chain - assert_allclose(mcmc.loc[i].mean() + mask = mcmc.chain == chain + assert_allclose(mcmc.loc[mask].mean() .drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().loc[chain, :]) - assert_allclose(mcmc.loc[i].std() + assert_allclose(mcmc.loc[mask].std() .drop(('chain', '$n_\\mathrm{chain}$')), chains.std().loc[chain, :]) - assert_allclose(mcmc.loc[i].median() + assert_allclose(mcmc.loc[mask].median() .drop(('chain', '$n_\\mathrm{chain}$')), chains.median().loc[chain, :]) - assert_allclose(mcmc.loc[i].var() + assert_allclose(mcmc.loc[mask].var() .drop(('chain', '$n_\\mathrm{chain}$')), chains.var().loc[chain, :]) - assert_allclose(mcmc.loc[i].kurt() + assert_allclose(mcmc.loc[mask].kurt() .drop(('chain', '$n_\\mathrm{chain}$')), chains.kurt().loc[chain, :]) - assert_allclose(mcmc.loc[i].kurtosis() + assert_allclose(mcmc.loc[mask].kurtosis() .drop(('chain', '$n_\\mathrm{chain}$')), chains.kurtosis().loc[chain, :]) - assert_allclose(mcmc.loc[i].skew() + assert_allclose(mcmc.loc[mask].skew() .drop(('chain', '$n_\\mathrm{chain}$')), chains.skew().loc[chain, :]) - assert_allclose(mcmc.loc[i].mad() + assert_allclose(mcmc.loc[mask].mad() .drop(('chain', '$n_\\mathrm{chain}$')), chains.mad().loc[chain, :]) - assert_allclose(mcmc.loc[i].sem() + assert_allclose(mcmc.loc[mask].sem() .drop(('chain', '$n_\\mathrm{chain}$')), chains.sem().loc[chain, :]) @@ -1424,32 +1424,32 @@ def test_groupby_stats(): for col in mcmc.columns: if 'chain' not in col: for chain in [1, 2]: - i = mcmc.chain == chain - assert_allclose(mcmc.loc[i, col].mean(), + mask = mcmc.chain == chain + assert_allclose(mcmc.loc[mask, col].mean(), chains[[col]].mean().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].std(), + assert_allclose(mcmc.loc[mask, col].std(), chains[[col]].std().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].median(), + assert_allclose(mcmc.loc[mask, col].median(), chains[[col]].median().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].var(), + assert_allclose(mcmc.loc[mask, col].var(), chains[[col]].var().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].kurt(), + assert_allclose(mcmc.loc[mask, col].kurt(), chains[[col]].kurt().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].kurtosis(), + assert_allclose(mcmc.loc[mask, col].kurtosis(), chains[[col]].kurtosis().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].skew(), + assert_allclose(mcmc.loc[mask, col].skew(), chains[[col]].skew().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].mad(), + assert_allclose(mcmc.loc[mask, col].mad(), chains[[col]].mad().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].sem(), + assert_allclose(mcmc.loc[mask, col].sem(), chains[[col]].sem().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]), + assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), chains[[col]].cov().loc[chain, :]) - assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]), - chains[[col]].corr(mcmc.loc[i, col]) + assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), + chains[[col]].corr(mcmc.loc[mask, col]) .loc[chain, :]) q = np.random.rand() - assert_allclose(mcmc.loc[i, col].quantile(q), + assert_allclose(mcmc.loc[mask, col].quantile(q), chains[[col]].quantile(q).loc[chain, :]) sample = chains.sample(5) From 142740cff80e44ca74590ccb6f474515dfe80583 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Mon, 27 Mar 2023 13:51:26 -0700 Subject: [PATCH 43/71] add tests that check whether `groupby` results from `mean`, `std`, `cov` etc. are weighted as they should be --- tests/test_samples.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 477f9831..9a4e48c3 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1344,6 +1344,20 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$')) + + assert chains.mean().isweighted() is True + assert chains.std().isweighted() is True + assert chains.median().isweighted() is True + assert chains.var().isweighted() is True + assert chains.kurt().isweighted() is True + assert chains.kurtosis().isweighted() is True + assert chains.skew().isweighted() is True + # assert chains.mad().isweighted() is True + assert chains.sem().isweighted() is True + # assert chains.corr().isweighted() is True + # assert chains.cov().isweighted() is True + # assert chains.corrwith(mcmc).isweighted() is True + for chain in [1, 2]: mask = mcmc.chain == chain assert_allclose(mcmc.loc[mask].mean() From 7b0a8e1aa233582d6d93679c378058154b04cba9 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Mon, 27 Mar 2023 14:11:33 -0700 Subject: [PATCH 44/71] add groupby tests for `mad`, `corr`, `cov` and `corrwith` that check whether their results are weighted --- tests/test_samples.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 9a4e48c3..fcfa97bd 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1352,11 +1352,11 @@ def test_groupby_stats(): assert chains.kurt().isweighted() is True assert chains.kurtosis().isweighted() is True assert chains.skew().isweighted() is True - # assert chains.mad().isweighted() is True + assert chains.mad().isweighted() is True assert chains.sem().isweighted() is True - # assert chains.corr().isweighted() is True - # assert chains.cov().isweighted() is True - # assert chains.corrwith(mcmc).isweighted() is True + assert chains.corr().isweighted() is True + assert chains.cov().isweighted() is True + assert chains.corrwith(mcmc).isweighted() is True for chain in [1, 2]: mask = mcmc.chain == chain From 911f54e63747924a50a74e4e0a240a8474c6cf15 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Mon, 27 Mar 2023 14:38:57 -0700 Subject: [PATCH 45/71] add tests for groupby that explicitly check that the methods return the correct weights --- tests/test_samples.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index fcfa97bd..71b0609b 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1358,6 +1358,16 @@ def test_groupby_stats(): assert chains.cov().isweighted() is True assert chains.corrwith(mcmc).isweighted() is True + w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum() + w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum() + assert np.all(chains.mean().get_weights() == [w1, w2]) + assert np.all(chains.std().get_weights() == [w1, w2]) + assert np.all(chains.median().get_weights() == [w1, w2]) + assert np.all(chains.var().get_weights() == [w1, w2]) + assert np.all(chains.kurt().get_weights() == [w1, w2]) + assert np.all(chains.kurtosis().get_weights() == [w1, w2]) + assert np.all(chains.skew().get_weights() == [w1, w2]) + for chain in [1, 2]: mask = mcmc.chain == chain assert_allclose(mcmc.loc[mask].mean() From 23f2d3d162bd5e6f2c472606a65553110a73293d Mon Sep 17 00:00:00 2001 From: Will Handley Date: Tue, 28 Mar 2023 09:58:25 +0100 Subject: [PATCH 46/71] Added some cleaner tests for get_group --- tests/test_samples.py | 61 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 6b4b9899..8f507a32 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,7 +1343,7 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') - chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False) + chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$')) for chain in [1, 2]: i = mcmc.chain == chain assert_allclose(mcmc.loc[i].mean() @@ -1373,34 +1373,25 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[i].sem() .drop(('chain', '$n_\\mathrm{chain}$')), chains.sem().loc[chain, :]) - - assert_allclose(chains.get_group(chain).mean() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mean().loc[chain, :]) - assert_allclose(chains.get_group(chain).std() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.std().loc[chain, :]) - assert_allclose(chains.get_group(chain).median() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.median().loc[chain, :]) - assert_allclose(chains.get_group(chain).var() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.var().loc[chain, :]) - assert_allclose(chains.get_group(chain).kurt() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurt().loc[chain, :]) - assert_allclose(chains.get_group(chain).kurtosis() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurtosis().loc[chain, :]) - assert_allclose(chains.get_group(chain).skew() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.skew().loc[chain, :]) - assert_allclose(chains.get_group(chain).mad() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mad().loc[chain, :]) - assert_allclose(chains.get_group(chain).sem() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.sem().loc[chain, :]) + assert_allclose(mcmc.loc[i].drop( + columns=('chain', '$n_\\mathrm{chain}$')).cov(), + chains.cov().loc[chain, :]) + assert_allclose(mcmc.loc[i].drop( + columns=('chain', '$n_\\mathrm{chain}$')).corr(), + chains.corr().loc[chain, :]) + + group = chains.get_group(chain) + assert_allclose(mcmc.loc[i].mean(), group.mean()) + assert_allclose(mcmc.loc[i].std(), group.std()) + assert_allclose(mcmc.loc[i].median(), group.median()) + assert_allclose(mcmc.loc[i].var(), group.var()) + assert_allclose(mcmc.loc[i].kurt(), group.kurt()) + assert_allclose(mcmc.loc[i].kurtosis(), group.kurtosis()) + assert_allclose(mcmc.loc[i].skew(), group.skew()) + assert_allclose(mcmc.loc[i].mad(), group.mad()) + assert_allclose(mcmc.loc[i].sem(), group.sem()) + assert_allclose(mcmc.loc[i].cov(), group.cov()) + assert_allclose(mcmc.loc[i].corr(), group.corr()) assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) @@ -1428,6 +1419,18 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[i, col].sem(), chains[[col]].sem().loc[chain, :]) + group = chains[[col]].get_group(chain) + assert_allclose(mcmc.loc[i, col].mean(), group.mean()) + assert_allclose(mcmc.loc[i, col].std(), group.std()) + assert_allclose(mcmc.loc[i, col].median(), group.median()) + assert_allclose(mcmc.loc[i, col].var(), group.var()) + assert_allclose(mcmc.loc[i, col].kurt(), group.kurt()) + assert_allclose(mcmc.loc[i, col].kurtosis(), group.kurtosis()) + assert_allclose(mcmc.loc[i, col].skew(), group.skew()) + assert_allclose(mcmc.loc[i, col].mad(), group.mad()) + assert_allclose(mcmc.loc[i, col].sem(), group.sem()) + + sample = chains.sample(5) assert len(sample) == 10 assert sample.value_counts('chain')[1] == 5 From d6423aa67ded1458af1dfb4d1a2a1d6893205e5b Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 11:18:49 +0100 Subject: [PATCH 47/71] partial completion of covariance --- anesthetic/weighted_pandas.py | 16 ++++++ tests/test_samples.py | 94 ++++++++++------------------------- 2 files changed, 41 insertions(+), 69 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 94fb089d..8bbd386d 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -79,6 +79,9 @@ class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): def sample(self, *args, **kwargs): # noqa: D102 return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + def cov(self, *args, **kwargs): # noqa: D102 + return super().cov(*args, **kwargs).set_weights(self.get_weights()) + class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" @@ -126,6 +129,19 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover def sample(self, *args, **kwargs): # noqa: D102 return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + def cov(self, *args, **kwargs): # noqa: D102 + ans = super().cov(*args, **kwargs) + index = ans.index.get_level_values(self.keys) + weights = self.get_weights()[index] + return ans.set_weights(weights, level=1) + + def corr(self, *args, **kwargs): # noqa: D102 + ans = super().corr(*args, **kwargs) + index = ans.index.get_level_values(self.keys) + weights = self.get_weights()[index] + return ans.set_weights(weights, level=1) + + class _WeightedObject(object): """Common methods for `WeightedSeries` and `WeightedDataFrame`. diff --git a/tests/test_samples.py b/tests/test_samples.py index 0744d5ea..a5b72ecf 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1352,11 +1352,10 @@ def test_groupby_stats(): assert chains.kurt().isweighted() is True assert chains.kurtosis().isweighted() is True assert chains.skew().isweighted() is True - assert chains.mad().isweighted() is True assert chains.sem().isweighted() is True assert chains.corr().isweighted() is True - assert chains.cov().isweighted() is True - assert chains.corrwith(mcmc).isweighted() is True + #assert chains.cov().isweighted() is True + #assert chains.corrwith(mcmc).isweighted() is True w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum() w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum() @@ -1397,69 +1396,25 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[mask].sem() .drop(('chain', '$n_\\mathrm{chain}$')), chains.sem().loc[chain, :]) - assert_allclose(mcmc.loc[i].drop( + assert_allclose(mcmc.loc[mask].drop( columns=('chain', '$n_\\mathrm{chain}$')).cov(), chains.cov().loc[chain, :]) - assert_allclose(mcmc.loc[i].drop( + assert_allclose(mcmc.loc[mask].drop( columns=('chain', '$n_\\mathrm{chain}$')).corr(), chains.corr().loc[chain, :]) group = chains.get_group(chain) - assert_allclose(mcmc.loc[i].mean(), group.mean()) - assert_allclose(mcmc.loc[i].std(), group.std()) - assert_allclose(mcmc.loc[i].median(), group.median()) - assert_allclose(mcmc.loc[i].var(), group.var()) - assert_allclose(mcmc.loc[i].kurt(), group.kurt()) - assert_allclose(mcmc.loc[i].kurtosis(), group.kurtosis()) - assert_allclose(mcmc.loc[i].skew(), group.skew()) - assert_allclose(mcmc.loc[i].mad(), group.mad()) - assert_allclose(mcmc.loc[i].sem(), group.sem()) - assert_allclose(mcmc.loc[i].cov(), group.cov()) - assert_allclose(mcmc.loc[i].corr(), group.corr()) - - assert_allclose(chains.get_group(chain).mean() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mean().loc[chain, :]) - assert_allclose(chains.get_group(chain).std() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.std().loc[chain, :]) - assert_allclose(chains.get_group(chain).median() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.median().loc[chain, :]) - assert_allclose(chains.get_group(chain).var() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.var().loc[chain, :]) - assert_allclose(chains.get_group(chain).kurt() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurt().loc[chain, :]) - assert_allclose(chains.get_group(chain).kurtosis() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurtosis().loc[chain, :]) - assert_allclose(chains.get_group(chain).skew() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.skew().loc[chain, :]) - assert_allclose(chains.get_group(chain).mad() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mad().loc[chain, :]) - assert_allclose(chains.get_group(chain).sem() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.sem().loc[chain, :]) - assert_allclose(chains.get_group(chain).corr() - .drop(('chain', '$n_\\mathrm{chain}$')) - .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), - chains.corr().loc[chain, :]) - assert_allclose(chains.get_group(chain).cov() - .drop(('chain', '$n_\\mathrm{chain}$')) - .drop(('chain', '$n_\\mathrm{chain}$'), axis=1), - chains.cov().loc[chain, :]) - assert_allclose(chains.get_group(chain).corrwith(mcmc) - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.corrwith(mcmc).loc[chain, :] - .drop(('chain', '$n_\\mathrm{chain}$'))) - q = np.random.rand() - assert_allclose(chains.get_group(chain).quantile(q) - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.quantile(q).loc[chain, :]) + assert_allclose(mcmc.loc[mask].mean(), group.mean()) + assert_allclose(mcmc.loc[mask].std(), group.std()) + assert_allclose(mcmc.loc[mask].median(), group.median()) + assert_allclose(mcmc.loc[mask].var(), group.var()) + assert_allclose(mcmc.loc[mask].kurt(), group.kurt()) + assert_allclose(mcmc.loc[mask].kurtosis(), group.kurtosis()) + assert_allclose(mcmc.loc[mask].skew(), group.skew()) + assert_allclose(mcmc.loc[mask].mad(), group.mad()) + assert_allclose(mcmc.loc[mask].sem(), group.sem()) + assert_allclose(mcmc.loc[mask].cov(), group.cov()) + assert_allclose(mcmc.loc[mask].corr(), group.corr()) assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean()) @@ -1496,15 +1451,16 @@ def test_groupby_stats(): chains[[col]].quantile(q).loc[chain, :]) group = chains[[col]].get_group(chain) - assert_allclose(mcmc.loc[i, col].mean(), group.mean()) - assert_allclose(mcmc.loc[i, col].std(), group.std()) - assert_allclose(mcmc.loc[i, col].median(), group.median()) - assert_allclose(mcmc.loc[i, col].var(), group.var()) - assert_allclose(mcmc.loc[i, col].kurt(), group.kurt()) - assert_allclose(mcmc.loc[i, col].kurtosis(), group.kurtosis()) - assert_allclose(mcmc.loc[i, col].skew(), group.skew()) - assert_allclose(mcmc.loc[i, col].mad(), group.mad()) - assert_allclose(mcmc.loc[i, col].sem(), group.sem()) + assert_allclose(mcmc.loc[mask, col].mean(), group.mean()) + assert_allclose(mcmc.loc[mask, col].std(), group.std()) + assert_allclose(mcmc.loc[mask, col].median(), group.median()) + assert_allclose(mcmc.loc[mask, col].var(), group.var()) + assert_allclose(mcmc.loc[mask, col].kurt(), group.kurt()) + assert_allclose(mcmc.loc[mask, col].kurtosis(), + group.kurtosis()) + assert_allclose(mcmc.loc[mask, col].skew(), group.skew()) + assert_allclose(mcmc.loc[mask, col].mad(), group.mad()) + assert_allclose(mcmc.loc[mask, col].sem(), group.sem()) sample = chains.sample(5) From 706d75913e0147e7bb9cade41fe6d1bc47646f2b Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 11:42:05 +0100 Subject: [PATCH 48/71] Now using rather than --- tests/test_samples.py | 131 +++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 71 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index a5b72ecf..6d701de2 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1343,7 +1343,8 @@ def test_old_gui(): def test_groupby_stats(): mcmc = read_chains('./tests/example_data/cb') - chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$')) + params = ['x0', 'x1'] + chains = mcmc[params + ['chain']].groupby(('chain', '$n_\\mathrm{chain}$')) assert chains.mean().isweighted() is True assert chains.std().isweighted() is True @@ -1354,7 +1355,7 @@ def test_groupby_stats(): assert chains.skew().isweighted() is True assert chains.sem().isweighted() is True assert chains.corr().isweighted() is True - #assert chains.cov().isweighted() is True + assert chains.cov().isweighted() is True #assert chains.corrwith(mcmc).isweighted() is True w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum() @@ -1369,88 +1370,77 @@ def test_groupby_stats(): for chain in [1, 2]: mask = mcmc.chain == chain - assert_allclose(mcmc.loc[mask].mean() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mean().loc[chain, :]) - assert_allclose(mcmc.loc[mask].std() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.std().loc[chain, :]) - assert_allclose(mcmc.loc[mask].median() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.median().loc[chain, :]) - assert_allclose(mcmc.loc[mask].var() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.var().loc[chain, :]) - assert_allclose(mcmc.loc[mask].kurt() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurt().loc[chain, :]) - assert_allclose(mcmc.loc[mask].kurtosis() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.kurtosis().loc[chain, :]) - assert_allclose(mcmc.loc[mask].skew() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.skew().loc[chain, :]) - assert_allclose(mcmc.loc[mask].mad() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.mad().loc[chain, :]) - assert_allclose(mcmc.loc[mask].sem() - .drop(('chain', '$n_\\mathrm{chain}$')), - chains.sem().loc[chain, :]) - assert_allclose(mcmc.loc[mask].drop( - columns=('chain', '$n_\\mathrm{chain}$')).cov(), - chains.cov().loc[chain, :]) - assert_allclose(mcmc.loc[mask].drop( - columns=('chain', '$n_\\mathrm{chain}$')).corr(), - chains.corr().loc[chain, :]) - - group = chains.get_group(chain) - assert_allclose(mcmc.loc[mask].mean(), group.mean()) - assert_allclose(mcmc.loc[mask].std(), group.std()) - assert_allclose(mcmc.loc[mask].median(), group.median()) - assert_allclose(mcmc.loc[mask].var(), group.var()) - assert_allclose(mcmc.loc[mask].kurt(), group.kurt()) - assert_allclose(mcmc.loc[mask].kurtosis(), group.kurtosis()) - assert_allclose(mcmc.loc[mask].skew(), group.skew()) - assert_allclose(mcmc.loc[mask].mad(), group.mad()) - assert_allclose(mcmc.loc[mask].sem(), group.sem()) - assert_allclose(mcmc.loc[mask].cov(), group.cov()) - assert_allclose(mcmc.loc[mask].corr(), group.corr()) - - assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), - chains.mean().mean()) - - for col in mcmc.columns: + assert_allclose(mcmc.loc[mask, params].mean(), + chains.mean().loc[chain]) + assert_allclose(mcmc.loc[mask, params].std(), + chains.std().loc[chain]) + assert_allclose(mcmc.loc[mask, params].median(), + chains.median().loc[chain]) + assert_allclose(mcmc.loc[mask, params].var(), + chains.var().loc[chain]) + assert_allclose(mcmc.loc[mask, params].kurt(), + chains.kurt().loc[chain]) + assert_allclose(mcmc.loc[mask, params].kurtosis(), + chains.kurtosis().loc[chain]) + assert_allclose(mcmc.loc[mask, params].skew(), + chains.skew().loc[chain]) + assert_allclose(mcmc.loc[mask, params].mad(), + chains.mad().loc[chain]) + assert_allclose(mcmc.loc[mask, params].sem(), + chains.sem().loc[chain]) + assert_allclose(mcmc.loc[mask, params].cov(), + chains.cov().loc[chain]) + assert_allclose(mcmc.loc[mask, params].corr(), + chains.corr().loc[chain]) + + group = chains.get_group(chain).drop( + columns=('chain', '$n_\\mathrm{chain}$')) + assert_allclose(mcmc.loc[mask, params].mean(), group.mean()) + assert_allclose(mcmc.loc[mask, params].std(), group.std()) + assert_allclose(mcmc.loc[mask, params].median(), group.median()) + assert_allclose(mcmc.loc[mask, params].var(), group.var()) + assert_allclose(mcmc.loc[mask, params].kurt(), group.kurt()) + assert_allclose(mcmc.loc[mask, params].kurtosis(), group.kurtosis()) + assert_allclose(mcmc.loc[mask, params].skew(), group.skew()) + assert_allclose(mcmc.loc[mask, params].mad(), group.mad()) + assert_allclose(mcmc.loc[mask, params].sem(), group.sem()) + assert_allclose(mcmc.loc[mask, params].cov(), group.cov()) + assert_allclose(mcmc.loc[mask, params].corr(), group.corr()) + + assert_allclose(mcmc[params].mean(), chains.mean().mean()) + + for col in params: if 'chain' not in col: for chain in [1, 2]: mask = mcmc.chain == chain assert_allclose(mcmc.loc[mask, col].mean(), - chains[[col]].mean().loc[chain, :]) + chains[col].mean().loc[chain]) assert_allclose(mcmc.loc[mask, col].std(), - chains[[col]].std().loc[chain, :]) + chains[col].std().loc[chain]) assert_allclose(mcmc.loc[mask, col].median(), - chains[[col]].median().loc[chain, :]) + chains[col].median().loc[chain]) assert_allclose(mcmc.loc[mask, col].var(), - chains[[col]].var().loc[chain, :]) + chains[col].var().loc[chain]) assert_allclose(mcmc.loc[mask, col].kurt(), - chains[[col]].kurt().loc[chain, :]) + chains[col].kurt().loc[chain]) assert_allclose(mcmc.loc[mask, col].kurtosis(), - chains[[col]].kurtosis().loc[chain, :]) + chains[col].kurtosis().loc[chain]) assert_allclose(mcmc.loc[mask, col].skew(), - chains[[col]].skew().loc[chain, :]) - assert_allclose(mcmc.loc[mask, col].mad(), - chains[[col]].mad().loc[chain, :]) + chains[col].skew().loc[chain]) + #assert_allclose(mcmc.loc[mask, col].mad(), + # chains[col].mad().loc[chain]) assert_allclose(mcmc.loc[mask, col].sem(), - chains[[col]].sem().loc[chain, :]) - assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), - chains[[col]].cov().loc[chain, :]) - assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), - chains[[col]].corr(mcmc.loc[mask, col]) - .loc[chain, :]) + chains[col].sem().loc[chain]) + #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), + # chains[col].cov().loc[chain, :]) + #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), + # chains[col].corr(mcmc.loc[mask, col]) + # .loc[chain, :]) q = np.random.rand() assert_allclose(mcmc.loc[mask, col].quantile(q), - chains[[col]].quantile(q).loc[chain, :]) + chains[col].quantile(q).loc[chain]) - group = chains[[col]].get_group(chain) + group = chains[col].get_group(chain) assert_allclose(mcmc.loc[mask, col].mean(), group.mean()) assert_allclose(mcmc.loc[mask, col].std(), group.std()) assert_allclose(mcmc.loc[mask, col].median(), group.median()) @@ -1462,7 +1452,6 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[mask, col].mad(), group.mad()) assert_allclose(mcmc.loc[mask, col].sem(), group.sem()) - sample = chains.sample(5) assert len(sample) == 10 assert sample.value_counts('chain')[1] == 5 From bf07118d3bcbb917c85152085c7d0a59341d729d Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 12:50:00 +0100 Subject: [PATCH 49/71] Added a wrapper for cov, corr, corrwith --- anesthetic/weighted_pandas.py | 41 +++++++++++++++-------------------- tests/test_samples.py | 8 +++---- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 8bbd386d..c0e950cc 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -53,11 +53,6 @@ def kurtosis(self, *args, **kwargs): # noqa: D102 *args, **kwargs)).set_weights(self.get_weights()) return result.__finalize__(self.obj, method="groupby") - def skew(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).skew( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") - def sem(self, *args, **kwargs): # noqa: D102 result = self.agg(lambda df: self.obj._constructor(df).sem( *args, **kwargs)).set_weights(self.get_weights()) @@ -72,6 +67,21 @@ def get_weights(self): """Return the weights of the grouped samples.""" return self.agg(lambda df: df.get_weights().sum()) + def _make_wrapper(self, name): + _wrapper = super()._make_wrapper(name) + + def wrapper(*args, **kwargs): + result = _wrapper(*args, **kwargs) + try: + index = result.index.get_level_values(self.keys) + weights = self.get_weights()[index] + except KeyError: + weights = self.get_weights() + return result.set_weights(weights, level=1) + + wrapper.__name__ = name + return wrapper + class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" @@ -79,9 +89,6 @@ class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): def sample(self, *args, **kwargs): # noqa: D102 return super().sample(weights=self.obj.get_weights(), *args, **kwargs) - def cov(self, *args, **kwargs): # noqa: D102 - return super().cov(*args, **kwargs).set_weights(self.get_weights()) - class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" @@ -105,7 +112,9 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover as_index=self.as_index, sort=self.sort, group_keys=self.group_keys, + squeeze=self.squeeze, observed=self.observed, + mutated=self.mutated, dropna=self.dropna, ) elif ndim == 1: @@ -115,11 +124,10 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover subset, level=self.level, grouper=self.grouper, - exclusions=self.exclusions, selection=key, - as_index=self.as_index, sort=self.sort, group_keys=self.group_keys, + squeeze=self.squeeze, observed=self.observed, dropna=self.dropna, ) @@ -129,19 +137,6 @@ def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover def sample(self, *args, **kwargs): # noqa: D102 return super().sample(weights=self.obj.get_weights(), *args, **kwargs) - def cov(self, *args, **kwargs): # noqa: D102 - ans = super().cov(*args, **kwargs) - index = ans.index.get_level_values(self.keys) - weights = self.get_weights()[index] - return ans.set_weights(weights, level=1) - - def corr(self, *args, **kwargs): # noqa: D102 - ans = super().corr(*args, **kwargs) - index = ans.index.get_level_values(self.keys) - weights = self.get_weights()[index] - return ans.set_weights(weights, level=1) - - class _WeightedObject(object): """Common methods for `WeightedSeries` and `WeightedDataFrame`. diff --git a/tests/test_samples.py b/tests/test_samples.py index 6d701de2..6ece6cdc 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1356,7 +1356,7 @@ def test_groupby_stats(): assert chains.sem().isweighted() is True assert chains.corr().isweighted() is True assert chains.cov().isweighted() is True - #assert chains.corrwith(mcmc).isweighted() is True + assert chains.corrwith(mcmc).isweighted() is True w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum() w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum() @@ -1427,12 +1427,12 @@ def test_groupby_stats(): chains[col].kurtosis().loc[chain]) assert_allclose(mcmc.loc[mask, col].skew(), chains[col].skew().loc[chain]) - #assert_allclose(mcmc.loc[mask, col].mad(), - # chains[col].mad().loc[chain]) + assert_allclose(mcmc.loc[mask, col].mad(), + chains[col].mad().loc[chain]) assert_allclose(mcmc.loc[mask, col].sem(), chains[col].sem().loc[chain]) #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), - # chains[col].cov().loc[chain, :]) + # chains[col].cov(mcmc.loc[mask, col])) #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), # chains[col].corr(mcmc.loc[mask, col]) # .loc[chain, :]) From 17d4332671b9b1de97e0a625c91033a55ab66346 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 19:41:59 +0100 Subject: [PATCH 50/71] corr and cov now working --- anesthetic/weighted_pandas.py | 35 +++++++++++++++++++++++++---------- tests/test_samples.py | 16 +++++++++++----- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index c0e950cc..2a2228b6 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -12,6 +12,7 @@ from numpy.ma import masked_array from anesthetic.utils import (compress_weights, channel_capacity, quantile, temporary_seed, adjust_docstrings) +from pandas.core.dtypes.missing import notna class WeightedGroupBy(GroupBy): @@ -263,18 +264,32 @@ def var(self, skipna=True): # noqa: D102 return np.average(masked_array((self-mean)**2, null), weights=self.get_weights()) - def cov(self, other, skipna=True): # noqa: D102 - null = (self.isnull() | other.isnull()) & skipna - x = self.mean(skipna=skipna) - y = other.mean(skipna=skipna) - if np.isnan(x) or np.isnan(y): + def cov(self, other, min_periods=None, *args, **kwargs): # noqa: D102 + + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: return np.nan - return np.average(masked_array((self-x)*(other-y), null), - weights=self.get_weights()) - def corr(self, other, method="pearson", skipna=True): # noqa: D102 - norm = self.std(skipna=skipna)*other.std(skipna=skipna) - return self.cov(other, skipna=skipna)/norm + if min_periods is None: + min_periods = 1 + + weights = self.index.to_frame()['weights'] + weights, _ = weights.align(other, join="inner", copy=False) + + valid = notna(this) & notna(other) + if not valid.all(): + this = this[valid] + other = other[valid] + weights = weights[valid] + + if len(this) < min_periods: + return np.nan + + return np.cov(this, other, aweights=weights)[0, 1] + + def corr(self, other, *args, **kwargs): # noqa: D102 + norm = self.std(skipna=True)*other.std(skipna=True) + return self.cov(other, *args, **kwargs)/norm def kurt(self, skipna=True): # noqa: D102 null = self.isnull() & skipna diff --git a/tests/test_samples.py b/tests/test_samples.py index 6ece6cdc..8917efc2 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1431,11 +1431,12 @@ def test_groupby_stats(): chains[col].mad().loc[chain]) assert_allclose(mcmc.loc[mask, col].sem(), chains[col].sem().loc[chain]) - #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), - # chains[col].cov(mcmc.loc[mask, col])) - #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), - # chains[col].corr(mcmc.loc[mask, col]) - # .loc[chain, :]) + assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), + chains[col].cov(mcmc.loc[mask, col]) + .loc[chain]) + assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), + chains[col].corr(mcmc.loc[mask, col]) + .loc[chain]) q = np.random.rand() assert_allclose(mcmc.loc[mask, col].quantile(q), chains[col].quantile(q).loc[chain]) @@ -1452,6 +1453,11 @@ def test_groupby_stats(): assert_allclose(mcmc.loc[mask, col].mad(), group.mad()) assert_allclose(mcmc.loc[mask, col].sem(), group.sem()) + assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]), + group.cov(mcmc.loc[mask, col])) + assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]), + group.corr(mcmc.loc[mask, col])) + sample = chains.sample(5) assert len(sample) == 10 assert sample.value_counts('chain')[1] == 5 From a71151ee297733007c22ef73294b1d4246a4117f Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 20:35:26 +0100 Subject: [PATCH 51/71] reduced code repetition --- anesthetic/weighted_pandas.py | 38 +++++++++++------------------------ 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 2a2228b6..9da1dfd9 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -21,48 +21,34 @@ class WeightedGroupBy(GroupBy): grouper: ops.BaseGrouper """:meta private:""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def _add_weights(self, name, *args, **kwargs): + result = self.agg(lambda df: getattr(self.obj._constructor(df), name) + (*args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") def mean(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).mean( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("mean", *args, **kwargs) def std(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).std( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("std", *args, **kwargs) def median(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).median( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("median", *args, **kwargs) def var(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).var( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("var", *args, **kwargs) def kurt(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).kurt( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("kurt", *args, **kwargs) def kurtosis(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).kurtosis( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("kurtosis", *args, **kwargs) def sem(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).sem( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("sem", *args, **kwargs) def quantile(self, *args, **kwargs): # noqa: D102 - result = self.agg(lambda df: self.obj._constructor(df).quantile( - *args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") + return self._add_weights("quantile", *args, **kwargs) def get_weights(self): """Return the weights of the grouped samples.""" From 2935434f61d9f8c3fae355a6ee04d6825c723e2a Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 23:09:14 +0100 Subject: [PATCH 52/71] corrwith --- anesthetic/weighted_pandas.py | 39 +++++++++++++++++++++++++++++------ tests/test_samples.py | 3 +++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 9da1dfd9..9e15357f 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -230,6 +230,8 @@ class WeightedSeries(_WeightedObject, Series): """Weighted version of :class:`pandas.Series`.""" def mean(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan null = self.isnull() & skipna return np.average(masked_array(self, null), weights=self.get_weights()) @@ -243,6 +245,8 @@ def median(self, *args, **kwargs): # noqa: D102 return self.quantile(*args, **kwargs) def var(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan null = self.isnull() & skipna mean = self.mean(skipna=skipna) if np.isnan(mean): @@ -278,6 +282,8 @@ def corr(self, other, *args, **kwargs): # noqa: D102 return self.cov(other, *args, **kwargs)/norm def kurt(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan null = self.isnull() & skipna mean = self.mean(skipna=skipna) std = self.std(skipna=skipna) @@ -287,6 +293,8 @@ def kurt(self, skipna=True): # noqa: D102 weights=self.get_weights()) def skew(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan null = self.isnull() & skipna mean = self.mean(skipna=skipna) std = self.std(skipna=skipna) @@ -296,6 +304,8 @@ def skew(self, skipna=True): # noqa: D102 weights=self.get_weights()) def mad(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan null = self.isnull() & skipna mean = self.mean(skipna=skipna) if np.isnan(mean): @@ -369,6 +379,9 @@ class WeightedDataFrame(_WeightedObject, DataFrame): def mean(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) null = self.isnull() & skipna mean = np.average(masked_array(self, null), weights=self.get_weights(axis), axis=axis) @@ -387,6 +400,9 @@ def median(self, *args, **kwargs): # noqa: D102 def var(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) null = self.isnull() & skipna mean = self.mean(axis=axis, skipna=skipna) var = np.average(masked_array((self-mean)**2, null), @@ -423,14 +439,19 @@ def corr(self, method="pearson", skipna=True, def corrwith(self, other, axis=0, drop=False, method="pearson", *args, **kwargs): # noqa: D102 - if self.isweighted(axis): + axis = self._get_axis_number(axis) + if not self.isweighted(axis): + return super().corrwith(other, drop=drop, axis=axis, method=method, + *args, **kwargs) + else: if isinstance(other, Series): answer = self.apply(lambda x: other.corr(x, method=method), axis=axis) return self._constructor_sliced(answer) left, right = self.align(other, join="inner", copy=False) - weights = self.get_weights(axis) + weights = self.index.to_frame()['weights'] + weights, _ = weights.align(other, join="inner", copy=False) if axis == 1: left = left.T @@ -444,7 +465,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson", ldem = left - left.mean() rdem = right - right.mean() - num = (ldem * rdem * weights[:, None]).sum() + num = (ldem * rdem * weights.to_numpy()[:, None]).sum() dom = weights.sum() * left.std() * right.std() correl = num / dom @@ -460,12 +481,12 @@ def corrwith(self, other, axis=0, drop=False, method="pearson", index=idx_diff)]) return self._constructor_sliced(correl) - else: - return super().corrwith(other, drop=drop, axis=axis, method=method, - *args, **kwargs) def kurt(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) null = self.isnull() & skipna mean = self.mean(axis=axis, skipna=skipna) std = self.std(axis=axis, skipna=skipna) @@ -477,6 +498,9 @@ def kurt(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 def skew(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) null = self.isnull() & skipna mean = self.mean(axis=axis, skipna=skipna) std = self.std(axis=axis, skipna=skipna) @@ -488,6 +512,9 @@ def skew(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 def mad(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) null = self.isnull() & skipna mean = self.mean(axis=axis, skipna=skipna) mad = np.average(masked_array(abs(self-mean), null), diff --git a/tests/test_samples.py b/tests/test_samples.py index 8917efc2..8f308cf1 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1356,6 +1356,7 @@ def test_groupby_stats(): assert chains.sem().isweighted() is True assert chains.corr().isweighted() is True assert chains.cov().isweighted() is True + assert chains.hist().isweighted() is True assert chains.corrwith(mcmc).isweighted() is True w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum() @@ -1392,6 +1393,8 @@ def test_groupby_stats(): chains.cov().loc[chain]) assert_allclose(mcmc.loc[mask, params].corr(), chains.corr().loc[chain]) + assert_allclose([1, 1], chains.corrwith(mcmc.loc[mask, params] + ).loc[chain]) group = chains.get_group(chain).drop( columns=('chain', '$n_\\mathrm{chain}$')) From 93b06a03cb004e6cdeb4afc075e2bfe91094d0d1 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 23:35:55 +0100 Subject: [PATCH 53/71] Corrections to two extra functions --- anesthetic/weighted_pandas.py | 11 ++++++----- tests/test_weighted_pandas.py | 8 +------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index 9e15357f..ac595438 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -411,10 +411,10 @@ def var(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 else: return super().var(axis=axis, skipna=skipna, *args, **kwargs) - def cov(self, skipna=True, *args, **kwargs): # noqa: D102 + def cov(self, *args, **kwargs): # noqa: D102 if self.isweighted(): - null = self.isnull() & skipna - mean = self.mean(skipna=skipna) + null = self.isnull() + mean = self.mean(skipna=True) x = masked_array(self - mean, null) cov = np.ma.dot(self.get_weights()*x.T, x) \ / self.get_weights().sum().T @@ -450,13 +450,14 @@ def corrwith(self, other, axis=0, drop=False, method="pearson", return self._constructor_sliced(answer) left, right = self.align(other, join="inner", copy=False) - weights = self.index.to_frame()['weights'] - weights, _ = weights.align(other, join="inner", copy=False) if axis == 1: left = left.T right = right.T + weights = left.index.to_frame()['weights'] + weights, _ = weights.align(right, join="inner", copy=False) + # mask missing values left = left + right * 0 right = right + left * 0 diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index 71128e08..f68e4723 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -176,7 +176,7 @@ def test_WeightedDataFrame_corrwith(frame): assert isinstance(correl, WeightedSeries) assert not correl.isweighted() assert_array_equal(correl.index, frame.columns) - assert_allclose(correl, frame.corr()['A']) + assert_allclose(correl, frame.corr()['A'], atol=1e-2) correl = frame.corrwith(frame[['A', 'B']]) assert isinstance(correl, WeightedSeries) @@ -490,12 +490,6 @@ def test_WeightedSeries_cov(frame): assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2) assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2) - frame.loc[0, 'B'] = np.nan - assert ~np.isnan(frame.A.cov(frame.B)) - assert np.isnan(frame.A.cov(frame.B, skipna=False)) - assert ~np.isnan(frame.B.cov(frame.A)) - assert np.isnan(frame.B.cov(frame.A, skipna=False)) - def test_WeightedSeries_corr(frame): assert_allclose(frame.A.corr(frame.A), 1., atol=1e-2) From 0655a9c20f6828b4ea9e90ec1c6a3b3cec992568 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 29 Mar 2023 23:59:49 +0100 Subject: [PATCH 54/71] skipna no longer available for cov --- tests/test_weighted_pandas.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index f68e4723..869bf0a8 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -417,12 +417,6 @@ def test_WeightedDataFrame_nan(frame): assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6], [True, False, False, False, False, False]) - assert ~frame.cov().isna().any().any() - ans = np.zeros((6, 6), dtype=bool) - ans[0] = True - ans[:, 0] = True - assert_array_equal(frame.cov(skipna=False).isna(), ans) - frame['B'][2] = np.nan assert ~frame.mean().isna().any() assert_array_equal(frame.mean(skipna=False).isna(), @@ -436,11 +430,6 @@ def test_WeightedDataFrame_nan(frame): assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6], [True, False, True, False, False, False]) - assert ~frame.cov().isna().any().any() - ans[1] = True - ans[:, 1] = True - assert_array_equal(frame.cov(skipna=False).isna(), ans) - frame['C'][4] = np.nan frame['D'][5] = np.nan frame['E'][6] = np.nan @@ -455,9 +444,6 @@ def test_WeightedDataFrame_nan(frame): assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6], [True, False, True, False, True, True]) - assert ~frame.cov().isna().any().any() - assert frame.cov(skipna=False).isna().all().all() - assert_allclose(frame.mean(), 0.5, atol=1e-2) assert_allclose(frame.std(), (1./12)**0.5, atol=1e-2) assert_allclose(frame.cov(), (1./12)*np.identity(6), atol=1e-2) From 33dd6e0ad74946eefe526c16918448ae67fb18ba Mon Sep 17 00:00:00 2001 From: Will Handley Date: Thu, 30 Mar 2023 07:33:41 +0100 Subject: [PATCH 55/71] Completed coverage with new nan --- anesthetic/weighted_pandas.py | 4 ++++ tests/test_weighted_pandas.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index ac595438..a7b1e7de 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -242,6 +242,8 @@ def kurtosis(self, *args, **kwargs): # noqa: D102 return self.kurt(*args, **kwargs) def median(self, *args, **kwargs): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan return self.quantile(*args, **kwargs) def var(self, skipna=True): # noqa: D102 @@ -317,6 +319,8 @@ def sem(self, skipna=True): # noqa: D102 return np.sqrt(self.var(skipna=skipna)/self.neff()) def quantile(self, q=0.5, interpolation='linear'): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan return quantile(self.to_numpy(), q, self.get_weights(), interpolation) def compress(self, ncompress=True): diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index 869bf0a8..2aaa5413 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -453,6 +453,18 @@ def test_WeightedDataFrame_nan(frame): assert isinstance(frame.mean(axis=1), WeightedSeries) assert frame.mean(axis=1).isweighted() + assert frame[:0].mean().isna().all() + assert frame[:0].std().isna().all() + assert frame[:0].median().isna().all() + assert frame[:0].var().isna().all() + assert frame[:0].cov().isna().all().all() + assert frame[:0].corr().isna().all().all() + assert frame[:0].kurt().isna().all() + assert frame[:0].skew().isna().all() + assert frame[:0].mad().isna().all() + assert frame[:0].sem().isna().all() + assert frame[:0].quantile().isna().all() + def test_WeightedSeries_mean(series): series[0] = np.nan @@ -588,6 +600,18 @@ def test_WeightedSeries_nan(series): assert_allclose(series.var(), 1./12, atol=1e-2) assert_allclose(series.std(), (1./12)**0.5, atol=1e-2) + assert np.isnan(series[:0].mean()) + assert np.isnan(series[:0].std()) + assert np.isnan(series[:0].median()) + assert np.isnan(series[:0].var()) + assert np.isnan(series[:0].cov(series)) + assert np.isnan(series[:0].corr(series)) + assert np.isnan(series[:0].kurt()) + assert np.isnan(series[:0].skew()) + assert np.isnan(series[:0].mad()) + assert np.isnan(series[:0].sem()) + assert np.isnan(series[:0].quantile()) + @pytest.fixture def mcmc_df(): From 5ec1fecd33235113f37c8539c3ffa3e6b7234bb4 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Thu, 30 Mar 2023 10:39:22 +0100 Subject: [PATCH 56/71] Increase coverage --- anesthetic/weighted_pandas.py | 8 +------- tests/test_weighted_pandas.py | 4 ++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index a7b1e7de..b1a85410 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -256,15 +256,12 @@ def var(self, skipna=True): # noqa: D102 return np.average(masked_array((self-mean)**2, null), weights=self.get_weights()) - def cov(self, other, min_periods=None, *args, **kwargs): # noqa: D102 + def cov(self, other, *args, **kwargs): # noqa: D102 this, other = self.align(other, join="inner", copy=False) if len(this) == 0: return np.nan - if min_periods is None: - min_periods = 1 - weights = self.index.to_frame()['weights'] weights, _ = weights.align(other, join="inner", copy=False) @@ -274,9 +271,6 @@ def cov(self, other, min_periods=None, *args, **kwargs): # noqa: D102 other = other[valid] weights = weights[valid] - if len(this) < min_periods: - return np.nan - return np.cov(this, other, aweights=weights)[0, 1] def corr(self, other, *args, **kwargs): # noqa: D102 diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index 2aaa5413..271f98e8 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -488,6 +488,10 @@ def test_WeightedSeries_cov(frame): assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2) assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2) + frame['A'][0] = np.nan + assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2) + assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2) + def test_WeightedSeries_corr(frame): assert_allclose(frame.A.corr(frame.A), 1., atol=1e-2) From 5113b61efba108327645c1db4f430b59e0138ead Mon Sep 17 00:00:00 2001 From: Ormorod Date: Thu, 30 Mar 2023 16:29:20 +0100 Subject: [PATCH 57/71] add test for groupby().hist() --- tests/test_samples.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 8f308cf1..e08050d2 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1471,3 +1471,24 @@ def test_groupby_stats(): assert len(sample) == 10 assert sample.value_counts()[1] == 5 assert sample.value_counts()[2] == 5 + + +def test_groupby_plots(): + mcmc = read_chains('./tests/example_data/cb') + params = ['x0', 'x1'] + chains = mcmc[params + ['chain']].groupby(('chain', '$n_\\mathrm{chain}$')) + for param in params: + gb_plot = chains.hist(param) + for chain in [1, 2]: + mcmc_axes = mcmc.loc[mcmc.chain == chain].hist(param).flatten() + gb_axes = gb_plot[chain].values[0].flatten() + + mcmc_widths = [p.get_width() for ax in mcmc_axes + for p in ax.patches] + gb_widths = [p.get_width() for ax in gb_axes for p in ax.patches] + assert mcmc_widths == gb_widths + + mcmc_heights = [p.get_height() for ax in mcmc_axes + for p in ax.patches] + gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches] + assert mcmc_heights == gb_heights From 918986c6d64fa8a9fd55c55925a1d15d4d9c323a Mon Sep 17 00:00:00 2001 From: Ormorod Date: Thu, 30 Mar 2023 17:43:44 +0100 Subject: [PATCH 58/71] add test for groupby().plot.hist(), not happy with the janky slicing here... --- tests/test_samples.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index e08050d2..7414a206 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1486,9 +1486,21 @@ def test_groupby_plots(): mcmc_widths = [p.get_width() for ax in mcmc_axes for p in ax.patches] gb_widths = [p.get_width() for ax in gb_axes for p in ax.patches] - assert mcmc_widths == gb_widths + assert_allclose(mcmc_widths, gb_widths) mcmc_heights = [p.get_height() for ax in mcmc_axes for p in ax.patches] gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches] - assert mcmc_heights == gb_heights + assert_allclose(mcmc_heights, gb_heights) + plt.close() + + for param in params: + _, gb_ax = plt.subplots() + gb_plots = chains[param].plot.hist(ax=gb_ax) + _, mcmc_ax = plt.subplots() + for chain, gb_ax in zip([1, 2], gb_plots): + mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.hist( + ax=mcmc_ax) + mcmc_widths = [p.get_width() for p in mcmc_ax.patches] + gb_widths = [p.get_width() for p in gb_ax.patches] + assert_allclose(mcmc_widths, gb_widths) From b13b0a2c98167fe6f7026e8163137d2a2934652a Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 16:50:10 +0100 Subject: [PATCH 59/71] add test for groupby().plot.kde() --- tests/test_samples.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 7414a206..86cb4fba 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1492,7 +1492,7 @@ def test_groupby_plots(): for p in ax.patches] gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches] assert_allclose(mcmc_heights, gb_heights) - plt.close() + plt.close() for param in params: _, gb_ax = plt.subplots() @@ -1504,3 +1504,15 @@ def test_groupby_plots(): mcmc_widths = [p.get_width() for p in mcmc_ax.patches] gb_widths = [p.get_width() for p in gb_ax.patches] assert_allclose(mcmc_widths, gb_widths) + plt.close() + + for param in params: + _, gb_ax = plt.subplots() + gb_plots = chains[param].plot.kde(ax=gb_ax) + _, mcmc_ax = plt.subplots() + for chain, gb_ax in zip([1, 2], gb_plots): + mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.kde( + ax=mcmc_ax) + [assert_allclose(m.get_data(), g.get_data()) + for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] + plt.close() From 9709b380f7f059f45d06b700e7931697bf9ec449 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 16:52:19 +0100 Subject: [PATCH 60/71] add tests for hist_1d and kde_1d --- tests/test_samples.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 86cb4fba..5ee06413 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1506,6 +1506,18 @@ def test_groupby_plots(): assert_allclose(mcmc_widths, gb_widths) plt.close() + for param in params: + _, gb_ax = plt.subplots() + gb_plots = chains[param].plot.hist_1d(ax=gb_ax) + _, mcmc_ax = plt.subplots() + for chain, gb_ax in zip([1, 2], gb_plots): + mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.hist_1d( + ax=mcmc_ax) + mcmc_widths = [p.get_width() for p in mcmc_ax.patches] + gb_widths = [p.get_width() for p in gb_ax.patches] + assert_allclose(mcmc_widths, gb_widths) + plt.close() + for param in params: _, gb_ax = plt.subplots() gb_plots = chains[param].plot.kde(ax=gb_ax) @@ -1516,3 +1528,14 @@ def test_groupby_plots(): [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] plt.close() + + for param in params: + _, gb_ax = plt.subplots() + gb_plots = chains[param].plot.kde_1d(ax=gb_ax) + _, mcmc_ax = plt.subplots() + for chain, gb_ax in zip([1, 2], gb_plots): + mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.kde_1d( + ax=mcmc_ax) + [assert_allclose(m.get_data(), g.get_data()) + for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] + plt.close() From 338fc8a5c0379063caf594348b04c9f198a81d7b Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 16:56:06 +0100 Subject: [PATCH 61/71] test for fastkde_1d --- tests/test_samples.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 5ee06413..224eaaaf 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1539,3 +1539,15 @@ def test_groupby_plots(): [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] plt.close() + + if 'fastkde' in sys.modules: + for param in params: + _, gb_ax = plt.subplots() + gb_plots = chains[param].plot.fastkde_1d(ax=gb_ax) + _, mcmc_ax = plt.subplots() + for chain, gb_ax in zip([1, 2], gb_plots): + mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.fastkde_1d( + ax=mcmc_ax) + [assert_allclose(m.get_data(), g.get_data()) + for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] + plt.close() From 0e2676a9dbf7cffba1a2b68ac055d42478785bf8 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 17:11:53 +0100 Subject: [PATCH 62/71] test for hist_2d --- tests/test_samples.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 224eaaaf..3ac178d2 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1540,6 +1540,19 @@ def test_groupby_plots(): for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] plt.close() + for chain, gb_ax in zip([1, 2], chains.plot.hist_2d(*params)): + mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.hist_2d(*params) + mcmc_widths = [p.get_width() for p in mcmc_ax.patches] + gb_widths = [p.get_width() for p in gb_ax.patches] + assert_allclose(mcmc_widths, gb_widths) + mcmc_heights = [p.get_height() for p in mcmc_ax.patches] + gb_heights = [p.get_height() for p in gb_ax.patches] + assert_allclose(mcmc_heights, gb_heights) + mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches] + gb_colors = [p.get_facecolor() for p in gb_ax.patches] + assert_allclose(mcmc_colors, gb_colors) + plt.close() + if 'fastkde' in sys.modules: for param in params: _, gb_ax = plt.subplots() From 9589921bda822cd55faed4104837d8529e14016d Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 17:23:33 +0100 Subject: [PATCH 63/71] plt.close('all') --- tests/test_samples.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index 3ac178d2..21e38b15 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1492,7 +1492,7 @@ def test_groupby_plots(): for p in ax.patches] gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches] assert_allclose(mcmc_heights, gb_heights) - plt.close() + plt.close('all') for param in params: _, gb_ax = plt.subplots() @@ -1504,7 +1504,7 @@ def test_groupby_plots(): mcmc_widths = [p.get_width() for p in mcmc_ax.patches] gb_widths = [p.get_width() for p in gb_ax.patches] assert_allclose(mcmc_widths, gb_widths) - plt.close() + plt.close('all') for param in params: _, gb_ax = plt.subplots() @@ -1516,7 +1516,7 @@ def test_groupby_plots(): mcmc_widths = [p.get_width() for p in mcmc_ax.patches] gb_widths = [p.get_width() for p in gb_ax.patches] assert_allclose(mcmc_widths, gb_widths) - plt.close() + plt.close('all') for param in params: _, gb_ax = plt.subplots() @@ -1527,7 +1527,7 @@ def test_groupby_plots(): ax=mcmc_ax) [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] - plt.close() + plt.close('all') for param in params: _, gb_ax = plt.subplots() @@ -1538,7 +1538,7 @@ def test_groupby_plots(): ax=mcmc_ax) [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] - plt.close() + plt.close('all') for chain, gb_ax in zip([1, 2], chains.plot.hist_2d(*params)): mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.hist_2d(*params) @@ -1551,7 +1551,7 @@ def test_groupby_plots(): mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches] gb_colors = [p.get_facecolor() for p in gb_ax.patches] assert_allclose(mcmc_colors, gb_colors) - plt.close() + plt.close('all') if 'fastkde' in sys.modules: for param in params: @@ -1563,4 +1563,4 @@ def test_groupby_plots(): ax=mcmc_ax) [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] - plt.close() + plt.close('all') From 4e8e50f382ee99dbe35af2688889b88469811c71 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 17:24:09 +0100 Subject: [PATCH 64/71] test for kde_2d --- tests/test_samples.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 21e38b15..9730f15a 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1553,6 +1553,16 @@ def test_groupby_plots(): assert_allclose(mcmc_colors, gb_colors) plt.close('all') + for chain, gb_ax in zip([1, 2], chains.plot.kde_2d(*params)): + mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.kde_2d(*params) + mcmc_verts = [p.get_verts() for p in mcmc_ax.patches] + gb_verts = [p.get_verts() for p in gb_ax.patches] + assert_allclose(mcmc_verts, gb_verts) + mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches] + gb_colors = [p.get_facecolor() for p in gb_ax.patches] + assert_allclose(mcmc_colors, gb_colors) + plt.close('all') + if 'fastkde' in sys.modules: for param in params: _, gb_ax = plt.subplots() From a631d601b53eba9ea81bf74a63f7f29c0f14d0f1 Mon Sep 17 00:00:00 2001 From: Ormorod Date: Fri, 31 Mar 2023 17:24:54 +0100 Subject: [PATCH 65/71] test for fastkde_2d --- tests/test_samples.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 9730f15a..8a31eda6 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1574,3 +1574,13 @@ def test_groupby_plots(): [assert_allclose(m.get_data(), g.get_data()) for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())] plt.close('all') + + for chain, gb_ax in zip([1, 2], chains.plot.fastkde_2d(*params)): + mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.fastkde_2d(*params) + mcmc_verts = [p.get_verts() for p in mcmc_ax.patches] + gb_verts = [p.get_verts() for p in gb_ax.patches] + assert_allclose(mcmc_verts, gb_verts) + mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches] + gb_colors = [p.get_facecolor() for p in gb_ax.patches] + assert_allclose(mcmc_colors, gb_colors) + plt.close('all') From 132d80fcd3a60bb864c32876e85bdd041c9d874f Mon Sep 17 00:00:00 2001 From: Will Handley Date: Tue, 4 Apr 2023 17:40:30 +0100 Subject: [PATCH 66/71] Reinstated init function to get documentation to work --- .github/workflows/CI.yaml | 1 - anesthetic/weighted_pandas.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 794814cf..2c56ff71 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -44,7 +44,6 @@ jobs: - name: Upgrade pip and install doc requirements run: | python -m pip install --upgrade pip - python -m pip install pip-tools python -m pip install -e ".[extras,docs]" - name: build documentation run: | diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py index b1a85410..cfe5a1d9 100644 --- a/anesthetic/weighted_pandas.py +++ b/anesthetic/weighted_pandas.py @@ -21,6 +21,9 @@ class WeightedGroupBy(GroupBy): grouper: ops.BaseGrouper """:meta private:""" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + def _add_weights(self, name, *args, **kwargs): result = self.agg(lambda df: getattr(self.obj._constructor(df), name) (*args, **kwargs)).set_weights(self.get_weights()) From 369c49cd245aedda43b02470b332201c66ccaf2e Mon Sep 17 00:00:00 2001 From: lukashergt Date: Tue, 4 Apr 2023 13:23:14 -0700 Subject: [PATCH 67/71] complete test coverage for explicit weight checks --- tests/test_samples.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index 8a31eda6..d1c54b5c 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -1368,6 +1368,11 @@ def test_groupby_stats(): assert np.all(chains.kurt().get_weights() == [w1, w2]) assert np.all(chains.kurtosis().get_weights() == [w1, w2]) assert np.all(chains.skew().get_weights() == [w1, w2]) + assert np.all(chains.sem().get_weights() == [w1, w2]) + w = [w1 for _ in range(len(params))] + [w2 for _ in range(len(params))] + assert np.all(chains.corr().get_weights() == w) + assert np.all(chains.cov().get_weights() == w) + assert np.all(chains.corrwith(mcmc).get_weights() == [w1, w2]) for chain in [1, 2]: mask = mcmc.chain == chain From 122bf2b9799d6cbbf7c4dbef00bc0fec88ca15ec Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 5 Apr 2023 14:29:51 +0100 Subject: [PATCH 68/71] Readme correction following #217 --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index c876a856..a00e6e9d 100644 --- a/README.rst +++ b/README.rst @@ -191,8 +191,8 @@ Why create another one? In general, any dedicated user of software will find tha .. code:: python - from anesthetic import MCMCSamples - samples = MCMCSamples(root=file_root) # Load the samples + from anesthetic import read_chains + samples = read_chains(file_root) # Load the samples samples['omegab'] = samples.omegabh2/(samples.H0/100)**2 # Define omegab samples.tex['omegab'] = '$\Omega_b$' # Label omegab samples.plot_1d('omegab') # Simple 1D plot From 5a5f106c382543e75d489afa5f6afc93d2d767c6 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Thu, 6 Apr 2023 22:57:34 -0700 Subject: [PATCH 69/71] fix `GelmanRubin` method now that `groupby` is fixed --- anesthetic/samples.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/anesthetic/samples.py b/anesthetic/samples.py index 038faea0..9a910185 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -515,7 +515,7 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False): Indicates whether to modify the existing array or return a copy. """ - chains = self.groupby(('chain', '$n_\\mathrm{chain}$'), + chains = self.groupby(('chain', '$n_\\mathrm{chain}$'), sort=False, group_keys=False) nchains = chains.ngroups if isinstance(burn_in, (int, float)): @@ -574,25 +574,32 @@ def Gelman_Rubin(self, params=None): and 'logL' not in key and 'chain' not in key] chains = self[params+['chain']].groupby( - ('chain', '$n_\\mathrm{chain}$') + ('chain', '$n_\\mathrm{chain}$'), sort=False, ) + nchains = chains.ngroups # Within chain variance ``W`` # (average variance within each chain): - W = chains.cov().groupby(level=['params', 'labels']).mean().to_numpy() - # TODO: the above line should be a weighted mean - # --> need to fix groupby for WeightedDataFrames! - + W = chains.cov().groupby(level=('params', 'labels'), sort=False).mean() # Between-chain variance ``B`` - # (variance of the chain means compared to the full mean): - means_diff = (chains.mean() - self[params].mean()).to_numpy() - B = (means_diff.T @ means_diff) / (chains.ngroups - 1) - # B = chains.mean().cov().to_numpy() - # TODO: fix once groupby is fixed - - L = np.linalg.cholesky(W) - invL = np.linalg.inv(L) - D = np.linalg.eigvalsh(invL @ B @ invL.T) + # (variance of the chain means): + B = np.atleast_2d(np.cov(chains.mean().T, ddof=1)) + # We don't weight `B` with the effective number of samples (sum of the + # weights), here, because we want to notice outliers from shorter + # chains. + # In order to be conservative, we generally want to underestimate `W` + # and overestimate `B`, since `W` goes in the denominator and `B` in + # the numerator of the Gelman--Rubin statistic `Rminus1`. + + try: + invL = np.linalg.inv(np.linalg.cholesky(W)) + except np.linalg.LinAlgError as e: + raise np.linalg.LinAlgError( + "Make sure you do not have linearly dependent parameters, " + "e.g. having both `As` and `A=1e9*As` causes trouble.") from e + D = np.linalg.eigvalsh(invL @ ((nchains+1)/nchains * B) @ invL.T) + # The factor of `(nchains+1)/nchains` accounts for the additional + # uncertainty from using a finite number of chains. Rminus1 = np.max(np.abs(D)) return Rminus1 From 9d10ce5da10910cff4718dfd746ecb22df33f045 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Fri, 7 Apr 2023 01:16:28 -0700 Subject: [PATCH 70/71] add test for `LinAlgError` when covariance matrix is not positive definite --- tests/test_samples.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_samples.py b/tests/test_samples.py index d1c54b5c..f95855f9 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -436,6 +436,9 @@ def test_mcmc_stats(): assert mcmc_half.Gelman_Rubin() < 0.01 assert mcmc_half.Gelman_Rubin(['x0']) < 0.01 assert mcmc_half.Gelman_Rubin(['x1']) < 0.01 + with pytest.raises(np.linalg.LinAlgError): + mcmc['y'] = mcmc.x1 + mcmc.Gelman_Rubin(['x0', 'x1', 'y']) # more burn-in checks mcmc_new = mcmc.remove_burn_in(burn_in=200.9) From 85fa6aefb57906f7226a523d3e078fb153a49ec1 Mon Sep 17 00:00:00 2001 From: lukashergt Date: Fri, 7 Apr 2023 10:40:46 -0700 Subject: [PATCH 71/71] make linear dependence more blatant in check for `LinAlgError` --- tests/test_samples.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_samples.py b/tests/test_samples.py index f95855f9..c8a00445 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -437,8 +437,10 @@ def test_mcmc_stats(): assert mcmc_half.Gelman_Rubin(['x0']) < 0.01 assert mcmc_half.Gelman_Rubin(['x1']) < 0.01 with pytest.raises(np.linalg.LinAlgError): - mcmc['y'] = mcmc.x1 - mcmc.Gelman_Rubin(['x0', 'x1', 'y']) + mcmc['y1'] = mcmc.x1 + mcmc['y2'] = mcmc.x1 + mcmc['y3'] = mcmc.x1 + mcmc.Gelman_Rubin(['x0', 'x1', 'y1', 'y2', 'y3']) # more burn-in checks mcmc_new = mcmc.remove_burn_in(burn_in=200.9)