From ab634869d31e7f6ceeb5dd8769d31f5b45497e25 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:01:51 +0000
Subject: [PATCH 01/71] first pass at WeightedGroupBy

---
 anesthetic/weighted_pandas.py | 121 ++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index fb5ab1bc..dfd7d734 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -1,14 +1,59 @@
 """Pandas DataFrame and Series with weighted samples."""
 
+import warnings
 from inspect import signature
 import numpy as np
 from pandas import Series, DataFrame, concat, MultiIndex
+from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy
+from pandas._libs import lib
+from pandas._libs.lib import no_default
+from pandas.util._exceptions import find_stack_level
 from pandas.util import hash_pandas_object
 from numpy.ma import masked_array
 from anesthetic.utils import (compress_weights, channel_capacity, quantile,
                               temporary_seed, adjust_docstrings)
 
 
+class WeightedGroupBy(GroupBy):
+    def mean(self, numeric_only=False):
+        result = self.agg(lambda df: self.obj._constructor(df).mean(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def std(self, numeric_only=False):
+        result = self.agg(lambda df: self.obj._constructor(df).std(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def kurtosis(self, numeric_only=False):
+        result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def median(self, numeric_only=False):
+        result = self.agg(lambda df: self.obj._constructor(df).median(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def var(self, numeric_only=False):
+        result = self.agg(lambda df: self.obj._constructor(df).var(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+
+class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
+    def cov(self, other, skipna=True):
+        result = self.agg(lambda df: self.obj._constructor(df).cov(
+            other, skipna))
+        return result.__finalize__(self.obj, method="groupby")
+
+
+class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
+    def cov(self, skipna=True):
+        result = self.agg(lambda df: self.obj._constructor(df).cov(skipna))
+        return result.__finalize__(self.obj, method="groupby")
+
+
 class _WeightedObject(object):
     """Common methods for `WeightedSeries` and `WeightedDataFrame`.
 
@@ -204,6 +249,37 @@ def _constructor(self):
     def _constructor_expanddim(self):
         return WeightedDataFrame
 
+    def groupby(
+        self,
+        by=None,
+        axis=0,
+        level=None,
+        as_index=True,
+        sort=True,
+        group_keys=True,
+        observed=False,
+        dropna=True,
+    ) -> SeriesGroupBy:
+        from pandas.core.groupby.generic import SeriesGroupBy
+
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+        if not as_index:
+            raise TypeError("as_index=False only valid with DataFrame")
+        axis = self._get_axis_number(axis)
+
+        return WeightedSeriesGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            observed=observed,
+            dropna=dropna,
+        )
+
 
 class WeightedDataFrame(_WeightedObject, DataFrame):
     """Weighted version of :class:`pandas.DataFrame`."""
@@ -405,6 +481,51 @@ def _constructor_sliced(self):
     def _constructor(self):
         return WeightedDataFrame
 
+    def groupby(
+        self,
+        by=None,
+        axis=no_default,
+        level=None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        observed: bool = False,
+        dropna: bool = True,
+    ) -> DataFrameGroupBy:  # pragma: no cover
+        if axis is not lib.no_default:
+            axis = self._get_axis_number(axis)
+            if axis == 1:
+                warnings.warn(
+                    "DataFrame.groupby with axis=1 is deprecated. Do "
+                    "`frame.T.groupby(...)` without axis instead.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            else:
+                warnings.warn(
+                    "The 'axis' keyword in DataFrame.groupby is deprecated "
+                    "and will be removed in a future version.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+        else:
+            axis = 0
+
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+
+        return WeightedDataFrameGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            observed=observed,
+            dropna=dropna,
+        )
+
 
 for cls in [WeightedDataFrame, WeightedSeries]:
     adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame')

From e4e834edacd51434a25245f91f9aeb82301946b3 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:10:17 +0000
Subject: [PATCH 02/71] correct cov

---
 anesthetic/weighted_pandas.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index dfd7d734..3f47a4b6 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -46,12 +46,13 @@ def cov(self, other, skipna=True):
         result = self.agg(lambda df: self.obj._constructor(df).cov(
             other, skipna))
         return result.__finalize__(self.obj, method="groupby")
+    def cov(self, other, skipna=True):
+        return self._op_via_apply("cov", other=other, skipna=skipna)
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
     def cov(self, skipna=True):
-        result = self.agg(lambda df: self.obj._constructor(df).cov(skipna))
-        return result.__finalize__(self.obj, method="groupby")
+        return self._op_via_apply("cov", skipna=skipna)
 
 
 class _WeightedObject(object):

From 94aecb98749dfe122ef437f8e9268197dd31416c Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:15:51 +0000
Subject: [PATCH 03/71] remove duplicate cov

---
 anesthetic/weighted_pandas.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 3f47a4b6..b710d371 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -42,10 +42,6 @@ def var(self, numeric_only=False):
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
-    def cov(self, other, skipna=True):
-        result = self.agg(lambda df: self.obj._constructor(df).cov(
-            other, skipna))
-        return result.__finalize__(self.obj, method="groupby")
     def cov(self, other, skipna=True):
         return self._op_via_apply("cov", other=other, skipna=skipna)
 

From ada20d705895302a3cf87f556fdd8596dc848431 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:25:01 +0000
Subject: [PATCH 04/71] give up on cov for now

---
 anesthetic/weighted_pandas.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index b710d371..1fd0dd6a 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -42,13 +42,11 @@ def var(self, numeric_only=False):
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
-    def cov(self, other, skipna=True):
-        return self._op_via_apply("cov", other=other, skipna=skipna)
+    pass
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
-    def cov(self, skipna=True):
-        return self._op_via_apply("cov", skipna=skipna)
+    pass
 
 
 class _WeightedObject(object):

From 3abca58e7841e5cfeb62a0c5ece4508a75085732 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:34:52 +0000
Subject: [PATCH 05/71] use Lukas' test

---
 tests/test_samples.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index de3145cd..5539a7be 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1332,3 +1332,32 @@ def test_old_gui():
         make_2d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'})
     with pytest.raises(NotImplementedError):
         make_1d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'})
+
+
+def test_groupby():
+    params = ['a', 'b']
+    data = np.random.rand(4, 2)
+    weights = np.random.randint(1, 10, 4)
+    samples = Samples(data, weights=weights, columns=params)
+    samples['group'] = np.ones(4, dtype=int)
+    samples.loc[2:, 'group'] = 2
+
+    group1_means = np.average(samples.loc[:1, params], axis=0)
+    group1_wmeans = np.average(samples.loc[:1, params], axis=0,
+                               weights=samples.get_weights()[:2])
+    group2_means = np.average(samples.loc[2:, params], axis=0)
+    group2_wmeans = np.average(samples.loc[2:, params], axis=0,
+                               weights=samples.get_weights()[2:])
+    group_means = np.vstack([group1_means,  group2_means])
+    group_wmeans = np.vstack([group1_wmeans, group2_wmeans])
+    group_weights = [samples.get_weights()[:2].sum(),
+                     samples.get_weights()[2:].sum()]
+    mean = np.average(group_means, axis=0)
+    wmean = np.average(group_wmeans, axis=0, weights=group_weights)
+
+    groups = samples.groupby('group')
+    print(groups.mean())
+    assert not np.any(samples.groupby('group').mean() == group_means)
+    assert np.all(samples.groupby('group').mean() == group_wmeans)
+
+    assert not np.any(samples.groupby('group').mean().mean() == mean)

From 626bce358764f58952a72c79bbcb015b3b5d07e8 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:38:14 +0000
Subject: [PATCH 06/71] remove unecessary import

---
 anesthetic/weighted_pandas.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 1fd0dd6a..0f981899 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -255,8 +255,6 @@ def groupby(
         observed=False,
         dropna=True,
     ) -> SeriesGroupBy:
-        from pandas.core.groupby.generic import SeriesGroupBy
-
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         if not as_index:

From ca313333802cf9d2048a0b40d373ec5b39ff9835 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:45:47 +0000
Subject: [PATCH 07/71] remove currently unused lines from tests

---
 tests/test_samples.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 5539a7be..6b35674c 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1350,10 +1350,10 @@ def test_groupby():
                                weights=samples.get_weights()[2:])
     group_means = np.vstack([group1_means,  group2_means])
     group_wmeans = np.vstack([group1_wmeans, group2_wmeans])
-    group_weights = [samples.get_weights()[:2].sum(),
-                     samples.get_weights()[2:].sum()]
+    # group_weights = [samples.get_weights()[:2].sum(),
+    #                  samples.get_weights()[2:].sum()]
     mean = np.average(group_means, axis=0)
-    wmean = np.average(group_wmeans, axis=0, weights=group_weights)
+    # wmean = np.average(group_wmeans, axis=0, weights=group_weights)
 
     groups = samples.groupby('group')
     print(groups.mean())

From 9578b4bcc0ae6e4965ebc175b9753c9a001eab72 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:51:36 +0000
Subject: [PATCH 08/71] version bump

---
 README.rst             | 2 +-
 anesthetic/_version.py | 2 +-
 tests/test_samples.py  | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index c1260e94..31ec3a26 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.22
+:Version: 2.0.0-beta.23
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index 7d2b271c..d4666d28 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b22'
+__version__ = '2.0.0b23'
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6b35674c..6046e60a 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1361,3 +1361,4 @@ def test_groupby():
     assert np.all(samples.groupby('group').mean() == group_wmeans)
 
     assert not np.any(samples.groupby('group').mean().mean() == mean)
+    # assert np.all(samples.groupby('group').mean().mean() == wmean)

From 8531a5665e9046193295edd5acf3f94ef1e795b0 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 16:59:07 +0000
Subject: [PATCH 09/71] sort out docstrings

---
 anesthetic/weighted_pandas.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 0f981899..bb92f814 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -15,37 +15,43 @@
 
 
 class WeightedGroupBy(GroupBy):
-    def mean(self, numeric_only=False):
+    """Weighted version of :class:`pandas.core.groupby.GroupBy`."""
+
+    def mean(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).mean(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def std(self, numeric_only=False):
+    def std(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).std(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def kurtosis(self, numeric_only=False):
+    def kurtosis(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def median(self, numeric_only=False):
+    def median(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).median(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def var(self, numeric_only=False):
+    def var(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).var(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
+    """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`."""
+
     pass
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
+    """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`."""
+
     pass
 
 
@@ -254,7 +260,7 @@ def groupby(
         group_keys=True,
         observed=False,
         dropna=True,
-    ) -> SeriesGroupBy:
+        ):  # noqa: D102
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         if not as_index:
@@ -484,7 +490,7 @@ def groupby(
         group_keys: bool = True,
         observed: bool = False,
         dropna: bool = True,
-    ) -> DataFrameGroupBy:  # pragma: no cover
+        ):  # pragma: no cover  # noqa: D102
         if axis is not lib.no_default:
             axis = self._get_axis_number(axis)
             if axis == 1:

From 9598eac51cbda76f92667b1d7220c4504c40d4ff Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 17:02:08 +0000
Subject: [PATCH 10/71] fix indentation

---
 anesthetic/weighted_pandas.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index bb92f814..77550b61 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -260,7 +260,7 @@ def groupby(
         group_keys=True,
         observed=False,
         dropna=True,
-        ):  # noqa: D102
+    ):  # noqa: D102
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         if not as_index:
@@ -490,7 +490,7 @@ def groupby(
         group_keys: bool = True,
         observed: bool = False,
         dropna: bool = True,
-        ):  # pragma: no cover  # noqa: D102
+    ):  # pragma: no cover  # noqa: D102
         if axis is not lib.no_default:
             axis = self._get_axis_number(axis)
             if axis == 1:

From 192bffe47e53a65a82f126299a45089ee26a05d7 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 17:39:23 +0000
Subject: [PATCH 11/71] tests using cobaya chains

---
 tests/test_samples.py | 44 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6046e60a..7a31e335 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1334,31 +1334,19 @@ def test_old_gui():
         make_1d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'})
 
 
-def test_groupby():
-    params = ['a', 'b']
-    data = np.random.rand(4, 2)
-    weights = np.random.randint(1, 10, 4)
-    samples = Samples(data, weights=weights, columns=params)
-    samples['group'] = np.ones(4, dtype=int)
-    samples.loc[2:, 'group'] = 2
-
-    group1_means = np.average(samples.loc[:1, params], axis=0)
-    group1_wmeans = np.average(samples.loc[:1, params], axis=0,
-                               weights=samples.get_weights()[:2])
-    group2_means = np.average(samples.loc[2:, params], axis=0)
-    group2_wmeans = np.average(samples.loc[2:, params], axis=0,
-                               weights=samples.get_weights()[2:])
-    group_means = np.vstack([group1_means,  group2_means])
-    group_wmeans = np.vstack([group1_wmeans, group2_wmeans])
-    # group_weights = [samples.get_weights()[:2].sum(),
-    #                  samples.get_weights()[2:].sum()]
-    mean = np.average(group_means, axis=0)
-    # wmean = np.average(group_wmeans, axis=0, weights=group_weights)
-
-    groups = samples.groupby('group')
-    print(groups.mean())
-    assert not np.any(samples.groupby('group').mean() == group_means)
-    assert np.all(samples.groupby('group').mean() == group_wmeans)
-
-    assert not np.any(samples.groupby('group').mean().mean() == mean)
-    # assert np.all(samples.groupby('group').mean().mean() == wmean)
+def test_groupby_stats():
+    mcmc = read_chains('./tests/example_data/cb')
+    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].mean()
+                             .to_numpy()[:-1],
+           chains.mean().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std()
+                             .to_numpy()[:-1],
+           chains.std().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis()
+                             .dropna().to_numpy(),
+           chains.kurtosis().iloc[0, :].dropna().to_numpy()))
+    # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median().to_numpy()[:-1],
+                               # chains.median().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var().to_numpy()[:-1],
+           chains.var().iloc[0, :].to_numpy()))

From de9b4f7a0702e0c092f85d5d872dad09348fe30c Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 10 Mar 2023 17:40:15 +0000
Subject: [PATCH 12/71] test formatting

---
 tests/test_samples.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 7a31e335..124836df 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1346,7 +1346,9 @@ def test_groupby_stats():
     assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis()
                              .dropna().to_numpy(),
            chains.kurtosis().iloc[0, :].dropna().to_numpy()))
-    # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median().to_numpy()[:-1],
-                               # chains.median().iloc[0, :].to_numpy()))
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var().to_numpy()[:-1],
+    # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
+    # .to_numpy()[:-1],
+    # chains.median().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var()
+                             .to_numpy()[:-1],
            chains.var().iloc[0, :].to_numpy()))

From 55e9f4e30ddfa1e65e2fb4d4e74af166c71ff8df Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Tue, 14 Mar 2023 12:07:45 +0000
Subject: [PATCH 13/71] reinstate median test

---
 tests/test_samples.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 124836df..34559920 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1346,9 +1346,9 @@ def test_groupby_stats():
     assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis()
                              .dropna().to_numpy(),
            chains.kurtosis().iloc[0, :].dropna().to_numpy()))
-    # assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
-    # .to_numpy()[:-1],
-    # chains.median().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
+                             .to_numpy()[:-1],
+                             chains.median().iloc[0, :].to_numpy()))
     assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var()
                              .to_numpy()[:-1],
            chains.var().iloc[0, :].to_numpy()))

From dfc4c3b4908afd981871edebcd8b884ad865b456 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Tue, 14 Mar 2023 12:08:08 +0000
Subject: [PATCH 14/71] change numeric_only to None in median

---
 anesthetic/weighted_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 77550b61..0ba6bf5d 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -32,7 +32,7 @@ def kurtosis(self, numeric_only=False):  # noqa: D102
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def median(self, numeric_only=False):  # noqa: D102
+    def median(self, numeric_only=None):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).median(
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")

From 244cd1f76b264e488f1ad8ff758e79bfe29eea56 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Tue, 14 Mar 2023 12:21:08 +0000
Subject: [PATCH 15/71] stick underscores in front to see if this fixes the
 documentation

---
 anesthetic/weighted_pandas.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 0ba6bf5d..882990af 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -14,7 +14,7 @@
                               temporary_seed, adjust_docstrings)
 
 
-class WeightedGroupBy(GroupBy):
+class _WeightedGroupBy(GroupBy):
     """Weighted version of :class:`pandas.core.groupby.GroupBy`."""
 
     def mean(self, numeric_only=False):  # noqa: D102
@@ -43,13 +43,13 @@ def var(self, numeric_only=False):  # noqa: D102
         return result.__finalize__(self.obj, method="groupby")
 
 
-class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
+class _WeightedSeriesGroupBy(_WeightedGroupBy, SeriesGroupBy):
     """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`."""
 
     pass
 
 
-class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
+class _WeightedDataFrameGroupBy(_WeightedGroupBy, DataFrameGroupBy):
     """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`."""
 
     pass
@@ -267,7 +267,7 @@ def groupby(
             raise TypeError("as_index=False only valid with DataFrame")
         axis = self._get_axis_number(axis)
 
-        return WeightedSeriesGroupBy(
+        return _WeightedSeriesGroupBy(
             obj=self,
             keys=by,
             axis=axis,
@@ -513,7 +513,7 @@ def groupby(
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
 
-        return WeightedDataFrameGroupBy(
+        return _WeightedDataFrameGroupBy(
             obj=self,
             keys=by,
             axis=axis,

From e3badbb66cfa94faf4637d79754b9af71be158cb Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Tue, 14 Mar 2023 12:26:25 +0000
Subject: [PATCH 16/71] Revert "stick underscores in front to see if this fixes
 the documentation"

This reverts commit 244cd1f76b264e488f1ad8ff758e79bfe29eea56.
---
 anesthetic/weighted_pandas.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 882990af..0ba6bf5d 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -14,7 +14,7 @@
                               temporary_seed, adjust_docstrings)
 
 
-class _WeightedGroupBy(GroupBy):
+class WeightedGroupBy(GroupBy):
     """Weighted version of :class:`pandas.core.groupby.GroupBy`."""
 
     def mean(self, numeric_only=False):  # noqa: D102
@@ -43,13 +43,13 @@ def var(self, numeric_only=False):  # noqa: D102
         return result.__finalize__(self.obj, method="groupby")
 
 
-class _WeightedSeriesGroupBy(_WeightedGroupBy, SeriesGroupBy):
+class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
     """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`."""
 
     pass
 
 
-class _WeightedDataFrameGroupBy(_WeightedGroupBy, DataFrameGroupBy):
+class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
     """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`."""
 
     pass
@@ -267,7 +267,7 @@ def groupby(
             raise TypeError("as_index=False only valid with DataFrame")
         axis = self._get_axis_number(axis)
 
-        return _WeightedSeriesGroupBy(
+        return WeightedSeriesGroupBy(
             obj=self,
             keys=by,
             axis=axis,
@@ -513,7 +513,7 @@ def groupby(
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
 
-        return _WeightedDataFrameGroupBy(
+        return WeightedDataFrameGroupBy(
             obj=self,
             keys=by,
             axis=axis,

From ca0025583b48ddbfb2a4a6c67ae96fac3db4a664 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Tue, 14 Mar 2023 15:43:50 +0000
Subject: [PATCH 17/71] add missing no cover to WeightedSeries.groupby()

---
 anesthetic/weighted_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 0ba6bf5d..d064793f 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -260,7 +260,7 @@ def groupby(
         group_keys=True,
         observed=False,
         dropna=True,
-    ):  # noqa: D102
+    ):  # pragma: no cover  # noqa: D102
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         if not as_index:

From d97b760f2a59a522ebb7c634ddf4c9361c586f8f Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 14 Mar 2023 17:02:22 -0700
Subject: [PATCH 18/71] remove `:show-inheritance:` for `weighted_pandas`
 autodocs, cross referencing pandas is a pain, some of its classes lack docs

---
 docs/source/anesthetic.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst
index 9e9b8284..97fa9731 100644
--- a/docs/source/anesthetic.rst
+++ b/docs/source/anesthetic.rst
@@ -99,6 +99,5 @@ anesthetic.weighted\_pandas module
 .. automodule:: anesthetic.weighted_pandas
    :members:
    :undoc-members:
-   :show-inheritance:
 
 

From bfa2647e4c2082a14178e6402845f8b130fb8501 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 14 Mar 2023 17:02:43 -0700
Subject: [PATCH 19/71] fix autodocs for `weighted_pandas`

* `GroupBy` does not have its own docs.
  - Its initialisation signature looks like a core dump, hence
    implementing our own initialisation function.
  - Trying to cross-reference as
    ``:class:`pandas.core.groupby.GroupBy` `` will fail, hence dropping
    the link attempt. Same goes for `SeriesGroupBy` and
    `DataFrameGroupBy`.
* Dropping `kurtosis` from `WeightedGroupBy`, since it is not
  implemented in `pandas.core.croupby.GroupBy`. Leave that to tacke
  once/if/when we really need it.
* Add docstring adjustments for `WeightedDataFrameGroupBy` and
  `WeightedSeriesGroupBy` to the end of `weighted_pandas` in the same
  way as previously done for `WeightedDataFrame` and `WeightedSeries`.
---
 anesthetic/weighted_pandas.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index d064793f..bc503413 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -15,7 +15,10 @@
 
 
 class WeightedGroupBy(GroupBy):
-    """Weighted version of :class:`pandas.core.groupby.GroupBy`."""
+    """Weighted version of ``pandas.core.groupby.GroupBy``."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def mean(self, numeric_only=False):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).mean(
@@ -27,11 +30,6 @@ def std(self, numeric_only=False):  # noqa: D102
             numeric_only=numeric_only))
         return result.__finalize__(self.obj, method="groupby")
 
-    def kurtosis(self, numeric_only=False):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
-            numeric_only=numeric_only))
-        return result.__finalize__(self.obj, method="groupby")
-
     def median(self, numeric_only=None):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).median(
             numeric_only=numeric_only))
@@ -44,13 +42,13 @@ def var(self, numeric_only=False):  # noqa: D102
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
-    """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`."""
+    """Weighted version of ``pandas.core.groupby.SeriesGroupBy``."""
 
     pass
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
-    """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`."""
+    """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``."""
 
     pass
 
@@ -526,14 +524,18 @@ def groupby(
         )
 
 
-for cls in [WeightedDataFrame, WeightedSeries]:
+for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy]:
     adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame')
     adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames')
     adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries')
     adjust_docstrings(cls, 'core', 'pandas.core')
-    adjust_docstrings(cls, 'DataFrameGroupBy',
-                           'pandas.core.groupby.DataFrameGroupBy')
-    adjust_docstrings(cls, 'SeriesGroupBy',
-                           'pandas.core.groupby.SeriesGroupBy')
     adjust_docstrings(cls, 'pandas.core.window.Rolling.quantile',
                            'pandas.core.window.rolling.Rolling.quantile')
+    adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy')
+    adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy')
+    adjust_docstrings(cls, 'WeightedDataFrameGroupBy.sample',
+                           'pandas.core.groupby.DataFrameGroupBy.sample')
+    adjust_docstrings(cls, 'WeightedSeriesGroupBy.sample',
+                           'pandas.core.groupby.SeriesGroupBy.sample')
+adjust_docstrings(WeightedDataFrame, 'resample', 'pandas.DataFrame.resample')
+adjust_docstrings(WeightedSeries,    'resample', 'pandas.Series.resample')

From 6703ca7d990b51827ee326ac9f99298eaa40f91c Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 14 Mar 2023 17:17:51 -0700
Subject: [PATCH 20/71] drop `WeightedGroupBy.kurtosis` also from tests

---
 tests/test_samples.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 34559920..e190041d 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,9 +1343,6 @@ def test_groupby_stats():
     assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std()
                              .to_numpy()[:-1],
            chains.std().iloc[0, :].to_numpy()))
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis()
-                             .dropna().to_numpy(),
-           chains.kurtosis().iloc[0, :].dropna().to_numpy()))
     assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
                              .to_numpy()[:-1],
                              chains.median().iloc[0, :].to_numpy()))

From 40cd004d8fcfb3edc55512069d374b89a53d3864 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 14 Mar 2023 17:32:45 -0700
Subject: [PATCH 21/71] make `WeightedDataFramGroupBy` and
 `WeightedSeriesGroupBy` private, since they have essentially no documentation
 anyhow

---
 anesthetic/weighted_pandas.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index bc503413..7f6b4b80 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -42,13 +42,19 @@ def var(self, numeric_only=False):  # noqa: D102
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
-    """Weighted version of ``pandas.core.groupby.SeriesGroupBy``."""
+    """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.
+
+    :meta private:
+    """
 
     pass
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
-    """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``."""
+    """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.
+
+    :meta private:
+    """
 
     pass
 

From 0f2104e26ce78b6700de35c61e4576492c97ffd7 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 14 Mar 2023 18:25:37 -0700
Subject: [PATCH 22/71] make `WeightedGroupBy.grouper` private

---
 anesthetic/weighted_pandas.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 7f6b4b80..f6be2f4e 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -4,7 +4,7 @@
 from inspect import signature
 import numpy as np
 from pandas import Series, DataFrame, concat, MultiIndex
-from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy
+from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy, ops
 from pandas._libs import lib
 from pandas._libs.lib import no_default
 from pandas.util._exceptions import find_stack_level
@@ -17,6 +17,9 @@
 class WeightedGroupBy(GroupBy):
     """Weighted version of ``pandas.core.groupby.GroupBy``."""
 
+    grouper: ops.BaseGrouper
+    """:meta private:"""
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 

From 9636d5f39a4eb4db337b5c9cefca67c3b3f37f56 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Thu, 16 Mar 2023 14:28:01 +0000
Subject: [PATCH 23/71] version bump

---
 README.rst             | 2 +-
 anesthetic/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 31ec3a26..687bb728 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.23
+:Version: 2.0.0-beta.24
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index d4666d28..03670122 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b23'
+__version__ = '2.0.0b24'

From 0a83fe0bbbfe0de055a26d991575d62d13470332 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 20 Mar 2023 09:45:01 +0000
Subject: [PATCH 24/71] version bump

---
 README.rst             | 2 +-
 anesthetic/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 687bb728..8e1bb219 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.24
+:Version: 2.0.0-beta.25
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index 03670122..c4c4e20b 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b24'
+__version__ = '2.0.0b25'

From 8c907f187705766171095f3fa2a02ecd169ad84e Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 22 Mar 2023 11:45:19 +0000
Subject: [PATCH 25/71] Removed hard-coded numeric_only arguments

---
 anesthetic/weighted_pandas.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index f6be2f4e..d9e1f1df 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -23,24 +23,24 @@ class WeightedGroupBy(GroupBy):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    def mean(self, numeric_only=False):  # noqa: D102
+    def mean(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).mean(
-            numeric_only=numeric_only))
+            *args, **kwargs))
         return result.__finalize__(self.obj, method="groupby")
 
-    def std(self, numeric_only=False):  # noqa: D102
+    def std(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).std(
-            numeric_only=numeric_only))
+            *args, **kwargs))
         return result.__finalize__(self.obj, method="groupby")
 
-    def median(self, numeric_only=None):  # noqa: D102
+    def median(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).median(
-            numeric_only=numeric_only))
+            *args, **kwargs))
         return result.__finalize__(self.obj, method="groupby")
 
-    def var(self, numeric_only=False):  # noqa: D102
+    def var(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).var(
-            numeric_only=numeric_only))
+            *args, **kwargs))
         return result.__finalize__(self.obj, method="groupby")
 
 

From eac682aaec1859f39530e2ebc85c29da52b9891a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 22 Mar 2023 11:45:51 +0000
Subject: [PATCH 26/71] version bump

---
 README.rst             | 2 +-
 anesthetic/_version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 8e1bb219..c876a856 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.25
+:Version: 2.0.0-beta.26
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 
diff --git a/anesthetic/_version.py b/anesthetic/_version.py
index c4c4e20b..019ed87d 100644
--- a/anesthetic/_version.py
+++ b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b25'
+__version__ = '2.0.0b26'

From c49536210ac8b3c74f99b548802d73737e04bb1a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 22 Mar 2023 14:11:25 +0000
Subject: [PATCH 27/71] Updated weighted samples

---
 anesthetic/weighted_pandas.py | 55 +++++++++++++++++++++++++++++++----
 tests/test_samples.py         | 39 ++++++++++++++++---------
 2 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index d9e1f1df..49d2a29b 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -25,24 +25,31 @@ def __init__(self, *args, **kwargs):
 
     def mean(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).mean(
-            *args, **kwargs))
+            *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
     def std(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).std(
-            *args, **kwargs))
+            *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
     def median(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).median(
-            *args, **kwargs))
+            *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
     def var(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).var(
-            *args, **kwargs))
+            *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
+    def sample(self, *args, **kwargs):  # noqa: D102
+        return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
+
+    def get_weights(self):
+        """Return the weights of the grouped samples."""
+        return self.agg(lambda df: df.get_weights().sum())
+
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
     """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.
@@ -59,7 +66,45 @@ class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
     :meta private:
     """
 
-    pass
+    def get_weights(self):
+        """Return the weights of the grouped samples."""
+        return super().get_weights().min(axis=1-self.axis)
+
+    def _gotitem(self, key, ndim: int, subset=None):
+        if ndim == 2:
+            if subset is None:
+                subset = self.obj
+            return WeightedDataFrameGroupBy(
+                subset,
+                self.grouper,
+                axis=self.axis,
+                level=self.level,
+                grouper=self.grouper,
+                exclusions=self.exclusions,
+                selection=key,
+                as_index=self.as_index,
+                sort=self.sort,
+                group_keys=self.group_keys,
+                observed=self.observed,
+                dropna=self.dropna,
+            )
+        elif ndim == 1:
+            if subset is None:
+                subset = self.obj[key]
+            return WeightedSeriesGroupBy(
+                subset,
+                level=self.level,
+                grouper=self.grouper,
+                exclusions=self.exclusions,
+                selection=key,
+                as_index=self.as_index,
+                sort=self.sort,
+                group_keys=self.group_keys,
+                observed=self.observed,
+                dropna=self.dropna,
+            )
+
+        raise AssertionError("invalid ndim for _gotitem")
 
 
 class _WeightedObject(object):
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 86581704..801366de 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,16 +1343,29 @@ def test_old_gui():
 
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
-    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].mean()
-                             .to_numpy()[:-1],
-           chains.mean().iloc[0, :].to_numpy()))
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std()
-                             .to_numpy()[:-1],
-           chains.std().iloc[0, :].to_numpy()))
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
-                             .to_numpy()[:-1],
-                             chains.median().iloc[0, :].to_numpy()))
-    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var()
-                             .to_numpy()[:-1],
-           chains.var().iloc[0, :].to_numpy()))
+    chains = mcmc.groupby('chain')
+    for chain in [1, 2]:
+        i = mcmc.chain == chain
+        assert_allclose(mcmc.loc[i].mean().drop('chain'),
+                        chains.mean().loc[chain, :])
+        assert_allclose(mcmc.loc[i].std().drop('chain'),
+                        chains.std().loc[chain, :])
+        assert_allclose(mcmc.loc[i].median().drop('chain'),
+                        chains.median().loc[chain, :])
+        assert_allclose(mcmc.loc[i].var().drop('chain'),
+                        chains.var().loc[chain, :])
+
+    assert_allclose(mcmc.mean().drop('chain'), chains.mean().mean())
+
+    for col in mcmc.columns:
+        if 'chain' not in col:
+            for chain in [1, 2]:
+                i = mcmc.chain == chain
+                assert_allclose(mcmc.loc[i, col].mean(),
+                                chains[[col]].mean().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].std(),
+                                chains[[col]].std().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].median(),
+                                chains[[col]].median().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].var(),
+                                chains[[col]].var().loc[chain, :])

From 7fa1cec13be606309446240d2ea29373cf1339c5 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 22 Mar 2023 15:49:22 +0000
Subject: [PATCH 28/71] Completed coverage

---
 anesthetic/weighted_pandas.py | 2 +-
 tests/test_samples.py         | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 49d2a29b..b2cc9caf 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -70,7 +70,7 @@ def get_weights(self):
         """Return the weights of the grouped samples."""
         return super().get_weights().min(axis=1-self.axis)
 
-    def _gotitem(self, key, ndim: int, subset=None):
+    def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
         if ndim == 2:
             if subset is None:
                 subset = self.obj
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 801366de..166a22d9 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1369,3 +1369,8 @@ def test_groupby_stats():
                                 chains[[col]].median().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].var(),
                                 chains[[col]].var().loc[chain, :])
+
+    sample = chains.sample(5)
+    assert len(sample) == 10
+    assert sample.value_counts('chain')[1] == 5
+    assert sample.value_counts('chain')[2] == 5

From 99a52e34f7f0d65293d44014e6abc27088a84cc4 Mon Sep 17 00:00:00 2001
From: Adam Ormondroyd <52655393+Ormorod@users.noreply.github.com>
Date: Wed, 22 Mar 2023 19:12:40 +0000
Subject: [PATCH 29/71] add missing space before inline comment

---
 anesthetic/weighted_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index b2cc9caf..cd532a93 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -70,7 +70,7 @@ def get_weights(self):
         """Return the weights of the grouped samples."""
         return super().get_weights().min(axis=1-self.axis)
 
-    def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover
+    def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
         if ndim == 2:
             if subset is None:
                 subset = self.obj

From 2e6d54ef164facd293fc908fc2fb15d67ca6c678 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Wed, 22 Mar 2023 19:23:36 +0000
Subject: [PATCH 30/71] joint call of column name and label

---
 tests/test_samples.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 166a22d9..95e150f0 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,19 +1343,19 @@ def test_old_gui():
 
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
-    chains = mcmc.groupby('chain')
+    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
     for chain in [1, 2]:
         i = mcmc.chain == chain
-        assert_allclose(mcmc.loc[i].mean().drop('chain'),
+        assert_allclose(mcmc.loc[i].mean().drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.mean().loc[chain, :])
-        assert_allclose(mcmc.loc[i].std().drop('chain'),
+        assert_allclose(mcmc.loc[i].std().drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.std().loc[chain, :])
-        assert_allclose(mcmc.loc[i].median().drop('chain'),
+        assert_allclose(mcmc.loc[i].median().drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.median().loc[chain, :])
-        assert_allclose(mcmc.loc[i].var().drop('chain'),
+        assert_allclose(mcmc.loc[i].var().drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
 
-    assert_allclose(mcmc.mean().drop('chain'), chains.mean().mean())
+    assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean())
 
     for col in mcmc.columns:
         if 'chain' not in col:

From e45aaa6a5fcf164f59ac38f2f59171ca8224862a Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Wed, 22 Mar 2023 19:31:06 +0000
Subject: [PATCH 31/71] formatting

---
 tests/test_samples.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 95e150f0..d8ae9e3d 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1346,16 +1346,21 @@ def test_groupby_stats():
     chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
     for chain in [1, 2]:
         i = mcmc.chain == chain
-        assert_allclose(mcmc.loc[i].mean().drop(('chain', '$n_\\mathrm{chain}$')),
+        assert_allclose(mcmc.loc[i].mean()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.mean().loc[chain, :])
-        assert_allclose(mcmc.loc[i].std().drop(('chain', '$n_\\mathrm{chain}$')),
+        assert_allclose(mcmc.loc[i].std()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.std().loc[chain, :])
-        assert_allclose(mcmc.loc[i].median().drop(('chain', '$n_\\mathrm{chain}$')),
+        assert_allclose(mcmc.loc[i].median()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.median().loc[chain, :])
-        assert_allclose(mcmc.loc[i].var().drop(('chain', '$n_\\mathrm{chain}$')),
+        assert_allclose(mcmc.loc[i].var()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
 
-    assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')), chains.mean().mean())
+    assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
+                    chains.mean().mean())
 
     for col in mcmc.columns:
         if 'chain' not in col:

From 5725ddfceb6d285c547cdbec8ed49f8596d2a9f8 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Wed, 22 Mar 2023 19:32:00 +0000
Subject: [PATCH 32/71] additional chains.get_group(chains) tests

---
 tests/test_samples.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index d8ae9e3d..3ba2bfc5 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1359,6 +1359,19 @@ def test_groupby_stats():
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
 
+        assert_allclose(chains.get_group(chain).mean()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.mean().loc[chain, :])
+        assert_allclose(chains.get_group(chain).std()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.std().loc[chain, :])
+        assert_allclose(chains.get_group(chain).median()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.median().loc[chain, :])
+        assert_allclose(chains.get_group(chain).var()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.var().loc[chain, :])
+
     assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
                     chains.mean().mean())
 

From 788fa8493dca86bd4b046fb855228ebe33a4a34d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 23 Mar 2023 09:23:51 +0000
Subject: [PATCH 33/71] added kurtosis, kurt, skew, mad, sem

---
 anesthetic/utils.py           | 11 +++++-----
 anesthetic/weighted_pandas.py | 20 ++++++++++++++++++
 tests/test_samples.py         | 40 +++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/anesthetic/utils.py b/anesthetic/utils.py
index 0208a855..541d2132 100644
--- a/anesthetic/utils.py
+++ b/anesthetic/utils.py
@@ -530,8 +530,9 @@ class to adjust
     """
     for key, val in cls.__dict__.items():
         doc = inspect.getdoc(val)
-        newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
-        try:
-            cls.__dict__[key].__doc__ = newdoc
-        except AttributeError:
-            pass
+        if doc is not None:
+            newdoc = re.sub(pattern, repl, doc, *args, **kwargs)
+            try:
+                cls.__dict__[key].__doc__ = newdoc
+            except AttributeError:
+                pass
diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index cd532a93..92e053c7 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -43,6 +43,26 @@ def var(self, *args, **kwargs):  # noqa: D102
             *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
+    def kurt(self, *args, **kwargs):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).kurt(
+            *args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
+
+    def kurtosis(self, *args, **kwargs):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
+            *args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
+
+    def skew(self, *args, **kwargs):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).skew(
+            *args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
+
+    def sem(self, *args, **kwargs):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).sem(
+            *args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
+
     def sample(self, *args, **kwargs):  # noqa: D102
         return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 3ba2bfc5..6b4b9899 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1358,6 +1358,21 @@ def test_groupby_stats():
         assert_allclose(mcmc.loc[i].var()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
+        assert_allclose(mcmc.loc[i].kurt()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.kurt().loc[chain, :])
+        assert_allclose(mcmc.loc[i].kurtosis()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.kurtosis().loc[chain, :])
+        assert_allclose(mcmc.loc[i].skew()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.skew().loc[chain, :])
+        assert_allclose(mcmc.loc[i].mad()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.mad().loc[chain, :])
+        assert_allclose(mcmc.loc[i].sem()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.sem().loc[chain, :])
 
         assert_allclose(chains.get_group(chain).mean()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
@@ -1371,6 +1386,21 @@ def test_groupby_stats():
         assert_allclose(chains.get_group(chain).var()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
+        assert_allclose(chains.get_group(chain).kurt()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.kurt().loc[chain, :])
+        assert_allclose(chains.get_group(chain).kurtosis()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.kurtosis().loc[chain, :])
+        assert_allclose(chains.get_group(chain).skew()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.skew().loc[chain, :])
+        assert_allclose(chains.get_group(chain).mad()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.mad().loc[chain, :])
+        assert_allclose(chains.get_group(chain).sem()
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.sem().loc[chain, :])
 
     assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
                     chains.mean().mean())
@@ -1387,6 +1417,16 @@ def test_groupby_stats():
                                 chains[[col]].median().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].var(),
                                 chains[[col]].var().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].kurt(),
+                                chains[[col]].kurt().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].kurtosis(),
+                                chains[[col]].kurtosis().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].skew(),
+                                chains[[col]].skew().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].mad(),
+                                chains[[col]].mad().loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].sem(),
+                                chains[[col]].sem().loc[chain, :])
 
     sample = chains.sample(5)
     assert len(sample) == 10

From f22a24ccb00f3f3a112663db84e0015cf4f9c2bd Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Fri, 24 Mar 2023 13:35:12 -0700
Subject: [PATCH 34/71] fix docs for weighted groupby sample methods

* move sample method from `WeightedGroupBy` to `WeightedSeriesGroupBy` and `WeightedDataFrameGroupBy`
* modify `adjust_docstrings` accordingly
* make `WeightedSeriesGroupBy` and `WeightedDataFrameGroupBy` public'
---
 anesthetic/weighted_pandas.py | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 92e053c7..c9d72ba5 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -63,28 +63,20 @@ def sem(self, *args, **kwargs):  # noqa: D102
             *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
-    def sample(self, *args, **kwargs):  # noqa: D102
-        return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
-
     def get_weights(self):
         """Return the weights of the grouped samples."""
         return self.agg(lambda df: df.get_weights().sum())
 
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
-    """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.
+    """Weighted version of ``pandas.core.groupby.SeriesGroupBy``."""
 
-    :meta private:
-    """
-
-    pass
+    def sample(self, *args, **kwargs):  # noqa: D102
+        return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
-    """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.
-
-    :meta private:
-    """
+    """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``."""
 
     def get_weights(self):
         """Return the weights of the grouped samples."""
@@ -126,6 +118,9 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
 
         raise AssertionError("invalid ndim for _gotitem")
 
+    def sample(self, *args, **kwargs):  # noqa: D102
+        return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
+
 
 class _WeightedObject(object):
     """Common methods for `WeightedSeries` and `WeightedDataFrame`.
@@ -598,7 +593,8 @@ def groupby(
         )
 
 
-for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy]:
+for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy,
+            WeightedDataFrameGroupBy, WeightedSeriesGroupBy]:
     adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame')
     adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames')
     adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries')
@@ -607,9 +603,5 @@ def groupby(
                            'pandas.core.window.rolling.Rolling.quantile')
     adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy')
     adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy')
-    adjust_docstrings(cls, 'WeightedDataFrameGroupBy.sample',
-                           'pandas.core.groupby.DataFrameGroupBy.sample')
-    adjust_docstrings(cls, 'WeightedSeriesGroupBy.sample',
-                           'pandas.core.groupby.SeriesGroupBy.sample')
 adjust_docstrings(WeightedDataFrame, 'resample', 'pandas.DataFrame.resample')
 adjust_docstrings(WeightedSeries,    'resample', 'pandas.Series.resample')

From d2215a5dbaeee01ea7d76b63c79ded9fb425a568 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Fri, 24 Mar 2023 14:45:48 -0700
Subject: [PATCH 35/71] complete coverage by adding test for
 `WeightedSeriesGroupBy.sample`

---
 tests/test_samples.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6b4b9899..d09fe5d2 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1427,6 +1427,8 @@ def test_groupby_stats():
                                 chains[[col]].mad().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
+            sample = chains[[col]].sample(5)
+            assert len(sample) == 10
 
     sample = chains.sample(5)
     assert len(sample) == 10

From 866b3b38b72b6b409f700018803562967a157aa1 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Fri, 24 Mar 2023 15:36:08 -0700
Subject: [PATCH 36/71] fix groupby test for `WeightedSeriesGroupBy.sample`

---
 tests/test_samples.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index d09fe5d2..424c57a6 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1427,10 +1427,14 @@ def test_groupby_stats():
                                 chains[[col]].mad().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
-            sample = chains[[col]].sample(5)
-            assert len(sample) == 10
 
     sample = chains.sample(5)
     assert len(sample) == 10
     assert sample.value_counts('chain')[1] == 5
     assert sample.value_counts('chain')[2] == 5
+
+    chains = mcmc.chain.groupby(mcmc.chain)
+    sample = chains.sample(5)
+    assert len(sample) == 10
+    assert sample.value_counts()[1] == 5
+    assert sample.value_counts()[2] == 5

From 57a1c1fb4a874009cc76073e39ed22a12948116f Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 27 Mar 2023 15:33:44 +0100
Subject: [PATCH 37/71] add quantile

---
 anesthetic/weighted_pandas.py | 5 +++++
 tests/test_samples.py         | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index c9d72ba5..94fb089d 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -63,6 +63,11 @@ def sem(self, *args, **kwargs):  # noqa: D102
             *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
+    def quantile(self, *args, **kwargs):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).quantile(
+            *args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
+
     def get_weights(self):
         """Return the weights of the grouped samples."""
         return self.agg(lambda df: df.get_weights().sum())
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 424c57a6..43da09fa 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1401,6 +1401,10 @@ def test_groupby_stats():
         assert_allclose(chains.get_group(chain).sem()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.sem().loc[chain, :])
+        q = np.random.rand()
+        assert_allclose(chains.get_group(chain).quantile(q)
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.quantile(q).loc[chain, :])
 
     assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
                     chains.mean().mean())
@@ -1427,6 +1431,9 @@ def test_groupby_stats():
                                 chains[[col]].mad().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
+                q = np.random.rand()
+                assert_allclose(mcmc.loc[i, col].quantile(q),
+                                chains[[col]].quantile(q).loc[chain, :])
 
     sample = chains.sample(5)
     assert len(sample) == 10

From aff1455c1dcf8450e9d5e55abb2473319c5bc858 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 27 Mar 2023 16:48:14 +0100
Subject: [PATCH 38/71] add tests for corr, line 1441 causing invalid value
 warning

---
 tests/test_samples.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 43da09fa..49217ee3 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,7 +1343,7 @@ def test_old_gui():
 
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
-    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
+    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'))
     for chain in [1, 2]:
         i = mcmc.chain == chain
         assert_allclose(mcmc.loc[i].mean()
@@ -1401,6 +1401,10 @@ def test_groupby_stats():
         assert_allclose(chains.get_group(chain).sem()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.sem().loc[chain, :])
+        assert_allclose(chains.get_group(chain).corr()
+                        .drop(('chain', '$n_\\mathrm{chain}$'))
+                        .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
+                        chains.corr().loc[chain, :])
         q = np.random.rand()
         assert_allclose(chains.get_group(chain).quantile(q)
                         .drop(('chain', '$n_\\mathrm{chain}$')),
@@ -1434,6 +1438,9 @@ def test_groupby_stats():
                 q = np.random.rand()
                 assert_allclose(mcmc.loc[i, col].quantile(q),
                                 chains[[col]].quantile(q).loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]),
+                                chains[[col]].corr(mcmc.loc[i, col])
+                                .loc[chain, :])
 
     sample = chains.sample(5)
     assert len(sample) == 10

From 83e2c4d6e2750004a0355322197561f37b9115f6 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 27 Mar 2023 17:03:28 +0100
Subject: [PATCH 39/71] add test for cov

---
 tests/test_samples.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 49217ee3..f1559684 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1405,6 +1405,10 @@ def test_groupby_stats():
                         .drop(('chain', '$n_\\mathrm{chain}$'))
                         .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
                         chains.corr().loc[chain, :])
+        assert_allclose(chains.get_group(chain).cov()
+                        .drop(('chain', '$n_\\mathrm{chain}$'))
+                        .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
+                        chains.cov().loc[chain, :])
         q = np.random.rand()
         assert_allclose(chains.get_group(chain).quantile(q)
                         .drop(('chain', '$n_\\mathrm{chain}$')),
@@ -1438,6 +1442,8 @@ def test_groupby_stats():
                 q = np.random.rand()
                 assert_allclose(mcmc.loc[i, col].quantile(q),
                                 chains[[col]].quantile(q).loc[chain, :])
+                assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]),
+                                chains[[col]].cov().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]),
                                 chains[[col]].corr(mcmc.loc[i, col])
                                 .loc[chain, :])

From 0654d98259a7b8d525beda29b124d4aa93b4820e Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 27 Mar 2023 17:07:33 +0100
Subject: [PATCH 40/71] move quantile to end

---
 tests/test_samples.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index f1559684..6e62ed06 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1439,14 +1439,14 @@ def test_groupby_stats():
                                 chains[[col]].mad().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
-                q = np.random.rand()
-                assert_allclose(mcmc.loc[i, col].quantile(q),
-                                chains[[col]].quantile(q).loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]),
                                 chains[[col]].cov().loc[chain, :])
                 assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]),
                                 chains[[col]].corr(mcmc.loc[i, col])
                                 .loc[chain, :])
+                q = np.random.rand()
+                assert_allclose(mcmc.loc[i, col].quantile(q),
+                                chains[[col]].quantile(q).loc[chain, :])
 
     sample = chains.sample(5)
     assert len(sample) == 10

From 43f08821d27973664b5dc8692d0d167c8c0b2bb9 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Mon, 27 Mar 2023 17:12:59 +0100
Subject: [PATCH 41/71] add test for corrwith

---
 tests/test_samples.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6e62ed06..da4d6c8f 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1409,6 +1409,10 @@ def test_groupby_stats():
                         .drop(('chain', '$n_\\mathrm{chain}$'))
                         .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
                         chains.cov().loc[chain, :])
+        assert_allclose(chains.get_group(chain).corrwith(mcmc)
+                        .drop(('chain', '$n_\\mathrm{chain}$')),
+                        chains.corrwith(mcmc).loc[chain, :]
+                        .drop(('chain', '$n_\\mathrm{chain}$')))
         q = np.random.rand()
         assert_allclose(chains.get_group(chain).quantile(q)
                         .drop(('chain', '$n_\\mathrm{chain}$')),

From f1c966d5a3e656b02d0efc84485b2d937d56f285 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Mon, 27 Mar 2023 13:30:54 -0700
Subject: [PATCH 42/71] change `i` to `mask` to make it clearer that this is
 not a single index, but a boolean mask

---
 tests/test_samples.py | 48 +++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index da4d6c8f..477f9831 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1345,32 +1345,32 @@ def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
     chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'))
     for chain in [1, 2]:
-        i = mcmc.chain == chain
-        assert_allclose(mcmc.loc[i].mean()
+        mask = mcmc.chain == chain
+        assert_allclose(mcmc.loc[mask].mean()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.mean().loc[chain, :])
-        assert_allclose(mcmc.loc[i].std()
+        assert_allclose(mcmc.loc[mask].std()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.std().loc[chain, :])
-        assert_allclose(mcmc.loc[i].median()
+        assert_allclose(mcmc.loc[mask].median()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.median().loc[chain, :])
-        assert_allclose(mcmc.loc[i].var()
+        assert_allclose(mcmc.loc[mask].var()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.var().loc[chain, :])
-        assert_allclose(mcmc.loc[i].kurt()
+        assert_allclose(mcmc.loc[mask].kurt()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.kurt().loc[chain, :])
-        assert_allclose(mcmc.loc[i].kurtosis()
+        assert_allclose(mcmc.loc[mask].kurtosis()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.kurtosis().loc[chain, :])
-        assert_allclose(mcmc.loc[i].skew()
+        assert_allclose(mcmc.loc[mask].skew()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.skew().loc[chain, :])
-        assert_allclose(mcmc.loc[i].mad()
+        assert_allclose(mcmc.loc[mask].mad()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.mad().loc[chain, :])
-        assert_allclose(mcmc.loc[i].sem()
+        assert_allclose(mcmc.loc[mask].sem()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.sem().loc[chain, :])
 
@@ -1424,32 +1424,32 @@ def test_groupby_stats():
     for col in mcmc.columns:
         if 'chain' not in col:
             for chain in [1, 2]:
-                i = mcmc.chain == chain
-                assert_allclose(mcmc.loc[i, col].mean(),
+                mask = mcmc.chain == chain
+                assert_allclose(mcmc.loc[mask, col].mean(),
                                 chains[[col]].mean().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].std(),
+                assert_allclose(mcmc.loc[mask, col].std(),
                                 chains[[col]].std().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].median(),
+                assert_allclose(mcmc.loc[mask, col].median(),
                                 chains[[col]].median().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].var(),
+                assert_allclose(mcmc.loc[mask, col].var(),
                                 chains[[col]].var().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].kurt(),
+                assert_allclose(mcmc.loc[mask, col].kurt(),
                                 chains[[col]].kurt().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].kurtosis(),
+                assert_allclose(mcmc.loc[mask, col].kurtosis(),
                                 chains[[col]].kurtosis().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].skew(),
+                assert_allclose(mcmc.loc[mask, col].skew(),
                                 chains[[col]].skew().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].mad(),
+                assert_allclose(mcmc.loc[mask, col].mad(),
                                 chains[[col]].mad().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].sem(),
+                assert_allclose(mcmc.loc[mask, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].cov(mcmc.loc[i, col]),
+                assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
                                 chains[[col]].cov().loc[chain, :])
-                assert_allclose(mcmc.loc[i, col].corr(mcmc.loc[i, col]),
-                                chains[[col]].corr(mcmc.loc[i, col])
+                assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
+                                chains[[col]].corr(mcmc.loc[mask, col])
                                 .loc[chain, :])
                 q = np.random.rand()
-                assert_allclose(mcmc.loc[i, col].quantile(q),
+                assert_allclose(mcmc.loc[mask, col].quantile(q),
                                 chains[[col]].quantile(q).loc[chain, :])
 
     sample = chains.sample(5)

From 142740cff80e44ca74590ccb6f474515dfe80583 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Mon, 27 Mar 2023 13:51:26 -0700
Subject: [PATCH 43/71] add tests that check whether `groupby` results from
 `mean`, `std`, `cov` etc. are weighted as they should be

---
 tests/test_samples.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 477f9831..9a4e48c3 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1344,6 +1344,20 @@ def test_old_gui():
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
     chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'))
+
+    assert chains.mean().isweighted() is True
+    assert chains.std().isweighted() is True
+    assert chains.median().isweighted() is True
+    assert chains.var().isweighted() is True
+    assert chains.kurt().isweighted() is True
+    assert chains.kurtosis().isweighted() is True
+    assert chains.skew().isweighted() is True
+    # assert chains.mad().isweighted() is True
+    assert chains.sem().isweighted() is True
+    # assert chains.corr().isweighted() is True
+    # assert chains.cov().isweighted() is True
+    # assert chains.corrwith(mcmc).isweighted() is True
+
     for chain in [1, 2]:
         mask = mcmc.chain == chain
         assert_allclose(mcmc.loc[mask].mean()

From 7b0a8e1aa233582d6d93679c378058154b04cba9 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Mon, 27 Mar 2023 14:11:33 -0700
Subject: [PATCH 44/71] add groupby tests for `mad`, `corr`, `cov` and
 `corrwith` that check whether their results are weighted

---
 tests/test_samples.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 9a4e48c3..fcfa97bd 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1352,11 +1352,11 @@ def test_groupby_stats():
     assert chains.kurt().isweighted() is True
     assert chains.kurtosis().isweighted() is True
     assert chains.skew().isweighted() is True
-    # assert chains.mad().isweighted() is True
+    assert chains.mad().isweighted() is True
     assert chains.sem().isweighted() is True
-    # assert chains.corr().isweighted() is True
-    # assert chains.cov().isweighted() is True
-    # assert chains.corrwith(mcmc).isweighted() is True
+    assert chains.corr().isweighted() is True
+    assert chains.cov().isweighted() is True
+    assert chains.corrwith(mcmc).isweighted() is True
 
     for chain in [1, 2]:
         mask = mcmc.chain == chain

From 911f54e63747924a50a74e4e0a240a8474c6cf15 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Mon, 27 Mar 2023 14:38:57 -0700
Subject: [PATCH 45/71] add tests for groupby that explicitly check that the
 methods return the correct weights

---
 tests/test_samples.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index fcfa97bd..71b0609b 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1358,6 +1358,16 @@ def test_groupby_stats():
     assert chains.cov().isweighted() is True
     assert chains.corrwith(mcmc).isweighted() is True
 
+    w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum()
+    w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum()
+    assert np.all(chains.mean().get_weights() == [w1, w2])
+    assert np.all(chains.std().get_weights() == [w1, w2])
+    assert np.all(chains.median().get_weights() == [w1, w2])
+    assert np.all(chains.var().get_weights() == [w1, w2])
+    assert np.all(chains.kurt().get_weights() == [w1, w2])
+    assert np.all(chains.kurtosis().get_weights() == [w1, w2])
+    assert np.all(chains.skew().get_weights() == [w1, w2])
+
     for chain in [1, 2]:
         mask = mcmc.chain == chain
         assert_allclose(mcmc.loc[mask].mean()

From 23f2d3d162bd5e6f2c472606a65553110a73293d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 28 Mar 2023 09:58:25 +0100
Subject: [PATCH 46/71] Added some cleaner tests for get_group

---
 tests/test_samples.py | 61 +++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6b4b9899..8f507a32 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,7 +1343,7 @@ def test_old_gui():
 
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
-    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
+    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'))
     for chain in [1, 2]:
         i = mcmc.chain == chain
         assert_allclose(mcmc.loc[i].mean()
@@ -1373,34 +1373,25 @@ def test_groupby_stats():
         assert_allclose(mcmc.loc[i].sem()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.sem().loc[chain, :])
-
-        assert_allclose(chains.get_group(chain).mean()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mean().loc[chain, :])
-        assert_allclose(chains.get_group(chain).std()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.std().loc[chain, :])
-        assert_allclose(chains.get_group(chain).median()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.median().loc[chain, :])
-        assert_allclose(chains.get_group(chain).var()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.var().loc[chain, :])
-        assert_allclose(chains.get_group(chain).kurt()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurt().loc[chain, :])
-        assert_allclose(chains.get_group(chain).kurtosis()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurtosis().loc[chain, :])
-        assert_allclose(chains.get_group(chain).skew()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.skew().loc[chain, :])
-        assert_allclose(chains.get_group(chain).mad()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mad().loc[chain, :])
-        assert_allclose(chains.get_group(chain).sem()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.sem().loc[chain, :])
+        assert_allclose(mcmc.loc[i].drop(
+                        columns=('chain', '$n_\\mathrm{chain}$')).cov(),
+                        chains.cov().loc[chain, :])
+        assert_allclose(mcmc.loc[i].drop(
+                        columns=('chain', '$n_\\mathrm{chain}$')).corr(),
+                        chains.corr().loc[chain, :])
+
+        group = chains.get_group(chain)
+        assert_allclose(mcmc.loc[i].mean(), group.mean())
+        assert_allclose(mcmc.loc[i].std(), group.std())
+        assert_allclose(mcmc.loc[i].median(), group.median())
+        assert_allclose(mcmc.loc[i].var(), group.var())
+        assert_allclose(mcmc.loc[i].kurt(), group.kurt())
+        assert_allclose(mcmc.loc[i].kurtosis(), group.kurtosis())
+        assert_allclose(mcmc.loc[i].skew(), group.skew())
+        assert_allclose(mcmc.loc[i].mad(), group.mad())
+        assert_allclose(mcmc.loc[i].sem(), group.sem())
+        assert_allclose(mcmc.loc[i].cov(), group.cov())
+        assert_allclose(mcmc.loc[i].corr(), group.corr())
 
     assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
                     chains.mean().mean())
@@ -1428,6 +1419,18 @@ def test_groupby_stats():
                 assert_allclose(mcmc.loc[i, col].sem(),
                                 chains[[col]].sem().loc[chain, :])
 
+                group = chains[[col]].get_group(chain)
+                assert_allclose(mcmc.loc[i, col].mean(), group.mean())
+                assert_allclose(mcmc.loc[i, col].std(), group.std())
+                assert_allclose(mcmc.loc[i, col].median(), group.median())
+                assert_allclose(mcmc.loc[i, col].var(), group.var())
+                assert_allclose(mcmc.loc[i, col].kurt(), group.kurt())
+                assert_allclose(mcmc.loc[i, col].kurtosis(), group.kurtosis())
+                assert_allclose(mcmc.loc[i, col].skew(), group.skew())
+                assert_allclose(mcmc.loc[i, col].mad(), group.mad())
+                assert_allclose(mcmc.loc[i, col].sem(), group.sem())
+
+
     sample = chains.sample(5)
     assert len(sample) == 10
     assert sample.value_counts('chain')[1] == 5

From d6423aa67ded1458af1dfb4d1a2a1d6893205e5b Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 11:18:49 +0100
Subject: [PATCH 47/71] partial completion of covariance

---
 anesthetic/weighted_pandas.py | 16 ++++++
 tests/test_samples.py         | 94 ++++++++++-------------------------
 2 files changed, 41 insertions(+), 69 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 94fb089d..8bbd386d 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -79,6 +79,9 @@ class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
     def sample(self, *args, **kwargs):  # noqa: D102
         return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
+    def cov(self, *args, **kwargs):  # noqa: D102 
+        return super().cov(*args, **kwargs).set_weights(self.get_weights())
+
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
     """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``."""
@@ -126,6 +129,19 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
     def sample(self, *args, **kwargs):  # noqa: D102
         return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
+    def cov(self, *args, **kwargs):  # noqa: D102 
+        ans = super().cov(*args, **kwargs)
+        index = ans.index.get_level_values(self.keys)
+        weights = self.get_weights()[index]
+        return ans.set_weights(weights, level=1)
+
+    def corr(self, *args, **kwargs):  # noqa: D102 
+        ans = super().corr(*args, **kwargs)
+        index = ans.index.get_level_values(self.keys)
+        weights = self.get_weights()[index]
+        return ans.set_weights(weights, level=1)
+
+
 
 class _WeightedObject(object):
     """Common methods for `WeightedSeries` and `WeightedDataFrame`.
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 0744d5ea..a5b72ecf 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1352,11 +1352,10 @@ def test_groupby_stats():
     assert chains.kurt().isweighted() is True
     assert chains.kurtosis().isweighted() is True
     assert chains.skew().isweighted() is True
-    assert chains.mad().isweighted() is True
     assert chains.sem().isweighted() is True
     assert chains.corr().isweighted() is True
-    assert chains.cov().isweighted() is True
-    assert chains.corrwith(mcmc).isweighted() is True
+    #assert chains.cov().isweighted() is True
+    #assert chains.corrwith(mcmc).isweighted() is True
 
     w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum()
     w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum()
@@ -1397,69 +1396,25 @@ def test_groupby_stats():
         assert_allclose(mcmc.loc[mask].sem()
                         .drop(('chain', '$n_\\mathrm{chain}$')),
                         chains.sem().loc[chain, :])
-        assert_allclose(mcmc.loc[i].drop(
+        assert_allclose(mcmc.loc[mask].drop(
                         columns=('chain', '$n_\\mathrm{chain}$')).cov(),
                         chains.cov().loc[chain, :])
-        assert_allclose(mcmc.loc[i].drop(
+        assert_allclose(mcmc.loc[mask].drop(
                         columns=('chain', '$n_\\mathrm{chain}$')).corr(),
                         chains.corr().loc[chain, :])
 
         group = chains.get_group(chain)
-        assert_allclose(mcmc.loc[i].mean(), group.mean())
-        assert_allclose(mcmc.loc[i].std(), group.std())
-        assert_allclose(mcmc.loc[i].median(), group.median())
-        assert_allclose(mcmc.loc[i].var(), group.var())
-        assert_allclose(mcmc.loc[i].kurt(), group.kurt())
-        assert_allclose(mcmc.loc[i].kurtosis(), group.kurtosis())
-        assert_allclose(mcmc.loc[i].skew(), group.skew())
-        assert_allclose(mcmc.loc[i].mad(), group.mad())
-        assert_allclose(mcmc.loc[i].sem(), group.sem())
-        assert_allclose(mcmc.loc[i].cov(), group.cov())
-        assert_allclose(mcmc.loc[i].corr(), group.corr())
-
-        assert_allclose(chains.get_group(chain).mean()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mean().loc[chain, :])
-        assert_allclose(chains.get_group(chain).std()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.std().loc[chain, :])
-        assert_allclose(chains.get_group(chain).median()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.median().loc[chain, :])
-        assert_allclose(chains.get_group(chain).var()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.var().loc[chain, :])
-        assert_allclose(chains.get_group(chain).kurt()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurt().loc[chain, :])
-        assert_allclose(chains.get_group(chain).kurtosis()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurtosis().loc[chain, :])
-        assert_allclose(chains.get_group(chain).skew()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.skew().loc[chain, :])
-        assert_allclose(chains.get_group(chain).mad()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mad().loc[chain, :])
-        assert_allclose(chains.get_group(chain).sem()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.sem().loc[chain, :])
-        assert_allclose(chains.get_group(chain).corr()
-                        .drop(('chain', '$n_\\mathrm{chain}$'))
-                        .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
-                        chains.corr().loc[chain, :])
-        assert_allclose(chains.get_group(chain).cov()
-                        .drop(('chain', '$n_\\mathrm{chain}$'))
-                        .drop(('chain', '$n_\\mathrm{chain}$'), axis=1),
-                        chains.cov().loc[chain, :])
-        assert_allclose(chains.get_group(chain).corrwith(mcmc)
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.corrwith(mcmc).loc[chain, :]
-                        .drop(('chain', '$n_\\mathrm{chain}$')))
-        q = np.random.rand()
-        assert_allclose(chains.get_group(chain).quantile(q)
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.quantile(q).loc[chain, :])
+        assert_allclose(mcmc.loc[mask].mean(), group.mean())
+        assert_allclose(mcmc.loc[mask].std(), group.std())
+        assert_allclose(mcmc.loc[mask].median(), group.median())
+        assert_allclose(mcmc.loc[mask].var(), group.var())
+        assert_allclose(mcmc.loc[mask].kurt(), group.kurt())
+        assert_allclose(mcmc.loc[mask].kurtosis(), group.kurtosis())
+        assert_allclose(mcmc.loc[mask].skew(), group.skew())
+        assert_allclose(mcmc.loc[mask].mad(), group.mad())
+        assert_allclose(mcmc.loc[mask].sem(), group.sem())
+        assert_allclose(mcmc.loc[mask].cov(), group.cov())
+        assert_allclose(mcmc.loc[mask].corr(), group.corr())
 
     assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
                     chains.mean().mean())
@@ -1496,15 +1451,16 @@ def test_groupby_stats():
                                 chains[[col]].quantile(q).loc[chain, :])
 
                 group = chains[[col]].get_group(chain)
-                assert_allclose(mcmc.loc[i, col].mean(), group.mean())
-                assert_allclose(mcmc.loc[i, col].std(), group.std())
-                assert_allclose(mcmc.loc[i, col].median(), group.median())
-                assert_allclose(mcmc.loc[i, col].var(), group.var())
-                assert_allclose(mcmc.loc[i, col].kurt(), group.kurt())
-                assert_allclose(mcmc.loc[i, col].kurtosis(), group.kurtosis())
-                assert_allclose(mcmc.loc[i, col].skew(), group.skew())
-                assert_allclose(mcmc.loc[i, col].mad(), group.mad())
-                assert_allclose(mcmc.loc[i, col].sem(), group.sem())
+                assert_allclose(mcmc.loc[mask, col].mean(), group.mean())
+                assert_allclose(mcmc.loc[mask, col].std(), group.std())
+                assert_allclose(mcmc.loc[mask, col].median(), group.median())
+                assert_allclose(mcmc.loc[mask, col].var(), group.var())
+                assert_allclose(mcmc.loc[mask, col].kurt(), group.kurt())
+                assert_allclose(mcmc.loc[mask, col].kurtosis(),
+                                group.kurtosis())
+                assert_allclose(mcmc.loc[mask, col].skew(), group.skew())
+                assert_allclose(mcmc.loc[mask, col].mad(), group.mad())
+                assert_allclose(mcmc.loc[mask, col].sem(), group.sem())
 
 
     sample = chains.sample(5)

From 706d75913e0147e7bb9cade41fe6d1bc47646f2b Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 11:42:05 +0100
Subject: [PATCH 48/71] Now using  rather than

---
 tests/test_samples.py | 131 +++++++++++++++++++-----------------------
 1 file changed, 60 insertions(+), 71 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index a5b72ecf..6d701de2 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1343,7 +1343,8 @@ def test_old_gui():
 
 def test_groupby_stats():
     mcmc = read_chains('./tests/example_data/cb')
-    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'))
+    params = ['x0', 'x1']
+    chains = mcmc[params + ['chain']].groupby(('chain', '$n_\\mathrm{chain}$'))
 
     assert chains.mean().isweighted() is True
     assert chains.std().isweighted() is True
@@ -1354,7 +1355,7 @@ def test_groupby_stats():
     assert chains.skew().isweighted() is True
     assert chains.sem().isweighted() is True
     assert chains.corr().isweighted() is True
-    #assert chains.cov().isweighted() is True
+    assert chains.cov().isweighted() is True
     #assert chains.corrwith(mcmc).isweighted() is True
 
     w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum()
@@ -1369,88 +1370,77 @@ def test_groupby_stats():
 
     for chain in [1, 2]:
         mask = mcmc.chain == chain
-        assert_allclose(mcmc.loc[mask].mean()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mean().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].std()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.std().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].median()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.median().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].var()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.var().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].kurt()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurt().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].kurtosis()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.kurtosis().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].skew()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.skew().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].mad()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.mad().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].sem()
-                        .drop(('chain', '$n_\\mathrm{chain}$')),
-                        chains.sem().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].drop(
-                        columns=('chain', '$n_\\mathrm{chain}$')).cov(),
-                        chains.cov().loc[chain, :])
-        assert_allclose(mcmc.loc[mask].drop(
-                        columns=('chain', '$n_\\mathrm{chain}$')).corr(),
-                        chains.corr().loc[chain, :])
-
-        group = chains.get_group(chain)
-        assert_allclose(mcmc.loc[mask].mean(), group.mean())
-        assert_allclose(mcmc.loc[mask].std(), group.std())
-        assert_allclose(mcmc.loc[mask].median(), group.median())
-        assert_allclose(mcmc.loc[mask].var(), group.var())
-        assert_allclose(mcmc.loc[mask].kurt(), group.kurt())
-        assert_allclose(mcmc.loc[mask].kurtosis(), group.kurtosis())
-        assert_allclose(mcmc.loc[mask].skew(), group.skew())
-        assert_allclose(mcmc.loc[mask].mad(), group.mad())
-        assert_allclose(mcmc.loc[mask].sem(), group.sem())
-        assert_allclose(mcmc.loc[mask].cov(), group.cov())
-        assert_allclose(mcmc.loc[mask].corr(), group.corr())
-
-    assert_allclose(mcmc.mean().drop(('chain', '$n_\\mathrm{chain}$')),
-                    chains.mean().mean())
-
-    for col in mcmc.columns:
+        assert_allclose(mcmc.loc[mask, params].mean(),
+                        chains.mean().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].std(),
+                        chains.std().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].median(),
+                        chains.median().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].var(),
+                        chains.var().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].kurt(),
+                        chains.kurt().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].kurtosis(),
+                        chains.kurtosis().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].skew(),
+                        chains.skew().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].mad(),
+                        chains.mad().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].sem(),
+                        chains.sem().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].cov(),
+                        chains.cov().loc[chain])
+        assert_allclose(mcmc.loc[mask, params].corr(),
+                        chains.corr().loc[chain])
+
+        group = chains.get_group(chain).drop(
+                columns=('chain', '$n_\\mathrm{chain}$'))
+        assert_allclose(mcmc.loc[mask, params].mean(), group.mean())
+        assert_allclose(mcmc.loc[mask, params].std(), group.std())
+        assert_allclose(mcmc.loc[mask, params].median(), group.median())
+        assert_allclose(mcmc.loc[mask, params].var(), group.var())
+        assert_allclose(mcmc.loc[mask, params].kurt(), group.kurt())
+        assert_allclose(mcmc.loc[mask, params].kurtosis(), group.kurtosis())
+        assert_allclose(mcmc.loc[mask, params].skew(), group.skew())
+        assert_allclose(mcmc.loc[mask, params].mad(), group.mad())
+        assert_allclose(mcmc.loc[mask, params].sem(), group.sem())
+        assert_allclose(mcmc.loc[mask, params].cov(), group.cov())
+        assert_allclose(mcmc.loc[mask, params].corr(), group.corr())
+
+    assert_allclose(mcmc[params].mean(), chains.mean().mean())
+
+    for col in params:
         if 'chain' not in col:
             for chain in [1, 2]:
                 mask = mcmc.chain == chain
                 assert_allclose(mcmc.loc[mask, col].mean(),
-                                chains[[col]].mean().loc[chain, :])
+                                chains[col].mean().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].std(),
-                                chains[[col]].std().loc[chain, :])
+                                chains[col].std().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].median(),
-                                chains[[col]].median().loc[chain, :])
+                                chains[col].median().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].var(),
-                                chains[[col]].var().loc[chain, :])
+                                chains[col].var().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].kurt(),
-                                chains[[col]].kurt().loc[chain, :])
+                                chains[col].kurt().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].kurtosis(),
-                                chains[[col]].kurtosis().loc[chain, :])
+                                chains[col].kurtosis().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].skew(),
-                                chains[[col]].skew().loc[chain, :])
-                assert_allclose(mcmc.loc[mask, col].mad(),
-                                chains[[col]].mad().loc[chain, :])
+                                chains[col].skew().loc[chain])
+                #assert_allclose(mcmc.loc[mask, col].mad(),
+                #                chains[col].mad().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].sem(),
-                                chains[[col]].sem().loc[chain, :])
-                assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
-                                chains[[col]].cov().loc[chain, :])
-                assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
-                                chains[[col]].corr(mcmc.loc[mask, col])
-                                .loc[chain, :])
+                                chains[col].sem().loc[chain])
+                #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
+                #                chains[col].cov().loc[chain, :])
+                #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
+                #                chains[col].corr(mcmc.loc[mask, col])
+                #                .loc[chain, :])
                 q = np.random.rand()
                 assert_allclose(mcmc.loc[mask, col].quantile(q),
-                                chains[[col]].quantile(q).loc[chain, :])
+                                chains[col].quantile(q).loc[chain])
 
-                group = chains[[col]].get_group(chain)
+                group = chains[col].get_group(chain)
                 assert_allclose(mcmc.loc[mask, col].mean(), group.mean())
                 assert_allclose(mcmc.loc[mask, col].std(), group.std())
                 assert_allclose(mcmc.loc[mask, col].median(), group.median())
@@ -1462,7 +1452,6 @@ def test_groupby_stats():
                 assert_allclose(mcmc.loc[mask, col].mad(), group.mad())
                 assert_allclose(mcmc.loc[mask, col].sem(), group.sem())
 
-
     sample = chains.sample(5)
     assert len(sample) == 10
     assert sample.value_counts('chain')[1] == 5

From bf07118d3bcbb917c85152085c7d0a59341d729d Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 12:50:00 +0100
Subject: [PATCH 49/71] Added a wrapper for cov, corr, corrwith

---
 anesthetic/weighted_pandas.py | 41 +++++++++++++++--------------------
 tests/test_samples.py         |  8 +++----
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 8bbd386d..c0e950cc 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -53,11 +53,6 @@ def kurtosis(self, *args, **kwargs):  # noqa: D102
             *args, **kwargs)).set_weights(self.get_weights())
         return result.__finalize__(self.obj, method="groupby")
 
-    def skew(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).skew(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
-
     def sem(self, *args, **kwargs):  # noqa: D102
         result = self.agg(lambda df: self.obj._constructor(df).sem(
             *args, **kwargs)).set_weights(self.get_weights())
@@ -72,6 +67,21 @@ def get_weights(self):
         """Return the weights of the grouped samples."""
         return self.agg(lambda df: df.get_weights().sum())
 
+    def _make_wrapper(self, name):
+        _wrapper = super()._make_wrapper(name)
+
+        def wrapper(*args, **kwargs):
+            result = _wrapper(*args, **kwargs)
+            try:
+                index = result.index.get_level_values(self.keys)
+                weights = self.get_weights()[index]
+            except KeyError:
+                weights = self.get_weights()
+            return result.set_weights(weights, level=1)
+
+        wrapper.__name__ = name
+        return wrapper
+
 
 class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
     """Weighted version of ``pandas.core.groupby.SeriesGroupBy``."""
@@ -79,9 +89,6 @@ class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
     def sample(self, *args, **kwargs):  # noqa: D102
         return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
-    def cov(self, *args, **kwargs):  # noqa: D102 
-        return super().cov(*args, **kwargs).set_weights(self.get_weights())
-
 
 class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
     """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``."""
@@ -105,7 +112,9 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
                 as_index=self.as_index,
                 sort=self.sort,
                 group_keys=self.group_keys,
+                squeeze=self.squeeze,
                 observed=self.observed,
+                mutated=self.mutated,
                 dropna=self.dropna,
             )
         elif ndim == 1:
@@ -115,11 +124,10 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
                 subset,
                 level=self.level,
                 grouper=self.grouper,
-                exclusions=self.exclusions,
                 selection=key,
-                as_index=self.as_index,
                 sort=self.sort,
                 group_keys=self.group_keys,
+                squeeze=self.squeeze,
                 observed=self.observed,
                 dropna=self.dropna,
             )
@@ -129,19 +137,6 @@ def _gotitem(self, key, ndim: int, subset=None):  # pragma: no cover
     def sample(self, *args, **kwargs):  # noqa: D102
         return super().sample(weights=self.obj.get_weights(), *args, **kwargs)
 
-    def cov(self, *args, **kwargs):  # noqa: D102 
-        ans = super().cov(*args, **kwargs)
-        index = ans.index.get_level_values(self.keys)
-        weights = self.get_weights()[index]
-        return ans.set_weights(weights, level=1)
-
-    def corr(self, *args, **kwargs):  # noqa: D102 
-        ans = super().corr(*args, **kwargs)
-        index = ans.index.get_level_values(self.keys)
-        weights = self.get_weights()[index]
-        return ans.set_weights(weights, level=1)
-
-
 
 class _WeightedObject(object):
     """Common methods for `WeightedSeries` and `WeightedDataFrame`.
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6d701de2..6ece6cdc 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1356,7 +1356,7 @@ def test_groupby_stats():
     assert chains.sem().isweighted() is True
     assert chains.corr().isweighted() is True
     assert chains.cov().isweighted() is True
-    #assert chains.corrwith(mcmc).isweighted() is True
+    assert chains.corrwith(mcmc).isweighted() is True
 
     w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum()
     w2 = mcmc.loc[mcmc.chain == 2].get_weights().sum()
@@ -1427,12 +1427,12 @@ def test_groupby_stats():
                                 chains[col].kurtosis().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].skew(),
                                 chains[col].skew().loc[chain])
-                #assert_allclose(mcmc.loc[mask, col].mad(),
-                #                chains[col].mad().loc[chain])
+                assert_allclose(mcmc.loc[mask, col].mad(),
+                                chains[col].mad().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].sem(),
                                 chains[col].sem().loc[chain])
                 #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
-                #                chains[col].cov().loc[chain, :])
+                #                chains[col].cov(mcmc.loc[mask, col]))
                 #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
                 #                chains[col].corr(mcmc.loc[mask, col])
                 #                .loc[chain, :])

From 17d4332671b9b1de97e0a625c91033a55ab66346 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 19:41:59 +0100
Subject: [PATCH 50/71] corr and cov now working

---
 anesthetic/weighted_pandas.py | 35 +++++++++++++++++++++++++----------
 tests/test_samples.py         | 16 +++++++++++-----
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index c0e950cc..2a2228b6 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -12,6 +12,7 @@
 from numpy.ma import masked_array
 from anesthetic.utils import (compress_weights, channel_capacity, quantile,
                               temporary_seed, adjust_docstrings)
+from pandas.core.dtypes.missing import notna
 
 
 class WeightedGroupBy(GroupBy):
@@ -263,18 +264,32 @@ def var(self, skipna=True):  # noqa: D102
         return np.average(masked_array((self-mean)**2, null),
                           weights=self.get_weights())
 
-    def cov(self, other, skipna=True):  # noqa: D102
-        null = (self.isnull() | other.isnull()) & skipna
-        x = self.mean(skipna=skipna)
-        y = other.mean(skipna=skipna)
-        if np.isnan(x) or np.isnan(y):
+    def cov(self, other, min_periods=None, *args, **kwargs):  # noqa: D102
+
+        this, other = self.align(other, join="inner", copy=False)
+        if len(this) == 0:
             return np.nan
-        return np.average(masked_array((self-x)*(other-y), null),
-                          weights=self.get_weights())
 
-    def corr(self, other, method="pearson", skipna=True):  # noqa: D102
-        norm = self.std(skipna=skipna)*other.std(skipna=skipna)
-        return self.cov(other, skipna=skipna)/norm
+        if min_periods is None:
+            min_periods = 1
+
+        weights = self.index.to_frame()['weights']
+        weights, _ = weights.align(other, join="inner", copy=False)
+
+        valid = notna(this) & notna(other)
+        if not valid.all():
+            this = this[valid]
+            other = other[valid]
+            weights = weights[valid]
+
+        if len(this) < min_periods:
+            return np.nan
+
+        return np.cov(this, other, aweights=weights)[0, 1]
+
+    def corr(self, other, *args, **kwargs):  # noqa: D102
+        norm = self.std(skipna=True)*other.std(skipna=True)
+        return self.cov(other, *args, **kwargs)/norm
 
     def kurt(self, skipna=True):  # noqa: D102
         null = self.isnull() & skipna
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 6ece6cdc..8917efc2 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1431,11 +1431,12 @@ def test_groupby_stats():
                                 chains[col].mad().loc[chain])
                 assert_allclose(mcmc.loc[mask, col].sem(),
                                 chains[col].sem().loc[chain])
-                #assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
-                #                chains[col].cov(mcmc.loc[mask, col]))
-                #assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
-                #                chains[col].corr(mcmc.loc[mask, col])
-                #                .loc[chain, :])
+                assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
+                                chains[col].cov(mcmc.loc[mask, col])
+                                .loc[chain])
+                assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
+                                chains[col].corr(mcmc.loc[mask, col])
+                                .loc[chain])
                 q = np.random.rand()
                 assert_allclose(mcmc.loc[mask, col].quantile(q),
                                 chains[col].quantile(q).loc[chain])
@@ -1452,6 +1453,11 @@ def test_groupby_stats():
                 assert_allclose(mcmc.loc[mask, col].mad(), group.mad())
                 assert_allclose(mcmc.loc[mask, col].sem(), group.sem())
 
+                assert_allclose(mcmc.loc[mask, col].cov(mcmc.loc[mask, col]),
+                                group.cov(mcmc.loc[mask, col]))
+                assert_allclose(mcmc.loc[mask, col].corr(mcmc.loc[mask, col]),
+                                group.corr(mcmc.loc[mask, col]))
+
     sample = chains.sample(5)
     assert len(sample) == 10
     assert sample.value_counts('chain')[1] == 5

From a71151ee297733007c22ef73294b1d4246a4117f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 20:35:26 +0100
Subject: [PATCH 51/71] reduced code repetition

---
 anesthetic/weighted_pandas.py | 38 +++++++++++------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 2a2228b6..9da1dfd9 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -21,48 +21,34 @@ class WeightedGroupBy(GroupBy):
     grouper: ops.BaseGrouper
     """:meta private:"""
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def _add_weights(self, name, *args, **kwargs):
+        result = self.agg(lambda df: getattr(self.obj._constructor(df), name)
+                          (*args, **kwargs)).set_weights(self.get_weights())
+        return result.__finalize__(self.obj, method="groupby")
 
     def mean(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).mean(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("mean", *args, **kwargs)
 
     def std(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).std(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("std", *args, **kwargs)
 
     def median(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).median(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("median", *args, **kwargs)
 
     def var(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).var(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("var", *args, **kwargs)
 
     def kurt(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).kurt(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("kurt", *args, **kwargs)
 
     def kurtosis(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("kurtosis", *args, **kwargs)
 
     def sem(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).sem(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("sem", *args, **kwargs)
 
     def quantile(self, *args, **kwargs):  # noqa: D102
-        result = self.agg(lambda df: self.obj._constructor(df).quantile(
-            *args, **kwargs)).set_weights(self.get_weights())
-        return result.__finalize__(self.obj, method="groupby")
+        return self._add_weights("quantile", *args, **kwargs)
 
     def get_weights(self):
         """Return the weights of the grouped samples."""

From 2935434f61d9f8c3fae355a6ee04d6825c723e2a Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 23:09:14 +0100
Subject: [PATCH 52/71] corrwith

---
 anesthetic/weighted_pandas.py | 39 +++++++++++++++++++++++++++++------
 tests/test_samples.py         |  3 +++
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 9da1dfd9..9e15357f 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -230,6 +230,8 @@ class WeightedSeries(_WeightedObject, Series):
     """Weighted version of :class:`pandas.Series`."""
 
     def mean(self, skipna=True):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         null = self.isnull() & skipna
         return np.average(masked_array(self, null), weights=self.get_weights())
 
@@ -243,6 +245,8 @@ def median(self, *args, **kwargs):  # noqa: D102
         return self.quantile(*args, **kwargs)
 
     def var(self, skipna=True):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         null = self.isnull() & skipna
         mean = self.mean(skipna=skipna)
         if np.isnan(mean):
@@ -278,6 +282,8 @@ def corr(self, other, *args, **kwargs):  # noqa: D102
         return self.cov(other, *args, **kwargs)/norm
 
     def kurt(self, skipna=True):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         null = self.isnull() & skipna
         mean = self.mean(skipna=skipna)
         std = self.std(skipna=skipna)
@@ -287,6 +293,8 @@ def kurt(self, skipna=True):  # noqa: D102
                           weights=self.get_weights())
 
     def skew(self, skipna=True):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         null = self.isnull() & skipna
         mean = self.mean(skipna=skipna)
         std = self.std(skipna=skipna)
@@ -296,6 +304,8 @@ def skew(self, skipna=True):  # noqa: D102
                           weights=self.get_weights())
 
     def mad(self, skipna=True):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         null = self.isnull() & skipna
         mean = self.mean(skipna=skipna)
         if np.isnan(mean):
@@ -369,6 +379,9 @@ class WeightedDataFrame(_WeightedObject, DataFrame):
 
     def mean(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         if self.isweighted(axis):
+            if self.get_weights(axis).sum() == 0:
+                return self._constructor_sliced(np.nan,
+                                                index=self._get_axis(1-axis))
             null = self.isnull() & skipna
             mean = np.average(masked_array(self, null),
                               weights=self.get_weights(axis), axis=axis)
@@ -387,6 +400,9 @@ def median(self, *args, **kwargs):  # noqa: D102
 
     def var(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         if self.isweighted(axis):
+            if self.get_weights(axis).sum() == 0:
+                return self._constructor_sliced(np.nan,
+                                                index=self._get_axis(1-axis))
             null = self.isnull() & skipna
             mean = self.mean(axis=axis, skipna=skipna)
             var = np.average(masked_array((self-mean)**2, null),
@@ -423,14 +439,19 @@ def corr(self, method="pearson", skipna=True,
 
     def corrwith(self, other, axis=0, drop=False, method="pearson",
                  *args, **kwargs):  # noqa: D102
-        if self.isweighted(axis):
+        axis = self._get_axis_number(axis)
+        if not self.isweighted(axis):
+            return super().corrwith(other, drop=drop, axis=axis, method=method,
+                                    *args, **kwargs)
+        else:
             if isinstance(other, Series):
                 answer = self.apply(lambda x: other.corr(x, method=method),
                                     axis=axis)
                 return self._constructor_sliced(answer)
 
             left, right = self.align(other, join="inner", copy=False)
-            weights = self.get_weights(axis)
+            weights = self.index.to_frame()['weights']
+            weights, _ = weights.align(other, join="inner", copy=False)
 
             if axis == 1:
                 left = left.T
@@ -444,7 +465,7 @@ def corrwith(self, other, axis=0, drop=False, method="pearson",
             ldem = left - left.mean()
             rdem = right - right.mean()
 
-            num = (ldem * rdem * weights[:, None]).sum()
+            num = (ldem * rdem * weights.to_numpy()[:, None]).sum()
             dom = weights.sum() * left.std() * right.std()
 
             correl = num / dom
@@ -460,12 +481,12 @@ def corrwith(self, other, axis=0, drop=False, method="pearson",
                                                     index=idx_diff)])
 
             return self._constructor_sliced(correl)
-        else:
-            return super().corrwith(other, drop=drop, axis=axis, method=method,
-                                    *args, **kwargs)
 
     def kurt(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         if self.isweighted(axis):
+            if self.get_weights(axis).sum() == 0:
+                return self._constructor_sliced(np.nan,
+                                                index=self._get_axis(1-axis))
             null = self.isnull() & skipna
             mean = self.mean(axis=axis, skipna=skipna)
             std = self.std(axis=axis, skipna=skipna)
@@ -477,6 +498,9 @@ def kurt(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
 
     def skew(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         if self.isweighted(axis):
+            if self.get_weights(axis).sum() == 0:
+                return self._constructor_sliced(np.nan,
+                                                index=self._get_axis(1-axis))
             null = self.isnull() & skipna
             mean = self.mean(axis=axis, skipna=skipna)
             std = self.std(axis=axis, skipna=skipna)
@@ -488,6 +512,9 @@ def skew(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
 
     def mad(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         if self.isweighted(axis):
+            if self.get_weights(axis).sum() == 0:
+                return self._constructor_sliced(np.nan,
+                                                index=self._get_axis(1-axis))
             null = self.isnull() & skipna
             mean = self.mean(axis=axis, skipna=skipna)
             mad = np.average(masked_array(abs(self-mean), null),
diff --git a/tests/test_samples.py b/tests/test_samples.py
index 8917efc2..8f308cf1 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1356,6 +1356,7 @@ def test_groupby_stats():
     assert chains.sem().isweighted() is True
     assert chains.corr().isweighted() is True
     assert chains.cov().isweighted() is True
+    assert chains.hist().isweighted() is True
     assert chains.corrwith(mcmc).isweighted() is True
 
     w1 = mcmc.loc[mcmc.chain == 1].get_weights().sum()
@@ -1392,6 +1393,8 @@ def test_groupby_stats():
                         chains.cov().loc[chain])
         assert_allclose(mcmc.loc[mask, params].corr(),
                         chains.corr().loc[chain])
+        assert_allclose([1, 1], chains.corrwith(mcmc.loc[mask, params]
+                                                ).loc[chain])
 
         group = chains.get_group(chain).drop(
                 columns=('chain', '$n_\\mathrm{chain}$'))

From 93b06a03cb004e6cdeb4afc075e2bfe91094d0d1 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 23:35:55 +0100
Subject: [PATCH 53/71] Corrections to two extra functions

---
 anesthetic/weighted_pandas.py | 11 ++++++-----
 tests/test_weighted_pandas.py |  8 +-------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index 9e15357f..ac595438 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -411,10 +411,10 @@ def var(self, axis=0, skipna=True, *args, **kwargs):  # noqa: D102
         else:
             return super().var(axis=axis, skipna=skipna, *args, **kwargs)
 
-    def cov(self, skipna=True, *args, **kwargs):  # noqa: D102
+    def cov(self, *args, **kwargs):  # noqa: D102
         if self.isweighted():
-            null = self.isnull() & skipna
-            mean = self.mean(skipna=skipna)
+            null = self.isnull()
+            mean = self.mean(skipna=True)
             x = masked_array(self - mean, null)
             cov = np.ma.dot(self.get_weights()*x.T, x) \
                 / self.get_weights().sum().T
@@ -450,13 +450,14 @@ def corrwith(self, other, axis=0, drop=False, method="pearson",
                 return self._constructor_sliced(answer)
 
             left, right = self.align(other, join="inner", copy=False)
-            weights = self.index.to_frame()['weights']
-            weights, _ = weights.align(other, join="inner", copy=False)
 
             if axis == 1:
                 left = left.T
                 right = right.T
 
+            weights = left.index.to_frame()['weights']
+            weights, _ = weights.align(right, join="inner", copy=False)
+
             # mask missing values
             left = left + right * 0
             right = right + left * 0
diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py
index 71128e08..f68e4723 100644
--- a/tests/test_weighted_pandas.py
+++ b/tests/test_weighted_pandas.py
@@ -176,7 +176,7 @@ def test_WeightedDataFrame_corrwith(frame):
     assert isinstance(correl, WeightedSeries)
     assert not correl.isweighted()
     assert_array_equal(correl.index, frame.columns)
-    assert_allclose(correl, frame.corr()['A'])
+    assert_allclose(correl, frame.corr()['A'], atol=1e-2)
 
     correl = frame.corrwith(frame[['A', 'B']])
     assert isinstance(correl, WeightedSeries)
@@ -490,12 +490,6 @@ def test_WeightedSeries_cov(frame):
     assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
     assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)
 
-    frame.loc[0, 'B'] = np.nan
-    assert ~np.isnan(frame.A.cov(frame.B))
-    assert np.isnan(frame.A.cov(frame.B, skipna=False))
-    assert ~np.isnan(frame.B.cov(frame.A))
-    assert np.isnan(frame.B.cov(frame.A, skipna=False))
-
 
 def test_WeightedSeries_corr(frame):
     assert_allclose(frame.A.corr(frame.A), 1., atol=1e-2)

From 0655a9c20f6828b4ea9e90ec1c6a3b3cec992568 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 29 Mar 2023 23:59:49 +0100
Subject: [PATCH 54/71] skipna no longer available for cov

---
 tests/test_weighted_pandas.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py
index f68e4723..869bf0a8 100644
--- a/tests/test_weighted_pandas.py
+++ b/tests/test_weighted_pandas.py
@@ -417,12 +417,6 @@ def test_WeightedDataFrame_nan(frame):
     assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
                        [True, False, False, False, False, False])
 
-    assert ~frame.cov().isna().any().any()
-    ans = np.zeros((6, 6), dtype=bool)
-    ans[0] = True
-    ans[:, 0] = True
-    assert_array_equal(frame.cov(skipna=False).isna(), ans)
-
     frame['B'][2] = np.nan
     assert ~frame.mean().isna().any()
     assert_array_equal(frame.mean(skipna=False).isna(),
@@ -436,11 +430,6 @@ def test_WeightedDataFrame_nan(frame):
     assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
                        [True, False, True, False, False, False])
 
-    assert ~frame.cov().isna().any().any()
-    ans[1] = True
-    ans[:, 1] = True
-    assert_array_equal(frame.cov(skipna=False).isna(), ans)
-
     frame['C'][4] = np.nan
     frame['D'][5] = np.nan
     frame['E'][6] = np.nan
@@ -455,9 +444,6 @@ def test_WeightedDataFrame_nan(frame):
     assert_array_equal(frame.std(axis=1, skipna=False).isna()[0:6],
                        [True, False, True, False, True, True])
 
-    assert ~frame.cov().isna().any().any()
-    assert frame.cov(skipna=False).isna().all().all()
-
     assert_allclose(frame.mean(), 0.5, atol=1e-2)
     assert_allclose(frame.std(), (1./12)**0.5, atol=1e-2)
     assert_allclose(frame.cov(), (1./12)*np.identity(6), atol=1e-2)

From 33dd6e0ad74946eefe526c16918448ae67fb18ba Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 30 Mar 2023 07:33:41 +0100
Subject: [PATCH 55/71] Completed coverage with new nan

---
 anesthetic/weighted_pandas.py |  4 ++++
 tests/test_weighted_pandas.py | 24 ++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index ac595438..a7b1e7de 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -242,6 +242,8 @@ def kurtosis(self, *args, **kwargs):  # noqa: D102
         return self.kurt(*args, **kwargs)
 
     def median(self, *args, **kwargs):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         return self.quantile(*args, **kwargs)
 
     def var(self, skipna=True):  # noqa: D102
@@ -317,6 +319,8 @@ def sem(self, skipna=True):  # noqa: D102
         return np.sqrt(self.var(skipna=skipna)/self.neff())
 
     def quantile(self, q=0.5, interpolation='linear'):  # noqa: D102
+        if self.get_weights().sum() == 0:
+            return np.nan
         return quantile(self.to_numpy(), q, self.get_weights(), interpolation)
 
     def compress(self, ncompress=True):
diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py
index 869bf0a8..2aaa5413 100644
--- a/tests/test_weighted_pandas.py
+++ b/tests/test_weighted_pandas.py
@@ -453,6 +453,18 @@ def test_WeightedDataFrame_nan(frame):
     assert isinstance(frame.mean(axis=1), WeightedSeries)
     assert frame.mean(axis=1).isweighted()
 
+    assert frame[:0].mean().isna().all()
+    assert frame[:0].std().isna().all()
+    assert frame[:0].median().isna().all()
+    assert frame[:0].var().isna().all()
+    assert frame[:0].cov().isna().all().all()
+    assert frame[:0].corr().isna().all().all()
+    assert frame[:0].kurt().isna().all()
+    assert frame[:0].skew().isna().all()
+    assert frame[:0].mad().isna().all()
+    assert frame[:0].sem().isna().all()
+    assert frame[:0].quantile().isna().all()
+
 
 def test_WeightedSeries_mean(series):
     series[0] = np.nan
@@ -588,6 +600,18 @@ def test_WeightedSeries_nan(series):
     assert_allclose(series.var(), 1./12, atol=1e-2)
     assert_allclose(series.std(), (1./12)**0.5, atol=1e-2)
 
+    assert np.isnan(series[:0].mean())
+    assert np.isnan(series[:0].std())
+    assert np.isnan(series[:0].median())
+    assert np.isnan(series[:0].var())
+    assert np.isnan(series[:0].cov(series))
+    assert np.isnan(series[:0].corr(series))
+    assert np.isnan(series[:0].kurt())
+    assert np.isnan(series[:0].skew())
+    assert np.isnan(series[:0].mad())
+    assert np.isnan(series[:0].sem())
+    assert np.isnan(series[:0].quantile())
+
 
 @pytest.fixture
 def mcmc_df():

From 5ec1fecd33235113f37c8539c3ffa3e6b7234bb4 Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Thu, 30 Mar 2023 10:39:22 +0100
Subject: [PATCH 56/71] Increase coverage

---
 anesthetic/weighted_pandas.py | 8 +-------
 tests/test_weighted_pandas.py | 4 ++++
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index a7b1e7de..b1a85410 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -256,15 +256,12 @@ def var(self, skipna=True):  # noqa: D102
         return np.average(masked_array((self-mean)**2, null),
                           weights=self.get_weights())
 
-    def cov(self, other, min_periods=None, *args, **kwargs):  # noqa: D102
+    def cov(self, other, *args, **kwargs):  # noqa: D102
 
         this, other = self.align(other, join="inner", copy=False)
         if len(this) == 0:
             return np.nan
 
-        if min_periods is None:
-            min_periods = 1
-
         weights = self.index.to_frame()['weights']
         weights, _ = weights.align(other, join="inner", copy=False)
 
@@ -274,9 +271,6 @@ def cov(self, other, min_periods=None, *args, **kwargs):  # noqa: D102
             other = other[valid]
             weights = weights[valid]
 
-        if len(this) < min_periods:
-            return np.nan
-
         return np.cov(this, other, aweights=weights)[0, 1]
 
     def corr(self, other, *args, **kwargs):  # noqa: D102
diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py
index 2aaa5413..271f98e8 100644
--- a/tests/test_weighted_pandas.py
+++ b/tests/test_weighted_pandas.py
@@ -488,6 +488,10 @@ def test_WeightedSeries_cov(frame):
     assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
     assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)
 
+    frame['A'][0] = np.nan
+    assert_allclose(frame.A.cov(frame.A), 1./12, atol=1e-2)
+    assert_allclose(frame.A.cov(frame.B), 0, atol=1e-2)
+
 
 def test_WeightedSeries_corr(frame):
     assert_allclose(frame.A.corr(frame.A), 1., atol=1e-2)

From 5113b61efba108327645c1db4f430b59e0138ead Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Thu, 30 Mar 2023 16:29:20 +0100
Subject: [PATCH 57/71] add test for groupby().hist()

---
 tests/test_samples.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 8f308cf1..e08050d2 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1471,3 +1471,24 @@ def test_groupby_stats():
     assert len(sample) == 10
     assert sample.value_counts()[1] == 5
     assert sample.value_counts()[2] == 5
+
+
+def test_groupby_plots():
+    mcmc = read_chains('./tests/example_data/cb')
+    params = ['x0', 'x1']
+    chains = mcmc[params + ['chain']].groupby(('chain', '$n_\\mathrm{chain}$'))
+    for param in params:
+        gb_plot = chains.hist(param)
+        for chain in [1, 2]:
+            mcmc_axes = mcmc.loc[mcmc.chain == chain].hist(param).flatten()
+            gb_axes = gb_plot[chain].values[0].flatten()
+
+            mcmc_widths = [p.get_width() for ax in mcmc_axes
+                           for p in ax.patches]
+            gb_widths = [p.get_width() for ax in gb_axes for p in ax.patches]
+            assert mcmc_widths == gb_widths
+
+            mcmc_heights = [p.get_height() for ax in mcmc_axes
+                            for p in ax.patches]
+            gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches]
+            assert mcmc_heights == gb_heights

From 918986c6d64fa8a9fd55c55925a1d15d4d9c323a Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Thu, 30 Mar 2023 17:43:44 +0100
Subject: [PATCH 58/71] add test for groupby().plot.hist(), not happy with the
 janky slicing here...

---
 tests/test_samples.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index e08050d2..7414a206 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1486,9 +1486,21 @@ def test_groupby_plots():
             mcmc_widths = [p.get_width() for ax in mcmc_axes
                            for p in ax.patches]
             gb_widths = [p.get_width() for ax in gb_axes for p in ax.patches]
-            assert mcmc_widths == gb_widths
+            assert_allclose(mcmc_widths, gb_widths)
 
             mcmc_heights = [p.get_height() for ax in mcmc_axes
                             for p in ax.patches]
             gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches]
-            assert mcmc_heights == gb_heights
+            assert_allclose(mcmc_heights, gb_heights)
+    plt.close()
+
+    for param in params:
+        _, gb_ax = plt.subplots()
+        gb_plots = chains[param].plot.hist(ax=gb_ax)
+        _, mcmc_ax = plt.subplots()
+        for chain, gb_ax in zip([1, 2], gb_plots):
+            mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.hist(
+                    ax=mcmc_ax)
+        mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
+        gb_widths = [p.get_width() for p in gb_ax.patches]
+        assert_allclose(mcmc_widths, gb_widths)

From b13b0a2c98167fe6f7026e8163137d2a2934652a Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 16:50:10 +0100
Subject: [PATCH 59/71] add test for groupby().plot.kde()

---
 tests/test_samples.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 7414a206..86cb4fba 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1492,7 +1492,7 @@ def test_groupby_plots():
                             for p in ax.patches]
             gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches]
             assert_allclose(mcmc_heights, gb_heights)
-    plt.close()
+            plt.close()
 
     for param in params:
         _, gb_ax = plt.subplots()
@@ -1504,3 +1504,15 @@ def test_groupby_plots():
         mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
         gb_widths = [p.get_width() for p in gb_ax.patches]
         assert_allclose(mcmc_widths, gb_widths)
+    plt.close()
+
+    for param in params:
+        _, gb_ax = plt.subplots()
+        gb_plots = chains[param].plot.kde(ax=gb_ax)
+        _, mcmc_ax = plt.subplots()
+        for chain, gb_ax in zip([1, 2], gb_plots):
+            mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.kde(
+                    ax=mcmc_ax)
+        [assert_allclose(m.get_data(), g.get_data())
+         for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
+    plt.close()

From 9709b380f7f059f45d06b700e7931697bf9ec449 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 16:52:19 +0100
Subject: [PATCH 60/71] add tests for hist_1d and kde_1d

---
 tests/test_samples.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 86cb4fba..5ee06413 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1506,6 +1506,18 @@ def test_groupby_plots():
         assert_allclose(mcmc_widths, gb_widths)
     plt.close()
 
+    for param in params:
+        _, gb_ax = plt.subplots()
+        gb_plots = chains[param].plot.hist_1d(ax=gb_ax)
+        _, mcmc_ax = plt.subplots()
+        for chain, gb_ax in zip([1, 2], gb_plots):
+            mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.hist_1d(
+                    ax=mcmc_ax)
+        mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
+        gb_widths = [p.get_width() for p in gb_ax.patches]
+        assert_allclose(mcmc_widths, gb_widths)
+    plt.close()
+
     for param in params:
         _, gb_ax = plt.subplots()
         gb_plots = chains[param].plot.kde(ax=gb_ax)
@@ -1516,3 +1528,14 @@ def test_groupby_plots():
         [assert_allclose(m.get_data(), g.get_data())
          for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
     plt.close()
+
+    for param in params:
+        _, gb_ax = plt.subplots()
+        gb_plots = chains[param].plot.kde_1d(ax=gb_ax)
+        _, mcmc_ax = plt.subplots()
+        for chain, gb_ax in zip([1, 2], gb_plots):
+            mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.kde_1d(
+                    ax=mcmc_ax)
+        [assert_allclose(m.get_data(), g.get_data())
+         for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
+    plt.close()

From 338fc8a5c0379063caf594348b04c9f198a81d7b Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 16:56:06 +0100
Subject: [PATCH 61/71] test for fastkde_1d

---
 tests/test_samples.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 5ee06413..224eaaaf 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1539,3 +1539,15 @@ def test_groupby_plots():
         [assert_allclose(m.get_data(), g.get_data())
          for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
     plt.close()
+
+    if 'fastkde' in sys.modules:
+        for param in params:
+            _, gb_ax = plt.subplots()
+            gb_plots = chains[param].plot.fastkde_1d(ax=gb_ax)
+            _, mcmc_ax = plt.subplots()
+            for chain, gb_ax in zip([1, 2], gb_plots):
+                mcmc_ax = mcmc.loc[mcmc.chain == chain][param].plot.fastkde_1d(
+                        ax=mcmc_ax)
+            [assert_allclose(m.get_data(), g.get_data())
+             for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
+        plt.close()

From 0e2676a9dbf7cffba1a2b68ac055d42478785bf8 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 17:11:53 +0100
Subject: [PATCH 62/71] test for hist_2d

---
 tests/test_samples.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 224eaaaf..3ac178d2 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1540,6 +1540,19 @@ def test_groupby_plots():
          for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
     plt.close()
 
+    for chain, gb_ax in zip([1, 2], chains.plot.hist_2d(*params)):
+        mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.hist_2d(*params)
+        mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
+        gb_widths = [p.get_width() for p in gb_ax.patches]
+        assert_allclose(mcmc_widths, gb_widths)
+        mcmc_heights = [p.get_height() for p in mcmc_ax.patches]
+        gb_heights = [p.get_height() for p in gb_ax.patches]
+        assert_allclose(mcmc_heights, gb_heights)
+        mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches]
+        gb_colors = [p.get_facecolor() for p in gb_ax.patches]
+        assert_allclose(mcmc_colors, gb_colors)
+    plt.close()
+
     if 'fastkde' in sys.modules:
         for param in params:
             _, gb_ax = plt.subplots()

From 9589921bda822cd55faed4104837d8529e14016d Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 17:23:33 +0100
Subject: [PATCH 63/71] plt.close('all')

---
 tests/test_samples.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 3ac178d2..21e38b15 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1492,7 +1492,7 @@ def test_groupby_plots():
                             for p in ax.patches]
             gb_heights = [p.get_height() for ax in gb_axes for p in ax.patches]
             assert_allclose(mcmc_heights, gb_heights)
-            plt.close()
+            plt.close('all')
 
     for param in params:
         _, gb_ax = plt.subplots()
@@ -1504,7 +1504,7 @@ def test_groupby_plots():
         mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
         gb_widths = [p.get_width() for p in gb_ax.patches]
         assert_allclose(mcmc_widths, gb_widths)
-    plt.close()
+    plt.close('all')
 
     for param in params:
         _, gb_ax = plt.subplots()
@@ -1516,7 +1516,7 @@ def test_groupby_plots():
         mcmc_widths = [p.get_width() for p in mcmc_ax.patches]
         gb_widths = [p.get_width() for p in gb_ax.patches]
         assert_allclose(mcmc_widths, gb_widths)
-    plt.close()
+    plt.close('all')
 
     for param in params:
         _, gb_ax = plt.subplots()
@@ -1527,7 +1527,7 @@ def test_groupby_plots():
                     ax=mcmc_ax)
         [assert_allclose(m.get_data(), g.get_data())
          for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
-    plt.close()
+    plt.close('all')
 
     for param in params:
         _, gb_ax = plt.subplots()
@@ -1538,7 +1538,7 @@ def test_groupby_plots():
                     ax=mcmc_ax)
         [assert_allclose(m.get_data(), g.get_data())
          for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
-    plt.close()
+    plt.close('all')
 
     for chain, gb_ax in zip([1, 2], chains.plot.hist_2d(*params)):
         mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.hist_2d(*params)
@@ -1551,7 +1551,7 @@ def test_groupby_plots():
         mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches]
         gb_colors = [p.get_facecolor() for p in gb_ax.patches]
         assert_allclose(mcmc_colors, gb_colors)
-    plt.close()
+    plt.close('all')
 
     if 'fastkde' in sys.modules:
         for param in params:
@@ -1563,4 +1563,4 @@ def test_groupby_plots():
                         ax=mcmc_ax)
             [assert_allclose(m.get_data(), g.get_data())
              for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
-        plt.close()
+        plt.close('all')

From 4e8e50f382ee99dbe35af2688889b88469811c71 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 17:24:09 +0100
Subject: [PATCH 64/71] test for kde_2d

---
 tests/test_samples.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 21e38b15..9730f15a 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1553,6 +1553,16 @@ def test_groupby_plots():
         assert_allclose(mcmc_colors, gb_colors)
     plt.close('all')
 
+    for chain, gb_ax in zip([1, 2], chains.plot.kde_2d(*params)):
+        mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.kde_2d(*params)
+        mcmc_verts = [p.get_verts() for p in mcmc_ax.patches]
+        gb_verts = [p.get_verts() for p in gb_ax.patches]
+        assert_allclose(mcmc_verts, gb_verts)
+        mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches]
+        gb_colors = [p.get_facecolor() for p in gb_ax.patches]
+        assert_allclose(mcmc_colors, gb_colors)
+    plt.close('all')
+
     if 'fastkde' in sys.modules:
         for param in params:
             _, gb_ax = plt.subplots()

From a631d601b53eba9ea81bf74a63f7f29c0f14d0f1 Mon Sep 17 00:00:00 2001
From: Ormorod <Adam.Ormondroyd@gmail.com>
Date: Fri, 31 Mar 2023 17:24:54 +0100
Subject: [PATCH 65/71] test for fastkde_2d

---
 tests/test_samples.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 9730f15a..8a31eda6 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1574,3 +1574,13 @@ def test_groupby_plots():
             [assert_allclose(m.get_data(), g.get_data())
              for m, g in zip(mcmc_ax.get_lines(), gb_ax.get_lines())]
         plt.close('all')
+
+        for chain, gb_ax in zip([1, 2], chains.plot.fastkde_2d(*params)):
+            mcmc_ax = mcmc.loc[mcmc.chain == chain].plot.fastkde_2d(*params)
+            mcmc_verts = [p.get_verts() for p in mcmc_ax.patches]
+            gb_verts = [p.get_verts() for p in gb_ax.patches]
+            assert_allclose(mcmc_verts, gb_verts)
+            mcmc_colors = [p.get_facecolor() for p in mcmc_ax.patches]
+            gb_colors = [p.get_facecolor() for p in gb_ax.patches]
+            assert_allclose(mcmc_colors, gb_colors)
+        plt.close('all')

From 132d80fcd3a60bb864c32876e85bdd041c9d874f Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Tue, 4 Apr 2023 17:40:30 +0100
Subject: [PATCH 66/71] Reinstated init function to get documentation to work

---
 .github/workflows/CI.yaml     | 1 -
 anesthetic/weighted_pandas.py | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 794814cf..2c56ff71 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -44,7 +44,6 @@ jobs:
       - name: Upgrade pip and install doc requirements
         run: |
           python -m pip install --upgrade pip
-          python -m pip install pip-tools
           python -m pip install -e ".[extras,docs]"
       - name: build documentation
         run: |
diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
index b1a85410..cfe5a1d9 100644
--- a/anesthetic/weighted_pandas.py
+++ b/anesthetic/weighted_pandas.py
@@ -21,6 +21,9 @@ class WeightedGroupBy(GroupBy):
     grouper: ops.BaseGrouper
     """:meta private:"""
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
     def _add_weights(self, name, *args, **kwargs):
         result = self.agg(lambda df: getattr(self.obj._constructor(df), name)
                           (*args, **kwargs)).set_weights(self.get_weights())

From 369c49cd245aedda43b02470b332201c66ccaf2e Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Tue, 4 Apr 2023 13:23:14 -0700
Subject: [PATCH 67/71] complete test coverage for explicit weight checks

---
 tests/test_samples.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index 8a31eda6..d1c54b5c 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -1368,6 +1368,11 @@ def test_groupby_stats():
     assert np.all(chains.kurt().get_weights() == [w1, w2])
     assert np.all(chains.kurtosis().get_weights() == [w1, w2])
     assert np.all(chains.skew().get_weights() == [w1, w2])
+    assert np.all(chains.sem().get_weights() == [w1, w2])
+    w = [w1 for _ in range(len(params))] + [w2 for _ in range(len(params))]
+    assert np.all(chains.corr().get_weights() == w)
+    assert np.all(chains.cov().get_weights() == w)
+    assert np.all(chains.corrwith(mcmc).get_weights() == [w1, w2])
 
     for chain in [1, 2]:
         mask = mcmc.chain == chain

From 122bf2b9799d6cbbf7c4dbef00bc0fec88ca15ec Mon Sep 17 00:00:00 2001
From: Will Handley <wh260@cam.ac.uk>
Date: Wed, 5 Apr 2023 14:29:51 +0100
Subject: [PATCH 68/71] Readme correction following #217

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index c876a856..a00e6e9d 100644
--- a/README.rst
+++ b/README.rst
@@ -191,8 +191,8 @@ Why create another one? In general, any dedicated user of software will find tha
 
 .. code:: python
 
-    from anesthetic import MCMCSamples
-    samples = MCMCSamples(root=file_root)                         # Load the samples
+    from anesthetic import read_chains
+    samples = read_chains(file_root)                              # Load the samples
     samples['omegab'] = samples.omegabh2/(samples.H0/100)**2      # Define omegab
     samples.tex['omegab'] = '$\Omega_b$'                          # Label omegab
     samples.plot_1d('omegab')                                     # Simple 1D plot

From 5a5f106c382543e75d489afa5f6afc93d2d767c6 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Thu, 6 Apr 2023 22:57:34 -0700
Subject: [PATCH 69/71] fix `GelmanRubin` method now that `groupby` is fixed

---
 anesthetic/samples.py | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/anesthetic/samples.py b/anesthetic/samples.py
index 038faea0..9a910185 100644
--- a/anesthetic/samples.py
+++ b/anesthetic/samples.py
@@ -515,7 +515,7 @@ def remove_burn_in(self, burn_in, reset_index=False, inplace=False):
             Indicates whether to modify the existing array or return a copy.
 
         """
-        chains = self.groupby(('chain', '$n_\\mathrm{chain}$'),
+        chains = self.groupby(('chain', '$n_\\mathrm{chain}$'), sort=False,
                               group_keys=False)
         nchains = chains.ngroups
         if isinstance(burn_in, (int, float)):
@@ -574,25 +574,32 @@ def Gelman_Rubin(self, params=None):
                       and 'logL' not in key
                       and 'chain' not in key]
         chains = self[params+['chain']].groupby(
-                ('chain', '$n_\\mathrm{chain}$')
+                ('chain', '$n_\\mathrm{chain}$'), sort=False,
         )
+        nchains = chains.ngroups
 
         # Within chain variance ``W``
         # (average variance within each chain):
-        W = chains.cov().groupby(level=['params', 'labels']).mean().to_numpy()
-        # TODO: the above line should be a weighted mean
-        # --> need to fix groupby for WeightedDataFrames!
-
+        W = chains.cov().groupby(level=('params', 'labels'), sort=False).mean()
         # Between-chain variance ``B``
-        # (variance of the chain means compared to the full mean):
-        means_diff = (chains.mean() - self[params].mean()).to_numpy()
-        B = (means_diff.T @ means_diff) / (chains.ngroups - 1)
-        # B = chains.mean().cov().to_numpy()
-        # TODO: fix once groupby is fixed
-
-        L = np.linalg.cholesky(W)
-        invL = np.linalg.inv(L)
-        D = np.linalg.eigvalsh(invL @ B @ invL.T)
+        # (variance of the chain means):
+        B = np.atleast_2d(np.cov(chains.mean().T, ddof=1))
+        # We don't weight `B` with the effective number of samples (sum of the
+        # weights), here, because we want to notice outliers from shorter
+        # chains.
+        # In order to be conservative, we generally want to underestimate `W`
+        # and overestimate `B`, since `W` goes in the denominator and `B` in
+        # the numerator of the Gelman--Rubin statistic `Rminus1`.
+
+        try:
+            invL = np.linalg.inv(np.linalg.cholesky(W))
+        except np.linalg.LinAlgError as e:
+            raise np.linalg.LinAlgError(
+                "Make sure you do not have linearly dependent parameters, "
+                "e.g. having both `As` and `A=1e9*As` causes trouble.") from e
+        D = np.linalg.eigvalsh(invL @ ((nchains+1)/nchains * B) @ invL.T)
+        # The factor of `(nchains+1)/nchains` accounts for the additional
+        # uncertainty from using a finite number of chains.
         Rminus1 = np.max(np.abs(D))
         return Rminus1
 

From 9d10ce5da10910cff4718dfd746ecb22df33f045 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Fri, 7 Apr 2023 01:16:28 -0700
Subject: [PATCH 70/71] add test for `LinAlgError` when covariance matrix is
 not positive definite

---
 tests/test_samples.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index d1c54b5c..f95855f9 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -436,6 +436,9 @@ def test_mcmc_stats():
     assert mcmc_half.Gelman_Rubin() < 0.01
     assert mcmc_half.Gelman_Rubin(['x0']) < 0.01
     assert mcmc_half.Gelman_Rubin(['x1']) < 0.01
+    with pytest.raises(np.linalg.LinAlgError):
+        mcmc['y'] = mcmc.x1
+        mcmc.Gelman_Rubin(['x0', 'x1', 'y'])
 
     # more burn-in checks
     mcmc_new = mcmc.remove_burn_in(burn_in=200.9)

From 85fa6aefb57906f7226a523d3e078fb153a49ec1 Mon Sep 17 00:00:00 2001
From: lukashergt <lthergt@phas.ubc.ca>
Date: Fri, 7 Apr 2023 10:40:46 -0700
Subject: [PATCH 71/71] make linear dependence more blatant in check for
 `LinAlgError`

---
 tests/test_samples.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_samples.py b/tests/test_samples.py
index f95855f9..c8a00445 100644
--- a/tests/test_samples.py
+++ b/tests/test_samples.py
@@ -437,8 +437,10 @@ def test_mcmc_stats():
     assert mcmc_half.Gelman_Rubin(['x0']) < 0.01
     assert mcmc_half.Gelman_Rubin(['x1']) < 0.01
     with pytest.raises(np.linalg.LinAlgError):
-        mcmc['y'] = mcmc.x1
-        mcmc.Gelman_Rubin(['x0', 'x1', 'y'])
+        mcmc['y1'] = mcmc.x1
+        mcmc['y2'] = mcmc.x1
+        mcmc['y3'] = mcmc.x1
+        mcmc.Gelman_Rubin(['x0', 'x1', 'y1', 'y2', 'y3'])
 
     # more burn-in checks
     mcmc_new = mcmc.remove_burn_in(burn_in=200.9)