handley-lab · AdamOrmondroyd · Apr 7, 2023 · Mar 10, 2023 · Mar 10, 2023 · Mar 10, 2023
diff --git a/README.rst b/README.rst
@@ -2,7 +2,7 @@
 anesthetic: nested sampling post-processing
 ===========================================
 :Authors: Will Handley and Lukas Hergt
-:Version: 2.0.0-beta.22
+:Version: 2.0.0-beta.23
 :Homepage: https://github.com/williamjameshandley/anesthetic
 :Documentation: http://anesthetic.readthedocs.io/
 

diff --git a/anesthetic/_version.py b/anesthetic/_version.py
@@ -1 +1 @@
-__version__ = '2.0.0b22'
+__version__ = '2.0.0b23'
diff --git a/anesthetic/weighted_pandas.py b/anesthetic/weighted_pandas.py
@@ -1,14 +1,60 @@
 """Pandas DataFrame and Series with weighted samples."""
 
+import warnings
 from inspect import signature
 import numpy as np
 from pandas import Series, DataFrame, concat, MultiIndex
+from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy
+from pandas._libs import lib
+from pandas._libs.lib import no_default
+from pandas.util._exceptions import find_stack_level
 from pandas.util import hash_pandas_object
 from numpy.ma import masked_array
 from anesthetic.utils import (compress_weights, channel_capacity, quantile,
                               temporary_seed, adjust_docstrings)
 
 
+class WeightedGroupBy(GroupBy):
+    """Weighted version of :class:`pandas.core.groupby.GroupBy`."""
+
+    def mean(self, numeric_only=False):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).mean(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def std(self, numeric_only=False):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).std(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def kurtosis(self, numeric_only=False):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).kurtosis(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def median(self, numeric_only=None):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).median(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+    def var(self, numeric_only=False):  # noqa: D102
+        result = self.agg(lambda df: self.obj._constructor(df).var(
+            numeric_only=numeric_only))
+        return result.__finalize__(self.obj, method="groupby")
+
+
+class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy):
+    """Weighted version of :class:`pandas.core.groupby.SeriesGroupBy`."""
+
+    pass
+
+
+class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy):
+    """Weighted version of :class:`pandas.core.groupby.DataFrameGroupBy`."""
+
+    pass
+
+
 class _WeightedObject(object):
     """Common methods for `WeightedSeries` and `WeightedDataFrame`.
 
@@ -204,6 +250,35 @@ def _constructor(self):
     def _constructor_expanddim(self):
         return WeightedDataFrame
 
+    def groupby(
+        self,
+        by=None,
+        axis=0,
+        level=None,
+        as_index=True,
+        sort=True,
+        group_keys=True,
+        observed=False,
+        dropna=True,
+    ):  # noqa: D102
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+        if not as_index:
+            raise TypeError("as_index=False only valid with DataFrame")
+        axis = self._get_axis_number(axis)
+
+        return WeightedSeriesGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            observed=observed,
+            dropna=dropna,
+        )
+
 
 class WeightedDataFrame(_WeightedObject, DataFrame):
     """Weighted version of :class:`pandas.DataFrame`."""
@@ -405,6 +480,51 @@ def _constructor_sliced(self):
     def _constructor(self):
         return WeightedDataFrame
 
+    def groupby(
+        self,
+        by=None,
+        axis=no_default,
+        level=None,
+        as_index: bool = True,
+        sort: bool = True,
+        group_keys: bool = True,
+        observed: bool = False,
+        dropna: bool = True,
+    ):  # pragma: no cover  # noqa: D102
+        if axis is not lib.no_default:
+            axis = self._get_axis_number(axis)
+            if axis == 1:
+                warnings.warn(
+                    "DataFrame.groupby with axis=1 is deprecated. Do "
+                    "`frame.T.groupby(...)` without axis instead.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            else:
+                warnings.warn(
+                    "The 'axis' keyword in DataFrame.groupby is deprecated "
+                    "and will be removed in a future version.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+        else:
+            axis = 0
+
+        if level is None and by is None:
+            raise TypeError("You have to supply one of 'by' and 'level'")
+
+        return WeightedDataFrameGroupBy(
+            obj=self,
+            keys=by,
+            axis=axis,
+            level=level,
+            as_index=as_index,
+            sort=sort,
+            group_keys=group_keys,
+            observed=observed,
+            dropna=dropna,
+        )
+
 
 for cls in [WeightedDataFrame, WeightedSeries]:
     adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame')

diff --git a/tests/test_samples.py b/tests/test_samples.py
@@ -1332,3 +1332,23 @@ def test_old_gui():
         make_2d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'})
     with pytest.raises(NotImplementedError):
         make_1d_axes(['x0', 'y0'], tex={'x0': '$x_0$', 'y0': '$y_0$'})
+
+
+def test_groupby_stats():
+    mcmc = read_chains('./tests/example_data/cb')
+    chains = mcmc.groupby(('chain', '$n_\\mathrm{chain}$'), group_keys=False)
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].mean()
+                             .to_numpy()[:-1],
+           chains.mean().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].std()
+                             .to_numpy()[:-1],
+           chains.std().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].kurtosis()
+                             .dropna().to_numpy(),
+           chains.kurtosis().iloc[0, :].dropna().to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].median()
+                             .to_numpy()[:-1],
+                             chains.median().iloc[0, :].to_numpy()))
+    assert np.all(np.isclose(mcmc.loc[mcmc['chain'] == 1].var()
+                             .to_numpy()[:-1],
+           chains.var().iloc[0, :].to_numpy()))