From 6980b136fea565d98676a51ead9f967b9b270385 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 22 Mar 2023 15:40:15 +0000 Subject: [PATCH 01/19] wpandas refactor --- anesthetic/__init__.py | 37 +--- anesthetic/_version.py.orig | 5 + anesthetic/plot.py | 3 +- anesthetic/plotting/_matplotlib/__init__.py | 61 +++---- anesthetic/plotting/_matplotlib/core.py | 124 +------------- anesthetic/plotting/_matplotlib/hist.py | 162 +----------------- anesthetic/samples.py | 9 +- anesthetic/utils.py | 97 +---------- lpandas/__init__.py | 9 + {anesthetic => lpandas}/_format.py | 0 .../labelled_pandas.py => lpandas/core.py | 1 + tests/test_labelled_pandas.py | 2 +- tests/test_samples.py | 2 +- tests/test_utils.py | 3 +- tests/test_weighted_pandas.py | 2 +- wpandas/__init__.py | 31 ++++ {anesthetic => wpandas}/_code_utils.py | 0 .../weighted_pandas.py => wpandas/core.py | 4 +- wpandas/plotting/__init__.py | 1 + wpandas/plotting/_matplotlib/__init__.py | 53 ++++++ .../plotting/_matplotlib/boxplot.py | 8 +- wpandas/plotting/_matplotlib/core.py | 123 +++++++++++++ wpandas/plotting/_matplotlib/hist.py | 162 ++++++++++++++++++ .../plotting/_matplotlib/misc.py | 2 +- wpandas/utils.py | 99 +++++++++++ 25 files changed, 532 insertions(+), 468 deletions(-) create mode 100644 anesthetic/_version.py.orig create mode 100644 lpandas/__init__.py rename {anesthetic => lpandas}/_format.py (100%) rename anesthetic/labelled_pandas.py => lpandas/core.py (99%) create mode 100644 wpandas/__init__.py rename {anesthetic => wpandas}/_code_utils.py (100%) rename anesthetic/weighted_pandas.py => wpandas/core.py (99%) create mode 100644 wpandas/plotting/__init__.py create mode 100644 wpandas/plotting/_matplotlib/__init__.py rename {anesthetic => wpandas}/plotting/_matplotlib/boxplot.py (94%) create mode 100644 wpandas/plotting/_matplotlib/core.py create mode 100644 wpandas/plotting/_matplotlib/hist.py rename {anesthetic => wpandas}/plotting/_matplotlib/misc.py (85%) create mode 100644 wpandas/utils.py diff --git a/anesthetic/__init__.py b/anesthetic/__init__.py index dc1fbaf5..3bf8f432 100644 --- a/anesthetic/__init__.py +++ b/anesthetic/__init__.py @@ -2,45 +2,12 @@ import anesthetic.samples import anesthetic.plot import anesthetic.read.chain +import anesthetic._version -import pandas -import pandas.plotting._core -import pandas.plotting._misc -from anesthetic._format import _DataFrameFormatter -from anesthetic._version import __version__ # noqa: F401 - - -def _anesthetic_override(_get_plot_backend): - """Override the default backend. - - When _get_plot_backend asks for 'matplotlib' it will be directed to - 'anesthetic.plotting._matplotlib'. This is necessary since any users of - :class:`anesthetic.weighted_pandas.WeightedSamples` should not be using the - original backend. - """ - def wrapper(backend=None): - if backend == 'matplotlib': - return _get_plot_backend('anesthetic.plotting._matplotlib') - return _get_plot_backend(backend) - return wrapper - - -# Override the two places where _get_plot_backend is defined -pandas.plotting._core._get_plot_backend = \ - _anesthetic_override(pandas.plotting._core._get_plot_backend) -pandas.plotting._misc._get_plot_backend = \ - _anesthetic_override(pandas.plotting._misc._get_plot_backend) - -# Set anesthetic.plotting._matplotlib as the actual backend -pandas.options.plotting.backend = 'anesthetic.plotting._matplotlib' - -pandas.io.formats.format.DataFrameFormatter = _DataFrameFormatter -pandas.options.display.max_colwidth = 14 - +__version__ = anesthetic._version.__version__ Samples = anesthetic.samples.Samples MCMCSamples = anesthetic.samples.MCMCSamples NestedSamples = anesthetic.samples.NestedSamples make_2d_axes = anesthetic.plot.make_2d_axes make_1d_axes = anesthetic.plot.make_1d_axes - read_chains = anesthetic.read.chain.read_chains diff --git a/anesthetic/_version.py.orig b/anesthetic/_version.py.orig new file mode 100644 index 00000000..6735b9f6 --- /dev/null +++ b/anesthetic/_version.py.orig @@ -0,0 +1,5 @@ +<<<<<<< HEAD +__version__ = '2.0.0b26' +======= +__version__ = '2.0.0b25' +>>>>>>> master diff --git a/anesthetic/plot.py b/anesthetic/plot.py index 93452270..0144b911 100644 --- a/anesthetic/plot.py +++ b/anesthetic/plot.py @@ -30,10 +30,11 @@ from matplotlib.colors import LinearSegmentedColormap from matplotlib.transforms import Affine2D from anesthetic.utils import nest_level -from anesthetic.utils import (sample_compression_1d, quantile, +from anesthetic.utils import (sample_compression_1d, triangular_sample_compression_2d, iso_probability_contours, match_contour_to_contourf) +from wpandas.utils import quantile from anesthetic.boundary import cut_and_normalise_gaussian diff --git a/anesthetic/plotting/_matplotlib/__init__.py b/anesthetic/plotting/_matplotlib/__init__.py index 9925a020..dbdd0dc8 100644 --- a/anesthetic/plotting/_matplotlib/__init__.py +++ b/anesthetic/plotting/_matplotlib/__init__.py @@ -1,19 +1,22 @@ """anesthetic override of matplotlib backend for weighted samples.""" -from pandas.plotting._matplotlib import ( # noqa: F401 - PLOT_CLASSES, - __name__, - __all__, - plot, - register, - deregister -) -from anesthetic.plotting._matplotlib.boxplot import ( # noqa: F401 +from wpandas.plotting._matplotlib import ( # noqa: F401 + PLOT_CLASSES, + __name__, + __all__, + plot, + register, + deregister, + bootstrap_plot, + scatter_matrix, + andrews_curves, + autocorrelation_plot, + lag_plot, + parallel_coordinates, + radviz, + table, BoxPlot, boxplot, boxplot_frame, -) -from pandas.plotting._matplotlib import boxplot_frame_groupby # noqa: F401 -from anesthetic.plotting._matplotlib.core import ( # noqa: F401 AreaPlot, BarhPlot, BarPlot, @@ -21,43 +24,23 @@ LinePlot, PiePlot, ScatterPlot, + boxplot_frame_groupby, + KdePlot, + HistPlot, + hist_frame, + hist_series, +) +from anesthetic.plotting._matplotlib.core import ( # noqa: F401 ScatterPlot2d, ) from anesthetic.plotting._matplotlib.hist import ( # noqa: F401 - KdePlot, Kde1dPlot, FastKde1dPlot, Kde2dPlot, FastKde2dPlot, - HistPlot, Hist1dPlot, Hist2dPlot, - hist_frame, - hist_series, -) -from anesthetic.plotting._matplotlib.misc import ( # noqa: F401 - bootstrap_plot, - scatter_matrix, ) -from pandas.plotting._matplotlib import ( # noqa: F401 - andrews_curves, - autocorrelation_plot, - lag_plot, - parallel_coordinates, - radviz, - table, -) - -PLOT_CLASSES["line"] = LinePlot -PLOT_CLASSES["bar"] = BarPlot -PLOT_CLASSES["barh"] = BarhPlot -PLOT_CLASSES["box"] = BoxPlot -PLOT_CLASSES["hist"] = HistPlot -PLOT_CLASSES["kde"] = KdePlot -PLOT_CLASSES["area"] = AreaPlot -PLOT_CLASSES["pie"] = PiePlot -PLOT_CLASSES["scatter"] = ScatterPlot -PLOT_CLASSES["hexbin"] = HexBinPlot PLOT_CLASSES['hist_1d'] = Hist1dPlot PLOT_CLASSES['kde_1d'] = Kde1dPlot diff --git a/anesthetic/plotting/_matplotlib/core.py b/anesthetic/plotting/_matplotlib/core.py index b3124682..ddd4100e 100644 --- a/anesthetic/plotting/_matplotlib/core.py +++ b/anesthetic/plotting/_matplotlib/core.py @@ -1,83 +1,10 @@ from typing import Literal -import numpy as np from matplotlib import pyplot as plt -from pandas.core.dtypes.generic import ABCMultiIndex -import pandas.core.common as com -from pandas.plotting._matplotlib.core import (ScatterPlot as _ScatterPlot, - HexBinPlot as _HexBinPlot, - LinePlot as _LinePlot, - BarPlot as _BarPlot, - BarhPlot as _BarhPlot, - AreaPlot as _AreaPlot, - PiePlot as _PiePlot, - MPLPlot, PlanePlot) +from pandas.plotting._matplotlib.core import PlanePlot from pandas.plotting._matplotlib.groupby import create_iter_data_given_by from pandas.io.formats.printing import pprint_thing -from anesthetic.weighted_pandas import _WeightedObject from anesthetic.plot import scatter_plot_2d - - -def _get_weights(kwargs, data): - if isinstance(data, _WeightedObject): - kwargs['weights'] = data.get_weights() - - -class _WeightedMPLPlot(MPLPlot): - - _default_rot = None - - def __init__(self, data, *args, **kwargs): - _get_weights(kwargs, data) - super().__init__(data, *args, **kwargs) - - def _get_index_name(self): - if isinstance(self.data, _WeightedObject): - if isinstance(self.data.drop_weights().index, ABCMultiIndex): - name = self.data.drop_weights().index.names - if com.any_not_none(*name): - name = ",".join([pprint_thing(x) for x in name]) - else: - name = None - else: - name = self.data.drop_weights().index.name - if name is not None: - name = pprint_thing(name) - - # GH 45145, override the default axis label if one is provided. - index_name = self._get_custom_index_name() - if index_name is not None: - name = pprint_thing(index_name) - - return name - else: - return super()._get_index_name() - - def _get_xticks(self, convert_period: bool = False): - if isinstance(self.data, _WeightedObject): - return self.data.drop_weights().index._mpl_repr() - else: - return super()._get_xticks(convert_period) - - -def _compress_weights(kwargs, data): - if isinstance(data, _WeightedObject): - return data.compress(kwargs.pop('ncompress', True)) - else: - return data - - -class _CompressedMPLPlot(MPLPlot): - def __init__(self, data, *args, **kwargs): - data = _compress_weights(kwargs, data) - super().__init__(data, *args, **kwargs) - - -class ScatterPlot(_CompressedMPLPlot, _ScatterPlot): - # noqa: disable=D101 - def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: - if isinstance(data, _WeightedObject): - kwargs['alpha'] = kwargs.get('alpha', 0.5) - super().__init__(data, x, y, s, c, **kwargs) +from wpandas.plotting._matplotlib.core import _CompressedMPLPlot class _PlanePlot2d(PlanePlot): @@ -115,50 +42,3 @@ def _kind(self) -> Literal["scatter_2d"]: @classmethod def _plot(cls, ax, x, y, **kwds): return scatter_plot_2d(ax, x, y, **kwds) - - -class HexBinPlot(_HexBinPlot): - # noqa: disable=D101 - def __init__(self, data, x, y, C=None, **kwargs) -> None: - if isinstance(data, _WeightedObject): - C = '__weights' - data[C] = data.get_weights() - kwargs['reduce_C_function'] = np.sum - super().__init__(data, x, y, C=C, **kwargs) - - -class LinePlot(_LinePlot, _WeightedMPLPlot): - # noqa: disable=D101 - pass - - -class PiePlot(_PiePlot): - # noqa: disable=D101 - def __init__(self, data, kind=None, **kwargs) -> None: - if isinstance(data, _WeightedObject): - labels = data.drop_weights().index._mpl_repr() - kwargs['labels'] = kwargs.get('labels', labels) - super().__init__(data, kind=kind, **kwargs) - - -class BarPlot(_BarPlot, _WeightedMPLPlot): - # noqa: disable=D101 - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - super()._decorate_ticks(ax, name, ticklabels, start_edge, end_edge) - if isinstance(self.data, _WeightedObject): - if self.xticks is None: - ax.set_xticklabels(self._get_xticks()) - - -class BarhPlot(_BarhPlot, _WeightedMPLPlot): - # noqa: disable=D101 - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - super()._decorate_ticks(ax, name, ticklabels, start_edge, end_edge) - if isinstance(self.data, _WeightedObject): - if self.yticks is None: - ax.set_yticklabels(self._get_xticks()) - - -class AreaPlot(_AreaPlot, _WeightedMPLPlot): - # noqa: disable=D101 - pass diff --git a/anesthetic/plotting/_matplotlib/hist.py b/anesthetic/plotting/_matplotlib/hist.py index 3a09f2b0..17cdcb92 100644 --- a/anesthetic/plotting/_matplotlib/hist.py +++ b/anesthetic/plotting/_matplotlib/hist.py @@ -1,21 +1,8 @@ -from packaging import version -from matplotlib import pyplot as plt import numpy as np -import pandas as pd -from pandas.plotting._matplotlib.hist import (HistPlot as _HistPlot, - KdePlot as _KdePlot) -import pandas.plotting._matplotlib.hist -from pandas.core.dtypes.missing import ( - isna, - remove_na_arraylike, -) -from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.core import MPLPlot -from pandas.plotting._matplotlib.groupby import (create_iter_data_given_by, - reformat_hist_y_given_by) +from wpandas.plotting._matplotlib.hist import KdePlot, HistPlot from typing import Literal -from anesthetic.plotting._matplotlib.core import ( - _WeightedMPLPlot, _CompressedMPLPlot, _PlanePlot2d, _get_weights +from wpandas.plotting._matplotlib.core import ( + _WeightedMPLPlot, _CompressedMPLPlot, _PlanePlot2d ) from anesthetic.plot import ( kde_contour_plot_2d, @@ -27,137 +14,6 @@ ) -class HistPlot(_WeightedMPLPlot, _HistPlot): - # noqa: disable=D101 - def _calculate_bins(self, data): - nd_values = data._convert(datetime=True)._get_numeric_data() - values = np.ravel(nd_values) - weights = self.kwds.get("weights", None) - if weights is not None: - try: - weights = np.broadcast_to(weights[:, None], nd_values.shape) - except ValueError: - print(nd_values.shape, weights.shape) - pass - weights = np.ravel(weights) - weights = weights[~isna(values)] - - values = values[~isna(values)] - - hist, bins = np.histogram( - values, bins=self.bins, range=self.kwds.get("range", None), - weights=weights - ) - return bins - - def _get_colors(self, num_colors=None, color_kwds='color'): - if (self.colormap is not None and self.kwds.get(color_kwds) is None - and (num_colors is None or num_colors == 1)): - return [plt.get_cmap(self.colormap)(0.68)] - return super()._get_colors(num_colors, color_kwds) - - def _make_plot(self): # pragma: no cover - # TODO: remove when these changes have been added to pandas - if version.parse(pd.__version__) >= version.parse("2.0.0"): - return super._make_plot() - - colors = self._get_colors() - stacking_id = self._get_stacking_id() - - # Re-create iterated data if `by` is assigned by users - data = ( - create_iter_data_given_by(self.data, self._kind) - if self.by is not None - else self.data - ) - - for i, (label, y) in enumerate(self._iter_data(data=data)): - ax = self._get_ax(i) - - kwds = self.kwds.copy() - - label = pprint_thing(label) - label = self._mark_right_label(label, index=i) - kwds["label"] = label - - style, kwds = self._apply_style_colors(colors, kwds, i, label) - if style is not None: - kwds["style"] = style - - kwds = self._make_plot_keywords(kwds, y) - - # the bins is multi-dimension array now and each plot need only 1-d - # and when by is applied, label should be columns that are grouped - if self.by is not None: - kwds["bins"] = kwds["bins"][i] - kwds["label"] = self.columns - kwds.pop("color") - - # We allow weights to be a multi-dimensional array, e.g. a (10, 2) - # array, and each sub-array (10,) will be called in each iteration. - # If users only provide 1D array, we assume the same weights are - # used for all columns - weights = kwds.get("weights", None) - if weights is not None: - if np.ndim(weights) != 1 and np.shape(weights)[-1] != 1: - try: - weights = weights[:, i] - except IndexError as err: - raise ValueError( - "weights must have the same shape as data, " - "or be a single column" - ) from err - weights = weights[~isna(y)] - kwds["weights"] = weights - - y = reformat_hist_y_given_by(y, self.by) - - artists = self._plot(ax, y, column_num=i, - stacking_id=stacking_id, **kwds) - - # when by is applied, show title for subplots to - # know which group it is - if self.by is not None: - ax.set_title(pprint_thing(label)) - - self._append_legend_handles_labels(artists[0], label) - - def _post_plot_logic(self, ax, data): - super()._post_plot_logic(ax, data) - ax.set_yticks([]) - ax.set_ylim(bottom=0) - ax.set_xlim(self.bins[0], self.bins[-1]) - - -class KdePlot(HistPlot, _KdePlot): - # noqa: disable=D101 - @classmethod - def _plot( - cls, - ax, - y, - style=None, - bw_method=None, - ind=None, - column_num=None, - stacking_id=None, - **kwds, - ): - from scipy.stats import gaussian_kde - weights = kwds.pop("weights", None) - - y = remove_na_arraylike(y) - gkde = gaussian_kde(y, bw_method=bw_method, weights=weights) - - y = gkde.evaluate(ind) - lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) - return lines - - def _post_plot_logic(self, ax, data): - ax.set_yticks([]) - ax.set_ylim(bottom=0) - - class Kde1dPlot(KdePlot): # noqa: disable=D101 @property @@ -269,15 +125,3 @@ def _kind(self) -> Literal["hist_2d"]: @classmethod def _plot(cls, ax, x, y, **kwds): return hist_plot_2d(ax, x, y, **kwds) - - -def hist_frame(data, *args, **kwds): - # noqa: disable=D103 - _get_weights(kwds, data) - return pandas.plotting._matplotlib.hist_frame(data, *args, **kwds) - - -def hist_series(data, *args, **kwds): - # noqa: disable=D103 - _get_weights(kwds, data) - return pandas.plotting._matplotlib.hist_series(data, *args, **kwds) diff --git a/anesthetic/samples.py b/anesthetic/samples.py index 038faea0..cf4956fd 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -13,15 +13,14 @@ from anesthetic.utils import (compute_nlive, compute_insertion_indexes, is_int, logsumexp) from anesthetic.gui.plot import RunPlotter -from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries -from anesthetic.labelled_pandas import LabelledDataFrame, LabelledSeries +from wpandas import WeightedDataFrame, WeightedSeries +from lpandas import LabelledDataFrame, LabelledSeries from pandas.core.accessor import CachedAccessor from anesthetic.plot import (make_1d_axes, make_2d_axes, AxesSeries, AxesDataFrame) -import anesthetic.weighted_pandas +import wpandas from anesthetic.plotting import PlotAccessor -anesthetic.weighted_pandas._WeightedObject.plot =\ - CachedAccessor("plot", PlotAccessor) +wpandas.core._WeightedObject.plot = CachedAccessor("plot", PlotAccessor) class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame): diff --git a/anesthetic/utils.py b/anesthetic/utils.py index 0208a855..2ead17c2 100644 --- a/anesthetic/utils.py +++ b/anesthetic/utils.py @@ -5,9 +5,7 @@ from scipy.interpolate import interp1d from scipy.stats import kstwobign from matplotlib.tri import Triangulation -import contextlib -import inspect -import re +from wpandas.utils import channel_capacity def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): @@ -32,63 +30,6 @@ def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): return_sign=return_sign) -def channel_capacity(w): - r"""Channel capacity (effective sample size). - - .. math:: - - H = \sum_i p_i \ln p_i - - p_i = \frac{w_i}{\sum_j w_j} - - N = e^{-H} - """ - with np.errstate(divide='ignore', invalid='ignore'): - W = np.array(w)/sum(w) - H = np.nansum(np.log(W)*W) - return np.exp(-H) - - -def compress_weights(w, u=None, ncompress=True): - """Compresses weights to their approximate channel capacity.""" - if u is None: - u = np.random.rand(len(w)) - - if w is None: - w = np.ones_like(u) - - if ncompress is True: - ncompress = channel_capacity(w) - elif ncompress is False: - return w - - if ncompress <= 0: - W = w/w.max() - else: - W = w * ncompress / w.sum() - - fraction, integer = np.modf(W) - extra = (u < fraction).astype(int) - return (integer + extra).astype(int) - - -def quantile(a, q, w=None, interpolation='linear'): - """Compute the weighted quantile for a one dimensional array.""" - if w is None: - w = np.ones_like(a) - a = np.array(list(a)) # Necessary to convert pandas arrays - w = np.array(list(w)) # Necessary to convert pandas arrays - i = np.argsort(a) - c = np.cumsum(w[i[1:]]+w[i[:-1]]) - c = c / c[-1] - c = np.concatenate(([0.], c)) - icdf = interp1d(c, a[i], kind=interpolation) - quant = icdf(q) - if isinstance(q, float): - quant = float(quant) - return quant - - def mirror_1d(d, xmin=None, xmax=None): """If necessary apply reflecting boundary conditions.""" if xmin is not None and xmax is not None: @@ -499,39 +440,3 @@ def insertion_p_value(indexes, nlive, batch=0): ks_result["uncorrected p-value"] = p = ks_result["p-value"] ks_result["p-value"] = 1. - (1. - p)**n if p != 1 else p * n return ks_result - - -@contextlib.contextmanager -def temporary_seed(seed): - """Context for temporarily setting a numpy seed.""" - state = np.random.get_state() - np.random.seed(seed) - try: - yield - finally: - np.random.set_state(state) - - -def adjust_docstrings(cls, pattern, repl, *args, **kwargs): - """Adjust the docstrings of a class using regular expressions. - - After the first argument, the remaining arguments are identical to re.sub. - - Parameters - ---------- - cls : class - class to adjust - - pattern : str - regular expression pattern - - repl : str - replacement string - """ - for key, val in cls.__dict__.items(): - doc = inspect.getdoc(val) - newdoc = re.sub(pattern, repl, doc, *args, **kwargs) - try: - cls.__dict__[key].__doc__ = newdoc - except AttributeError: - pass diff --git a/lpandas/__init__.py b/lpandas/__init__.py new file mode 100644 index 00000000..1c15cf4d --- /dev/null +++ b/lpandas/__init__.py @@ -0,0 +1,9 @@ +import pandas +import lpandas.core +from lpandas._format import _DataFrameFormatter + +pandas.io.formats.format.DataFrameFormatter = _DataFrameFormatter +pandas.options.display.max_colwidth = 14 + +LabelledSeries = lpandas.core.LabelledSeries +LabelledDataFrame = lpandas.core.LabelledDataFrame diff --git a/anesthetic/_format.py b/lpandas/_format.py similarity index 100% rename from anesthetic/_format.py rename to lpandas/_format.py diff --git a/anesthetic/labelled_pandas.py b/lpandas/core.py similarity index 99% rename from anesthetic/labelled_pandas.py rename to lpandas/core.py index 27777719..4ede33f0 100644 --- a/anesthetic/labelled_pandas.py +++ b/lpandas/core.py @@ -223,3 +223,4 @@ def transpose(self, copy=False): # noqa: D102 transpose, doc=DataFrame.transpose.__doc__ ) + diff --git a/tests/test_labelled_pandas.py b/tests/test_labelled_pandas.py index d505b5e1..82e3a32f 100644 --- a/tests/test_labelled_pandas.py +++ b/tests/test_labelled_pandas.py @@ -1,6 +1,6 @@ import numpy as np from numpy.testing import assert_array_equal -from anesthetic.labelled_pandas import LabelledSeries, LabelledDataFrame +from lpandas import LabelledSeries, LabelledDataFrame from pandas import Series, DataFrame, MultiIndex import pandas.testing import pytest diff --git a/tests/test_samples.py b/tests/test_samples.py index d5b31ac2..c22d14a3 100644 --- a/tests/test_samples.py +++ b/tests/test_samples.py @@ -8,7 +8,7 @@ import matplotlib.pyplot as plt from matplotlib.lines import Line2D from matplotlib.patches import Rectangle -from anesthetic.weighted_pandas import WeightedSeries, WeightedDataFrame +from wpandas import WeightedSeries, WeightedDataFrame from anesthetic import ( Samples, MCMCSamples, NestedSamples, make_1d_axes, make_2d_axes, read_chains diff --git a/tests/test_utils.py b/tests/test_utils.py index f202bade..2531a397 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -9,7 +9,8 @@ iso_probability_contours_from_samples, logsumexp, sample_compression_1d, triangular_sample_compression_2d, - insertion_p_value, compress_weights) + insertion_p_value) +from wpandas.utils import compress_weights def test_compress_weights(): diff --git a/tests/test_weighted_pandas.py b/tests/test_weighted_pandas.py index 71128e08..d35821eb 100644 --- a/tests/test_weighted_pandas.py +++ b/tests/test_weighted_pandas.py @@ -1,4 +1,4 @@ -from anesthetic.weighted_pandas import WeightedDataFrame, WeightedSeries +from wpandas import WeightedDataFrame, WeightedSeries from pandas import DataFrame, MultiIndex import pandas.testing from anesthetic.utils import channel_capacity diff --git a/wpandas/__init__.py b/wpandas/__init__.py new file mode 100644 index 00000000..960334b8 --- /dev/null +++ b/wpandas/__init__.py @@ -0,0 +1,31 @@ +import pandas +import pandas.plotting._core +import pandas.plotting._misc +import wpandas.core + + +def _wpandas_override(_get_plot_backend): + """Override the default backend. + + When _get_plot_backend asks for 'matplotlib' it will be directed to + 'wpandas.plotting._matplotlib'. This is necessary since any users of + :class:`wpandas.WeightedSamples` should not be using the original backend. + """ + def wrapper(backend=None): + if backend == 'matplotlib': + return _get_plot_backend('wpandas.plotting._matplotlib') + return _get_plot_backend(backend) + return wrapper + + +# Override the two places where _get_plot_backend is defined +pandas.plotting._core._get_plot_backend = \ + _wpandas_override(pandas.plotting._core._get_plot_backend) +pandas.plotting._misc._get_plot_backend = \ + _wpandas_override(pandas.plotting._misc._get_plot_backend) + +# Set wpandas.plotting._matplotlib as the actual backend +pandas.options.plotting.backend = 'wpandas.plotting._matplotlib' + +WeightedSeries = wpandas.core.WeightedSeries +WeightedDataFrame = wpandas.core.WeightedDataFrame diff --git a/anesthetic/_code_utils.py b/wpandas/_code_utils.py similarity index 100% rename from anesthetic/_code_utils.py rename to wpandas/_code_utils.py diff --git a/anesthetic/weighted_pandas.py b/wpandas/core.py similarity index 99% rename from anesthetic/weighted_pandas.py rename to wpandas/core.py index fb5ab1bc..26cc890b 100644 --- a/anesthetic/weighted_pandas.py +++ b/wpandas/core.py @@ -5,8 +5,8 @@ from pandas import Series, DataFrame, concat, MultiIndex from pandas.util import hash_pandas_object from numpy.ma import masked_array -from anesthetic.utils import (compress_weights, channel_capacity, quantile, - temporary_seed, adjust_docstrings) +from wpandas.utils import (compress_weights, channel_capacity, quantile, + temporary_seed, adjust_docstrings) class _WeightedObject(object): diff --git a/wpandas/plotting/__init__.py b/wpandas/plotting/__init__.py new file mode 100644 index 00000000..1cb11da6 --- /dev/null +++ b/wpandas/plotting/__init__.py @@ -0,0 +1 @@ +"""Plotting public API.""" diff --git a/wpandas/plotting/_matplotlib/__init__.py b/wpandas/plotting/_matplotlib/__init__.py new file mode 100644 index 00000000..0ca2b707 --- /dev/null +++ b/wpandas/plotting/_matplotlib/__init__.py @@ -0,0 +1,53 @@ +"""wpandas override of matplotlib backend for weighted samples.""" +from pandas.plotting._matplotlib import ( # noqa: F401 + PLOT_CLASSES, + __name__, + __all__, + plot, + register, + deregister +) +from wpandas.plotting._matplotlib.boxplot import ( # noqa: F401 + BoxPlot, + boxplot, + boxplot_frame, +) +from pandas.plotting._matplotlib import boxplot_frame_groupby # noqa: F401 +from wpandas.plotting._matplotlib.core import ( # noqa: F401 + AreaPlot, + BarhPlot, + BarPlot, + HexBinPlot, + LinePlot, + PiePlot, + ScatterPlot, +) +from wpandas.plotting._matplotlib.hist import ( # noqa: F401 + KdePlot, + HistPlot, + hist_frame, + hist_series, +) +from wpandas.plotting._matplotlib.misc import ( # noqa: F401 + bootstrap_plot, + scatter_matrix, +) +from pandas.plotting._matplotlib import ( # noqa: F401 + andrews_curves, + autocorrelation_plot, + lag_plot, + parallel_coordinates, + radviz, + table, +) + +PLOT_CLASSES["line"] = LinePlot +PLOT_CLASSES["bar"] = BarPlot +PLOT_CLASSES["barh"] = BarhPlot +PLOT_CLASSES["box"] = BoxPlot +PLOT_CLASSES["hist"] = HistPlot +PLOT_CLASSES["kde"] = KdePlot +PLOT_CLASSES["area"] = AreaPlot +PLOT_CLASSES["pie"] = PiePlot +PLOT_CLASSES["scatter"] = ScatterPlot +PLOT_CLASSES["hexbin"] = HexBinPlot diff --git a/anesthetic/plotting/_matplotlib/boxplot.py b/wpandas/plotting/_matplotlib/boxplot.py similarity index 94% rename from anesthetic/plotting/_matplotlib/boxplot.py rename to wpandas/plotting/_matplotlib/boxplot.py index 6505817d..4190bccc 100644 --- a/anesthetic/plotting/_matplotlib/boxplot.py +++ b/wpandas/plotting/_matplotlib/boxplot.py @@ -1,11 +1,11 @@ import pandas.plotting._matplotlib.boxplot from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot -from anesthetic.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights -from anesthetic.utils import quantile +from wpandas.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights +from wpandas.utils import quantile from pandas.core.dtypes.missing import remove_na_arraylike import numpy as np from pandas.io.formats.printing import pprint_thing -from anesthetic._code_utils import replace_inner_function +from wpandas._code_utils import replace_inner_function def _bxpstat(y, weights, whis): @@ -78,7 +78,7 @@ def plot_group(keys, values, ax, **kwds): # pragma: no cover else: whis = kwds.pop("whis", 1.5) kwds['showfliers'] = False - from anesthetic.plotting._matplotlib.boxplot import _bxpstat + from wpandas.plotting._matplotlib.boxplot import _bxpstat bp = ax.bxp([_bxpstat(v, weights, whis) for v in values], **kwds) diff --git a/wpandas/plotting/_matplotlib/core.py b/wpandas/plotting/_matplotlib/core.py new file mode 100644 index 00000000..1642932c --- /dev/null +++ b/wpandas/plotting/_matplotlib/core.py @@ -0,0 +1,123 @@ +import numpy as np +from pandas.core.dtypes.generic import ABCMultiIndex +import pandas.core.common as com +from pandas.plotting._matplotlib.core import (ScatterPlot as _ScatterPlot, + HexBinPlot as _HexBinPlot, + LinePlot as _LinePlot, + BarPlot as _BarPlot, + BarhPlot as _BarhPlot, + AreaPlot as _AreaPlot, + PiePlot as _PiePlot, + MPLPlot) +from pandas.io.formats.printing import pprint_thing +from wpandas.core import _WeightedObject + + +def _get_weights(kwargs, data): + if isinstance(data, _WeightedObject): + kwargs['weights'] = data.get_weights() + + +class _WeightedMPLPlot(MPLPlot): + + _default_rot = None + + def __init__(self, data, *args, **kwargs): + _get_weights(kwargs, data) + super().__init__(data, *args, **kwargs) + + def _get_index_name(self): + if isinstance(self.data, _WeightedObject): + if isinstance(self.data.drop_weights().index, ABCMultiIndex): + name = self.data.drop_weights().index.names + if com.any_not_none(*name): + name = ",".join([pprint_thing(x) for x in name]) + else: + name = None + else: + name = self.data.drop_weights().index.name + if name is not None: + name = pprint_thing(name) + + # GH 45145, override the default axis label if one is provided. + index_name = self._get_custom_index_name() + if index_name is not None: + name = pprint_thing(index_name) + + return name + else: + return super()._get_index_name() + + def _get_xticks(self, convert_period: bool = False): + if isinstance(self.data, _WeightedObject): + return self.data.drop_weights().index._mpl_repr() + else: + return super()._get_xticks(convert_period) + + +def _compress_weights(kwargs, data): + if isinstance(data, _WeightedObject): + return data.compress(kwargs.pop('ncompress', True)) + else: + return data + + +class _CompressedMPLPlot(MPLPlot): + def __init__(self, data, *args, **kwargs): + data = _compress_weights(kwargs, data) + super().__init__(data, *args, **kwargs) + + +class ScatterPlot(_CompressedMPLPlot, _ScatterPlot): + # noqa: disable=D101 + def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: + if isinstance(data, _WeightedObject): + kwargs['alpha'] = kwargs.get('alpha', 0.5) + super().__init__(data, x, y, s, c, **kwargs) + + +class HexBinPlot(_HexBinPlot): + # noqa: disable=D101 + def __init__(self, data, x, y, C=None, **kwargs) -> None: + if isinstance(data, _WeightedObject): + C = '__weights' + data[C] = data.get_weights() + kwargs['reduce_C_function'] = np.sum + super().__init__(data, x, y, C=C, **kwargs) + + +class LinePlot(_LinePlot, _WeightedMPLPlot): + # noqa: disable=D101 + pass + + +class PiePlot(_PiePlot): + # noqa: disable=D101 + def __init__(self, data, kind=None, **kwargs) -> None: + if isinstance(data, _WeightedObject): + labels = data.drop_weights().index._mpl_repr() + kwargs['labels'] = kwargs.get('labels', labels) + super().__init__(data, kind=kind, **kwargs) + + +class BarPlot(_BarPlot, _WeightedMPLPlot): + # noqa: disable=D101 + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + super()._decorate_ticks(ax, name, ticklabels, start_edge, end_edge) + if isinstance(self.data, _WeightedObject): + if self.xticks is None: + ax.set_xticklabels(self._get_xticks()) + + +class BarhPlot(_BarhPlot, _WeightedMPLPlot): + # noqa: disable=D101 + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + super()._decorate_ticks(ax, name, ticklabels, start_edge, end_edge) + if isinstance(self.data, _WeightedObject): + if self.yticks is None: + ax.set_yticklabels(self._get_xticks()) + + +class AreaPlot(_AreaPlot, _WeightedMPLPlot): + # noqa: disable=D101 + pass diff --git a/wpandas/plotting/_matplotlib/hist.py b/wpandas/plotting/_matplotlib/hist.py new file mode 100644 index 00000000..780d9345 --- /dev/null +++ b/wpandas/plotting/_matplotlib/hist.py @@ -0,0 +1,162 @@ +from packaging import version +from matplotlib import pyplot as plt +import numpy as np +import pandas as pd +from pandas.plotting._matplotlib.hist import (HistPlot as _HistPlot, + KdePlot as _KdePlot) +import pandas.plotting._matplotlib.hist +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import MPLPlot +from pandas.plotting._matplotlib.groupby import (create_iter_data_given_by, + reformat_hist_y_given_by) +from typing import Literal +from wpandas.plotting._matplotlib.core import ( + _WeightedMPLPlot, _CompressedMPLPlot, _get_weights +) + + +class HistPlot(_WeightedMPLPlot, _HistPlot): + # noqa: disable=D101 + def _calculate_bins(self, data): + nd_values = data._convert(datetime=True)._get_numeric_data() + values = np.ravel(nd_values) + weights = self.kwds.get("weights", None) + if weights is not None: + try: + weights = np.broadcast_to(weights[:, None], nd_values.shape) + except ValueError: + print(nd_values.shape, weights.shape) + pass + weights = np.ravel(weights) + weights = weights[~isna(values)] + + values = values[~isna(values)] + + hist, bins = np.histogram( + values, bins=self.bins, range=self.kwds.get("range", None), + weights=weights + ) + return bins + + def _get_colors(self, num_colors=None, color_kwds='color'): + if (self.colormap is not None and self.kwds.get(color_kwds) is None + and (num_colors is None or num_colors == 1)): + return [plt.get_cmap(self.colormap)(0.68)] + return super()._get_colors(num_colors, color_kwds) + + def _make_plot(self): # pragma: no cover + # TODO: remove when these changes have been added to pandas + if version.parse(pd.__version__) >= version.parse("2.0.0"): + return super._make_plot() + + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + # Re-create iterated data if `by` is assigned by users + data = ( + create_iter_data_given_by(self.data, self._kind) + if self.by is not None + else self.data + ) + + for i, (label, y) in enumerate(self._iter_data(data=data)): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + label = self._mark_right_label(label, index=i) + kwds["label"] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds["style"] = style + + kwds = self._make_plot_keywords(kwds, y) + + # the bins is multi-dimension array now and each plot need only 1-d + # and when by is applied, label should be columns that are grouped + if self.by is not None: + kwds["bins"] = kwds["bins"][i] + kwds["label"] = self.columns + kwds.pop("color") + + # We allow weights to be a multi-dimensional array, e.g. a (10, 2) + # array, and each sub-array (10,) will be called in each iteration. + # If users only provide 1D array, we assume the same weights are + # used for all columns + weights = kwds.get("weights", None) + if weights is not None: + if np.ndim(weights) != 1 and np.shape(weights)[-1] != 1: + try: + weights = weights[:, i] + except IndexError as err: + raise ValueError( + "weights must have the same shape as data, " + "or be a single column" + ) from err + weights = weights[~isna(y)] + kwds["weights"] = weights + + y = reformat_hist_y_given_by(y, self.by) + + artists = self._plot(ax, y, column_num=i, + stacking_id=stacking_id, **kwds) + + # when by is applied, show title for subplots to + # know which group it is + if self.by is not None: + ax.set_title(pprint_thing(label)) + + self._append_legend_handles_labels(artists[0], label) + + def _post_plot_logic(self, ax, data): + super()._post_plot_logic(ax, data) + ax.set_yticks([]) + ax.set_ylim(bottom=0) + ax.set_xlim(self.bins[0], self.bins[-1]) + + +class KdePlot(HistPlot, _KdePlot): + # noqa: disable=D101 + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bw_method=None, + ind=None, + column_num=None, + stacking_id=None, + **kwds, + ): + from scipy.stats import gaussian_kde + weights = kwds.pop("weights", None) + + y = remove_na_arraylike(y) + gkde = gaussian_kde(y, bw_method=bw_method, weights=weights) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _post_plot_logic(self, ax, data): + ax.set_yticks([]) + ax.set_ylim(bottom=0) + + +def hist_frame(data, *args, **kwds): + # noqa: disable=D103 + _get_weights(kwds, data) + return pandas.plotting._matplotlib.hist_frame(data, *args, **kwds) + + +def hist_series(data, *args, **kwds): + # noqa: disable=D103 + _get_weights(kwds, data) + return pandas.plotting._matplotlib.hist_series(data, *args, **kwds) diff --git a/anesthetic/plotting/_matplotlib/misc.py b/wpandas/plotting/_matplotlib/misc.py similarity index 85% rename from anesthetic/plotting/_matplotlib/misc.py rename to wpandas/plotting/_matplotlib/misc.py index dc81826a..0be80b61 100644 --- a/anesthetic/plotting/_matplotlib/misc.py +++ b/wpandas/plotting/_matplotlib/misc.py @@ -1,5 +1,5 @@ import pandas.plotting._matplotlib.misc as misc -from anesthetic.plotting._matplotlib.core import _compress_weights +from wpandas.plotting._matplotlib.core import _compress_weights def scatter_matrix(frame, *args, **kwargs): diff --git a/wpandas/utils.py b/wpandas/utils.py new file mode 100644 index 00000000..7fd84d99 --- /dev/null +++ b/wpandas/utils.py @@ -0,0 +1,99 @@ +"""Data-processing utility functions.""" +import numpy as np +from scipy.interpolate import interp1d +import contextlib +import inspect +import re + + +def channel_capacity(w): + r"""Channel capacity (effective sample size). + + .. math:: + + H = \sum_i p_i \ln p_i + + p_i = \frac{w_i}{\sum_j w_j} + + N = e^{-H} + """ + with np.errstate(divide='ignore', invalid='ignore'): + W = np.array(w)/sum(w) + H = np.nansum(np.log(W)*W) + return np.exp(-H) + + +def compress_weights(w, u=None, ncompress=True): + """Compresses weights to their approximate channel capacity.""" + if u is None: + u = np.random.rand(len(w)) + + if w is None: + w = np.ones_like(u) + + if ncompress is True: + ncompress = channel_capacity(w) + elif ncompress is False: + return w + + if ncompress <= 0: + W = w/w.max() + else: + W = w * ncompress / w.sum() + + fraction, integer = np.modf(W) + extra = (u < fraction).astype(int) + return (integer + extra).astype(int) + + +def quantile(a, q, w=None, interpolation='linear'): + """Compute the weighted quantile for a one dimensional array.""" + if w is None: + w = np.ones_like(a) + a = np.array(list(a)) # Necessary to convert pandas arrays + w = np.array(list(w)) # Necessary to convert pandas arrays + i = np.argsort(a) + c = np.cumsum(w[i[1:]]+w[i[:-1]]) + c = c / c[-1] + c = np.concatenate(([0.], c)) + icdf = interp1d(c, a[i], kind=interpolation) + quant = icdf(q) + if isinstance(q, float): + quant = float(quant) + return quant + + +@contextlib.contextmanager +def temporary_seed(seed): + """Context for temporarily setting a numpy seed.""" + state = np.random.get_state() + np.random.seed(seed) + try: + yield + finally: + np.random.set_state(state) + + +def adjust_docstrings(cls, pattern, repl, *args, **kwargs): + """Adjust the docstrings of a class using regular expressions. + + After the first argument, the remaining arguments are identical to re.sub. + + Parameters + ---------- + cls : class + class to adjust + + pattern : str + regular expression pattern + + repl : str + replacement string + """ + for key, val in cls.__dict__.items(): + doc = inspect.getdoc(val) + newdoc = re.sub(pattern, repl, doc, *args, **kwargs) + try: + cls.__dict__[key].__doc__ = newdoc + except AttributeError: + pass From 23faf4ade43d7a2f497453842517a7e5bad1636d Mon Sep 17 00:00:00 2001 From: Will Handley Date: Thu, 30 Mar 2023 10:50:24 +0100 Subject: [PATCH 02/19] Updated modules --- docs/source/anesthetic.examples.rst | 12 ++++----- docs/source/anesthetic.gui.rst | 12 ++++----- docs/source/anesthetic.read.rst | 12 ++++----- docs/source/anesthetic.rst | 22 ++--------------- docs/source/lpandas.rst | 18 ++++++++++++++ docs/source/modules.rst | 2 ++ docs/source/wpandas.plotting.rst | 7 ++++++ docs/source/wpandas.rst | 38 +++++++++++++++++++++++++++++ 8 files changed, 85 insertions(+), 38 deletions(-) create mode 100644 docs/source/lpandas.rst create mode 100644 docs/source/wpandas.plotting.rst create mode 100644 docs/source/wpandas.rst diff --git a/docs/source/anesthetic.examples.rst b/docs/source/anesthetic.examples.rst index e7bc6504..f53ae682 100644 --- a/docs/source/anesthetic.examples.rst +++ b/docs/source/anesthetic.examples.rst @@ -1,12 +1,6 @@ anesthetic.examples package =========================== -.. automodule:: anesthetic.examples - :members: - :undoc-members: - :show-inheritance: - - anesthetic.examples.perfect\_ns module -------------------------------------- @@ -25,3 +19,9 @@ anesthetic.examples.utils module :show-inheritance: + + +.. automodule:: anesthetic.examples + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/anesthetic.gui.rst b/docs/source/anesthetic.gui.rst index 45206769..31522689 100644 --- a/docs/source/anesthetic.gui.rst +++ b/docs/source/anesthetic.gui.rst @@ -1,12 +1,6 @@ anesthetic.gui package ====================== -.. automodule:: anesthetic.gui - :members: - :undoc-members: - :show-inheritance: - - anesthetic.gui.plot module -------------------------- @@ -25,3 +19,9 @@ anesthetic.gui.widgets module :show-inheritance: + + +.. automodule:: anesthetic.gui + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/anesthetic.read.rst b/docs/source/anesthetic.read.rst index e92044ce..e37320ac 100644 --- a/docs/source/anesthetic.read.rst +++ b/docs/source/anesthetic.read.rst @@ -1,12 +1,6 @@ anesthetic.read package ======================= -.. automodule:: anesthetic.read - :members: - :undoc-members: - :show-inheritance: - - anesthetic.read.chain module ---------------------------- @@ -52,3 +46,9 @@ anesthetic.read.polychord module :show-inheritance: + + +.. automodule:: anesthetic.read + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index 9e9b8284..e4beb272 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -1,11 +1,5 @@ anesthetic package ================== - -.. automodule:: anesthetic - :members: - :undoc-members: - :show-inheritance: - anesthetic subpackages ---------------------- @@ -48,15 +42,6 @@ anesthetic.kde module :show-inheritance: -anesthetic.labelled\_pandas module -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. automodule:: anesthetic.labelled_pandas - :members: - :undoc-members: - :show-inheritance: - - anesthetic.plot module ~~~~~~~~~~~~~~~~~~~~~~ @@ -93,12 +78,9 @@ anesthetic.utils module :show-inheritance: -anesthetic.weighted\_pandas module -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: anesthetic.weighted_pandas + +.. automodule:: anesthetic :members: :undoc-members: :show-inheritance: - - diff --git a/docs/source/lpandas.rst b/docs/source/lpandas.rst new file mode 100644 index 00000000..c4a7fcfc --- /dev/null +++ b/docs/source/lpandas.rst @@ -0,0 +1,18 @@ +lpandas package +=============== + +lpandas.core module +------------------- + +.. automodule:: lpandas.core + :members: + :undoc-members: + :show-inheritance: + + + + +.. automodule:: lpandas + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/modules.rst b/docs/source/modules.rst index 3fe8e890..180c1091 100644 --- a/docs/source/modules.rst +++ b/docs/source/modules.rst @@ -5,3 +5,5 @@ anesthetic :maxdepth: 5 anesthetic + wpandas + lpandas diff --git a/docs/source/wpandas.plotting.rst b/docs/source/wpandas.plotting.rst new file mode 100644 index 00000000..3657707b --- /dev/null +++ b/docs/source/wpandas.plotting.rst @@ -0,0 +1,7 @@ +wpandas.plotting package +======================== + +.. automodule:: wpandas.plotting + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst new file mode 100644 index 00000000..9b5099ea --- /dev/null +++ b/docs/source/wpandas.rst @@ -0,0 +1,38 @@ +wpandas package +=============== +wpandas subpackages +------------------- + +.. toctree:: + :maxdepth: 1 + + wpandas.plotting + + +wpandas modules +--------------- + +wpandas.core module +~~~~~~~~~~~~~~~~~~~ + +.. automodule:: wpandas.core + :members: + :undoc-members: + :show-inheritance: + + +wpandas.utils module +~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: wpandas.utils + :members: + :undoc-members: + :show-inheritance: + + + + +.. automodule:: wpandas + :members: + :undoc-members: + :show-inheritance: From d4bf52c7587ce6a134cd6a3abe91260ad4bdf084 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Thu, 30 Mar 2023 10:54:20 +0100 Subject: [PATCH 03/19] Updated documentation --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 8e1bb219..828a1a35 100644 --- a/README.rst +++ b/README.rst @@ -130,8 +130,12 @@ and view the documentation by opening ``docs/build/html/index.html`` in a browse .. code:: bash + sphinx-apidoc -fM -t docs/templates/ -o docs/source/ wpandas/ + sphinx-apidoc -fM -t docs/templates/ -o docs/source/ lpandas/ sphinx-apidoc -fM -t docs/templates/ -o docs/source/ anesthetic/ +and modify/revert ``docs/source/modules.rst`` accordingly with any new modules. + Citation -------- From 076cdd4fc9345ca313a892a4b349adf1f14ceeed Mon Sep 17 00:00:00 2001 From: Will Handley Date: Sun, 9 Apr 2023 11:00:34 +0100 Subject: [PATCH 04/19] Tests now pass --- anesthetic/plotting/__init__.py | 19 +++++++++++++++++++ anesthetic/plotting/_matplotlib/hist.py | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/anesthetic/plotting/__init__.py b/anesthetic/plotting/__init__.py index 74954ec0..4e836aee 100644 --- a/anesthetic/plotting/__init__.py +++ b/anesthetic/plotting/__init__.py @@ -3,3 +3,22 @@ .. autoclass:: anesthetic.plotting.PlotAccessor """ from anesthetic.plotting._core import PlotAccessor # noqa: 401 +from anesthetic.plotting._matplotlib.core import ScatterPlot2d +from anesthetic.plotting._matplotlib.hist import ( + Kde1dPlot, + FastKde1dPlot, + Kde2dPlot, + FastKde2dPlot, + Hist1dPlot, + Hist2dPlot, +) + +import wpandas.plotting._matplotlib + +wpandas.plotting._matplotlib.PLOT_CLASSES['hist_1d'] = Hist1dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['kde_1d'] = Kde1dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['fastkde_1d'] = FastKde1dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['hist_2d'] = Hist2dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['kde_2d'] = Kde2dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['fastkde_2d'] = FastKde2dPlot +wpandas.plotting._matplotlib.PLOT_CLASSES['scatter_2d'] = ScatterPlot2d diff --git a/anesthetic/plotting/_matplotlib/hist.py b/anesthetic/plotting/_matplotlib/hist.py index 17cdcb92..de215e75 100644 --- a/anesthetic/plotting/_matplotlib/hist.py +++ b/anesthetic/plotting/_matplotlib/hist.py @@ -2,8 +2,9 @@ from wpandas.plotting._matplotlib.hist import KdePlot, HistPlot from typing import Literal from wpandas.plotting._matplotlib.core import ( - _WeightedMPLPlot, _CompressedMPLPlot, _PlanePlot2d + _WeightedMPLPlot, _CompressedMPLPlot ) +from anesthetic.plotting._matplotlib.core import _PlanePlot2d from anesthetic.plot import ( kde_contour_plot_2d, hist_plot_2d, From 44f90d51c66f6e944a8f9d680d329010ca8907bd Mon Sep 17 00:00:00 2001 From: Will Handley Date: Sun, 9 Apr 2023 11:27:54 +0100 Subject: [PATCH 05/19] documentation update weighted_pandas -> wpandas --- docs/source/samples.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/samples.rst b/docs/source/samples.rst index a689f52d..b842ba00 100644 --- a/docs/source/samples.rst +++ b/docs/source/samples.rst @@ -20,8 +20,8 @@ General weighted sample functionality The important extension to the :class:`pandas.Series` and :class:`pandas.DataFrame` classes, is that in anesthetic the data frames are -weighted (see :class:`anesthetic.weighted_pandas.WeightedSeries` and -:class:`anesthetic.weighted_pandas.WeightedDataFrame`), where the weights form +weighted (see :class:`anesthetic.wpandas.WeightedSeries` and +:class:`anesthetic.wpandas.WeightedDataFrame`), where the weights form part of a :class:`pandas.MultiIndex` and are therefore retained when slicing. From a5bc9e775d8929b5a7d13652ce8f09b8fd025e46 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Sun, 9 Apr 2023 11:31:26 +0100 Subject: [PATCH 06/19] Removed :show-inheritance: from wpandas to match master behaviour --- docs/source/wpandas.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index 9b5099ea..a5c95f6e 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -35,4 +35,3 @@ wpandas.utils module .. automodule:: wpandas :members: :undoc-members: - :show-inheritance: From f8c0ac54ba3368ff838521b95c210dcc5a12b81d Mon Sep 17 00:00:00 2001 From: Will Handley Date: Sun, 9 Apr 2023 12:06:14 +0100 Subject: [PATCH 07/19] Sphinx actually fixed now --- docs/source/samples.rst | 6 +++--- docs/source/wpandas.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/samples.rst b/docs/source/samples.rst index b842ba00..71156e4f 100644 --- a/docs/source/samples.rst +++ b/docs/source/samples.rst @@ -20,9 +20,9 @@ General weighted sample functionality The important extension to the :class:`pandas.Series` and :class:`pandas.DataFrame` classes, is that in anesthetic the data frames are -weighted (see :class:`anesthetic.wpandas.WeightedSeries` and -:class:`anesthetic.wpandas.WeightedDataFrame`), where the weights form -part of a :class:`pandas.MultiIndex` and are therefore retained when slicing. +weighted (see :class:`wpandas.core.WeightedSeries` and +:class:`wpandas.core.WeightedDataFrame`), where the weights form part of a +:class:`pandas.MultiIndex` and are therefore retained when slicing. Mean, standard deviation, median, quantiles, etc. diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index a5c95f6e..bc9ef1e3 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -18,7 +18,6 @@ wpandas.core module .. automodule:: wpandas.core :members: :undoc-members: - :show-inheritance: wpandas.utils module @@ -35,3 +34,4 @@ wpandas.utils module .. automodule:: wpandas :members: :undoc-members: + :show-inheritance: From 167b23ea98e58db47ef5ff08d8d3d1aa39886e12 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 10 Apr 2023 15:13:22 +0100 Subject: [PATCH 08/19] missing brackets after super() --- wpandas/plotting/_matplotlib/hist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wpandas/plotting/_matplotlib/hist.py b/wpandas/plotting/_matplotlib/hist.py index 780d9345..90906cf8 100644 --- a/wpandas/plotting/_matplotlib/hist.py +++ b/wpandas/plotting/_matplotlib/hist.py @@ -51,7 +51,7 @@ def _get_colors(self, num_colors=None, color_kwds='color'): def _make_plot(self): # pragma: no cover # TODO: remove when these changes have been added to pandas if version.parse(pd.__version__) >= version.parse("2.0.0"): - return super._make_plot() + return super()._make_plot() colors = self._get_colors() stacking_id = self._get_stacking_id() From fbf4a7a39683e75a9626872ea6bc3f298e436153 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Tue, 11 Apr 2023 21:07:44 +0100 Subject: [PATCH 09/19] Moved anesthetic.samples to anesthetic.core --- anesthetic/__init__.py | 8 +- anesthetic/convert.py | 2 +- anesthetic/core.py | 1308 ++++++++++++++++++++++ anesthetic/examples/perfect_ns.py | 8 +- anesthetic/gui/plot.py | 4 +- anesthetic/read/chain.py | 6 +- anesthetic/read/cobaya.py | 4 +- anesthetic/read/getdist.py | 4 +- anesthetic/read/multinest.py | 2 +- anesthetic/read/polychord.py | 2 +- anesthetic/samples.py | 1310 +---------------------- docs/source/anesthetic.rst | 4 +- docs/source/quickstart.rst | 8 +- docs/source/reading_writing.rst | 10 +- docs/source/samples.rst | 26 +- tests/{test_samples.py => test_core.py} | 10 +- 16 files changed, 1367 insertions(+), 1349 deletions(-) create mode 100644 anesthetic/core.py rename tests/{test_samples.py => test_core.py} (99%) diff --git a/anesthetic/__init__.py b/anesthetic/__init__.py index 3bf8f432..6a0b65c8 100644 --- a/anesthetic/__init__.py +++ b/anesthetic/__init__.py @@ -1,13 +1,13 @@ """Anesthetic: nested sampling post-processing.""" -import anesthetic.samples +import anesthetic.core import anesthetic.plot import anesthetic.read.chain import anesthetic._version __version__ = anesthetic._version.__version__ -Samples = anesthetic.samples.Samples -MCMCSamples = anesthetic.samples.MCMCSamples -NestedSamples = anesthetic.samples.NestedSamples +Samples = anesthetic.core.Samples +MCMCSamples = anesthetic.core.MCMCSamples +NestedSamples = anesthetic.core.NestedSamples make_2d_axes = anesthetic.plot.make_2d_axes make_1d_axes = anesthetic.plot.make_1d_axes read_chains = anesthetic.read.chain.read_chains diff --git a/anesthetic/convert.py b/anesthetic/convert.py index d5bec4b8..d07e244a 100644 --- a/anesthetic/convert.py +++ b/anesthetic/convert.py @@ -7,7 +7,7 @@ def to_getdist(samples): Parameters ---------- - samples : :class:`anesthetic.samples.Samples` + samples : :class:`anesthetic.core.Samples` anesthetic samples to be converted Returns diff --git a/anesthetic/core.py b/anesthetic/core.py new file mode 100644 index 00000000..5d8f99d8 --- /dev/null +++ b/anesthetic/core.py @@ -0,0 +1,1308 @@ +"""Main classes for the anesthetic module. + +- :class:`anesthetic.core.Samples` +- :class:`anesthetic.core.MCMCSamples` +- :class:`anesthetic.core.NestedSamples` +""" +import numpy as np +import pandas +import copy +import warnings +from pandas import MultiIndex, Series +from collections.abc import Sequence +from anesthetic.utils import (compute_nlive, compute_insertion_indexes, + is_int, logsumexp) +from anesthetic.gui.plot import RunPlotter +from wpandas import WeightedDataFrame, WeightedSeries +from lpandas import LabelledDataFrame, LabelledSeries +from pandas.core.accessor import CachedAccessor +from anesthetic.plot import (make_1d_axes, make_2d_axes, + AxesSeries, AxesDataFrame) +import wpandas +from anesthetic.plotting import PlotAccessor +wpandas.core._WeightedObject.plot = CachedAccessor("plot", PlotAccessor) + + +class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame): + """:class:`pandas.DataFrame` with weights and labels.""" + + _metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata + + def __init__(self, *args, **kwargs): + labels = kwargs.pop('labels', None) + if not hasattr(self, '_labels'): + self._labels = ('weights', 'labels') + super().__init__(*args, **kwargs) + if labels is not None: + if isinstance(labels, dict): + labels = [labels.get(p, '') for p in self] + self.set_labels(labels, inplace=True) + + def islabelled(self, axis=1): + """Search for existence of labels.""" + return super().islabelled(axis=axis) + + def get_labels(self, axis=1): + """Retrieve labels from an axis.""" + return super().get_labels(axis=axis) + + def get_labels_map(self, axis=1): + """Retrieve mapping from paramnames to labels from an axis.""" + return super().get_labels_map(axis=axis) + + def get_label(self, param, axis=1): + """Retrieve mapping from paramnames to labels from an axis.""" + return super().get_label(param, axis=axis) + + def set_label(self, param, value, axis=1): + """Set a specific label to a specific value on an axis.""" + return super().set_label(param, value, axis=axis, inplace=True) + + def drop_labels(self, axis=1): + """Drop the labels from an axis if present.""" + return super().drop_labels(axis) + + def set_labels(self, labels, axis=1, inplace=False, level=None): + """Set labels along an axis.""" + return super().set_labels(labels, axis=axis, + inplace=inplace, level=level) + + @property + def _constructor(self): + return WeightedLabelledDataFrame + + @property + def _constructor_sliced(self): + return WeightedLabelledSeries + + +class WeightedLabelledSeries(WeightedSeries, LabelledSeries): + """Series with weights and labels.""" + + _metadata = WeightedSeries._metadata + LabelledSeries._metadata + + def __init__(self, *args, **kwargs): + if not hasattr(self, '_labels'): + self._labels = ('weights', 'labels') + super().__init__(*args, **kwargs) + + def set_label(self, param, value, axis=0): + """Set a specific label to a specific value.""" + return super().set_label(param, value, axis=axis, inplace=True) + + @property + def _constructor(self): + return WeightedLabelledSeries + + @property + def _constructor_expanddim(self): + return WeightedLabelledDataFrame + + +class Samples(WeightedLabelledDataFrame): + """Storage and plotting tools for general samples. + + Extends the :class:`pandas.DataFrame` by providing plotting methods and + standardising sample storage. + + Example plotting commands include + - ``samples.plot_1d(['paramA', 'paramB'])`` + - ``samples.plot_2d(['paramA', 'paramB'])`` + - ``samples.plot_2d([['paramA', 'paramB'], ['paramC', 'paramD']])`` + + Parameters + ---------- + data : np.array + Coordinates of samples. shape = (nsamples, ndims). + + columns : list(str) + reference names of parameters + + weights : np.array + weights of samples. + + logL : np.array + loglikelihoods of samples. + + labels : dict or array-like + mapping from columns to plotting labels + + label : str + Legend label + + logzero : float, default=-1e30 + The threshold for `log(0)` values assigned to rejected sample points. + Anything equal or below this value is set to `-np.inf`. + + """ + + _metadata = WeightedLabelledDataFrame._metadata + ['label'] + + def __init__(self, *args, **kwargs): + logzero = kwargs.pop('logzero', -1e30) + logL = kwargs.pop('logL', None) + if logL is not None: + logL = np.array(logL) + logL = np.where(logL <= logzero, -np.inf, logL) + self.label = kwargs.pop('label', None) + + super().__init__(*args, **kwargs) + + if logL is not None: + self['logL'] = logL + if self.islabelled(axis=1): + self.set_label('logL', r'$\ln\mathcal{L}$') + + @property + def _constructor(self): + return Samples + + plot = CachedAccessor("plot", PlotAccessor) + + def plot_1d(self, axes, *args, **kwargs): + """Create an array of 1D plots. + + Parameters + ---------- + axes : plotting axes + Can be: + + * list(str) or str + * :class:`pandas.Series` of :class:`matplotlib.axes.Axes` + + If a :class:`pandas.Series` is provided as an existing set of axes, + then this is used for creating the plot. Otherwise, a new set of + axes are created using the list or lists of strings. + + kind : str, default='kde_1d' + What kind of plots to produce. Alongside the usual pandas options + {'hist', 'box', 'kde', 'density'}, anesthetic also provides + + * 'hist_1d': :func:`anesthetic.plot.hist_plot_1d` + * 'kde_1d': :func:`anesthetic.plot.kde_plot_1d` + * 'fastkde_1d': :func:`anesthetic.plot.fastkde_plot_1d` + + Warning -- while the other pandas plotting options + {'line', 'bar', 'barh', 'area', 'pie'} are also accessible, these + can be hard to interpret/expensive for :class:`Samples`, + :class:`MCMCSamples`, or :class:`NestedSamples`. + + Returns + ------- + axes : :class:`pandas.Series` of :class:`matplotlib.axes.Axes` + Pandas array of axes objects + + """ + # TODO: remove this in version >= 2.1 + if 'plot_type' in kwargs: + raise ValueError( + "You are using the anesthetic 1.0 kwarg \'plot_type\' instead " + "of anesthetic 2.0 \'kind\'. Please update your code." + ) + + if not isinstance(axes, AxesSeries): + _, axes = make_1d_axes(axes, labels=self.get_labels_map()) + + kwargs['kind'] = kwargs.get('kind', 'kde_1d') + kwargs['label'] = kwargs.get('label', self.label) + + # TODO: remove this in version >= 2.1 + if kwargs['kind'] == 'kde': + warnings.warn( + "You are using \'kde\' as a plot kind. " + "\'kde_1d\' is the appropriate keyword for anesthetic. " + "Your plots may look odd if you use this argument.", + UserWarning + ) + elif kwargs['kind'] == 'hist': + warnings.warn( + "You are using \'hist\' as a plot kind. " + "\'hist_1d\' is the appropriate keyword for anesthetic. " + "Your plots may look odd if you use this argument.", + UserWarning + ) + + for x, ax in axes.items(): + if x in self and kwargs['kind'] is not None: + xlabel = self.get_label(x) + self[x].plot(ax=ax, xlabel=xlabel, + *args, **kwargs) + ax.set_xlabel(xlabel) + else: + ax.plot([], []) + + return axes + + def plot_2d(self, axes, *args, **kwargs): + """Create an array of 2D plots. + + To avoid interfering with y-axis sharing, one-dimensional plots are + created on a separate axis, which is monkey-patched onto the argument + ax as the attribute ax.twin. + + Parameters + ---------- + axes : plotting axes + Can be: + - list(str) if the x and y axes are the same + - [list(str),list(str)] if the x and y axes are different + - :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes` + + If a :class:`pandas.DataFrame` is provided as an existing set of + axes, then this is used for creating the plot. Otherwise, a new set + of axes are created using the list or lists of strings. + + kind/kinds : dict, optional + What kinds of plots to produce. Dictionary takes the keys + 'diagonal' for the 1D plots and 'lower' and 'upper' for the 2D + plots. The options for 'diagonal' are: + + - 'kde_1d': :func:`anesthetic.plot.kde_plot_1d` + - 'hist_1d': :func:`anesthetic.plot.hist_plot_1d` + - 'fastkde_1d': :func:`anesthetic.plot.fastkde_plot_1d` + - 'kde': :meth:`pandas.Series.plot.kde` + - 'hist': :meth:`pandas.Series.plot.hist` + - 'box': :meth:`pandas.Series.plot.box` + - 'density': :meth:`pandas.Series.plot.density` + + The options for 'lower' and 'upper' are: + + - 'kde_2d': :func:`anesthetic.plot.kde_contour_plot_2d` + - 'hist_2d': :func:`anesthetic.plot.hist_plot_2d` + - 'scatter_2d': :func:`anesthetic.plot.scatter_plot_2d` + - 'fastkde_2d': :func:`anesthetic.plot.fastkde_contour_plot_2d` + - 'kde': :meth:`pandas.DataFrame.plot.kde` + - 'scatter': :meth:`pandas.DataFrame.plot.scatter` + - 'hexbin': :meth:`pandas.DataFrame.plot.hexbin` + + There are also a set of shortcuts provided in + :attr:`plot_2d_default_kinds`: + + - 'kde_1d': 1d kde plots down the diagonal + - 'kde_2d': 2d kde plots in lower triangle + - 'kde': 1d & 2d kde plots in lower & diagonal + - 'hist_1d': 1d histograms down the diagonal + - 'hist_2d': 2d histograms in lower triangle + - 'hist': 1d & 2d histograms in lower & diagonal + + Feel free to add your own to this list! + Default: + {'diagonal': 'kde_1d', 'lower': 'kde_2d', 'upper':'scatter_2d'} + + diagonal_kwargs, lower_kwargs, upper_kwargs : dict, optional + kwargs for the diagonal (1D)/lower or upper (2D) plots. This is + useful when there is a conflict of kwargs for different kinds of + plots. Note that any kwargs directly passed to plot_2d will + overwrite any kwarg with the same key passed to _kwargs. + Default: {} + + Returns + ------- + axes : :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes` + Pandas array of axes objects + + """ + # TODO: remove this in version >= 2.1 + if 'types' in kwargs: + raise ValueError( + "You are using the anesthetic 1.0 kwarg \'types\' instead of " + "anesthetic 2.0 \'kind' or \'kinds\' (synonyms). " + "Please update your code." + ) + kind = kwargs.pop('kind', 'default') + kind = kwargs.pop('kinds', kind) + + if isinstance(kind, str) and kind in self.plot_2d_default_kinds: + kind = self.plot_2d_default_kinds.get(kind) + if (not isinstance(kind, dict) or + not set(kind.keys()) <= {'lower', 'upper', 'diagonal'}): + raise ValueError(f"{kind} is not a valid input. `kind`/`kinds` " + "must be a dict mapping " + "{'lower','diagonal','upper'} to an allowed plot " + "(see `help(NestedSamples.plot2d)`), or one of " + "the following string shortcuts: " + f"{list(self.plot_2d_default_kinds.keys())}") + + local_kwargs = {pos: kwargs.pop('%s_kwargs' % pos, {}) + for pos in ['upper', 'lower', 'diagonal']} + kwargs['label'] = kwargs.get('label', self.label) + + for pos in local_kwargs: + local_kwargs[pos].update(kwargs) + + if not isinstance(axes, AxesDataFrame): + _, axes = make_2d_axes(axes, labels=self.get_labels(), + upper=('upper' in kind), + lower=('lower' in kind), + diagonal=('diagonal' in kind)) + + for y, row in axes.iterrows(): + for x, ax in row.items(): + if ax is not None: + pos = ax.position + lkwargs = local_kwargs.get(pos, {}) + lkwargs['kind'] = kind.get(pos, None) + # TODO: remove this in version >= 2.1 + if lkwargs['kind'] == 'kde': + warnings.warn( + "You are using \'kde\' as a plot kind. " + "\'kde_1d\' and \'kde_2d\' are the appropriate " + "keywords for anesthetic. Your plots may look " + "odd if you use this argument.", + UserWarning + ) + elif lkwargs['kind'] == 'hist': + warnings.warn( + "You are using \'hist\' as a plot kind. " + "\'hist_1d\' and \'hist_2d\' are the appropriate " + "keywords for anesthetic. Your plots may look " + "odd if you use this argument.", + UserWarning + ) + if x in self and y in self and lkwargs['kind'] is not None: + xlabel = self.get_label(x) + ylabel = self.get_label(y) + if x == y: + self[x].plot(ax=ax.twin, xlabel=xlabel, + *args, **lkwargs) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + else: + self.plot(x, y, ax=ax, xlabel=xlabel, + ylabel=ylabel, *args, **lkwargs) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + else: + if x == y: + ax.twin.plot([], []) + else: + ax.plot([], []) + + return axes + + plot_2d_default_kinds = { + 'default': {'diagonal': 'kde_1d', + 'lower': 'kde_2d', + 'upper': 'scatter_2d'}, + 'kde': {'diagonal': 'kde_1d', 'lower': 'kde_2d'}, + 'kde_1d': {'diagonal': 'kde_1d'}, + 'kde_2d': {'lower': 'kde_2d'}, + 'hist': {'diagonal': 'hist_1d', 'lower': 'hist_2d'}, + 'hist_1d': {'diagonal': 'hist_1d'}, + 'hist_2d': {'lower': 'hist_2d'}, + } + + def importance_sample(self, logL_new, action='add', inplace=False): + """Perform importance re-weighting on the log-likelihood. + + Parameters + ---------- + logL_new : np.array + New log-likelihood values. Should have the same shape as `logL`. + + action : str, default='add' + Can be any of {'add', 'replace', 'mask'}. + + * add: Add the new `logL_new` to the current `logL`. + * replace: Replace the current `logL` with the new `logL_new`. + * mask: treat `logL_new` as a boolean mask and only keep the + corresponding (True) samples. + + inplace : bool, default=False + Indicates whether to modify the existing array, or return a new + frame with importance sampling applied. + + Returns + ------- + samples : :class:`Samples`/:class:`MCMCSamples`/:class:`NestedSamples` + Importance re-weighted samples. + + """ + if inplace: + samples = self + else: + samples = self.copy() + + if action == 'add': + new_weights = samples.get_weights() + new_weights *= np.exp(logL_new - logL_new.max()) + samples.set_weights(new_weights, inplace=True) + samples.logL += logL_new + elif action == 'replace': + logL_new2 = logL_new - samples.logL + new_weights = samples.get_weights() + new_weights *= np.exp(logL_new2 - logL_new2.max()) + samples.set_weights(new_weights, inplace=True) + samples.logL = logL_new + elif action == 'mask': + samples = samples[logL_new] + else: + raise NotImplementedError("`action` needs to be one of " + "{'add', 'replace', 'mask'}, but '%s' " + "was requested." % action) + + if inplace: + self._update_inplace(samples) + else: + return samples.__finalize__(self, "importance_sample") + + # TODO: remove this in version >= 2.1 + @property + def tex(self): + # noqa: disable=D102 + raise NotImplementedError( + "This is anesthetic 1.0 syntax. You need to update, e.g.\n" + "samples.tex[label] = tex # anesthetic 1.0\n" + "samples.set_label(label, tex) # anesthetic 2.0\n\n" + "tex = samples.tex[label] # anesthetic 1.0\n" + "tex = samples.get_label(label) # anesthetic 2.0" + ) + + +class MCMCSamples(Samples): + """Storage and plotting tools for MCMC samples. + + Any new functionality specific to MCMC (e.g. convergence criteria etc.) + should be put here. + + Parameters + ---------- + data : np.array + Coordinates of samples. shape = (nsamples, ndims). + + columns : array-like + reference names of parameters + + weights : np.array + weights of samples. + + logL : np.array + loglikelihoods of samples. + + labels : dict or array-like + mapping from columns to plotting labels + + label : str + Legend label + + logzero : float, default=-1e30 + The threshold for `log(0)` values assigned to rejected sample points. + Anything equal or below this value is set to `-np.inf`. + + """ + + _metadata = Samples._metadata + ['root'] + + @property + def _constructor(self): + return MCMCSamples + + def remove_burn_in(self, burn_in, reset_index=False, inplace=False): + """Remove burn-in samples from each MCMC chain. + + Parameters + ---------- + burn_in : int or float or array_like + Fraction or number of samples to remove or keep: + + * ``if 0 < burn_in < 1``: remove first fraction of samples + * ``elif 1 < burn_in``: remove first number of samples + * ``elif -1 < burn_in < 0``: keep last fraction of samples + * ``elif burn_in < -1``: keep last number of samples + * ``elif type(burn_in)==list``: different burn-in for each chain + + reset_index : bool, default=False + Whether to reset the index counter to start at zero or not. + + inplace : bool, default=False + Indicates whether to modify the existing array or return a copy. + + """ + chains = self.groupby(('chain', '$n_\\mathrm{chain}$'), sort=False, + group_keys=False) + nchains = chains.ngroups + if isinstance(burn_in, (int, float)): + ndrop = np.full(nchains, burn_in) + elif isinstance(burn_in, (list, tuple, np.ndarray)) \ + and len(burn_in) == nchains: + ndrop = np.array(burn_in) + else: + raise ValueError("`burn_in` has to be a scalar or an array of " + "length matching the number of chains " + "`nchains=%d`. However, you provided " + "`burn_in=%s`" % (nchains, burn_in)) + if np.all(np.abs(ndrop) < 1): + nsamples = chains.count().iloc[:, 0].to_numpy() + ndrop = ndrop * nsamples + ndrop = ndrop.astype(int) + data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index, + inplace=inplace) + if reset_index: + data = data.reset_index(drop=True, inplace=inplace) + return data + + def Gelman_Rubin(self, params=None): + """Gelman--Rubin convergence statistic of multiple MCMC chains. + + Determine the Gelman--Rubin convergence statistic ``R-1`` by computing + and comparing the within-chain variance and the between-chain variance. + This follows the routine as outlined in + `Lewis (2013), section IV.A. `_ + + Note that this requires more than one chain. To circumvent this, you + could overwrite the ``'chain'`` column, splitting the samples into two + or more sets. + + Parameters + ---------- + params : list(str) + List of column names (i.e. parameters) to be included in the + convergence calculation. + Default: all parameters (except those parameters that contain + 'prior', 'chi2', or 'logL' in their names) + + Returns + ------- + Rminus1 : float + Gelman--Rubin convergence statistic ``R-1``. The smaller, the + better converged. Aiming for ``Rminus1~0.01`` should normally work + well. + + """ + self.columns.set_names(['params', 'labels'], inplace=True) + if params is None: + params = [key for key in self.columns.get_level_values('params') + if 'prior' not in key + and 'chi2' not in key + and 'logL' not in key + and 'chain' not in key] + chains = self[params+['chain']].groupby( + ('chain', '$n_\\mathrm{chain}$'), sort=False, + ) + nchains = chains.ngroups + + # Within chain variance ``W`` + # (average variance within each chain): + W = chains.cov().groupby(level=('params', 'labels'), sort=False).mean() + # Between-chain variance ``B`` + # (variance of the chain means): + B = np.atleast_2d(np.cov(chains.mean().T, ddof=1)) + # We don't weight `B` with the effective number of samples (sum of the + # weights), here, because we want to notice outliers from shorter + # chains. + # In order to be conservative, we generally want to underestimate `W` + # and overestimate `B`, since `W` goes in the denominator and `B` in + # the numerator of the Gelman--Rubin statistic `Rminus1`. + + try: + invL = np.linalg.inv(np.linalg.cholesky(W)) + except np.linalg.LinAlgError as e: + raise np.linalg.LinAlgError( + "Make sure you do not have linearly dependent parameters, " + "e.g. having both `As` and `A=1e9*As` causes trouble.") from e + D = np.linalg.eigvalsh(invL @ ((nchains+1)/nchains * B) @ invL.T) + # The factor of `(nchains+1)/nchains` accounts for the additional + # uncertainty from using a finite number of chains. + Rminus1 = np.max(np.abs(D)) + return Rminus1 + + +class NestedSamples(Samples): + """Storage and plotting tools for Nested Sampling samples. + + We extend the :class:`Samples` class with the additional methods: + + * ``self.live_points(logL)`` + * ``self.set_beta(beta)`` + * ``self.prior()`` + * ``self.posterior_points(beta)`` + * ``self.prior_points()`` + * ``self.stats()`` + * ``self.logZ()`` + * ``self.D_KL()`` + * ``self.d()`` + * ``self.recompute()`` + * ``self.gui()`` + * ``self.importance_sample()`` + + Parameters + ---------- + data : np.array + Coordinates of samples. shape = (nsamples, ndims). + + columns : list(str) + reference names of parameters + + logL : np.array + loglikelihoods of samples. + + logL_birth : np.array or int + birth loglikelihoods, or number of live points. + + labels : dict + optional mapping from column names to plot labels + + label : str + Legend label + default: basename of root + + beta : float + thermodynamic temperature + default: 1. + + logzero : float + The threshold for `log(0)` values assigned to rejected sample points. + Anything equal or below this value is set to `-np.inf`. + default: -1e30 + + """ + + _metadata = Samples._metadata + ['root', '_beta'] + + def __init__(self, *args, **kwargs): + logzero = kwargs.pop('logzero', -1e30) + self._beta = kwargs.pop('beta', 1.) + logL_birth = kwargs.pop('logL_birth', None) + if not isinstance(logL_birth, int) and logL_birth is not None: + logL_birth = np.array(logL_birth) + logL_birth = np.where(logL_birth <= logzero, -np.inf, + logL_birth) + + super().__init__(logzero=logzero, *args, **kwargs) + if logL_birth is not None: + self.recompute(logL_birth, inplace=True) + + @property + def _constructor(self): + return NestedSamples + + def _compute_insertion_indexes(self): + logL = self.logL.to_numpy() + logL_birth = self.logL_birth.to_numpy() + self['insertion'] = compute_insertion_indexes(logL, logL_birth) + + @property + def beta(self): + """Thermodynamic inverse temperature.""" + return self._beta + + @beta.setter + def beta(self, beta): + self._beta = beta + logw = self.logw(beta=beta) + self.set_weights(np.exp(logw - logw.max()), inplace=True) + + def set_beta(self, beta, inplace=False): + """Change the inverse temperature. + + Parameters + ---------- + beta : float + Temperature to set. + (``beta=0`` corresponds to the prior distribution.) + + inplace : bool, default=False + Indicates whether to modify the existing array, or return a copy + with the temperature changed. + + """ + if inplace: + self.beta = beta + else: + data = self.copy() + data.beta = beta + return data + + def prior(self, inplace=False): + """Re-weight samples at infinite temperature to get prior samples.""" + return self.set_beta(beta=0, inplace=inplace) + + # TODO: remove this in version >= 2.1 + def ns_output(self, *args, **kwargs): + # noqa: disable=D102 + raise NotImplementedError( + "This is anesthetic 1.0 syntax. You need to update, e.g.\n" + "samples.ns_output(1000) # anesthetic 1.0\n" + "samples.stats(1000) # anesthetic 2.0\n\n" + "Check out the new temperature functionality: help(samples.stats)," + " as well as average loglikelihoods: help(samples.logL_P)" + ) + + def stats(self, nsamples=None, beta=None): + r"""Compute Nested Sampling statistics. + + Using nested sampling we can compute: + + - ``logZ``: Bayesian evidence + + .. math:: + \log Z = \int L \pi d\theta + + - ``D_KL``: Kullback--Leibler divergence + + .. math:: + D_{KL} = \int P \log(P / \pi) d\theta + + - ``logL_P``: posterior averaged log-likelihood + + .. math:: + \langle\log L\rangle_P = \int P \log L d\theta + + - ``d_G``: Gaussian model dimensionality + (or posterior variance of the log-likelihood) + + .. math:: + d_G/2 = \langle(\log L)^2\rangle_P - \langle\log L\rangle_P^2 + + see `Handley and Lemos (2019) `_ + for more details on model dimensionalities. + + (Note that all of these are available as individual functions with the + same signature.) + + In addition to point estimates nested sampling provides an error bar + or more generally samples from a (correlated) distribution over the + variables. Samples from this distribution can be computed by providing + an integer nsamples. + + Nested sampling as an athermal algorithm is also capable of producing + these as a function of inverse thermodynamic temperature beta. This is + provided as a vectorised function. If nsamples is also provided a + MultiIndex dataframe is generated. + + These obey Occam's razor equation: + + .. math:: + \log Z = \langle\log L\rangle_P - D_{KL}, + + which splits a model's quality ``logZ`` into a goodness-of-fit + ``logL_P`` and a complexity penalty ``D_KL``. See `Hergt et al. (2021) + `_ for more detail. + + Parameters + ---------- + nsamples : int, optional + - If nsamples is not supplied, calculate mean value + - If nsamples is integer, draw nsamples from the distribution of + values inferred by nested sampling + + beta : float, array-like, optional + inverse temperature(s) beta=1/kT. Default self.beta + + Returns + ------- + if beta is scalar and nsamples is None: + Series, index ['logZ', 'd_G', 'DK_L', 'logL_P'] + elif beta is scalar and nsamples is int: + :class:`Samples`, index range(nsamples), + columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + elif beta is array-like and nsamples is None: + :class:`Samples`, index beta, + columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + elif beta is array-like and nsamples is int: + :class:`Samples`, index :class:`pandas.MultiIndex` the product of + beta and range(nsamples) + columns ['logZ', 'd_G', 'DK_L', 'logL_P'] + """ + logw = self.logw(nsamples, beta) + if nsamples is None and beta is None: + samples = self._constructor_sliced(index=self.columns[:0], + dtype=float) + else: + samples = Samples(index=logw.columns, columns=self.columns[:0]) + samples['logZ'] = self.logZ(logw) + samples.set_label('logZ', r'$\ln\mathcal{Z}$') + w = np.exp(logw-samples['logZ']) + + betalogL = self._betalogL(beta) + S = (logw*0).add(betalogL, axis=0) - samples.logZ + + samples['D_KL'] = (S*w).sum() + samples.set_label('D_KL', r'$\mathcal{D}_\mathrm{KL}$') + + samples['logL_P'] = samples['logZ'] + samples['D_KL'] + samples.set_label('logL_P', + r'$\langle\ln\mathcal{L}\rangle_\mathcal{P}$') + + samples['d_G'] = ((S-samples.D_KL)**2*w).sum()*2 + samples.set_label('d_G', r'$d_\mathrm{G}$') + + samples.label = self.label + return samples + + def logX(self, nsamples=None): + """Log-Volume. + + The log of the prior volume contained within each iso-likelihood + contour. + + Parameters + ---------- + nsamples : int, optional + - If nsamples is not supplied, calculate mean value + - If nsamples is integer, draw nsamples from the distribution of + values inferred by nested sampling + + Returns + ------- + if nsamples is None: + WeightedSeries like self + elif nsamples is int: + WeightedDataFrame like self, columns range(nsamples) + """ + if nsamples is None: + t = np.log(self.nlive/(self.nlive+1)) + else: + r = np.log(np.random.rand(len(self), nsamples)) + w = self.get_weights() + r = self.nlive._constructor_expanddim(r, self.index, weights=w) + t = r.divide(self.nlive, axis=0) + t.columns.name = 'samples' + logX = t.cumsum() + logX.name = 'logX' + return logX + + def dlogX(self, nsamples=None): + """Compute volume of shell of loglikelihood. + + Parameters + ---------- + nsamples : int, optional + - If nsamples is not supplied, calculate mean value + - If nsamples is integer, draw nsamples from the distribution of + values inferred by nested sampling + + Returns + ------- + if nsamples is None: + WeightedSeries like self + elif nsamples is int: + WeightedDataFrame like self, columns range(nsamples) + """ + logX = self.logX(nsamples) + logXp = logX.shift(1, fill_value=0) + logXm = logX.shift(-1, fill_value=-np.inf) + dlogX = np.log(1 - np.exp(logXm-logXp)) + logXp - np.log(2) + dlogX.name = 'dlogX' + + return dlogX + + def _betalogL(self, beta=None): + """Log(L**beta) convenience function. + + Parameters + ---------- + beta : scalar or array-like, optional + inverse temperature(s) beta=1/kT. Default self.beta + + Returns + ------- + if beta is scalar: + WeightedSeries like self + elif beta is array-like: + WeightedDataFrame like self, columns of beta + """ + if beta is None: + beta = self.beta + logL = self.logL + if np.isscalar(beta): + betalogL = beta * logL + betalogL.name = 'betalogL' + else: + betalogL = logL._constructor_expanddim(np.outer(self.logL, beta), + self.index, columns=beta) + betalogL.columns.name = 'beta' + return betalogL + + def logw(self, nsamples=None, beta=None): + """Log-nested sampling weight. + + The logarithm of the (unnormalised) sampling weight log(L**beta*dX). + + Parameters + ---------- + nsamples : int, optional + - If nsamples is not supplied, calculate mean value + - If nsamples is integer, draw nsamples from the distribution of + values inferred by nested sampling + - If nsamples is array, nsamples is assumed to be logw and returned + (implementation convenience functionality) + + beta : float, array-like, optional + inverse temperature(s) beta=1/kT. Default self.beta + + Returns + ------- + if nsamples is array-like: + WeightedDataFrame equal to nsamples + elif beta is scalar and nsamples is None: + WeightedSeries like self + elif beta is array-like and nsamples is None: + WeightedDataFrame like self, columns of beta + elif beta is scalar and nsamples is int: + WeightedDataFrame like self, columns of range(nsamples) + elif beta is array-like and nsamples is int: + WeightedDataFrame like self, MultiIndex columns the product of beta + and range(nsamples) + """ + if np.ndim(nsamples) > 0: + return nsamples + + dlogX = self.dlogX(nsamples) + betalogL = self._betalogL(beta) + + if dlogX.ndim == 1 and betalogL.ndim == 1: + logw = dlogX + betalogL + elif dlogX.ndim > 1 and betalogL.ndim == 1: + logw = dlogX.add(betalogL, axis=0) + elif dlogX.ndim == 1 and betalogL.ndim > 1: + logw = betalogL.add(dlogX, axis=0) + else: + cols = MultiIndex.from_product([betalogL.columns, dlogX.columns]) + dlogX = dlogX.reindex(columns=cols, level='samples') + betalogL = betalogL.reindex(columns=cols, level='beta') + logw = betalogL+dlogX + return logw + + def logZ(self, nsamples=None, beta=None): + """Log-Evidence. + + Parameters + ---------- + nsamples : int, optional + - If nsamples is not supplied, calculate mean value + - If nsamples is integer, draw nsamples from the distribution of + values inferred by nested sampling + - If nsamples is array, nsamples is assumed to be logw + + beta : float, array-like, optional + inverse temperature(s) beta=1/kT. Default self.beta + + Returns + ------- + if nsamples is array-like: + :class:`pandas.Series`, index nsamples.columns + elif beta is scalar and nsamples is None: + float + elif beta is array-like and nsamples is None: + :class:`pandas.Series`, index beta + elif beta is scalar and nsamples is int: + :class:`pandas.Series`, index range(nsamples) + elif beta is array-like and nsamples is int: + :class:`pandas.Series`, :class:`pandas.MultiIndex` columns the + product of beta and range(nsamples) + """ + logw = self.logw(nsamples, beta) + logZ = logsumexp(logw, axis=0) + if np.isscalar(logZ): + return logZ + else: + return logw._constructor_sliced(logZ, name='logZ', + index=logw.columns).squeeze() + + _logZ_function_shape = '\n' + '\n'.join(logZ.__doc__.split('\n')[1:]) + + # TODO: remove this in version >= 2.1 + def D(self, nsamples=None): + # noqa: disable=D102 + raise NotImplementedError( + "This is anesthetic 1.0 syntax. You need to update, e.g.\n" + "samples.D(1000) # anesthetic 1.0\n" + "samples.D_KL(1000) # anesthetic 2.0\n\n" + "Check out the new temperature functionality: help(samples.D_KL), " + "as well as average loglikelihoods: help(samples.logL_P)" + ) + + def D_KL(self, nsamples=None, beta=None): + """Kullback--Leibler divergence.""" + logw = self.logw(nsamples, beta) + logZ = self.logZ(logw, beta) + betalogL = self._betalogL(beta) + S = (logw*0).add(betalogL, axis=0) - logZ + w = np.exp(logw-logZ) + D_KL = (S*w).sum() + if np.isscalar(D_KL): + return D_KL + else: + return self._constructor_sliced(D_KL, name='D_KL', + index=logw.columns).squeeze() + + D_KL.__doc__ += _logZ_function_shape + + # TODO: remove this in version >= 2.1 + def d(self, nsamples=None): + # noqa: disable=D102 + raise NotImplementedError( + "This is anesthetic 1.0 syntax. You need to update, e.g.\n" + "samples.d(1000) # anesthetic 1.0\n" + "samples.d_G(1000) # anesthetic 2.0\n\n" + "Check out the new temperature functionality: help(samples.d_G), " + "as well as average loglikelihoods: help(samples.logL_P)" + ) + + def d_G(self, nsamples=None, beta=None): + """Bayesian model dimensionality.""" + logw = self.logw(nsamples, beta) + logZ = self.logZ(logw, beta) + betalogL = self._betalogL(beta) + S = (logw*0).add(betalogL, axis=0) - logZ + w = np.exp(logw-logZ) + D_KL = (S*w).sum() + d_G = ((S-D_KL)**2*w).sum()*2 + if np.isscalar(d_G): + return d_G + else: + return self._constructor_sliced(d_G, name='d_G', + index=logw.columns).squeeze() + + d_G.__doc__ += _logZ_function_shape + + def logL_P(self, nsamples=None, beta=None): + """Posterior averaged loglikelihood.""" + logw = self.logw(nsamples, beta) + logZ = self.logZ(logw, beta) + betalogL = self._betalogL(beta) + betalogL = (logw*0).add(betalogL, axis=0) + w = np.exp(logw-logZ) + logL_P = (betalogL*w).sum() + if np.isscalar(logL_P): + return logL_P + else: + return self._constructor_sliced(logL_P, name='logL_P', + index=logw.columns).squeeze() + + logL_P.__doc__ += _logZ_function_shape + + def live_points(self, logL=None): + """Get the live points within logL. + + Parameters + ---------- + logL : float or int, optional + Loglikelihood or iteration number to return live points. + If not provided, return the last set of active live points. + + Returns + ------- + live_points : Samples + Live points at either: + - contour logL (if input is float) + - ith iteration (if input is integer) + - last set of live points if no argument provided + """ + if logL is None: + logL = self.logL_birth.max() + else: + try: + logL = float(self.logL[logL]) + except KeyError: + pass + i = ((self.logL >= logL) & (self.logL_birth < logL)).to_numpy() + return Samples(self[i]).set_weights(None) + + def posterior_points(self, beta=1): + """Get equally weighted posterior points at temperature beta.""" + return self.set_beta(beta).compress(-1) + + def prior_points(self, params=None): + """Get equally weighted prior points.""" + return self.posterior_points(beta=0) + + def gui(self, params=None): + """Construct a graphical user interface for viewing samples.""" + return RunPlotter(self, params) + + def importance_sample(self, logL_new, action='add', inplace=False): + """Perform importance re-weighting on the log-likelihood. + + Parameters + ---------- + logL_new : np.array + New log-likelihood values. Should have the same shape as `logL`. + + action : str, default='add' + Can be any of {'add', 'replace', 'mask'}. + + * add: Add the new `logL_new` to the current `logL`. + * replace: Replace the current `logL` with the new `logL_new`. + * mask: treat `logL_new` as a boolean mask and only keep the + corresponding (True) samples. + + inplace : bool, optional + Indicates whether to modify the existing array, or return a new + frame with importance sampling applied. + default: False + + Returns + ------- + samples : :class:`NestedSamples` + Importance re-weighted samples. + + """ + samples = super().importance_sample(logL_new, action=action) + mask = (samples.logL > samples.logL_birth).to_numpy() + samples = samples[mask].recompute() + if inplace: + self._update_inplace(samples) + else: + return samples.__finalize__(self, "importance_sample") + + def recompute(self, logL_birth=None, inplace=False): + """Re-calculate the nested sampling contours and live points. + + Parameters + ---------- + logL_birth : array-like or int, optional + + * array-like: the birth contours. + * int: the number of live points. + * default: use the existing birth contours to compute nlive + + inplace : bool, default=False + Indicates whether to modify the existing array, or return a new + frame with contours resorted and nlive recomputed + + """ + if inplace: + samples = self + else: + samples = self.copy() + + nlive_label = r'$n_\mathrm{live}$' + if is_int(logL_birth): + nlive = logL_birth + samples.sort_values('logL', inplace=True) + samples.reset_index(drop=True, inplace=True) + n = np.ones(len(self), int) * nlive + n[-nlive:] = np.arange(nlive, 0, -1) + samples['nlive', nlive_label] = n + else: + if logL_birth is not None: + label = r'$\ln\mathcal{L}_\mathrm{birth}$' + samples['logL_birth'] = logL_birth + if self.islabelled(): + samples.set_label('logL_birth', label) + + if 'logL_birth' not in samples: + raise RuntimeError("Cannot recompute run without " + "birth contours logL_birth.") + + invalid = (samples.logL <= samples.logL_birth).to_numpy() + n_bad = invalid.sum() + n_equal = (samples.logL == samples.logL_birth).sum() + if n_bad: + warnings.warn("%i out of %i samples have logL <= logL_birth," + "\n%i of which have logL == logL_birth." + "\nThis may just indicate numerical rounding " + "errors at the peak of the likelihood, but " + "further investigation of the chains files is " + "recommended." + "\nDropping the invalid samples." % + (n_bad, len(samples), n_equal), + RuntimeWarning) + samples = samples[~invalid].reset_index(drop=True) + + samples.sort_values('logL', inplace=True) + samples.reset_index(drop=True, inplace=True) + nlive = compute_nlive(samples.logL, samples.logL_birth) + samples['nlive'] = nlive + if self.islabelled(): + samples.set_label('nlive', nlive_label) + + samples.beta = samples._beta + + if np.any(pandas.isna(samples.logL)): + warnings.warn("NaN encountered in logL. If this is unexpected, you" + " should investigate why your likelihood is throwing" + " NaNs. Dropping these samples at prior level", + RuntimeWarning) + samples = samples[samples.logL.notna().to_numpy()].recompute() + + if inplace: + self._update_inplace(samples) + else: + return samples.__finalize__(self, "recompute") + + +def merge_nested_samples(runs): + """Merge one or more nested sampling runs. + + Parameters + ---------- + runs : list(:class:`NestedSamples`) + List or array-like of one or more nested sampling runs. + If only a single run is provided, this recalculates the live points and + as such can be used for masked runs. + + Returns + ------- + samples : :class:`NestedSamples` + Merged run. + """ + merge = pandas.concat(runs, ignore_index=True) + return merge.recompute() + + +def merge_samples_weighted(samples, weights=None, label=None): + r"""Merge sets of samples with weights. + + Combine two (or more) samples so the new PDF is + P(x|new) = weight_A P(x|A) + weight_B P(x|B). + The number of samples and internal weights do not affect the result. + + Parameters + ---------- + samples : list(:class:`NestedSamples`) or list(:class:`MCMCSamples`) + List or array-like of one or more MCMC or nested sampling runs. + + weights : list(double) or None + Weight for each run in samples (normalized internally). + Can be omitted if samples are :class:`NestedSamples`, + then exp(logZ) is used as weight. + + label : str or None, default=None + Label for the new samples. + + Returns + ------- + new_samples : :class:`Samples` + Merged (weighted) run. + """ + if not (isinstance(samples, Sequence) or + isinstance(samples, Series)): + raise TypeError("samples must be a list of samples " + "(Sequence or pandas.Series)") + + mcmc_samples = copy.deepcopy([Samples(s) for s in samples]) + if weights is None: + try: + logZs = np.array(copy.deepcopy([s.logZ() for s in samples])) + except AttributeError: + raise ValueError("If samples includes MCMCSamples " + "then weights must be given.") + # Subtract logsumexp to avoid numerical issues (similar to max(logZs)) + logZs -= logsumexp(logZs) + weights = np.exp(logZs) + else: + if len(weights) != len(samples): + raise ValueError("samples and weights must have the same length," + "each weight is for a whole sample. Currently", + len(samples), len(weights)) + + new_samples = [] + for s, w in zip(mcmc_samples, weights): + # Normalize the given weights + new_weights = s.get_weights() / s.get_weights().sum() + new_weights *= w/np.sum(weights) + s = Samples(s, weights=new_weights) + new_samples.append(s) + + new_samples = pandas.concat(new_samples) + + new_weights = new_samples.get_weights() + new_weights /= new_weights.max() + new_samples.set_weights(new_weights, inplace=True) + + new_samples.label = label + + return new_samples diff --git a/anesthetic/examples/perfect_ns.py b/anesthetic/examples/perfect_ns.py index ca73e2b3..db1ff888 100644 --- a/anesthetic/examples/perfect_ns.py +++ b/anesthetic/examples/perfect_ns.py @@ -2,7 +2,7 @@ import numpy as np from anesthetic.examples.utils import random_ellipsoid from anesthetic import NestedSamples -from anesthetic.samples import merge_nested_samples +from anesthetic.core import merge_nested_samples def gaussian(nlive, ndims, sigma=0.1, R=1, logLmin=-1e-2): @@ -31,7 +31,7 @@ def gaussian(nlive, ndims, sigma=0.1, R=1, logLmin=-1e-2): Returns ------- - samples : :class:`anesthetic.samples.NestedSamples` + samples : :class:`anesthetic.core.NestedSamples` Nested sampling run """ @@ -87,7 +87,7 @@ def correlated_gaussian(nlive, mean, cov, bounds=None): Returns ------- - samples : :class:`anesthetic.samples.NestedSamples` + samples : :class:`anesthetic.core.NestedSamples` Nested sampling run """ mean = np.array(mean, dtype=float) @@ -220,7 +220,7 @@ def planck_gaussian(nlive=500): Returns ------- - samples : :class:`anesthetic.samples.NestedSamples` + samples : :class:`anesthetic.core.NestedSamples` Nested sampling run """ columns = ['omegabh2', 'omegach2', 'theta', 'tau', 'logA', 'ns'] diff --git a/anesthetic/gui/plot.py b/anesthetic/gui/plot.py index 451ba444..1a9f5be3 100644 --- a/anesthetic/gui/plot.py +++ b/anesthetic/gui/plot.py @@ -115,13 +115,13 @@ class RunPlotter(object): Parameters ---------- - samples : :class:`anesthetic.samples.NestedSamples` + samples : :class:`anesthetic.core.NestedSamples` The root string for the chains files to be used, or a set of nested samples. Attributes ---------- - samples : :class:`anesthetic.samples.NestedSamples` + samples : :class:`anesthetic.core.NestedSamples` Object for extracting nested sampling data from chains files. fig : :class:`matplotlib.figure.Figure` diff --git a/anesthetic/read/chain.py b/anesthetic/read/chain.py index 05645bf6..62fe7add 100644 --- a/anesthetic/read/chain.py +++ b/anesthetic/read/chain.py @@ -31,8 +31,8 @@ def read_chains(root, *args, **kwargs): Returns ------- - :class:`anesthetic.samples.NestedSamples` or - :class:`anesthetic.samples.MCMCSamples` depending on auto-detection + :class:`anesthetic.core.NestedSamples` or + :class:`anesthetic.core.MCMCSamples` depending on auto-detection """ if 'burn_in' in kwargs: @@ -42,7 +42,7 @@ def read_chains(root, *args, **kwargs): "read_chains(root, burn_in=0.5) # anesthetic 1.0\n" "read_chains(root).remove_burn_in(0.5) # anesthetic 2.0\n" "See also https://anesthetic.readthedocs.io/en/latest/" - "anesthetic.html#anesthetic.samples.MCMCSamples.remove_burn_in" + "anesthetic.html#anesthetic.core.MCMCSamples.remove_burn_in" ) errors = [] for read in [read_polychord, read_multinest, read_cobaya, read_getdist]: diff --git a/anesthetic/read/cobaya.py b/anesthetic/read/cobaya.py index 658f0fe8..3b2ae214 100644 --- a/anesthetic/read/cobaya.py +++ b/anesthetic/read/cobaya.py @@ -2,7 +2,7 @@ import os import re import numpy as np -from anesthetic.samples import MCMCSamples +from anesthetic.core import MCMCSamples from pandas import concat @@ -36,7 +36,7 @@ def read_cobaya(root, *args, **kwargs): Returns ------- - :class:`anesthetic.samples.MCMCSamples` + :class:`anesthetic.core.MCMCSamples` """ dirname, basename = os.path.split(root) diff --git a/anesthetic/read/getdist.py b/anesthetic/read/getdist.py index 3a10bd24..dad2c592 100644 --- a/anesthetic/read/getdist.py +++ b/anesthetic/read/getdist.py @@ -2,7 +2,7 @@ import os import re import numpy as np -from anesthetic.samples import MCMCSamples +from anesthetic.core import MCMCSamples from pandas import concat @@ -44,7 +44,7 @@ def read_getdist(root, *args, **kwargs): Returns ------- - :class:`anesthetic.samples.MCMCSamples` + :class:`anesthetic.core.MCMCSamples` """ dirname, basename = os.path.split(root) diff --git a/anesthetic/read/multinest.py b/anesthetic/read/multinest.py index b5a7dd54..a69a7b52 100644 --- a/anesthetic/read/multinest.py +++ b/anesthetic/read/multinest.py @@ -2,7 +2,7 @@ import os import numpy as np from anesthetic.read.getdist import read_paramnames -from anesthetic.samples import NestedSamples +from anesthetic.core import NestedSamples def read_multinest(root, *args, **kwargs): diff --git a/anesthetic/read/polychord.py b/anesthetic/read/polychord.py index 93b3b51b..52d6b5ab 100644 --- a/anesthetic/read/polychord.py +++ b/anesthetic/read/polychord.py @@ -2,7 +2,7 @@ import os import numpy as np from anesthetic.read.getdist import read_paramnames -from anesthetic.samples import NestedSamples +from anesthetic.core import NestedSamples def read_polychord(root, *args, **kwargs): diff --git a/anesthetic/samples.py b/anesthetic/samples.py index 9d3fa2c0..ebe7f395 100644 --- a/anesthetic/samples.py +++ b/anesthetic/samples.py @@ -1,1304 +1,10 @@ -"""Main classes for the anesthetic module. - -- :class:`anesthetic.samples.Samples` -- :class:`anesthetic.samples.MCMCSamples` -- :class:`anesthetic.samples.NestedSamples` -""" -import numpy as np -import pandas -import copy +"""Deprecated module for backwards compatibility.""" +# TODO: remove this file in version >= 2.1 import warnings -from pandas import MultiIndex, Series -from collections.abc import Sequence -from anesthetic.utils import (compute_nlive, compute_insertion_indexes, - is_int, logsumexp) -from anesthetic.gui.plot import RunPlotter -from wpandas import WeightedDataFrame, WeightedSeries -from lpandas import LabelledDataFrame, LabelledSeries -from pandas.core.accessor import CachedAccessor -from anesthetic.plot import (make_1d_axes, make_2d_axes, - AxesSeries, AxesDataFrame) -import wpandas -from anesthetic.plotting import PlotAccessor -wpandas.core._WeightedObject.plot = CachedAccessor("plot", PlotAccessor) - - -class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame): - """:class:`pandas.DataFrame` with weights and labels.""" - - _metadata = WeightedDataFrame._metadata + LabelledDataFrame._metadata - - def __init__(self, *args, **kwargs): - labels = kwargs.pop('labels', None) - if not hasattr(self, '_labels'): - self._labels = ('weights', 'labels') - super().__init__(*args, **kwargs) - if labels is not None: - if isinstance(labels, dict): - labels = [labels.get(p, '') for p in self] - self.set_labels(labels, inplace=True) - - def islabelled(self, axis=1): - """Search for existence of labels.""" - return super().islabelled(axis=axis) - - def get_labels(self, axis=1): - """Retrieve labels from an axis.""" - return super().get_labels(axis=axis) - - def get_labels_map(self, axis=1): - """Retrieve mapping from paramnames to labels from an axis.""" - return super().get_labels_map(axis=axis) - - def get_label(self, param, axis=1): - """Retrieve mapping from paramnames to labels from an axis.""" - return super().get_label(param, axis=axis) - - def set_label(self, param, value, axis=1): - """Set a specific label to a specific value on an axis.""" - return super().set_label(param, value, axis=axis, inplace=True) - - def drop_labels(self, axis=1): - """Drop the labels from an axis if present.""" - return super().drop_labels(axis) - - def set_labels(self, labels, axis=1, inplace=False, level=None): - """Set labels along an axis.""" - return super().set_labels(labels, axis=axis, - inplace=inplace, level=level) - - @property - def _constructor(self): - return WeightedLabelledDataFrame - - @property - def _constructor_sliced(self): - return WeightedLabelledSeries - - -class WeightedLabelledSeries(WeightedSeries, LabelledSeries): - """Series with weights and labels.""" - - _metadata = WeightedSeries._metadata + LabelledSeries._metadata - - def __init__(self, *args, **kwargs): - if not hasattr(self, '_labels'): - self._labels = ('weights', 'labels') - super().__init__(*args, **kwargs) - - def set_label(self, param, value, axis=0): - """Set a specific label to a specific value.""" - return super().set_label(param, value, axis=axis, inplace=True) - - @property - def _constructor(self): - return WeightedLabelledSeries - - @property - def _constructor_expanddim(self): - return WeightedLabelledDataFrame - - -class Samples(WeightedLabelledDataFrame): - """Storage and plotting tools for general samples. - - Extends the :class:`pandas.DataFrame` by providing plotting methods and - standardising sample storage. - - Example plotting commands include - - ``samples.plot_1d(['paramA', 'paramB'])`` - - ``samples.plot_2d(['paramA', 'paramB'])`` - - ``samples.plot_2d([['paramA', 'paramB'], ['paramC', 'paramD']])`` - - Parameters - ---------- - data : np.array - Coordinates of samples. shape = (nsamples, ndims). - - columns : list(str) - reference names of parameters - - weights : np.array - weights of samples. - - logL : np.array - loglikelihoods of samples. - - labels : dict or array-like - mapping from columns to plotting labels - - label : str - Legend label - - logzero : float, default=-1e30 - The threshold for `log(0)` values assigned to rejected sample points. - Anything equal or below this value is set to `-np.inf`. - - """ - - _metadata = WeightedLabelledDataFrame._metadata + ['label'] - - def __init__(self, *args, **kwargs): - logzero = kwargs.pop('logzero', -1e30) - logL = kwargs.pop('logL', None) - if logL is not None: - logL = np.array(logL) - logL = np.where(logL <= logzero, -np.inf, logL) - self.label = kwargs.pop('label', None) - - super().__init__(*args, **kwargs) - - if logL is not None: - self['logL'] = logL - if self.islabelled(axis=1): - self.set_label('logL', r'$\ln\mathcal{L}$') - - @property - def _constructor(self): - return Samples - - plot = CachedAccessor("plot", PlotAccessor) - - def plot_1d(self, axes, *args, **kwargs): - """Create an array of 1D plots. - - Parameters - ---------- - axes : plotting axes - Can be: - - * list(str) or str - * :class:`pandas.Series` of :class:`matplotlib.axes.Axes` - - If a :class:`pandas.Series` is provided as an existing set of axes, - then this is used for creating the plot. Otherwise, a new set of - axes are created using the list or lists of strings. - - kind : str, default='kde_1d' - What kind of plots to produce. Alongside the usual pandas options - {'hist', 'box', 'kde', 'density'}, anesthetic also provides - - * 'hist_1d': :func:`anesthetic.plot.hist_plot_1d` - * 'kde_1d': :func:`anesthetic.plot.kde_plot_1d` - * 'fastkde_1d': :func:`anesthetic.plot.fastkde_plot_1d` - - Warning -- while the other pandas plotting options - {'line', 'bar', 'barh', 'area', 'pie'} are also accessible, these - can be hard to interpret/expensive for :class:`Samples`, - :class:`MCMCSamples`, or :class:`NestedSamples`. - - Returns - ------- - axes : :class:`pandas.Series` of :class:`matplotlib.axes.Axes` - Pandas array of axes objects - - """ - # TODO: remove this in version >= 2.1 - if 'plot_type' in kwargs: - raise ValueError( - "You are using the anesthetic 1.0 kwarg \'plot_type\' instead " - "of anesthetic 2.0 \'kind\'. Please update your code." - ) - - if not isinstance(axes, AxesSeries): - _, axes = make_1d_axes(axes, labels=self.get_labels_map()) - - kwargs['kind'] = kwargs.get('kind', 'kde_1d') - kwargs['label'] = kwargs.get('label', self.label) - - # TODO: remove this in version >= 2.1 - if kwargs['kind'] == 'kde': - warnings.warn( - "You are using \'kde\' as a plot kind. " - "\'kde_1d\' is the appropriate keyword for anesthetic. " - "Your plots may look odd if you use this argument." - ) - elif kwargs['kind'] == 'hist': - warnings.warn( - "You are using \'hist\' as a plot kind. " - "\'hist_1d\' is the appropriate keyword for anesthetic. " - "Your plots may look odd if you use this argument." - ) - - for x, ax in axes.items(): - if x in self and kwargs['kind'] is not None: - xlabel = self.get_label(x) - self[x].plot(ax=ax, xlabel=xlabel, - *args, **kwargs) - ax.set_xlabel(xlabel) - else: - ax.plot([], []) - - return axes - - def plot_2d(self, axes, *args, **kwargs): - """Create an array of 2D plots. - - To avoid interfering with y-axis sharing, one-dimensional plots are - created on a separate axis, which is monkey-patched onto the argument - ax as the attribute ax.twin. - - Parameters - ---------- - axes : plotting axes - Can be: - - list(str) if the x and y axes are the same - - [list(str),list(str)] if the x and y axes are different - - :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes` - - If a :class:`pandas.DataFrame` is provided as an existing set of - axes, then this is used for creating the plot. Otherwise, a new set - of axes are created using the list or lists of strings. - - kind/kinds : dict, optional - What kinds of plots to produce. Dictionary takes the keys - 'diagonal' for the 1D plots and 'lower' and 'upper' for the 2D - plots. The options for 'diagonal' are: - - - 'kde_1d': :func:`anesthetic.plot.kde_plot_1d` - - 'hist_1d': :func:`anesthetic.plot.hist_plot_1d` - - 'fastkde_1d': :func:`anesthetic.plot.fastkde_plot_1d` - - 'kde': :meth:`pandas.Series.plot.kde` - - 'hist': :meth:`pandas.Series.plot.hist` - - 'box': :meth:`pandas.Series.plot.box` - - 'density': :meth:`pandas.Series.plot.density` - - The options for 'lower' and 'upper' are: - - - 'kde_2d': :func:`anesthetic.plot.kde_contour_plot_2d` - - 'hist_2d': :func:`anesthetic.plot.hist_plot_2d` - - 'scatter_2d': :func:`anesthetic.plot.scatter_plot_2d` - - 'fastkde_2d': :func:`anesthetic.plot.fastkde_contour_plot_2d` - - 'kde': :meth:`pandas.DataFrame.plot.kde` - - 'scatter': :meth:`pandas.DataFrame.plot.scatter` - - 'hexbin': :meth:`pandas.DataFrame.plot.hexbin` - - There are also a set of shortcuts provided in - :attr:`plot_2d_default_kinds`: - - - 'kde_1d': 1d kde plots down the diagonal - - 'kde_2d': 2d kde plots in lower triangle - - 'kde': 1d & 2d kde plots in lower & diagonal - - 'hist_1d': 1d histograms down the diagonal - - 'hist_2d': 2d histograms in lower triangle - - 'hist': 1d & 2d histograms in lower & diagonal - - Feel free to add your own to this list! - Default: - {'diagonal': 'kde_1d', 'lower': 'kde_2d', 'upper':'scatter_2d'} - - diagonal_kwargs, lower_kwargs, upper_kwargs : dict, optional - kwargs for the diagonal (1D)/lower or upper (2D) plots. This is - useful when there is a conflict of kwargs for different kinds of - plots. Note that any kwargs directly passed to plot_2d will - overwrite any kwarg with the same key passed to _kwargs. - Default: {} - - Returns - ------- - axes : :class:`pandas.DataFrame` of :class:`matplotlib.axes.Axes` - Pandas array of axes objects - - """ - # TODO: remove this in version >= 2.1 - if 'types' in kwargs: - raise ValueError( - "You are using the anesthetic 1.0 kwarg \'types\' instead of " - "anesthetic 2.0 \'kind' or \'kinds\' (synonyms). " - "Please update your code." - ) - kind = kwargs.pop('kind', 'default') - kind = kwargs.pop('kinds', kind) - - if isinstance(kind, str) and kind in self.plot_2d_default_kinds: - kind = self.plot_2d_default_kinds.get(kind) - if (not isinstance(kind, dict) or - not set(kind.keys()) <= {'lower', 'upper', 'diagonal'}): - raise ValueError(f"{kind} is not a valid input. `kind`/`kinds` " - "must be a dict mapping " - "{'lower','diagonal','upper'} to an allowed plot " - "(see `help(NestedSamples.plot2d)`), or one of " - "the following string shortcuts: " - f"{list(self.plot_2d_default_kinds.keys())}") - - local_kwargs = {pos: kwargs.pop('%s_kwargs' % pos, {}) - for pos in ['upper', 'lower', 'diagonal']} - kwargs['label'] = kwargs.get('label', self.label) - - for pos in local_kwargs: - local_kwargs[pos].update(kwargs) - - if not isinstance(axes, AxesDataFrame): - _, axes = make_2d_axes(axes, labels=self.get_labels(), - upper=('upper' in kind), - lower=('lower' in kind), - diagonal=('diagonal' in kind)) - - for y, row in axes.iterrows(): - for x, ax in row.items(): - if ax is not None: - pos = ax.position - lkwargs = local_kwargs.get(pos, {}) - lkwargs['kind'] = kind.get(pos, None) - # TODO: remove this in version >= 2.1 - if lkwargs['kind'] == 'kde': - warnings.warn( - "You are using \'kde\' as a plot kind. " - "\'kde_1d\' and \'kde_2d\' are the appropriate " - "keywords for anesthetic. Your plots may look " - "odd if you use this argument." - ) - elif lkwargs['kind'] == 'hist': - warnings.warn( - "You are using \'hist\' as a plot kind. " - "\'hist_1d\' and \'hist_2d\' are the appropriate " - "keywords for anesthetic. Your plots may look " - "odd if you use this argument." - ) - if x in self and y in self and lkwargs['kind'] is not None: - xlabel = self.get_label(x) - ylabel = self.get_label(y) - if x == y: - self[x].plot(ax=ax.twin, xlabel=xlabel, - *args, **lkwargs) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - else: - self.plot(x, y, ax=ax, xlabel=xlabel, - ylabel=ylabel, *args, **lkwargs) - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - else: - if x == y: - ax.twin.plot([], []) - else: - ax.plot([], []) - - return axes - - plot_2d_default_kinds = { - 'default': {'diagonal': 'kde_1d', - 'lower': 'kde_2d', - 'upper': 'scatter_2d'}, - 'kde': {'diagonal': 'kde_1d', 'lower': 'kde_2d'}, - 'kde_1d': {'diagonal': 'kde_1d'}, - 'kde_2d': {'lower': 'kde_2d'}, - 'hist': {'diagonal': 'hist_1d', 'lower': 'hist_2d'}, - 'hist_1d': {'diagonal': 'hist_1d'}, - 'hist_2d': {'lower': 'hist_2d'}, - } - - def importance_sample(self, logL_new, action='add', inplace=False): - """Perform importance re-weighting on the log-likelihood. - - Parameters - ---------- - logL_new : np.array - New log-likelihood values. Should have the same shape as `logL`. - - action : str, default='add' - Can be any of {'add', 'replace', 'mask'}. - - * add: Add the new `logL_new` to the current `logL`. - * replace: Replace the current `logL` with the new `logL_new`. - * mask: treat `logL_new` as a boolean mask and only keep the - corresponding (True) samples. - - inplace : bool, default=False - Indicates whether to modify the existing array, or return a new - frame with importance sampling applied. - - Returns - ------- - samples : :class:`Samples`/:class:`MCMCSamples`/:class:`NestedSamples` - Importance re-weighted samples. - - """ - if inplace: - samples = self - else: - samples = self.copy() - - if action == 'add': - new_weights = samples.get_weights() - new_weights *= np.exp(logL_new - logL_new.max()) - samples.set_weights(new_weights, inplace=True) - samples.logL += logL_new - elif action == 'replace': - logL_new2 = logL_new - samples.logL - new_weights = samples.get_weights() - new_weights *= np.exp(logL_new2 - logL_new2.max()) - samples.set_weights(new_weights, inplace=True) - samples.logL = logL_new - elif action == 'mask': - samples = samples[logL_new] - else: - raise NotImplementedError("`action` needs to be one of " - "{'add', 'replace', 'mask'}, but '%s' " - "was requested." % action) - - if inplace: - self._update_inplace(samples) - else: - return samples.__finalize__(self, "importance_sample") - - # TODO: remove this in version >= 2.1 - @property - def tex(self): - # noqa: disable=D102 - raise NotImplementedError( - "This is anesthetic 1.0 syntax. You need to update, e.g.\n" - "samples.tex[label] = tex # anesthetic 1.0\n" - "samples.set_label(label, tex) # anesthetic 2.0\n\n" - "tex = samples.tex[label] # anesthetic 1.0\n" - "tex = samples.get_label(label) # anesthetic 2.0" - ) - - -class MCMCSamples(Samples): - """Storage and plotting tools for MCMC samples. - - Any new functionality specific to MCMC (e.g. convergence criteria etc.) - should be put here. - - Parameters - ---------- - data : np.array - Coordinates of samples. shape = (nsamples, ndims). - - columns : array-like - reference names of parameters - - weights : np.array - weights of samples. - - logL : np.array - loglikelihoods of samples. - - labels : dict or array-like - mapping from columns to plotting labels - - label : str - Legend label - - logzero : float, default=-1e30 - The threshold for `log(0)` values assigned to rejected sample points. - Anything equal or below this value is set to `-np.inf`. - - """ - - _metadata = Samples._metadata + ['root'] - - @property - def _constructor(self): - return MCMCSamples - - def remove_burn_in(self, burn_in, reset_index=False, inplace=False): - """Remove burn-in samples from each MCMC chain. - - Parameters - ---------- - burn_in : int or float or array_like - Fraction or number of samples to remove or keep: - - * ``if 0 < burn_in < 1``: remove first fraction of samples - * ``elif 1 < burn_in``: remove first number of samples - * ``elif -1 < burn_in < 0``: keep last fraction of samples - * ``elif burn_in < -1``: keep last number of samples - * ``elif type(burn_in)==list``: different burn-in for each chain - - reset_index : bool, default=False - Whether to reset the index counter to start at zero or not. - - inplace : bool, default=False - Indicates whether to modify the existing array or return a copy. - - """ - chains = self.groupby(('chain', '$n_\\mathrm{chain}$'), sort=False, - group_keys=False) - nchains = chains.ngroups - if isinstance(burn_in, (int, float)): - ndrop = np.full(nchains, burn_in) - elif isinstance(burn_in, (list, tuple, np.ndarray)) \ - and len(burn_in) == nchains: - ndrop = np.array(burn_in) - else: - raise ValueError("`burn_in` has to be a scalar or an array of " - "length matching the number of chains " - "`nchains=%d`. However, you provided " - "`burn_in=%s`" % (nchains, burn_in)) - if np.all(np.abs(ndrop) < 1): - nsamples = chains.count().iloc[:, 0].to_numpy() - ndrop = ndrop * nsamples - ndrop = ndrop.astype(int) - data = self.drop(chains.apply(lambda g: g.head(ndrop[g.name-1])).index, - inplace=inplace) - if reset_index: - data = data.reset_index(drop=True, inplace=inplace) - return data - - def Gelman_Rubin(self, params=None): - """Gelman--Rubin convergence statistic of multiple MCMC chains. - - Determine the Gelman--Rubin convergence statistic ``R-1`` by computing - and comparing the within-chain variance and the between-chain variance. - This follows the routine as outlined in - `Lewis (2013), section IV.A. `_ - - Note that this requires more than one chain. To circumvent this, you - could overwrite the ``'chain'`` column, splitting the samples into two - or more sets. - - Parameters - ---------- - params : list(str) - List of column names (i.e. parameters) to be included in the - convergence calculation. - Default: all parameters (except those parameters that contain - 'prior', 'chi2', or 'logL' in their names) - - Returns - ------- - Rminus1 : float - Gelman--Rubin convergence statistic ``R-1``. The smaller, the - better converged. Aiming for ``Rminus1~0.01`` should normally work - well. - - """ - self.columns.set_names(['params', 'labels'], inplace=True) - if params is None: - params = [key for key in self.columns.get_level_values('params') - if 'prior' not in key - and 'chi2' not in key - and 'logL' not in key - and 'chain' not in key] - chains = self[params+['chain']].groupby( - ('chain', '$n_\\mathrm{chain}$'), sort=False, - ) - nchains = chains.ngroups - - # Within chain variance ``W`` - # (average variance within each chain): - W = chains.cov().groupby(level=('params', 'labels'), sort=False).mean() - # Between-chain variance ``B`` - # (variance of the chain means): - B = np.atleast_2d(np.cov(chains.mean().T, ddof=1)) - # We don't weight `B` with the effective number of samples (sum of the - # weights), here, because we want to notice outliers from shorter - # chains. - # In order to be conservative, we generally want to underestimate `W` - # and overestimate `B`, since `W` goes in the denominator and `B` in - # the numerator of the Gelman--Rubin statistic `Rminus1`. - - try: - invL = np.linalg.inv(np.linalg.cholesky(W)) - except np.linalg.LinAlgError as e: - raise np.linalg.LinAlgError( - "Make sure you do not have linearly dependent parameters, " - "e.g. having both `As` and `A=1e9*As` causes trouble.") from e - D = np.linalg.eigvalsh(invL @ ((nchains+1)/nchains * B) @ invL.T) - # The factor of `(nchains+1)/nchains` accounts for the additional - # uncertainty from using a finite number of chains. - Rminus1 = np.max(np.abs(D)) - return Rminus1 - - -class NestedSamples(Samples): - """Storage and plotting tools for Nested Sampling samples. - - We extend the :class:`Samples` class with the additional methods: - - * ``self.live_points(logL)`` - * ``self.set_beta(beta)`` - * ``self.prior()`` - * ``self.posterior_points(beta)`` - * ``self.prior_points()`` - * ``self.stats()`` - * ``self.logZ()`` - * ``self.D_KL()`` - * ``self.d()`` - * ``self.recompute()`` - * ``self.gui()`` - * ``self.importance_sample()`` - - Parameters - ---------- - data : np.array - Coordinates of samples. shape = (nsamples, ndims). - - columns : list(str) - reference names of parameters - - logL : np.array - loglikelihoods of samples. - - logL_birth : np.array or int - birth loglikelihoods, or number of live points. - - labels : dict - optional mapping from column names to plot labels - - label : str - Legend label - default: basename of root - - beta : float - thermodynamic temperature - default: 1. - - logzero : float - The threshold for `log(0)` values assigned to rejected sample points. - Anything equal or below this value is set to `-np.inf`. - default: -1e30 - - """ - - _metadata = Samples._metadata + ['root', '_beta'] - - def __init__(self, *args, **kwargs): - logzero = kwargs.pop('logzero', -1e30) - self._beta = kwargs.pop('beta', 1.) - logL_birth = kwargs.pop('logL_birth', None) - if not isinstance(logL_birth, int) and logL_birth is not None: - logL_birth = np.array(logL_birth) - logL_birth = np.where(logL_birth <= logzero, -np.inf, - logL_birth) - - super().__init__(logzero=logzero, *args, **kwargs) - if logL_birth is not None: - self.recompute(logL_birth, inplace=True) - - @property - def _constructor(self): - return NestedSamples - - def _compute_insertion_indexes(self): - logL = self.logL.to_numpy() - logL_birth = self.logL_birth.to_numpy() - self['insertion'] = compute_insertion_indexes(logL, logL_birth) - - @property - def beta(self): - """Thermodynamic inverse temperature.""" - return self._beta - - @beta.setter - def beta(self, beta): - self._beta = beta - logw = self.logw(beta=beta) - self.set_weights(np.exp(logw - logw.max()), inplace=True) - - def set_beta(self, beta, inplace=False): - """Change the inverse temperature. - - Parameters - ---------- - beta : float - Temperature to set. - (``beta=0`` corresponds to the prior distribution.) - - inplace : bool, default=False - Indicates whether to modify the existing array, or return a copy - with the temperature changed. - - """ - if inplace: - self.beta = beta - else: - data = self.copy() - data.beta = beta - return data - - def prior(self, inplace=False): - """Re-weight samples at infinite temperature to get prior samples.""" - return self.set_beta(beta=0, inplace=inplace) - - # TODO: remove this in version >= 2.1 - def ns_output(self, *args, **kwargs): - # noqa: disable=D102 - raise NotImplementedError( - "This is anesthetic 1.0 syntax. You need to update, e.g.\n" - "samples.ns_output(1000) # anesthetic 1.0\n" - "samples.stats(1000) # anesthetic 2.0\n\n" - "Check out the new temperature functionality: help(samples.stats)," - " as well as average loglikelihoods: help(samples.logL_P)" - ) - - def stats(self, nsamples=None, beta=None): - r"""Compute Nested Sampling statistics. - - Using nested sampling we can compute: - - - ``logZ``: Bayesian evidence - - .. math:: - \log Z = \int L \pi d\theta - - - ``D_KL``: Kullback--Leibler divergence - - .. math:: - D_{KL} = \int P \log(P / \pi) d\theta - - - ``logL_P``: posterior averaged log-likelihood - - .. math:: - \langle\log L\rangle_P = \int P \log L d\theta - - - ``d_G``: Gaussian model dimensionality - (or posterior variance of the log-likelihood) - - .. math:: - d_G/2 = \langle(\log L)^2\rangle_P - \langle\log L\rangle_P^2 - - see `Handley and Lemos (2019) `_ - for more details on model dimensionalities. - - (Note that all of these are available as individual functions with the - same signature.) - - In addition to point estimates nested sampling provides an error bar - or more generally samples from a (correlated) distribution over the - variables. Samples from this distribution can be computed by providing - an integer nsamples. - - Nested sampling as an athermal algorithm is also capable of producing - these as a function of inverse thermodynamic temperature beta. This is - provided as a vectorised function. If nsamples is also provided a - MultiIndex dataframe is generated. - - These obey Occam's razor equation: - - .. math:: - \log Z = \langle\log L\rangle_P - D_{KL}, - - which splits a model's quality ``logZ`` into a goodness-of-fit - ``logL_P`` and a complexity penalty ``D_KL``. See `Hergt et al. (2021) - `_ for more detail. - - Parameters - ---------- - nsamples : int, optional - - If nsamples is not supplied, calculate mean value - - If nsamples is integer, draw nsamples from the distribution of - values inferred by nested sampling - - beta : float, array-like, optional - inverse temperature(s) beta=1/kT. Default self.beta - - Returns - ------- - if beta is scalar and nsamples is None: - Series, index ['logZ', 'd_G', 'DK_L', 'logL_P'] - elif beta is scalar and nsamples is int: - :class:`Samples`, index range(nsamples), - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] - elif beta is array-like and nsamples is None: - :class:`Samples`, index beta, - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] - elif beta is array-like and nsamples is int: - :class:`Samples`, index :class:`pandas.MultiIndex` the product of - beta and range(nsamples) - columns ['logZ', 'd_G', 'DK_L', 'logL_P'] - """ - logw = self.logw(nsamples, beta) - if nsamples is None and beta is None: - samples = self._constructor_sliced(index=self.columns[:0], - dtype=float) - else: - samples = Samples(index=logw.columns, columns=self.columns[:0]) - samples['logZ'] = self.logZ(logw) - samples.set_label('logZ', r'$\ln\mathcal{Z}$') - w = np.exp(logw-samples['logZ']) - - betalogL = self._betalogL(beta) - S = (logw*0).add(betalogL, axis=0) - samples.logZ - - samples['D_KL'] = (S*w).sum() - samples.set_label('D_KL', r'$\mathcal{D}_\mathrm{KL}$') - - samples['logL_P'] = samples['logZ'] + samples['D_KL'] - samples.set_label('logL_P', - r'$\langle\ln\mathcal{L}\rangle_\mathcal{P}$') - - samples['d_G'] = ((S-samples.D_KL)**2*w).sum()*2 - samples.set_label('d_G', r'$d_\mathrm{G}$') - - samples.label = self.label - return samples - - def logX(self, nsamples=None): - """Log-Volume. - - The log of the prior volume contained within each iso-likelihood - contour. - - Parameters - ---------- - nsamples : int, optional - - If nsamples is not supplied, calculate mean value - - If nsamples is integer, draw nsamples from the distribution of - values inferred by nested sampling - - Returns - ------- - if nsamples is None: - WeightedSeries like self - elif nsamples is int: - WeightedDataFrame like self, columns range(nsamples) - """ - if nsamples is None: - t = np.log(self.nlive/(self.nlive+1)) - else: - r = np.log(np.random.rand(len(self), nsamples)) - w = self.get_weights() - r = self.nlive._constructor_expanddim(r, self.index, weights=w) - t = r.divide(self.nlive, axis=0) - t.columns.name = 'samples' - logX = t.cumsum() - logX.name = 'logX' - return logX - - def dlogX(self, nsamples=None): - """Compute volume of shell of loglikelihood. - - Parameters - ---------- - nsamples : int, optional - - If nsamples is not supplied, calculate mean value - - If nsamples is integer, draw nsamples from the distribution of - values inferred by nested sampling - - Returns - ------- - if nsamples is None: - WeightedSeries like self - elif nsamples is int: - WeightedDataFrame like self, columns range(nsamples) - """ - logX = self.logX(nsamples) - logXp = logX.shift(1, fill_value=0) - logXm = logX.shift(-1, fill_value=-np.inf) - dlogX = np.log(1 - np.exp(logXm-logXp)) + logXp - np.log(2) - dlogX.name = 'dlogX' - - return dlogX - - def _betalogL(self, beta=None): - """Log(L**beta) convenience function. - - Parameters - ---------- - beta : scalar or array-like, optional - inverse temperature(s) beta=1/kT. Default self.beta - - Returns - ------- - if beta is scalar: - WeightedSeries like self - elif beta is array-like: - WeightedDataFrame like self, columns of beta - """ - if beta is None: - beta = self.beta - logL = self.logL - if np.isscalar(beta): - betalogL = beta * logL - betalogL.name = 'betalogL' - else: - betalogL = logL._constructor_expanddim(np.outer(self.logL, beta), - self.index, columns=beta) - betalogL.columns.name = 'beta' - return betalogL - - def logw(self, nsamples=None, beta=None): - """Log-nested sampling weight. - - The logarithm of the (unnormalised) sampling weight log(L**beta*dX). - - Parameters - ---------- - nsamples : int, optional - - If nsamples is not supplied, calculate mean value - - If nsamples is integer, draw nsamples from the distribution of - values inferred by nested sampling - - If nsamples is array, nsamples is assumed to be logw and returned - (implementation convenience functionality) - - beta : float, array-like, optional - inverse temperature(s) beta=1/kT. Default self.beta - - Returns - ------- - if nsamples is array-like: - WeightedDataFrame equal to nsamples - elif beta is scalar and nsamples is None: - WeightedSeries like self - elif beta is array-like and nsamples is None: - WeightedDataFrame like self, columns of beta - elif beta is scalar and nsamples is int: - WeightedDataFrame like self, columns of range(nsamples) - elif beta is array-like and nsamples is int: - WeightedDataFrame like self, MultiIndex columns the product of beta - and range(nsamples) - """ - if np.ndim(nsamples) > 0: - return nsamples - - dlogX = self.dlogX(nsamples) - betalogL = self._betalogL(beta) - - if dlogX.ndim == 1 and betalogL.ndim == 1: - logw = dlogX + betalogL - elif dlogX.ndim > 1 and betalogL.ndim == 1: - logw = dlogX.add(betalogL, axis=0) - elif dlogX.ndim == 1 and betalogL.ndim > 1: - logw = betalogL.add(dlogX, axis=0) - else: - cols = MultiIndex.from_product([betalogL.columns, dlogX.columns]) - dlogX = dlogX.reindex(columns=cols, level='samples') - betalogL = betalogL.reindex(columns=cols, level='beta') - logw = betalogL+dlogX - return logw - - def logZ(self, nsamples=None, beta=None): - """Log-Evidence. - - Parameters - ---------- - nsamples : int, optional - - If nsamples is not supplied, calculate mean value - - If nsamples is integer, draw nsamples from the distribution of - values inferred by nested sampling - - If nsamples is array, nsamples is assumed to be logw - - beta : float, array-like, optional - inverse temperature(s) beta=1/kT. Default self.beta - - Returns - ------- - if nsamples is array-like: - :class:`pandas.Series`, index nsamples.columns - elif beta is scalar and nsamples is None: - float - elif beta is array-like and nsamples is None: - :class:`pandas.Series`, index beta - elif beta is scalar and nsamples is int: - :class:`pandas.Series`, index range(nsamples) - elif beta is array-like and nsamples is int: - :class:`pandas.Series`, :class:`pandas.MultiIndex` columns the - product of beta and range(nsamples) - """ - logw = self.logw(nsamples, beta) - logZ = logsumexp(logw, axis=0) - if np.isscalar(logZ): - return logZ - else: - return logw._constructor_sliced(logZ, name='logZ', - index=logw.columns).squeeze() - - _logZ_function_shape = '\n' + '\n'.join(logZ.__doc__.split('\n')[1:]) - - # TODO: remove this in version >= 2.1 - def D(self, nsamples=None): - # noqa: disable=D102 - raise NotImplementedError( - "This is anesthetic 1.0 syntax. You need to update, e.g.\n" - "samples.D(1000) # anesthetic 1.0\n" - "samples.D_KL(1000) # anesthetic 2.0\n\n" - "Check out the new temperature functionality: help(samples.D_KL), " - "as well as average loglikelihoods: help(samples.logL_P)" - ) - - def D_KL(self, nsamples=None, beta=None): - """Kullback--Leibler divergence.""" - logw = self.logw(nsamples, beta) - logZ = self.logZ(logw, beta) - betalogL = self._betalogL(beta) - S = (logw*0).add(betalogL, axis=0) - logZ - w = np.exp(logw-logZ) - D_KL = (S*w).sum() - if np.isscalar(D_KL): - return D_KL - else: - return self._constructor_sliced(D_KL, name='D_KL', - index=logw.columns).squeeze() - - D_KL.__doc__ += _logZ_function_shape - - # TODO: remove this in version >= 2.1 - def d(self, nsamples=None): - # noqa: disable=D102 - raise NotImplementedError( - "This is anesthetic 1.0 syntax. You need to update, e.g.\n" - "samples.d(1000) # anesthetic 1.0\n" - "samples.d_G(1000) # anesthetic 2.0\n\n" - "Check out the new temperature functionality: help(samples.d_G), " - "as well as average loglikelihoods: help(samples.logL_P)" - ) - - def d_G(self, nsamples=None, beta=None): - """Bayesian model dimensionality.""" - logw = self.logw(nsamples, beta) - logZ = self.logZ(logw, beta) - betalogL = self._betalogL(beta) - S = (logw*0).add(betalogL, axis=0) - logZ - w = np.exp(logw-logZ) - D_KL = (S*w).sum() - d_G = ((S-D_KL)**2*w).sum()*2 - if np.isscalar(d_G): - return d_G - else: - return self._constructor_sliced(d_G, name='d_G', - index=logw.columns).squeeze() - - d_G.__doc__ += _logZ_function_shape - - def logL_P(self, nsamples=None, beta=None): - """Posterior averaged loglikelihood.""" - logw = self.logw(nsamples, beta) - logZ = self.logZ(logw, beta) - betalogL = self._betalogL(beta) - betalogL = (logw*0).add(betalogL, axis=0) - w = np.exp(logw-logZ) - logL_P = (betalogL*w).sum() - if np.isscalar(logL_P): - return logL_P - else: - return self._constructor_sliced(logL_P, name='logL_P', - index=logw.columns).squeeze() - - logL_P.__doc__ += _logZ_function_shape - - def live_points(self, logL=None): - """Get the live points within logL. - - Parameters - ---------- - logL : float or int, optional - Loglikelihood or iteration number to return live points. - If not provided, return the last set of active live points. - - Returns - ------- - live_points : Samples - Live points at either: - - contour logL (if input is float) - - ith iteration (if input is integer) - - last set of live points if no argument provided - """ - if logL is None: - logL = self.logL_birth.max() - else: - try: - logL = float(self.logL[logL]) - except KeyError: - pass - i = ((self.logL >= logL) & (self.logL_birth < logL)).to_numpy() - return Samples(self[i]).set_weights(None) - - def posterior_points(self, beta=1): - """Get equally weighted posterior points at temperature beta.""" - return self.set_beta(beta).compress(-1) - - def prior_points(self, params=None): - """Get equally weighted prior points.""" - return self.posterior_points(beta=0) - - def gui(self, params=None): - """Construct a graphical user interface for viewing samples.""" - return RunPlotter(self, params) - - def importance_sample(self, logL_new, action='add', inplace=False): - """Perform importance re-weighting on the log-likelihood. - - Parameters - ---------- - logL_new : np.array - New log-likelihood values. Should have the same shape as `logL`. - - action : str, default='add' - Can be any of {'add', 'replace', 'mask'}. - - * add: Add the new `logL_new` to the current `logL`. - * replace: Replace the current `logL` with the new `logL_new`. - * mask: treat `logL_new` as a boolean mask and only keep the - corresponding (True) samples. - - inplace : bool, optional - Indicates whether to modify the existing array, or return a new - frame with importance sampling applied. - default: False - - Returns - ------- - samples : :class:`NestedSamples` - Importance re-weighted samples. - - """ - samples = super().importance_sample(logL_new, action=action) - mask = (samples.logL > samples.logL_birth).to_numpy() - samples = samples[mask].recompute() - if inplace: - self._update_inplace(samples) - else: - return samples.__finalize__(self, "importance_sample") - - def recompute(self, logL_birth=None, inplace=False): - """Re-calculate the nested sampling contours and live points. - - Parameters - ---------- - logL_birth : array-like or int, optional - - * array-like: the birth contours. - * int: the number of live points. - * default: use the existing birth contours to compute nlive - - inplace : bool, default=False - Indicates whether to modify the existing array, or return a new - frame with contours resorted and nlive recomputed - - """ - if inplace: - samples = self - else: - samples = self.copy() - - nlive_label = r'$n_\mathrm{live}$' - if is_int(logL_birth): - nlive = logL_birth - samples.sort_values('logL', inplace=True) - samples.reset_index(drop=True, inplace=True) - n = np.ones(len(self), int) * nlive - n[-nlive:] = np.arange(nlive, 0, -1) - samples['nlive', nlive_label] = n - else: - if logL_birth is not None: - label = r'$\ln\mathcal{L}_\mathrm{birth}$' - samples['logL_birth'] = logL_birth - if self.islabelled(): - samples.set_label('logL_birth', label) - - if 'logL_birth' not in samples: - raise RuntimeError("Cannot recompute run without " - "birth contours logL_birth.") - - invalid = (samples.logL <= samples.logL_birth).to_numpy() - n_bad = invalid.sum() - n_equal = (samples.logL == samples.logL_birth).sum() - if n_bad: - warnings.warn("%i out of %i samples have logL <= logL_birth," - "\n%i of which have logL == logL_birth." - "\nThis may just indicate numerical rounding " - "errors at the peak of the likelihood, but " - "further investigation of the chains files is " - "recommended." - "\nDropping the invalid samples." % - (n_bad, len(samples), n_equal), - RuntimeWarning) - samples = samples[~invalid].reset_index(drop=True) - - samples.sort_values('logL', inplace=True) - samples.reset_index(drop=True, inplace=True) - nlive = compute_nlive(samples.logL, samples.logL_birth) - samples['nlive'] = nlive - if self.islabelled(): - samples.set_label('nlive', nlive_label) - - samples.beta = samples._beta - - if np.any(pandas.isna(samples.logL)): - warnings.warn("NaN encountered in logL. If this is unexpected, you" - " should investigate why your likelihood is throwing" - " NaNs. Dropping these samples at prior level", - RuntimeWarning) - samples = samples[samples.logL.notna().to_numpy()].recompute() - - if inplace: - self._update_inplace(samples) - else: - return samples.__finalize__(self, "recompute") - - -def merge_nested_samples(runs): - """Merge one or more nested sampling runs. - - Parameters - ---------- - runs : list(:class:`NestedSamples`) - List or array-like of one or more nested sampling runs. - If only a single run is provided, this recalculates the live points and - as such can be used for masked runs. - - Returns - ------- - samples : :class:`NestedSamples` - Merged run. - """ - merge = pandas.concat(runs, ignore_index=True) - return merge.recompute() - - -def merge_samples_weighted(samples, weights=None, label=None): - r"""Merge sets of samples with weights. - - Combine two (or more) samples so the new PDF is - P(x|new) = weight_A P(x|A) + weight_B P(x|B). - The number of samples and internal weights do not affect the result. - - Parameters - ---------- - samples : list(:class:`NestedSamples`) or list(:class:`MCMCSamples`) - List or array-like of one or more MCMC or nested sampling runs. - - weights : list(double) or None - Weight for each run in samples (normalized internally). - Can be omitted if samples are :class:`NestedSamples`, - then exp(logZ) is used as weight. - - label : str or None, default=None - Label for the new samples. - - Returns - ------- - new_samples : :class:`Samples` - Merged (weighted) run. - """ - if not (isinstance(samples, Sequence) or - isinstance(samples, Series)): - raise TypeError("samples must be a list of samples " - "(Sequence or pandas.Series)") - - mcmc_samples = copy.deepcopy([Samples(s) for s in samples]) - if weights is None: - try: - logZs = np.array(copy.deepcopy([s.logZ() for s in samples])) - except AttributeError: - raise ValueError("If samples includes MCMCSamples " - "then weights must be given.") - # Subtract logsumexp to avoid numerical issues (similar to max(logZs)) - logZs -= logsumexp(logZs) - weights = np.exp(logZs) - else: - if len(weights) != len(samples): - raise ValueError("samples and weights must have the same length," - "each weight is for a whole sample. Currently", - len(samples), len(weights)) - - new_samples = [] - for s, w in zip(mcmc_samples, weights): - # Normalize the given weights - new_weights = s.get_weights() / s.get_weights().sum() - new_weights *= w/np.sum(weights) - s = Samples(s, weights=new_weights) - new_samples.append(s) - - new_samples = pandas.concat(new_samples) - - new_weights = new_samples.get_weights() - new_weights /= new_weights.max() - new_samples.set_weights(new_weights, inplace=True) - - new_samples.label = label +from anesthetic.core import * # noqa: F403, F401 - return new_samples +warnings.warn( + "You are using the anesthetic.samples module, which is deprecated. " + "Please use anesthetic.core instead.", + FutureWarning + ) diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index e4beb272..7f44bdf6 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -51,10 +51,10 @@ anesthetic.plot module :show-inheritance: -anesthetic.samples module +anesthetic.core module ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: anesthetic.samples +.. automodule:: anesthetic.core :members: :undoc-members: :show-inheritance: diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index b8537e0e..0432ec09 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -45,15 +45,15 @@ Plot Example 2: Marginalised 2D posteriors :meth:`anesthetic.plot.make_1d_axes`, :meth:`anesthetic.plot.make_2d_axes`, - :meth:`anesthetic.samples.Samples.plot_1d`, - :meth:`anesthetic.samples.Samples.plot_2d` + :meth:`anesthetic.core.Samples.plot_1d`, + :meth:`anesthetic.core.Samples.plot_2d` Nested sampling statistics ========================== Providing Bayesian statistics from nested sampling data is where anesthetic -shines. With :meth:`anesthetic.samples.NestedSamples.stats` you can compute the +shines. With :meth:`anesthetic.core.NestedSamples.stats` you can compute the Bayesian evidence :math:`\ln\mathcal{Z}`, the Kullback--Leibler divergence :math:`\mathcal{D}_\mathrm{KL}`, and the posterior average of the log-likelihood :math:`\langle\ln\mathcal{L}\rangle_\mathcal{P}`, which together @@ -78,5 +78,5 @@ of the model complexity (or dimensionality). .. seealso:: * anesthetic / :doc:`Samples and statistics ` / :ref:`samples:Bayesian statistics` - * :meth:`anesthetic.samples.NestedSamples.stats` + * :meth:`anesthetic.core.NestedSamples.stats` diff --git a/docs/source/reading_writing.rst b/docs/source/reading_writing.rst index 3f0e05de..7a684b65 100644 --- a/docs/source/reading_writing.rst +++ b/docs/source/reading_writing.rst @@ -22,7 +22,7 @@ Feel free to use the testing data in ``anesthetic/tests/example_data`` to try out the examples listed here. * PolyChord samples, which will be an instance of the - :class:`anesthetic.samples.NestedSamples` class: + :class:`anesthetic.core.NestedSamples` class: :: @@ -30,7 +30,7 @@ out the examples listed here. samples = read_chains("anesthetic/tests/example_data/pc") * Cobaya samples, which will be an instance of the - :class:`anesthetic.samples.MCMCSamples` class: + :class:`anesthetic.core.MCMCSamples` class: :: @@ -44,15 +44,15 @@ Passing data as arguments ========================= You can also pass your own (weighted) data directly to the main sample classes -:class:`anesthetic.samples.Samples`, :class:`anesthetic.samples.MCMCSamples`, -or :class:`anesthetic.samples.NestedSamples`, e.g. here with randomly generated +:class:`anesthetic.core.Samples`, :class:`anesthetic.core.MCMCSamples`, +or :class:`anesthetic.core.NestedSamples`, e.g. here with randomly generated data: :: import numpy as np from scipy.stats import multivariate_normal as mvn - from anesthetic.samples import Samples + from anesthetic.core import Samples num_samples = 1000 # number of samples num_dim = 2 # number of parameters/dimensions diff --git a/docs/source/samples.rst b/docs/source/samples.rst index 71156e4f..2d843dbd 100644 --- a/docs/source/samples.rst +++ b/docs/source/samples.rst @@ -2,10 +2,10 @@ Weighted samples: the ``Samples``, ``MCMCSamples``, and ``NestedSamples`` data frames ************************************************************************************* -The :class:`anesthetic.samples.Samples` data frame at its core is a +The :class:`anesthetic.core.Samples` data frame at its core is a :class:`pandas.DataFrame`, and as such comes with all the pandas functionality. -The :class:`anesthetic.samples.MCMCSamples` and -:class:`anesthetic.samples.NestedSamples` data frames share all the +The :class:`anesthetic.core.MCMCSamples` and +:class:`anesthetic.core.NestedSamples` data frames share all the functionality of the more general ``Samples`` class, but come with some additional MCMC or nested sampling specific functionality. @@ -125,7 +125,7 @@ Remove burn-in -------------- To get rid of the initial burn-in phase, you can use the -:meth:`anesthetic.samples.MCMCSamples.remove_burn_in` method: +:meth:`anesthetic.core.MCMCSamples.remove_burn_in` method: .. plot:: :context: close-figs @@ -156,7 +156,7 @@ Rubin (1992) `_ and `Brooks and Gelman Provided you have an MCMC run containing multiple chains, you can compute the Gelman--Rubin ``R-1`` statistic using the -:meth:`anesthetic.samples.MCMCSamples.Gelman_Rubin` method: +:meth:`anesthetic.core.MCMCSamples.Gelman_Rubin` method: .. plot:: :context: close-figs @@ -198,7 +198,7 @@ Prior distribution While MCMC explores effectively only the posterior bulk, nested sampling explores the full parameter space, allowing us to calculate and plot not only -the posterior distribution, but also the prior distribution, which you can get with the :meth:`anesthetic.samples.NestedSamples.prior` method: +the posterior distribution, but also the prior distribution, which you can get with the :meth:`anesthetic.core.NestedSamples.prior` method: .. plot:: :context: close-figs @@ -208,7 +208,7 @@ the posterior distribution, but also the prior distribution, which you can get w Note that the ``.prior()`` method is really just a shorthand for ``.set_beta(beta=0)``, i.e. for setting the inverse temperature parameter ``beta=0`` (where ``1/beta=kT``) in the - :meth:`anesthetic.samples.NestedSamples.set_beta` method, which allows you to + :meth:`anesthetic.core.NestedSamples.set_beta` method, which allows you to get the distribution at any temperature. This allows us to plot both prior and posterior distributions together. Note, @@ -243,21 +243,21 @@ Bayesian statistics Thanks to the power of nested sampling, we can compute Bayesian statistics from the nested samples, such as the following: -* Bayesian (log-)evidence :meth:`anesthetic.samples.NestedSamples.logZ` -* Kullback--Leibler (KL) divergence :meth:`anesthetic.samples.NestedSamples.D_KL` +* Bayesian (log-)evidence :meth:`anesthetic.core.NestedSamples.logZ` +* Kullback--Leibler (KL) divergence :meth:`anesthetic.core.NestedSamples.D_KL` * Posterior average of the log-likelihood - :meth:`anesthetic.samples.NestedSamples.logL_P` + :meth:`anesthetic.core.NestedSamples.logL_P` :raw-html:`
` (this connects Bayesian evidence with KL-divergence as ``logZ = logL_P - D_KL``, allowing the interpretation of the Bayesian evidence as a trade-off between model fit ``logL_P`` and Occam penalty ``D_KL``, see also our paper `Hergt, Handley, Hobson, and Lasenby (2021) `_) -* Gaussian model dimensionality :meth:`anesthetic.samples.NestedSamples.logL_P` +* Gaussian model dimensionality :meth:`anesthetic.core.NestedSamples.logL_P` :raw-html:`
` (for more, see our paper `Handley and Lemos (2019) `_) -* All of the above in one go, using :meth:`anesthetic.samples.NestedSamples.stats` +* All of the above in one go, using :meth:`anesthetic.core.NestedSamples.stats` By default (i.e. without passing any additional keywords) the mean values for these quantities are computed: @@ -274,7 +274,7 @@ reflecting the underlying distributions of the Bayesian statistics: nsamples = 2000 bayesian_stats = nested_samples.stats(nsamples) -Since ``bayesian_stats`` is an instance of :class:`anesthetic.samples.Samples`, +Since ``bayesian_stats`` is an instance of :class:`anesthetic.core.Samples`, the same plotting functions can be used as for the posterior plots above. Plotting the 2D distributions allows us to inspect the correlation between the inferences: diff --git a/tests/test_samples.py b/tests/test_core.py similarity index 99% rename from tests/test_samples.py rename to tests/test_core.py index dd5b14ba..3a9d2f5a 100644 --- a/tests/test_samples.py +++ b/tests/test_core.py @@ -13,9 +13,8 @@ Samples, MCMCSamples, NestedSamples, make_1d_axes, make_2d_axes, read_chains ) -from anesthetic.samples import (merge_nested_samples, merge_samples_weighted, - WeightedLabelledSeries, - WeightedLabelledDataFrame) +from anesthetic.core import (merge_nested_samples, merge_samples_weighted, + WeightedLabelledSeries, WeightedLabelledDataFrame) from numpy.testing import (assert_array_equal, assert_array_almost_equal, assert_array_less, assert_allclose) from pandas.testing import assert_frame_equal @@ -1594,3 +1593,8 @@ def test_groupby_plots(): gb_colors = [p.get_facecolor() for p in gb_ax.patches] assert_allclose(mcmc_colors, gb_colors) plt.close('all') + + +def test_deprecated_files(): + with pytest.warns(FutureWarning): + import anesthetic.samples # noqa: F401 From ca022cbf8379b0be7eefb8c4f7be68d2952e3aa2 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Tue, 11 Apr 2023 22:36:10 +0100 Subject: [PATCH 10/19] version bump --- README.rst | 2 +- anesthetic/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index c516a4fd..4626ea73 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ anesthetic: nested sampling post-processing =========================================== :Authors: Will Handley and Lukas Hergt -:Version: 2.0.0-beta.28 +:Version: 2.0.0-beta.29 :Homepage: https://github.com/handley-lab/anesthetic :Documentation: http://anesthetic.readthedocs.io/ diff --git a/anesthetic/_version.py b/anesthetic/_version.py index 03beede9..56c59a8d 100644 --- a/anesthetic/_version.py +++ b/anesthetic/_version.py @@ -1 +1 @@ -__version__ = '2.0.0b28' +__version__ = '2.0.0b29' From e9fba58c30c45c6d5505ed567ba97f1959ec07b7 Mon Sep 17 00:00:00 2001 From: Will Handley Date: Wed, 12 Apr 2023 09:46:18 +0100 Subject: [PATCH 11/19] Refactored wpandas --- anesthetic/core.py | 2 +- anesthetic/plot.py | 2 +- anesthetic/utils.py | 2 +- tests/test_utils.py | 2 +- wpandas/__init__.py | 7 +- wpandas/_code_utils.py | 34 -- wpandas/core.py | 653 --------------------- wpandas/core/__init__.py | 0 wpandas/core/base.py | 106 ++++ wpandas/core/frame.py | 281 +++++++++ wpandas/core/groupby/__init__.py | 1 + wpandas/core/groupby/generic.py | 70 +++ wpandas/core/groupby/groupby.py | 72 +++ wpandas/core/series.py | 163 +++++ wpandas/core/util/__init__.py | 0 wpandas/core/util/_code_transforms.py | 78 +++ wpandas/core/util/random.py | 14 + wpandas/{utils.py => core/util/weights.py} | 38 -- wpandas/plotting/_matplotlib/boxplot.py | 4 +- wpandas/plotting/_matplotlib/core.py | 2 +- 20 files changed, 796 insertions(+), 735 deletions(-) delete mode 100644 wpandas/_code_utils.py delete mode 100644 wpandas/core.py create mode 100644 wpandas/core/__init__.py create mode 100644 wpandas/core/base.py create mode 100644 wpandas/core/frame.py create mode 100644 wpandas/core/groupby/__init__.py create mode 100644 wpandas/core/groupby/generic.py create mode 100644 wpandas/core/groupby/groupby.py create mode 100644 wpandas/core/series.py create mode 100644 wpandas/core/util/__init__.py create mode 100644 wpandas/core/util/_code_transforms.py create mode 100644 wpandas/core/util/random.py rename wpandas/{utils.py => core/util/weights.py} (62%) diff --git a/anesthetic/core.py b/anesthetic/core.py index 5d8f99d8..81b22818 100644 --- a/anesthetic/core.py +++ b/anesthetic/core.py @@ -20,7 +20,7 @@ AxesSeries, AxesDataFrame) import wpandas from anesthetic.plotting import PlotAccessor -wpandas.core._WeightedObject.plot = CachedAccessor("plot", PlotAccessor) +wpandas.core.base._WeightedObject.plot = CachedAccessor("plot", PlotAccessor) class WeightedLabelledDataFrame(WeightedDataFrame, LabelledDataFrame): diff --git a/anesthetic/plot.py b/anesthetic/plot.py index 0144b911..e3e1c801 100644 --- a/anesthetic/plot.py +++ b/anesthetic/plot.py @@ -34,7 +34,7 @@ triangular_sample_compression_2d, iso_probability_contours, match_contour_to_contourf) -from wpandas.utils import quantile +from wpandas.core.util.weights import quantile from anesthetic.boundary import cut_and_normalise_gaussian diff --git a/anesthetic/utils.py b/anesthetic/utils.py index e887ae30..e588b519 100644 --- a/anesthetic/utils.py +++ b/anesthetic/utils.py @@ -5,7 +5,7 @@ from scipy.interpolate import interp1d from scipy.stats import kstwobign from matplotlib.tri import Triangulation -from wpandas.utils import channel_capacity +from wpandas.core.util.weights import channel_capacity def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): diff --git a/tests/test_utils.py b/tests/test_utils.py index 2531a397..1db3b100 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -10,7 +10,7 @@ logsumexp, sample_compression_1d, triangular_sample_compression_2d, insertion_p_value) -from wpandas.utils import compress_weights +from wpandas.core.util.weights import compress_weights def test_compress_weights(): diff --git a/wpandas/__init__.py b/wpandas/__init__.py index 960334b8..05ae1b85 100644 --- a/wpandas/__init__.py +++ b/wpandas/__init__.py @@ -1,7 +1,8 @@ import pandas import pandas.plotting._core import pandas.plotting._misc -import wpandas.core +import wpandas.core.series +import wpandas.core.frame def _wpandas_override(_get_plot_backend): @@ -27,5 +28,5 @@ def wrapper(backend=None): # Set wpandas.plotting._matplotlib as the actual backend pandas.options.plotting.backend = 'wpandas.plotting._matplotlib' -WeightedSeries = wpandas.core.WeightedSeries -WeightedDataFrame = wpandas.core.WeightedDataFrame +WeightedSeries = wpandas.core.series.WeightedSeries +WeightedDataFrame = wpandas.core.frame.WeightedDataFrame diff --git a/wpandas/_code_utils.py b/wpandas/_code_utils.py deleted file mode 100644 index ce8cf721..00000000 --- a/wpandas/_code_utils.py +++ /dev/null @@ -1,34 +0,0 @@ -def replace_inner_function(outer, new_inner): - """Replace a nested function code object used by outer with new_inner. - - The replacement new_inner must use the same name and must at most use the - same closures as the original. - - Code taken from https://stackoverflow.com/a/27550237 - """ - # find original code object so we can validate the closures match - ocode = outer.__code__ - function, code = type(outer), type(ocode) - iname = new_inner.co_name - orig_inner = next( - const for const in ocode.co_consts - if isinstance(const, code) and const.co_name == iname) - - # you can ignore later closures, but since they are matched by position - # the new sequence must match the start of the old. - assert (orig_inner.co_freevars[:len(new_inner.co_freevars)] == - new_inner.co_freevars), 'New closures must match originals' - - # replace the code object for the inner function - new_consts = tuple( - new_inner if const is orig_inner else const - for const in outer.__code__.co_consts) - - # create a new code object with the new constants - ncode = ocode.replace(co_consts=new_consts) - - # and a new function object using the updated code object - return function( - ncode, outer.__globals__, outer.__name__, - outer.__defaults__, outer.__closure__ - ) diff --git a/wpandas/core.py b/wpandas/core.py deleted file mode 100644 index 7e784fb2..00000000 --- a/wpandas/core.py +++ /dev/null @@ -1,653 +0,0 @@ -"""Pandas DataFrame and Series with weighted samples.""" - -import warnings -from inspect import signature -import numpy as np -from pandas import Series, DataFrame, concat, MultiIndex -from pandas.core.groupby import GroupBy, SeriesGroupBy, DataFrameGroupBy, ops -from pandas._libs import lib -from pandas._libs.lib import no_default -from pandas.util._exceptions import find_stack_level -from pandas.util import hash_pandas_object -from numpy.ma import masked_array -from wpandas.utils import (compress_weights, channel_capacity, quantile, - temporary_seed, adjust_docstrings) -from pandas.core.dtypes.missing import notna - - -class WeightedGroupBy(GroupBy): - """Weighted version of ``pandas.core.groupby.GroupBy``.""" - - grouper: ops.BaseGrouper - """:meta private:""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def _add_weights(self, name, *args, **kwargs): - result = self.agg(lambda df: getattr(self.obj._constructor(df), name) - (*args, **kwargs)).set_weights(self.get_weights()) - return result.__finalize__(self.obj, method="groupby") - - def mean(self, *args, **kwargs): # noqa: D102 - return self._add_weights("mean", *args, **kwargs) - - def std(self, *args, **kwargs): # noqa: D102 - return self._add_weights("std", *args, **kwargs) - - def median(self, *args, **kwargs): # noqa: D102 - return self._add_weights("median", *args, **kwargs) - - def var(self, *args, **kwargs): # noqa: D102 - return self._add_weights("var", *args, **kwargs) - - def kurt(self, *args, **kwargs): # noqa: D102 - return self._add_weights("kurt", *args, **kwargs) - - def kurtosis(self, *args, **kwargs): # noqa: D102 - return self._add_weights("kurtosis", *args, **kwargs) - - def sem(self, *args, **kwargs): # noqa: D102 - return self._add_weights("sem", *args, **kwargs) - - def quantile(self, *args, **kwargs): # noqa: D102 - return self._add_weights("quantile", *args, **kwargs) - - def get_weights(self): - """Return the weights of the grouped samples.""" - return self.agg(lambda df: df.get_weights().sum()) - - def _make_wrapper(self, name): - _wrapper = super()._make_wrapper(name) - - def wrapper(*args, **kwargs): - result = _wrapper(*args, **kwargs) - try: - index = result.index.get_level_values(self.keys) - weights = self.get_weights()[index] - except KeyError: - weights = self.get_weights() - return result.set_weights(weights, level=1) - - wrapper.__name__ = name - return wrapper - - -class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): - """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" - - def sample(self, *args, **kwargs): # noqa: D102 - return super().sample(weights=self.obj.get_weights(), *args, **kwargs) - - -class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): - """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" - - def get_weights(self): - """Return the weights of the grouped samples.""" - return super().get_weights().min(axis=1-self.axis) - - def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover - if ndim == 2: - if subset is None: - subset = self.obj - return WeightedDataFrameGroupBy( - subset, - self.grouper, - axis=self.axis, - level=self.level, - grouper=self.grouper, - exclusions=self.exclusions, - selection=key, - as_index=self.as_index, - sort=self.sort, - group_keys=self.group_keys, - squeeze=self.squeeze, - observed=self.observed, - mutated=self.mutated, - dropna=self.dropna, - ) - elif ndim == 1: - if subset is None: - subset = self.obj[key] - return WeightedSeriesGroupBy( - subset, - level=self.level, - grouper=self.grouper, - selection=key, - sort=self.sort, - group_keys=self.group_keys, - squeeze=self.squeeze, - observed=self.observed, - dropna=self.dropna, - ) - - raise AssertionError("invalid ndim for _gotitem") - - def sample(self, *args, **kwargs): # noqa: D102 - return super().sample(weights=self.obj.get_weights(), *args, **kwargs) - - -class _WeightedObject(object): - """Common methods for `WeightedSeries` and `WeightedDataFrame`. - - :meta public: - """ - - def __init__(self, *args, **kwargs): - weights = kwargs.pop('weights', None) - super().__init__(*args, **kwargs) - if weights is not None: - self.set_weights(weights, inplace=True) - - def isweighted(self, axis=0): - """Determine if weights are actually present.""" - return 'weights' in self._get_axis(axis).names - - def get_weights(self, axis=0): - """Retrieve sample weights from an axis.""" - if self.isweighted(axis): - return self._get_axis(axis).get_level_values('weights').to_numpy() - else: - return np.ones_like(self._get_axis(axis)) - - def drop_weights(self, axis=0): - """Drop weights.""" - if self.isweighted(axis): - return self.droplevel('weights', axis) - return self.copy().__finalize__(self, "drop_weights") - - def set_weights(self, weights, axis=0, inplace=False, level=None): - """Set sample weights along an axis. - - Parameters - ---------- - weights : 1d array-like - The sample weights to put in an index. - - axis : int (0,1), default=0 - Whether to put weights in an index or column. - - inplace : bool, default=False - Whether to operate inplace, or return a new array. - - level : int - Which level in the index to insert before. - Defaults to inserting at back - - """ - if inplace: - result = self - else: - result = self.copy() - - if weights is None: - if result.isweighted(axis=axis): - result = result.drop_weights(axis) - else: - names = [n for n in result._get_axis(axis).names if n != 'weights'] - index = [result._get_axis(axis).get_level_values(n) for n in names] - if level is None: - if result.isweighted(axis): - level = result._get_axis(axis).names.index('weights') - else: - level = len(index) - index.insert(level, weights) - names.insert(level, 'weights') - - index = MultiIndex.from_arrays(index, names=names) - result = result.set_axis(index, axis=axis, copy=False) - - if inplace: - self._update_inplace(result) - else: - return result.__finalize__(self, "set_weights") - - def _rand(self, axis=0): - """Random number for consistent compression.""" - seed = hash_pandas_object(self._get_axis(axis)).sum() % 2**32 - with temporary_seed(seed): - return np.random.rand(self.shape[axis]) - - def reset_index(self, level=None, drop=False, inplace=False, - *args, **kwargs): - """Reset the index, retaining weights.""" - weights = self.get_weights() - answer = super().reset_index(level=level, drop=drop, - inplace=False, *args, **kwargs) - answer.set_weights(weights, inplace=True) - if inplace: - self._update_inplace(answer) - else: - return answer.__finalize__(self, "reset_index") - - def neff(self, axis=0): - """Effective number of samples.""" - if self.isweighted(axis): - return channel_capacity(self.get_weights(axis)) - else: - return self.shape[axis] - - -class WeightedSeries(_WeightedObject, Series): - """Weighted version of :class:`pandas.Series`.""" - - def mean(self, skipna=True): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - null = self.isnull() & skipna - return np.average(masked_array(self, null), weights=self.get_weights()) - - def std(self, *args, **kwargs): # noqa: D102 - return np.sqrt(self.var(*args, **kwargs)) - - def kurtosis(self, *args, **kwargs): # noqa: D102 - return self.kurt(*args, **kwargs) - - def median(self, *args, **kwargs): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - return self.quantile(*args, **kwargs) - - def var(self, skipna=True): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - null = self.isnull() & skipna - mean = self.mean(skipna=skipna) - if np.isnan(mean): - return mean - return np.average(masked_array((self-mean)**2, null), - weights=self.get_weights()) - - def cov(self, other, *args, **kwargs): # noqa: D102 - - this, other = self.align(other, join="inner", copy=False) - if len(this) == 0: - return np.nan - - weights = self.index.to_frame()['weights'] - weights, _ = weights.align(other, join="inner", copy=False) - - valid = notna(this) & notna(other) - if not valid.all(): - this = this[valid] - other = other[valid] - weights = weights[valid] - - return np.cov(this, other, aweights=weights)[0, 1] - - def corr(self, other, *args, **kwargs): # noqa: D102 - norm = self.std(skipna=True)*other.std(skipna=True) - return self.cov(other, *args, **kwargs)/norm - - def kurt(self, skipna=True): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - null = self.isnull() & skipna - mean = self.mean(skipna=skipna) - std = self.std(skipna=skipna) - if np.isnan(mean) or np.isnan(std): - return np.nan - return np.average(masked_array(((self-mean)/std)**4, null), - weights=self.get_weights()) - - def skew(self, skipna=True): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - null = self.isnull() & skipna - mean = self.mean(skipna=skipna) - std = self.std(skipna=skipna) - if np.isnan(mean) or np.isnan(std): - return np.nan - return np.average(masked_array(((self-mean)/std)**3, null), - weights=self.get_weights()) - - def mad(self, skipna=True): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - null = self.isnull() & skipna - mean = self.mean(skipna=skipna) - if np.isnan(mean): - return np.nan - return np.average(masked_array(abs(self-mean), null), - weights=self.get_weights()) - - def sem(self, skipna=True): # noqa: D102 - return np.sqrt(self.var(skipna=skipna)/self.neff()) - - def quantile(self, q=0.5, interpolation='linear'): # noqa: D102 - if self.get_weights().sum() == 0: - return np.nan - return quantile(self.to_numpy(), q, self.get_weights(), interpolation) - - def compress(self, ncompress=True): - """Reduce the number of samples by discarding low-weights. - - Parameters - ---------- - ncompress : int, optional - effective number of samples after compression. If not supplied - (or True), then reduce to the channel capacity (theoretical optimum - compression). If <=0, then compress so that all weights are unity. - - """ - i = compress_weights(self.get_weights(), self._rand(), ncompress) - return self.repeat(i) - - def sample(self, *args, **kwargs): # noqa: D102 - return super().sample(weights=self.get_weights(), *args, **kwargs) - - @property - def _constructor(self): - return WeightedSeries - - @property - def _constructor_expanddim(self): - return WeightedDataFrame - - def groupby( - self, - by=None, - axis=0, - level=None, - as_index=True, - sort=True, - group_keys=True, - observed=False, - dropna=True, - ): # pragma: no cover # noqa: D102 - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") - if not as_index: - raise TypeError("as_index=False only valid with DataFrame") - axis = self._get_axis_number(axis) - - return WeightedSeriesGroupBy( - obj=self, - keys=by, - axis=axis, - level=level, - as_index=as_index, - sort=sort, - group_keys=group_keys, - observed=observed, - dropna=dropna, - ) - - -class WeightedDataFrame(_WeightedObject, DataFrame): - """Weighted version of :class:`pandas.DataFrame`.""" - - def mean(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 - if self.isweighted(axis): - if self.get_weights(axis).sum() == 0: - return self._constructor_sliced(np.nan, - index=self._get_axis(1-axis)) - null = self.isnull() & skipna - mean = np.average(masked_array(self, null), - weights=self.get_weights(axis), axis=axis) - return self._constructor_sliced(mean, index=self._get_axis(1-axis)) - else: - return super().mean(axis=axis, skipna=skipna, *args, **kwargs) - - def std(self, *args, **kwargs): # noqa: D102 - return np.sqrt(self.var(*args, **kwargs)) - - def kurtosis(self, *args, **kwargs): # noqa: D102 - return self.kurt(*args, **kwargs) - - def median(self, *args, **kwargs): # noqa: D102 - return self.quantile(*args, **kwargs) - - def var(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 - if self.isweighted(axis): - if self.get_weights(axis).sum() == 0: - return self._constructor_sliced(np.nan, - index=self._get_axis(1-axis)) - null = self.isnull() & skipna - mean = self.mean(axis=axis, skipna=skipna) - var = np.average(masked_array((self-mean)**2, null), - weights=self.get_weights(axis), axis=axis) - return self._constructor_sliced(var, index=self._get_axis(1-axis)) - else: - return super().var(axis=axis, skipna=skipna, *args, **kwargs) - - def cov(self, *args, **kwargs): # noqa: D102 - if self.isweighted(): - null = self.isnull() - mean = self.mean(skipna=True) - x = masked_array(self - mean, null) - cov = np.ma.dot(self.get_weights()*x.T, x) \ - / self.get_weights().sum().T - if kwargs: - raise NotImplementedError("The keywords %s are not implemented" - "for the calculation of the" - "covariance with weighted samples." - % kwargs) - return self._constructor(cov, index=self.columns, - columns=self.columns) - else: - return super().cov(*args, **kwargs) - - def corr(self, method="pearson", skipna=True, - *args, **kwargs): # noqa: D102 - if self.isweighted(): - cov = self.cov() - diag = np.sqrt(np.diag(cov)) - return cov.divide(diag, axis=1).divide(diag, axis=0) - else: - return super().corr(*args, **kwargs) - - def corrwith(self, other, axis=0, drop=False, method="pearson", - *args, **kwargs): # noqa: D102 - axis = self._get_axis_number(axis) - if not self.isweighted(axis): - return super().corrwith(other, drop=drop, axis=axis, method=method, - *args, **kwargs) - else: - if isinstance(other, Series): - answer = self.apply(lambda x: other.corr(x, method=method), - axis=axis) - return self._constructor_sliced(answer) - - left, right = self.align(other, join="inner", copy=False) - - if axis == 1: - left = left.T - right = right.T - - weights = left.index.to_frame()['weights'] - weights, _ = weights.align(right, join="inner", copy=False) - - # mask missing values - left = left + right * 0 - right = right + left * 0 - - # demeaned data - ldem = left - left.mean() - rdem = right - right.mean() - - num = (ldem * rdem * weights.to_numpy()[:, None]).sum() - dom = weights.sum() * left.std() * right.std() - - correl = num / dom - - if not drop: - # Find non-matching labels along the given axis - result_index = self._get_axis(1-axis).union( - other._get_axis(1-axis)) - idx_diff = result_index.difference(correl.index) - - if len(idx_diff) > 0: - correl = concat([correl, Series([np.nan] * len(idx_diff), - index=idx_diff)]) - - return self._constructor_sliced(correl) - - def kurt(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 - if self.isweighted(axis): - if self.get_weights(axis).sum() == 0: - return self._constructor_sliced(np.nan, - index=self._get_axis(1-axis)) - null = self.isnull() & skipna - mean = self.mean(axis=axis, skipna=skipna) - std = self.std(axis=axis, skipna=skipna) - kurt = np.average(masked_array(((self-mean)/std)**4, null), - weights=self.get_weights(axis), axis=axis) - return self._constructor_sliced(kurt, index=self._get_axis(1-axis)) - else: - return super().kurt(axis=axis, skipna=skipna, *args, **kwargs) - - def skew(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 - if self.isweighted(axis): - if self.get_weights(axis).sum() == 0: - return self._constructor_sliced(np.nan, - index=self._get_axis(1-axis)) - null = self.isnull() & skipna - mean = self.mean(axis=axis, skipna=skipna) - std = self.std(axis=axis, skipna=skipna) - skew = np.average(masked_array(((self-mean)/std)**3, null), - weights=self.get_weights(axis), axis=axis) - return self._constructor_sliced(skew, index=self._get_axis(1-axis)) - else: - return super().skew(axis=axis, skipna=skipna, *args, **kwargs) - - def mad(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 - if self.isweighted(axis): - if self.get_weights(axis).sum() == 0: - return self._constructor_sliced(np.nan, - index=self._get_axis(1-axis)) - null = self.isnull() & skipna - mean = self.mean(axis=axis, skipna=skipna) - mad = np.average(masked_array(abs(self-mean), null), - weights=self.get_weights(axis), axis=axis) - return self._constructor_sliced(mad, index=self._get_axis(1-axis)) - else: - return super().var(axis=axis, skipna=skipna, *args, **kwargs) - - def sem(self, axis=0, skipna=True): # noqa: D102 - n = self.neff(axis) - return np.sqrt(self.var(axis=axis, skipna=skipna)/n) - - def quantile(self, q=0.5, axis=0, numeric_only=None, - interpolation='linear', method=None): # noqa: D102 - if self.isweighted(axis): - if numeric_only is not None or method is not None: - raise NotImplementedError( - "`numeric_only` and `method` kwargs not implemented for " - "`WeightedSeries` and `WeightedDataFrame`." - ) - data = np.array([c.quantile(q=q, interpolation=interpolation) - for _, c in self.items()]) - if np.isscalar(q): - return self._constructor_sliced(data, - index=self._get_axis(1-axis)) - else: - return self._constructor(data.T, index=q, - columns=self._get_axis(1-axis)) - else: - if numeric_only is None: - numeric_only = True - if method is None: - method = 'single' - return super().quantile(q=q, axis=axis, numeric_only=numeric_only, - interpolation=interpolation, method=method) - - def compress(self, ncompress=True, axis=0): - """Reduce the number of samples by discarding low-weights. - - Parameters - ---------- - ncompress : int, optional - effective number of samples after compression. If not supplied - (or True), then reduce to the channel capacity (theoretical optimum - compression). If <=0, then compress so that all weights are unity. - - """ - if self.isweighted(axis): - i = compress_weights(self.get_weights(axis), self._rand(axis), - ncompress) - data = np.repeat(self.to_numpy(), i, axis=axis) - i = self.drop_weights(axis)._get_axis(axis).repeat(i) - df = self._constructor(data=data) - df = df.set_axis(i, axis=axis, copy=False) - df = df.set_axis(self._get_axis(1-axis), axis=1-axis, copy=False) - return df - else: - return self - - def sample(self, *args, **kwargs): # noqa: D102 - sig = signature(DataFrame.sample) - axis = sig.bind(self, *args, **kwargs).arguments.get('axis', 0) - if self.isweighted(axis): - return super().sample(weights=self.get_weights(axis), - *args, **kwargs) - else: - return super().sample(*args, **kwargs) - - @property - def _constructor_sliced(self): - return WeightedSeries - - @property - def _constructor(self): - return WeightedDataFrame - - def groupby( - self, - by=None, - axis=no_default, - level=None, - as_index: bool = True, - sort: bool = True, - group_keys: bool = True, - observed: bool = False, - dropna: bool = True, - ): # pragma: no cover # noqa: D102 - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.groupby with axis=1 is deprecated. Do " - "`frame.T.groupby(...)` without axis instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - "The 'axis' keyword in DataFrame.groupby is deprecated " - "and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") - - return WeightedDataFrameGroupBy( - obj=self, - keys=by, - axis=axis, - level=level, - as_index=as_index, - sort=sort, - group_keys=group_keys, - observed=observed, - dropna=dropna, - ) - - -for cls in [WeightedDataFrame, WeightedSeries, WeightedGroupBy, - WeightedDataFrameGroupBy, WeightedSeriesGroupBy]: - adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame') - adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames') - adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries') - adjust_docstrings(cls, 'core', 'pandas.core') - adjust_docstrings(cls, 'pandas.core.window.Rolling.quantile', - 'pandas.core.window.rolling.Rolling.quantile') - adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy') - adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy') -adjust_docstrings(WeightedDataFrame, 'resample', 'pandas.DataFrame.resample') -adjust_docstrings(WeightedSeries, 'resample', 'pandas.Series.resample') diff --git a/wpandas/core/__init__.py b/wpandas/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wpandas/core/base.py b/wpandas/core/base.py new file mode 100644 index 00000000..93aebd22 --- /dev/null +++ b/wpandas/core/base.py @@ -0,0 +1,106 @@ +import numpy as np +from pandas import MultiIndex +from pandas.util import hash_pandas_object +from wpandas.core.util.random import temporary_seed +from wpandas.core.util.weights import channel_capacity + + +class _WeightedObject(object): + """Common methods for `WeightedSeries` and `WeightedDataFrame`. + + :meta public: + """ + + def __init__(self, *args, **kwargs): + weights = kwargs.pop('weights', None) + super().__init__(*args, **kwargs) + if weights is not None: + self.set_weights(weights, inplace=True) + + def isweighted(self, axis=0): + """Determine if weights are actually present.""" + return 'weights' in self._get_axis(axis).names + + def get_weights(self, axis=0): + """Retrieve sample weights from an axis.""" + if self.isweighted(axis): + return self._get_axis(axis).get_level_values('weights').to_numpy() + else: + return np.ones_like(self._get_axis(axis)) + + def drop_weights(self, axis=0): + """Drop weights.""" + if self.isweighted(axis): + return self.droplevel('weights', axis) + return self.copy().__finalize__(self, "drop_weights") + + def set_weights(self, weights, axis=0, inplace=False, level=None): + """Set sample weights along an axis. + + Parameters + ---------- + weights : 1d array-like + The sample weights to put in an index. + + axis : int (0,1), default=0 + Whether to put weights in an index or column. + + inplace : bool, default=False + Whether to operate inplace, or return a new array. + + level : int + Which level in the index to insert before. + Defaults to inserting at back + + """ + if inplace: + result = self + else: + result = self.copy() + + if weights is None: + if result.isweighted(axis=axis): + result = result.drop_weights(axis) + else: + names = [n for n in result._get_axis(axis).names if n != 'weights'] + index = [result._get_axis(axis).get_level_values(n) for n in names] + if level is None: + if result.isweighted(axis): + level = result._get_axis(axis).names.index('weights') + else: + level = len(index) + index.insert(level, weights) + names.insert(level, 'weights') + + index = MultiIndex.from_arrays(index, names=names) + result = result.set_axis(index, axis=axis, copy=False) + + if inplace: + self._update_inplace(result) + else: + return result.__finalize__(self, "set_weights") + + def _rand(self, axis=0): + """Random number for consistent compression.""" + seed = hash_pandas_object(self._get_axis(axis)).sum() % 2**32 + with temporary_seed(seed): + return np.random.rand(self.shape[axis]) + + def reset_index(self, level=None, drop=False, inplace=False, + *args, **kwargs): + """Reset the index, retaining weights.""" + weights = self.get_weights() + answer = super().reset_index(level=level, drop=drop, + inplace=False, *args, **kwargs) + answer.set_weights(weights, inplace=True) + if inplace: + self._update_inplace(answer) + else: + return answer.__finalize__(self, "reset_index") + + def neff(self, axis=0): + """Effective number of samples.""" + if self.isweighted(axis): + return channel_capacity(self.get_weights(axis)) + else: + return self.shape[axis] diff --git a/wpandas/core/frame.py b/wpandas/core/frame.py new file mode 100644 index 00000000..531aa6f4 --- /dev/null +++ b/wpandas/core/frame.py @@ -0,0 +1,281 @@ +from wpandas.core.base import _WeightedObject +from wpandas.core.util._code_transforms import adjust_weighted_docstrings + +import warnings +from inspect import signature +import numpy as np +from pandas import Series, DataFrame, concat +from pandas._libs import lib +from pandas._libs.lib import no_default +from pandas.util._exceptions import find_stack_level +from numpy.ma import masked_array +from wpandas.core.util.weights import compress_weights + + +class WeightedDataFrame(_WeightedObject, DataFrame): + """Weighted version of :class:`pandas.DataFrame`.""" + + def mean(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 + if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) + null = self.isnull() & skipna + mean = np.average(masked_array(self, null), + weights=self.get_weights(axis), axis=axis) + return self._constructor_sliced(mean, index=self._get_axis(1-axis)) + else: + return super().mean(axis=axis, skipna=skipna, *args, **kwargs) + + def std(self, *args, **kwargs): # noqa: D102 + return np.sqrt(self.var(*args, **kwargs)) + + def kurtosis(self, *args, **kwargs): # noqa: D102 + return self.kurt(*args, **kwargs) + + def median(self, *args, **kwargs): # noqa: D102 + return self.quantile(*args, **kwargs) + + def var(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 + if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) + null = self.isnull() & skipna + mean = self.mean(axis=axis, skipna=skipna) + var = np.average(masked_array((self-mean)**2, null), + weights=self.get_weights(axis), axis=axis) + return self._constructor_sliced(var, index=self._get_axis(1-axis)) + else: + return super().var(axis=axis, skipna=skipna, *args, **kwargs) + + def cov(self, *args, **kwargs): # noqa: D102 + if self.isweighted(): + null = self.isnull() + mean = self.mean(skipna=True) + x = masked_array(self - mean, null) + cov = np.ma.dot(self.get_weights()*x.T, x) \ + / self.get_weights().sum().T + if kwargs: + raise NotImplementedError("The keywords %s are not implemented" + "for the calculation of the" + "covariance with weighted samples." + % kwargs) + return self._constructor(cov, index=self.columns, + columns=self.columns) + else: + return super().cov(*args, **kwargs) + + def corr(self, method="pearson", skipna=True, + *args, **kwargs): # noqa: D102 + if self.isweighted(): + cov = self.cov() + diag = np.sqrt(np.diag(cov)) + return cov.divide(diag, axis=1).divide(diag, axis=0) + else: + return super().corr(*args, **kwargs) + + def corrwith(self, other, axis=0, drop=False, method="pearson", + *args, **kwargs): # noqa: D102 + axis = self._get_axis_number(axis) + if not self.isweighted(axis): + return super().corrwith(other, drop=drop, axis=axis, method=method, + *args, **kwargs) + else: + if isinstance(other, Series): + answer = self.apply(lambda x: other.corr(x, method=method), + axis=axis) + return self._constructor_sliced(answer) + + left, right = self.align(other, join="inner", copy=False) + + if axis == 1: + left = left.T + right = right.T + + weights = left.index.to_frame()['weights'] + weights, _ = weights.align(right, join="inner", copy=False) + + # mask missing values + left = left + right * 0 + right = right + left * 0 + + # demeaned data + ldem = left - left.mean() + rdem = right - right.mean() + + num = (ldem * rdem * weights.to_numpy()[:, None]).sum() + dom = weights.sum() * left.std() * right.std() + + correl = num / dom + + if not drop: + # Find non-matching labels along the given axis + result_index = self._get_axis(1-axis).union( + other._get_axis(1-axis)) + idx_diff = result_index.difference(correl.index) + + if len(idx_diff) > 0: + correl = concat([correl, Series([np.nan] * len(idx_diff), + index=idx_diff)]) + + return self._constructor_sliced(correl) + + def kurt(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 + if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) + null = self.isnull() & skipna + mean = self.mean(axis=axis, skipna=skipna) + std = self.std(axis=axis, skipna=skipna) + kurt = np.average(masked_array(((self-mean)/std)**4, null), + weights=self.get_weights(axis), axis=axis) + return self._constructor_sliced(kurt, index=self._get_axis(1-axis)) + else: + return super().kurt(axis=axis, skipna=skipna, *args, **kwargs) + + def skew(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 + if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) + null = self.isnull() & skipna + mean = self.mean(axis=axis, skipna=skipna) + std = self.std(axis=axis, skipna=skipna) + skew = np.average(masked_array(((self-mean)/std)**3, null), + weights=self.get_weights(axis), axis=axis) + return self._constructor_sliced(skew, index=self._get_axis(1-axis)) + else: + return super().skew(axis=axis, skipna=skipna, *args, **kwargs) + + def mad(self, axis=0, skipna=True, *args, **kwargs): # noqa: D102 + if self.isweighted(axis): + if self.get_weights(axis).sum() == 0: + return self._constructor_sliced(np.nan, + index=self._get_axis(1-axis)) + null = self.isnull() & skipna + mean = self.mean(axis=axis, skipna=skipna) + mad = np.average(masked_array(abs(self-mean), null), + weights=self.get_weights(axis), axis=axis) + return self._constructor_sliced(mad, index=self._get_axis(1-axis)) + else: + return super().var(axis=axis, skipna=skipna, *args, **kwargs) + + def sem(self, axis=0, skipna=True): # noqa: D102 + n = self.neff(axis) + return np.sqrt(self.var(axis=axis, skipna=skipna)/n) + + def quantile(self, q=0.5, axis=0, numeric_only=None, + interpolation='linear', method=None): # noqa: D102 + if self.isweighted(axis): + if numeric_only is not None or method is not None: + raise NotImplementedError( + "`numeric_only` and `method` kwargs not implemented for " + "`WeightedSeries` and `WeightedDataFrame`." + ) + data = np.array([c.quantile(q=q, interpolation=interpolation) + for _, c in self.items()]) + if np.isscalar(q): + return self._constructor_sliced(data, + index=self._get_axis(1-axis)) + else: + return self._constructor(data.T, index=q, + columns=self._get_axis(1-axis)) + else: + if numeric_only is None: + numeric_only = True + if method is None: + method = 'single' + return super().quantile(q=q, axis=axis, numeric_only=numeric_only, + interpolation=interpolation, method=method) + + def compress(self, ncompress=True, axis=0): + """Reduce the number of samples by discarding low-weights. + + Parameters + ---------- + ncompress : int, optional + effective number of samples after compression. If not supplied + (or True), then reduce to the channel capacity (theoretical optimum + compression). If <=0, then compress so that all weights are unity. + + """ + if self.isweighted(axis): + i = compress_weights(self.get_weights(axis), self._rand(axis), + ncompress) + data = np.repeat(self.to_numpy(), i, axis=axis) + i = self.drop_weights(axis)._get_axis(axis).repeat(i) + df = self._constructor(data=data) + df = df.set_axis(i, axis=axis, copy=False) + df = df.set_axis(self._get_axis(1-axis), axis=1-axis, copy=False) + return df + else: + return self + + def sample(self, *args, **kwargs): # noqa: D102 + sig = signature(DataFrame.sample) + axis = sig.bind(self, *args, **kwargs).arguments.get('axis', 0) + if self.isweighted(axis): + return super().sample(weights=self.get_weights(axis), + *args, **kwargs) + else: + return super().sample(*args, **kwargs) + + @property + def _constructor_sliced(self): + from wpandas.core.series import WeightedSeries + return WeightedSeries + + @property + def _constructor(self): + return WeightedDataFrame + + def groupby( + self, + by=None, + axis=no_default, + level=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + observed: bool = False, + dropna: bool = True, + ): # pragma: no cover # noqa: D102 + from wpandas.core.groupby import WeightedDataFrameGroupBy + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + if axis == 1: + warnings.warn( + "DataFrame.groupby with axis=1 is deprecated. Do " + "`frame.T.groupby(...)` without axis instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + warnings.warn( + "The 'axis' keyword in DataFrame.groupby is deprecated " + "and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + else: + axis = 0 + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + + return WeightedDataFrameGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + + +adjust_weighted_docstrings(WeightedDataFrame) diff --git a/wpandas/core/groupby/__init__.py b/wpandas/core/groupby/__init__.py new file mode 100644 index 00000000..ebe54069 --- /dev/null +++ b/wpandas/core/groupby/__init__.py @@ -0,0 +1 @@ +from wpandas.core.groupby.generic import WeightedSeriesGroupBy, WeightedDataFrameGroupBy diff --git a/wpandas/core/groupby/generic.py b/wpandas/core/groupby/generic.py new file mode 100644 index 00000000..d453490a --- /dev/null +++ b/wpandas/core/groupby/generic.py @@ -0,0 +1,70 @@ +""" +Define the WeightedSeriesGroupBy and WeightedDataFrameGroupBy +classes that hold the groupby interfaces (and some +implementations). + +These are user facing as the result of the +``wdf.groupby(...)`` operations, which here returns a +WeightedDataFrameGroupBy object. +""" +from pandas.core.groupby import SeriesGroupBy, DataFrameGroupBy +from wpandas.core.groupby.groupby import WeightedGroupBy +from wpandas.core.util._code_transforms import adjust_weighted_docstrings + + +class WeightedSeriesGroupBy(WeightedGroupBy, SeriesGroupBy): + """Weighted version of ``pandas.core.groupby.SeriesGroupBy``.""" + + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + + +class WeightedDataFrameGroupBy(WeightedGroupBy, DataFrameGroupBy): + """Weighted version of ``pandas.core.groupby.DataFrameGroupBy``.""" + + def get_weights(self): + """Return the weights of the grouped samples.""" + return super().get_weights().min(axis=1-self.axis) + + def _gotitem(self, key, ndim: int, subset=None): # pragma: no cover + if ndim == 2: + if subset is None: + subset = self.obj + return WeightedDataFrameGroupBy( + subset, + self.grouper, + axis=self.axis, + level=self.level, + grouper=self.grouper, + exclusions=self.exclusions, + selection=key, + as_index=self.as_index, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + mutated=self.mutated, + dropna=self.dropna, + ) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return WeightedSeriesGroupBy( + subset, + level=self.level, + grouper=self.grouper, + selection=key, + sort=self.sort, + group_keys=self.group_keys, + squeeze=self.squeeze, + observed=self.observed, + dropna=self.dropna, + ) + + raise AssertionError("invalid ndim for _gotitem") + + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.obj.get_weights(), *args, **kwargs) + + +adjust_weighted_docstrings(WeightedSeriesGroupBy) diff --git a/wpandas/core/groupby/groupby.py b/wpandas/core/groupby/groupby.py new file mode 100644 index 00000000..6dc90414 --- /dev/null +++ b/wpandas/core/groupby/groupby.py @@ -0,0 +1,72 @@ +""" +Provide the weighted groupby split-apply-combine paradigm. Define the +WeightedGroupBy class providing the base-class of operations. + +The WeightedSeriesGroupBy and WeightedDataFrameGroupBy sub-class (defined in +wpandas.core.groupby.generic) expose these user-facing objects to provide +specific functionality. +""" + +from pandas.core.groupby import GroupBy, ops +from wpandas.core.util._code_transforms import adjust_weighted_docstrings + + +class WeightedGroupBy(GroupBy): + """Weighted version of ``pandas.core.groupby.GroupBy``.""" + + grouper: ops.BaseGrouper + """:meta private:""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _add_weights(self, name, *args, **kwargs): + result = self.agg(lambda df: getattr(self.obj._constructor(df), name) + (*args, **kwargs)).set_weights(self.get_weights()) + return result.__finalize__(self.obj, method="groupby") + + def mean(self, *args, **kwargs): # noqa: D102 + return self._add_weights("mean", *args, **kwargs) + + def std(self, *args, **kwargs): # noqa: D102 + return self._add_weights("std", *args, **kwargs) + + def median(self, *args, **kwargs): # noqa: D102 + return self._add_weights("median", *args, **kwargs) + + def var(self, *args, **kwargs): # noqa: D102 + return self._add_weights("var", *args, **kwargs) + + def kurt(self, *args, **kwargs): # noqa: D102 + return self._add_weights("kurt", *args, **kwargs) + + def kurtosis(self, *args, **kwargs): # noqa: D102 + return self._add_weights("kurtosis", *args, **kwargs) + + def sem(self, *args, **kwargs): # noqa: D102 + return self._add_weights("sem", *args, **kwargs) + + def quantile(self, *args, **kwargs): # noqa: D102 + return self._add_weights("quantile", *args, **kwargs) + + def get_weights(self): + """Return the weights of the grouped samples.""" + return self.agg(lambda df: df.get_weights().sum()) + + def _make_wrapper(self, name): + _wrapper = super()._make_wrapper(name) + + def wrapper(*args, **kwargs): + result = _wrapper(*args, **kwargs) + try: + index = result.index.get_level_values(self.keys) + weights = self.get_weights()[index] + except KeyError: + weights = self.get_weights() + return result.set_weights(weights, level=1) + + wrapper.__name__ = name + return wrapper + + +adjust_weighted_docstrings(WeightedGroupBy) diff --git a/wpandas/core/series.py b/wpandas/core/series.py new file mode 100644 index 00000000..fbe3c9b4 --- /dev/null +++ b/wpandas/core/series.py @@ -0,0 +1,163 @@ +""" +Weighted data structure for 1-dimensional cross-sectional and time series data +""" +import numpy as np +from numpy.ma import masked_array +from pandas import Series +from wpandas.core.util._code_transforms import adjust_weighted_docstrings +from wpandas.core.base import _WeightedObject + +from wpandas.core.util.weights import compress_weights, quantile +from pandas.core.dtypes.missing import notna + + +class WeightedSeries(_WeightedObject, Series): + """Weighted version of :class:`pandas.Series`.""" + + def mean(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + null = self.isnull() & skipna + return np.average(masked_array(self, null), weights=self.get_weights()) + + def std(self, *args, **kwargs): # noqa: D102 + return np.sqrt(self.var(*args, **kwargs)) + + def kurtosis(self, *args, **kwargs): # noqa: D102 + return self.kurt(*args, **kwargs) + + def median(self, *args, **kwargs): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + return self.quantile(*args, **kwargs) + + def var(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + null = self.isnull() & skipna + mean = self.mean(skipna=skipna) + if np.isnan(mean): + return mean + return np.average(masked_array((self-mean)**2, null), + weights=self.get_weights()) + + def cov(self, other, *args, **kwargs): # noqa: D102 + + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + + weights = self.index.to_frame()['weights'] + weights, _ = weights.align(other, join="inner", copy=False) + + valid = notna(this) & notna(other) + if not valid.all(): + this = this[valid] + other = other[valid] + weights = weights[valid] + + return np.cov(this, other, aweights=weights)[0, 1] + + def corr(self, other, *args, **kwargs): # noqa: D102 + norm = self.std(skipna=True)*other.std(skipna=True) + return self.cov(other, *args, **kwargs)/norm + + def kurt(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + null = self.isnull() & skipna + mean = self.mean(skipna=skipna) + std = self.std(skipna=skipna) + if np.isnan(mean) or np.isnan(std): + return np.nan + return np.average(masked_array(((self-mean)/std)**4, null), + weights=self.get_weights()) + + def skew(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + null = self.isnull() & skipna + mean = self.mean(skipna=skipna) + std = self.std(skipna=skipna) + if np.isnan(mean) or np.isnan(std): + return np.nan + return np.average(masked_array(((self-mean)/std)**3, null), + weights=self.get_weights()) + + def mad(self, skipna=True): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + null = self.isnull() & skipna + mean = self.mean(skipna=skipna) + if np.isnan(mean): + return np.nan + return np.average(masked_array(abs(self-mean), null), + weights=self.get_weights()) + + def sem(self, skipna=True): # noqa: D102 + return np.sqrt(self.var(skipna=skipna)/self.neff()) + + def quantile(self, q=0.5, interpolation='linear'): # noqa: D102 + if self.get_weights().sum() == 0: + return np.nan + return quantile(self.to_numpy(), q, self.get_weights(), interpolation) + + def compress(self, ncompress=True): + """Reduce the number of samples by discarding low-weights. + + Parameters + ---------- + ncompress : int, optional + effective number of samples after compression. If not supplied + (or True), then reduce to the channel capacity (theoretical optimum + compression). If <=0, then compress so that all weights are unity. + + """ + i = compress_weights(self.get_weights(), self._rand(), ncompress) + return self.repeat(i) + + def sample(self, *args, **kwargs): # noqa: D102 + return super().sample(weights=self.get_weights(), *args, **kwargs) + + @property + def _constructor(self): + return WeightedSeries + + @property + def _constructor_expanddim(self): + from wpandas.core.frame import WeightedDataFrame + return WeightedDataFrame + + def groupby( + self, + by=None, + axis=0, + level=None, + as_index=True, + sort=True, + group_keys=True, + observed=False, + dropna=True, + ): # pragma: no cover # noqa: D102 + from wpandas.core.groupby import WeightedSeriesGroupBy + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + if not as_index: + raise TypeError("as_index=False only valid with DataFrame") + axis = self._get_axis_number(axis) + + return WeightedSeriesGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + + +adjust_weighted_docstrings(WeightedSeries) diff --git a/wpandas/core/util/__init__.py b/wpandas/core/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wpandas/core/util/_code_transforms.py b/wpandas/core/util/_code_transforms.py new file mode 100644 index 00000000..9b07e6bb --- /dev/null +++ b/wpandas/core/util/_code_transforms.py @@ -0,0 +1,78 @@ +"""Utility functions for code transformation.""" +import inspect +import re + + +def adjust_docstrings(cls, pattern, repl, *args, **kwargs): + """Adjust the docstrings of a class using regular expressions. + + After the first argument, the remaining arguments are identical to re.sub. + + Parameters + ---------- + cls : class + class to adjust + + pattern : str + regular expression pattern + + repl : str + replacement string + """ + for key, val in cls.__dict__.items(): + doc = inspect.getdoc(val) + if doc is not None: + newdoc = re.sub(pattern, repl, doc, *args, **kwargs) + try: + cls.__dict__[key].__doc__ = newdoc + except AttributeError: + pass + + +def adjust_weighted_docstrings(cls): + adjust_docstrings(cls, r'\bDataFrame\b', 'WeightedDataFrame') + adjust_docstrings(cls, r'\bDataFrames\b', 'WeightedDataFrames') + adjust_docstrings(cls, r'\bSeries\b', 'WeightedSeries') + adjust_docstrings(cls, 'core', 'pandas.core') + adjust_docstrings(cls, 'pandas.core.window.Rolling.quantile', + 'pandas.core.window.rolling.Rolling.quantile') + adjust_docstrings(cls, r'\bDataFrameGroupBy\b', 'WeightedDataFrameGroupBy') + adjust_docstrings(cls, r'\bSeriesGroupBy\b', 'WeightedSeriesGroupBy') + adjust_docstrings(cls, 'resample', 'pandas.DataFrame.resample') + adjust_docstrings(cls, 'resample', 'pandas.Series.resample') + + +def replace_inner_function(outer, new_inner): + """Replace a nested function code object used by outer with new_inner. + + The replacement new_inner must use the same name and must at most use the + same closures as the original. + + Code taken from https://stackoverflow.com/a/27550237 + """ + # find original code object so we can validate the closures match + ocode = outer.__code__ + function, code = type(outer), type(ocode) + iname = new_inner.co_name + orig_inner = next( + const for const in ocode.co_consts + if isinstance(const, code) and const.co_name == iname) + + # you can ignore later closures, but since they are matched by position + # the new sequence must match the start of the old. + assert (orig_inner.co_freevars[:len(new_inner.co_freevars)] == + new_inner.co_freevars), 'New closures must match originals' + + # replace the code object for the inner function + new_consts = tuple( + new_inner if const is orig_inner else const + for const in outer.__code__.co_consts) + + # create a new code object with the new constants + ncode = ocode.replace(co_consts=new_consts) + + # and a new function object using the updated code object + return function( + ncode, outer.__globals__, outer.__name__, + outer.__defaults__, outer.__closure__ + ) diff --git a/wpandas/core/util/random.py b/wpandas/core/util/random.py new file mode 100644 index 00000000..b8d3b732 --- /dev/null +++ b/wpandas/core/util/random.py @@ -0,0 +1,14 @@ +"""Random number generation utilities.""" +import contextlib +import numpy as np + + +@contextlib.contextmanager +def temporary_seed(seed): + """Context for temporarily setting a numpy seed.""" + state = np.random.get_state() + np.random.seed(seed) + try: + yield + finally: + np.random.set_state(state) diff --git a/wpandas/utils.py b/wpandas/core/util/weights.py similarity index 62% rename from wpandas/utils.py rename to wpandas/core/util/weights.py index 9d01435a..ffd61939 100644 --- a/wpandas/utils.py +++ b/wpandas/core/util/weights.py @@ -2,8 +2,6 @@ import numpy as np from scipy.interpolate import interp1d import contextlib -import inspect -import re def channel_capacity(w): @@ -62,39 +60,3 @@ def quantile(a, q, w=None, interpolation='linear'): quant = float(quant) return quant - -@contextlib.contextmanager -def temporary_seed(seed): - """Context for temporarily setting a numpy seed.""" - state = np.random.get_state() - np.random.seed(seed) - try: - yield - finally: - np.random.set_state(state) - - -def adjust_docstrings(cls, pattern, repl, *args, **kwargs): - """Adjust the docstrings of a class using regular expressions. - - After the first argument, the remaining arguments are identical to re.sub. - - Parameters - ---------- - cls : class - class to adjust - - pattern : str - regular expression pattern - - repl : str - replacement string - """ - for key, val in cls.__dict__.items(): - doc = inspect.getdoc(val) - if doc is not None: - newdoc = re.sub(pattern, repl, doc, *args, **kwargs) - try: - cls.__dict__[key].__doc__ = newdoc - except AttributeError: - pass diff --git a/wpandas/plotting/_matplotlib/boxplot.py b/wpandas/plotting/_matplotlib/boxplot.py index 4190bccc..151a6cf4 100644 --- a/wpandas/plotting/_matplotlib/boxplot.py +++ b/wpandas/plotting/_matplotlib/boxplot.py @@ -1,11 +1,11 @@ import pandas.plotting._matplotlib.boxplot from pandas.plotting._matplotlib.boxplot import BoxPlot as _BoxPlot from wpandas.plotting._matplotlib.core import _WeightedMPLPlot, _get_weights -from wpandas.utils import quantile +from wpandas.core.util.weights import quantile from pandas.core.dtypes.missing import remove_na_arraylike import numpy as np from pandas.io.formats.printing import pprint_thing -from wpandas._code_utils import replace_inner_function +from wpandas.core.util._code_transforms import replace_inner_function def _bxpstat(y, weights, whis): diff --git a/wpandas/plotting/_matplotlib/core.py b/wpandas/plotting/_matplotlib/core.py index 1642932c..d53128d3 100644 --- a/wpandas/plotting/_matplotlib/core.py +++ b/wpandas/plotting/_matplotlib/core.py @@ -10,7 +10,7 @@ PiePlot as _PiePlot, MPLPlot) from pandas.io.formats.printing import pprint_thing -from wpandas.core import _WeightedObject +from wpandas.core.base import _WeightedObject def _get_weights(kwargs, data): From 8f37ed7c9b83b0c22ce89b2a59c094d5d7ff4889 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 14:26:53 +0100 Subject: [PATCH 12/19] remove show-inheritance from anesthetic.core to see if this fixes docs --- docs/source/anesthetic.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index 7f44bdf6..4a20f054 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -57,7 +57,6 @@ anesthetic.core module .. automodule:: anesthetic.core :members: :undoc-members: - :show-inheritance: anesthetic.scripts module From 5611c58fcec7a5f35ab4e46c4e009aab818672a2 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 14:37:39 +0100 Subject: [PATCH 13/19] attempt to correct wpandas.utils docs --- docs/source/wpandas.rst | 11 +---------- wpandas.utils.rst | 7 +++++++ 2 files changed, 8 insertions(+), 10 deletions(-) create mode 100644 wpandas.utils.rst diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index bc9ef1e3..ef6e49fa 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -7,6 +7,7 @@ wpandas subpackages :maxdepth: 1 wpandas.plotting + wpandas.utils wpandas modules @@ -20,16 +21,6 @@ wpandas.core module :undoc-members: -wpandas.utils module -~~~~~~~~~~~~~~~~~~~~ - -.. automodule:: wpandas.utils - :members: - :undoc-members: - :show-inheritance: - - - .. automodule:: wpandas :members: diff --git a/wpandas.utils.rst b/wpandas.utils.rst new file mode 100644 index 00000000..4c9337bb --- /dev/null +++ b/wpandas.utils.rst @@ -0,0 +1,7 @@ +wpandas.utils package +======================== + +.. automodule:: wpandas.utils + :members: + :undoc-members: + :show-inheritance: From 1f8c51686d7b0d3d782e46250667d88527894de8 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 14:44:48 +0100 Subject: [PATCH 14/19] put utils docs in right place --- wpandas.utils.rst => docs/source/wpandas.utils.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename wpandas.utils.rst => docs/source/wpandas.utils.rst (100%) diff --git a/wpandas.utils.rst b/docs/source/wpandas.utils.rst similarity index 100% rename from wpandas.utils.rst rename to docs/source/wpandas.utils.rst From 8e7548a7c9c9cc599493e75663850c8aed943de6 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 14:51:20 +0100 Subject: [PATCH 15/19] just get rid of wpandas utils --- docs/source/wpandas.rst | 1 - docs/source/wpandas.utils.rst | 7 ------- 2 files changed, 8 deletions(-) delete mode 100644 docs/source/wpandas.utils.rst diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index ef6e49fa..d961b022 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -7,7 +7,6 @@ wpandas subpackages :maxdepth: 1 wpandas.plotting - wpandas.utils wpandas modules diff --git a/docs/source/wpandas.utils.rst b/docs/source/wpandas.utils.rst deleted file mode 100644 index 4c9337bb..00000000 --- a/docs/source/wpandas.utils.rst +++ /dev/null @@ -1,7 +0,0 @@ -wpandas.utils package -======================== - -.. automodule:: wpandas.utils - :members: - :undoc-members: - :show-inheritance: From 53f486db08b4e70d8de1e45cddcac04cd1bd96eb Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 15:26:59 +0100 Subject: [PATCH 16/19] separate wpandas.core module --- docs/source/wpandas.core.rst | 7 +++++++ docs/source/wpandas.rst | 12 +----------- 2 files changed, 8 insertions(+), 11 deletions(-) create mode 100644 docs/source/wpandas.core.rst diff --git a/docs/source/wpandas.core.rst b/docs/source/wpandas.core.rst new file mode 100644 index 00000000..976c1b81 --- /dev/null +++ b/docs/source/wpandas.core.rst @@ -0,0 +1,7 @@ +wpandas.core package +=========================== + +.. automodule:: wpandas.core + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index d961b022..d13b6dbc 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -6,20 +6,10 @@ wpandas subpackages .. toctree:: :maxdepth: 1 + wpandas.core wpandas.plotting -wpandas modules ---------------- - -wpandas.core module -~~~~~~~~~~~~~~~~~~~ - -.. automodule:: wpandas.core - :members: - :undoc-members: - - .. automodule:: wpandas :members: From fe08bf1236921de7bc4c52d501b62a646ec84d89 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Mon, 17 Apr 2023 15:35:38 +0100 Subject: [PATCH 17/19] reinstate :show-inheritance: --- docs/source/anesthetic.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index 4a20f054..7f44bdf6 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -57,6 +57,7 @@ anesthetic.core module .. automodule:: anesthetic.core :members: :undoc-members: + :show-inheritance: anesthetic.scripts module From a2c6d03d55eda78b9266f994d720c2b653c8457b Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Fri, 21 Apr 2023 11:53:27 +0100 Subject: [PATCH 18/19] run sphinx-apidoc on anesthetic, wpandas and lpandas separately, then make sure all three are in modules.rst --- docs/source/anesthetic.examples.rst | 12 ++++---- docs/source/anesthetic.gui.rst | 12 ++++---- docs/source/anesthetic.read.rst | 12 ++++---- docs/source/anesthetic.rst | 25 +++++++++++------ docs/source/lpandas.rst | 12 ++++---- docs/source/wpandas.core.rst | 43 ++++++++++++++++++++++++++++- docs/source/wpandas.rst | 12 ++++---- 7 files changed, 89 insertions(+), 39 deletions(-) diff --git a/docs/source/anesthetic.examples.rst b/docs/source/anesthetic.examples.rst index f53ae682..e7bc6504 100644 --- a/docs/source/anesthetic.examples.rst +++ b/docs/source/anesthetic.examples.rst @@ -1,6 +1,12 @@ anesthetic.examples package =========================== +.. automodule:: anesthetic.examples + :members: + :undoc-members: + :show-inheritance: + + anesthetic.examples.perfect\_ns module -------------------------------------- @@ -19,9 +25,3 @@ anesthetic.examples.utils module :show-inheritance: - - -.. automodule:: anesthetic.examples - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/anesthetic.gui.rst b/docs/source/anesthetic.gui.rst index 31522689..45206769 100644 --- a/docs/source/anesthetic.gui.rst +++ b/docs/source/anesthetic.gui.rst @@ -1,6 +1,12 @@ anesthetic.gui package ====================== +.. automodule:: anesthetic.gui + :members: + :undoc-members: + :show-inheritance: + + anesthetic.gui.plot module -------------------------- @@ -19,9 +25,3 @@ anesthetic.gui.widgets module :show-inheritance: - - -.. automodule:: anesthetic.gui - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/anesthetic.read.rst b/docs/source/anesthetic.read.rst index e37320ac..e92044ce 100644 --- a/docs/source/anesthetic.read.rst +++ b/docs/source/anesthetic.read.rst @@ -1,6 +1,12 @@ anesthetic.read package ======================= +.. automodule:: anesthetic.read + :members: + :undoc-members: + :show-inheritance: + + anesthetic.read.chain module ---------------------------- @@ -46,9 +52,3 @@ anesthetic.read.polychord module :show-inheritance: - - -.. automodule:: anesthetic.read - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/anesthetic.rst b/docs/source/anesthetic.rst index 7f44bdf6..ed0b06f7 100644 --- a/docs/source/anesthetic.rst +++ b/docs/source/anesthetic.rst @@ -1,5 +1,11 @@ anesthetic package ================== + +.. automodule:: anesthetic + :members: + :undoc-members: + :show-inheritance: + anesthetic subpackages ---------------------- @@ -33,6 +39,15 @@ anesthetic.convert module :show-inheritance: +anesthetic.core module +~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: anesthetic.core + :members: + :undoc-members: + :show-inheritance: + + anesthetic.kde module ~~~~~~~~~~~~~~~~~~~~~ @@ -51,10 +66,10 @@ anesthetic.plot module :show-inheritance: -anesthetic.core module +anesthetic.samples module ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: anesthetic.core +.. automodule:: anesthetic.samples :members: :undoc-members: :show-inheritance: @@ -78,9 +93,3 @@ anesthetic.utils module :show-inheritance: - - -.. automodule:: anesthetic - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/lpandas.rst b/docs/source/lpandas.rst index c4a7fcfc..3be40ff7 100644 --- a/docs/source/lpandas.rst +++ b/docs/source/lpandas.rst @@ -1,18 +1,18 @@ lpandas package =============== -lpandas.core module -------------------- - -.. automodule:: lpandas.core +.. automodule:: lpandas :members: :undoc-members: :show-inheritance: +lpandas.core module +------------------- - -.. automodule:: lpandas +.. automodule:: lpandas.core :members: :undoc-members: :show-inheritance: + + diff --git a/docs/source/wpandas.core.rst b/docs/source/wpandas.core.rst index 976c1b81..518e803a 100644 --- a/docs/source/wpandas.core.rst +++ b/docs/source/wpandas.core.rst @@ -1,7 +1,48 @@ wpandas.core package -=========================== +==================== .. automodule:: wpandas.core :members: :undoc-members: :show-inheritance: + +wpandas.core subpackages +------------------------ + +.. toctree:: + :maxdepth: 1 + + wpandas.core.groupby + wpandas.core.util + + +wpandas.core modules +-------------------- + +wpandas.core.base module +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: wpandas.core.base + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.frame module +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: wpandas.core.frame + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.series module +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: wpandas.core.series + :members: + :undoc-members: + :show-inheritance: + + diff --git a/docs/source/wpandas.rst b/docs/source/wpandas.rst index d13b6dbc..a4832fd8 100644 --- a/docs/source/wpandas.rst +++ b/docs/source/wpandas.rst @@ -1,5 +1,11 @@ wpandas package =============== + +.. automodule:: wpandas + :members: + :undoc-members: + :show-inheritance: + wpandas subpackages ------------------- @@ -9,9 +15,3 @@ wpandas subpackages wpandas.core wpandas.plotting - - -.. automodule:: wpandas - :members: - :undoc-members: - :show-inheritance: From 536a56f5b4bba2031745f8facb5b1f8a913cd0e4 Mon Sep 17 00:00:00 2001 From: AdamOrmondroyd Date: Fri, 21 Apr 2023 11:56:48 +0100 Subject: [PATCH 19/19] add wpandas.core.groupby.rst and wpandas.core.util.rst --- docs/source/wpandas.core.groupby.rst | 27 +++++++++++++++++++++++++++ docs/source/wpandas.core.util.rst | 27 +++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 docs/source/wpandas.core.groupby.rst create mode 100644 docs/source/wpandas.core.util.rst diff --git a/docs/source/wpandas.core.groupby.rst b/docs/source/wpandas.core.groupby.rst new file mode 100644 index 00000000..3e48d37c --- /dev/null +++ b/docs/source/wpandas.core.groupby.rst @@ -0,0 +1,27 @@ +wpandas.core.groupby package +============================ + +.. automodule:: wpandas.core.groupby + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.groupby.generic module +----------------------------------- + +.. automodule:: wpandas.core.groupby.generic + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.groupby.groupby module +----------------------------------- + +.. automodule:: wpandas.core.groupby.groupby + :members: + :undoc-members: + :show-inheritance: + + diff --git a/docs/source/wpandas.core.util.rst b/docs/source/wpandas.core.util.rst new file mode 100644 index 00000000..65714b71 --- /dev/null +++ b/docs/source/wpandas.core.util.rst @@ -0,0 +1,27 @@ +wpandas.core.util package +========================= + +.. automodule:: wpandas.core.util + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.util.random module +------------------------------- + +.. automodule:: wpandas.core.util.random + :members: + :undoc-members: + :show-inheritance: + + +wpandas.core.util.weights module +-------------------------------- + +.. automodule:: wpandas.core.util.weights + :members: + :undoc-members: + :show-inheritance: + +