Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split up Bayes Factor stat and plotting functions #2406

Merged
merged 10 commits into from
Jan 9, 2025
Merged
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
### Maintenance and fixes
- Make `arviz.data.generate_dims_coords` handle `dims` and `default_dims` consistently ([2395](https://github.com/arviz-devs/arviz/pull/2395))
- Only emit a warning for custom groups in `InferenceData` when explicitly requested ([2401](https://github.com/arviz-devs/arviz/pull/2401))
- Update `method="sd"` of `mcse` to not use normality assumption ([2167](https://github.com/arviz-devs/arviz/pull/2167))
- Splits Bayes Factor computation out from `az.plot_bf` into `az.bayes_factor` ([2402](https://github.com/arviz-devs/arviz/issues/2402))
- Update `method="sd"` of `mcse` to not use normality assumption ([2167](https://github.com/arviz-devs/arviz/pull/2167))

### Documentation
- Add example of ECDF comparison plot to gallery ([2178](https://github.com/arviz-devs/arviz/pull/2178))
Expand Down
35 changes: 9 additions & 26 deletions arviz/plots/bfplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
# pylint: disable=unbalanced-tuple-unpacking
import logging

from numpy import interp

from ..data.utils import extract
from .plot_utils import get_plotting_function
from ..stats.density_utils import _kde_linear
from ..stats import bayes_factor

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,32 +92,17 @@ def plot_bf(
>>> az.plot_bf(idata, var_name="a", ref_val=0)

"""
posterior = extract(idata, var_names=var_name).values

if ref_val > posterior.max() or ref_val < posterior.min():
_log.warning(
"The reference value is outside of the posterior. "
"This translate into infinite support for H1, which is most likely an overstatement."
)

if posterior.ndim > 1:
_log.warning("Posterior distribution has {posterior.ndim} dimensions")

if prior is None:
prior = extract(idata, var_names=var_name, group="prior").values

if posterior.dtype.kind == "f":
posterior_grid, posterior_pdf = _kde_linear(posterior)
prior_grid, prior_pdf = _kde_linear(prior)
posterior_at_ref_val = interp(ref_val, posterior_grid, posterior_pdf)
prior_at_ref_val = interp(ref_val, prior_grid, prior_pdf)

elif posterior.dtype.kind == "i":
posterior_at_ref_val = (posterior == ref_val).mean()
prior_at_ref_val = (prior == ref_val).mean()
bf, p_at_ref_val = bayes_factor(
idata, var_name, prior=prior, ref_val=ref_val, return_ref_vals=True
)
bf_10 = bf["BF10"]
bf_01 = bf["BF01"]

bf_10 = prior_at_ref_val / posterior_at_ref_val
bf_01 = 1 / bf_10
posterior = extract(idata, var_names=var_name)

bfplot_kwargs = dict(
ax=ax,
Expand All @@ -128,8 +111,8 @@ def plot_bf(
prior=prior,
posterior=posterior,
ref_val=ref_val,
prior_at_ref_val=prior_at_ref_val,
posterior_at_ref_val=posterior_at_ref_val,
prior_at_ref_val=p_at_ref_val["prior"],
posterior_at_ref_val=p_at_ref_val["posterior"],
var_name=var_name,
colors=colors,
figsize=figsize,
Expand Down
1 change: 1 addition & 0 deletions arviz/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

__all__ = [
"apply_test_function",
"bayes_factor",
"bfmi",
"compare",
"hdi",
Expand Down
86 changes: 86 additions & 0 deletions arviz/stats/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .density_utils import get_bins as _get_bins
from .density_utils import histogram as _histogram
from .density_utils import kde as _kde
from .density_utils import _kde_linear
from .diagnostics import _mc_error, _multichain_statistics, ess
from .stats_utils import ELPDData, _circular_standard_deviation, smooth_data
from .stats_utils import get_log_likelihood as _get_log_likelihood
Expand All @@ -41,6 +42,7 @@

__all__ = [
"apply_test_function",
"bayes_factor",
"compare",
"hdi",
"loo",
Expand Down Expand Up @@ -2337,3 +2339,87 @@ def _cjs_dist(draws, weights):
bound = cdf_p_int + cdf_q_int

return np.sqrt((cjs_pq + cjs_qp) / bound)


def bayes_factor(idata, var_name, ref_val=0, prior=None, return_ref_vals=False):
r"""Approximated Bayes Factor for comparing hypothesis of two nested models.

The Bayes factor is estimated by comparing a model (H1) against a model in which the
parameter of interest has been restricted to be a point-null (H0). This computation
assumes the models are nested and thus H0 is a special case of H1.

Notes
-----
The bayes Factor is approximated as the Savage-Dickey density ratio
algorithm presented in [1]_.

Parameters
----------
idata : InferenceData
Any object that can be converted to an :class:`arviz.InferenceData` object
Refer to documentation of :func:`arviz.convert_to_dataset` for details.
var_name : str, optional
Name of variable we want to test.
ref_val : int, default 0
Point-null for Bayes factor estimation.
prior : numpy.array, optional
In case we want to use different prior, for example for sensitivity analysis.
return_ref_vals : bool, optional
Whether to return the values of the prior and posterior at the reference value.
Used by :func:`arviz.plot_bf` to display the distribution comparison.


Returns
-------
dict : A dictionary with BF10 (Bayes Factor 10 (H1/H0 ratio), and BF01 (H0/H1 ratio).

References
----------
.. [1] Heck, D., 2019. A caveat on the Savage-Dickey density ratio:
The case of computing Bayes factors for regression parameters.

Examples
--------
Moderate evidence indicating that the parameter "a" is different from zero.

.. ipython::

In [1]: import numpy as np
...: import arviz as az
...: idata = az.from_dict(posterior={"a":np.random.normal(1, 0.5, 5000)},
...: prior={"a":np.random.normal(0, 1, 5000)})
...: az.bayes_factor(idata, var_name="a", ref_val=0)

"""

posterior = extract(idata, var_names=var_name).values

if ref_val > posterior.max() or ref_val < posterior.min():
_log.warning(
"The reference value is outside of the posterior. "
"This translate into infinite support for H1, which is most likely an overstatement."
)

if posterior.ndim > 1:
_log.warning("Posterior distribution has {posterior.ndim} dimensions")

if prior is None:
prior = extract(idata, var_names=var_name, group="prior").values

if posterior.dtype.kind == "f":
posterior_grid, posterior_pdf, *_ = _kde_linear(posterior)
prior_grid, prior_pdf, *_ = _kde_linear(prior)
posterior_at_ref_val = np.interp(ref_val, posterior_grid, posterior_pdf)
prior_at_ref_val = np.interp(ref_val, prior_grid, prior_pdf)

elif posterior.dtype.kind == "i":
posterior_at_ref_val = (posterior == ref_val).mean()
prior_at_ref_val = (prior == ref_val).mean()

bf_10 = prior_at_ref_val / posterior_at_ref_val
bf = {"BF10": bf_10, "BF01": 1 / bf_10}

if return_ref_vals:
return (bf, {"prior": prior_at_ref_val, "posterior": posterior_at_ref_val})
else:
return bf
6 changes: 2 additions & 4 deletions arviz/tests/base_tests/test_plots_matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2082,7 +2082,5 @@ def test_plot_bf():
idata = from_dict(
posterior={"a": np.random.normal(1, 0.5, 5000)}, prior={"a": np.random.normal(0, 1, 5000)}
)
bf_dict0, _ = plot_bf(idata, var_name="a", ref_val=0)
bf_dict1, _ = plot_bf(idata, prior=np.random.normal(0, 10, 5000), var_name="a", ref_val=0)
assert bf_dict0["BF10"] > bf_dict0["BF01"]
assert bf_dict1["BF10"] < bf_dict1["BF01"]
_, bf_plot = plot_bf(idata, var_name="a", ref_val=0)
assert bf_plot is not None
11 changes: 11 additions & 0 deletions arviz/tests/base_tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from ...rcparams import rcParams
from ...stats import (
apply_test_function,
bayes_factor,
compare,
ess,
hdi,
Expand Down Expand Up @@ -871,3 +872,13 @@ def test_priorsens_coords(psens_data):
assert "mu" in result
assert "theta" in result
assert "school" in result.theta_t.dims


def test_bayes_factor():
idata = from_dict(
posterior={"a": np.random.normal(1, 0.5, 5000)}, prior={"a": np.random.normal(0, 1, 5000)}
)
bf_dict0 = bayes_factor(idata, var_name="a", ref_val=0)
bf_dict1 = bayes_factor(idata, prior=np.random.normal(0, 10, 5000), var_name="a", ref_val=0)
assert bf_dict0["BF10"] > bf_dict0["BF01"]
assert bf_dict1["BF10"] < bf_dict1["BF01"]