diff --git a/tests/test_integration/test_two_normal_example.py b/tests/test_integration/test_two_normal_example.py new file mode 100644 index 0000000..c0ec529 --- /dev/null +++ b/tests/test_integration/test_two_normal_example.py @@ -0,0 +1,179 @@ +r"""Integration test for the two-normal example, . + +In this test, we have the following setup: + +- A single parameter $\mu$. +- A random variable $X$ distributed as $X \sim \mathcal{N}(\mu, \nu_X^2)$ for a fixed + $\nu_X$. +- A random variable $Y$ distributed as $Y \vert X \sim \mathcal{N}(X, \nu_Y^2)$ for a + fixed $\nu_Y$. +- The causal estimand of interest $\sigma = \mathbb{E}[Y]$. +- The constraints function is $\vert \mathbb{E}[X] - \phi \vert$ for an empirical + observation of the expectation of $X$, $\phi$. + +For a fixed tolerance $\epsilon > 0$, we are thus we are looking to solve the +following optimisation problem (1): + +$$ \text{min / max}_\mu \mathbb{E}[Y], $$ +$$ \text{subject to } \vert \mathbb{E}[X] - \phi \vert \leq \epsilon. $$ + +It should be noted that analytically, $\mathbb{E}[X] = \mathbb{E}[Y] = \mu$. +This means that we are effectively solving (2): + +$$ \text{min / max}_\mu \mu, $$ +$$ \text{subject to } \vert \mu - \phi \vert \leq \epsilon. $$ + +which we can immediately spot has solutions $\mu = \phi \pm \epsilon$ (larger value in +the case of maximisation). +""" + +from collections.abc import Callable +from typing import Literal + +import jax +import numpy as np +import pytest +from scipy.optimize import NonlinearConstraint, minimize + +from causalprog.algorithms import expectation +from causalprog.causal_problem import CausalProblem +from causalprog.distribution.normal import NormalFamily +from causalprog.graph import DistributionNode, Graph, ParameterNode + + +@pytest.mark.parametrize( + ("n_samples", "max_or_min"), + [ + pytest.param(1e3, "min", id="[Min] 1e3 samples"), + pytest.param(1e6, "min", id="[Min] 1e6 samples"), + pytest.param(1e8, "min", id="[Min] 1e8 samples"), + pytest.param(1e3, "max", id="[Max] 1e3 samples"), + pytest.param(1e6, "max", id="[Max] 1e6 samples"), + pytest.param(1e8, "max", id="[Max] 1e8 samples"), + ], +) +def test_two_normal_example( + n_samples: int, + max_or_min: Literal["max", "min"], + rng_key: jax.Array, + nu_x: float = 1.0, + nu_y: float = 1.0, + epsilon: float = 1.0, + data: tuple[float, ...] = (2.0,), + x0: tuple[float, ...] = (1.1,), +) -> None: + r"""Integration test for the two normal example. + + 0) Record the analytic answer we expect, `true_analytic_value`. + 1) Compute the result of solving (1), `analytic_result` via the optimiser, to ensure + that we have setup and understood the problem (and analytical answer) correctly. + This also makes us robust against behaviour changes in the `causal_estimand` and + `constraints` methods of the `CausalProblem` class. + 2) Check that `analytic_result` is close to `true_analytic_value`. + 3) Compute the result of (2) via the optimiser, `result`. + 4) Check that `result` is close to both `true_analytic_result` and `analytic_result` + (see below for tolerances used). + + Empirical experiments suggest that the absolute difference between the `result` and + `analytic_result` scales in proportion to the inverse square of the number of + samples used; + + $$ \mathrm{atol} \propto \mathrm{samples}^{-0.5}, $$ + + so to be generous, we use `atol = 10 ** (-np.floor(np.log10(n_samples) / 2.)))`. + + Finally, it should be noted that in order to obtain a good answer (in any case), we + need to provide the Jacobians of the causal estimand and constraints functions to + the solver. Without these, the results are poor (if the optimiser converges at all). + """ + minimise_options = {"disp": False, "maxiter": 20} + # Maximisation is minimisation of the negation of the objective function. + prefactor = 1.0 if max_or_min == "min" else -1.0 + + n_samples = int(n_samples) + data = np.array(data, ndmin=1) + x0 = np.array(x0, ndmin=1) + true_analytic_value = np.array(data) - prefactor * epsilon + + mu = ParameterNode("mu") + x = DistributionNode( + NormalFamily(), + label="x", + parameters={"mean": "mu"}, + constant_parameters={"cov": nu_x**2}, + is_outcome=True, + ) + y = DistributionNode( + NormalFamily(), + label="y", + parameters={"mean": "x"}, + constant_parameters={"cov": nu_y**2}, + is_outcome=True, + ) + + graph = Graph(label="G") + graph.add_edge(mu, x) + graph.add_edge(x, y) + + def expectation_with_n_samples() -> Callable[[Graph, DistributionNode], float]: + def _inner(g: Graph, rv: DistributionNode) -> float: + return expectation(g, rv.label, samples=n_samples, rng_key=rng_key) + + return _inner + + # Setup the CausalProblem instance. + + cp = CausalProblem(graph, label="CP") + cp.set_causal_estimand( + expectation_with_n_samples(), + rvs_to_nodes={"rv": "y"}, + graph_argument="g", + ) + cp.set_constraints( + expectation_with_n_samples(), + rvs_to_nodes={"rv": "x"}, + graph_argument="g", + ) + + def ce(p): + return prefactor * cp.causal_estimand(p) + + def ce_jacobian(*p): + # Gradient is prefactor * 1.0 since we're effectively minimising f(p) = p + return prefactor + + def con(p): + return np.abs(cp.constraints(p) - data) + + def con_jacobian(p): + return -1.0 * (p < data) + (p > data) + + # 1) Analytic solve + def analytic_ce(p): + return prefactor * p + + def analytic_con(p): + return np.abs(p - data) + + analytic_constraint = NonlinearConstraint(analytic_con, lb=-np.inf, ub=epsilon) + analytic_result = minimize( + analytic_ce, x0, constraints=[analytic_constraint], options=minimise_options + ) + + # 2) Check analytic solve. + assert np.isclose(analytic_result.x, true_analytic_value) + + # 3) Solve (1) via the CausalProblem class methods. + nlc = NonlinearConstraint(con, lb=-np.inf, ub=epsilon, jac=con_jacobian) + result = minimize( + ce, + x0, + constraints=[nlc], + options=minimise_options, + jac=ce_jacobian, + ) + + # 4) Check proximity to correct solution. + atol = 10.0 ** (-np.floor(np.log10(n_samples) / 2.0)) + assert np.isclose(result.x, analytic_result.x, atol=atol) + assert np.isclose(result.x, true_analytic_value, atol=atol)