[FIX] Permutation p-values (#447)

tyarkoni · web-flow · commit 5e88e4582559 · 2021-02-25T12:54:49.000-05:00
* fix and improve null_to_p

* update and improve null_to_p tests

* add separate methods for computing two-sided p-values when null dist is symmetric or not

* expand null_to_p tests

* use abs()-based p-value computation for efficiency

* black
diff --git a/nimare/meta/cbma/ale.py b/nimare/meta/cbma/ale.py
@@ -316,7 +316,7 @@ def _fit(self, dataset1, dataset2):
         p_arr = np.ones(n_voxels)
         for voxel in range(n_voxels):
             p_arr[voxel] = null_to_p(
-                diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two"
+                diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two", symmetric=True
             )
         diff_signs = np.sign(diff_ale_values - np.median(iter_diff_values, axis=0))
 
diff --git a/nimare/stats.py b/nimare/stats.py
@@ -3,7 +3,6 @@
 import warnings
 
 import numpy as np
-from scipy import stats
 
 from . import utils
 
@@ -110,8 +109,8 @@ def pearson(x, y):
     return rs
 
 
-def null_to_p(test_value, null_array, tail="two"):
-    """Return p-value for test value against null array.
+def null_to_p(test_value, null_array, tail="two", symmetric=False):
+    """Return p-value for test value(s) against null array.
 
     Parameters
     ----------
@@ -125,19 +124,36 @@ def null_to_p(test_value, null_array, tail="two"):
         If 'upper', then higher values for the test_value are more significant.
         If 'lower', then lower values for the test_value are more significant.
         Default is 'two'.
+    symmetric : bool
+        When tail="two", indicates how to compute p-values. When False (default),
+        both one-tailed p-values are computed, and the two-tailed p is double
+        the minimum one-tailed p. When True, it is assumed that the null
+        distribution is zero-centered and symmetric, and the two-tailed p-value
+        is computed as P(abs(test_value) >= abs(null_array)).
 
     Returns
     -------
     p_value : :obj:`float`
-        P-value associated with the test value when compared against the null
-        distribution.
+        P-value(s) associated with the test value when compared against the null
+        distribution. Return type matches input type (i.e., a float if
+        test_value is a single float, and an array if test_value is an array).
 
     Notes
     -----
     P-values are clipped based on the number of elements in the null array.
     Therefore no p-values of 0 or 1 should be produced.
+
+    When the null distribution is known to be symmetric and centered on zero,
+    and two-tailed p-values are desired, use symmetric=True, as it is
+    approximately twice as efficient computationally, and has lower variance.
     """
+
+    if tail not in {"two", "upper", "lower"}:
+        raise ValueError('Argument "tail" must be one of ["two", "upper", "lower"]')
+
+    return_first = isinstance(test_value, (float, int))
     test_value = np.atleast_1d(test_value)
+    null_array = np.array(null_array)
 
     # For efficiency's sake, if there are more than 1000 values, pass only the unique
     # values through percentileofscore(), and then reconstruct.
@@ -147,26 +163,32 @@ def null_to_p(test_value, null_array, tail="two"):
     else:
         reconstruct = False
 
-    # TODO: this runs in N^2 time; is there a more efficient alternative?
-    p = np.array([stats.percentileofscore(null_array, v, "strict") for v in test_value])
-    p /= 100.0
-    if tail == "two":
-        p = (0.5 - np.abs(p - 0.5)) * 2
-    elif tail == "upper":
-        p = 1 - p
-    elif tail != "lower":
-        raise ValueError('Argument "tail" must be one of ["two", "upper", "lower"]')
+    def compute_p(t, null):
+        null = np.sort(null)
+        idx = np.searchsorted(null, t, side="left").astype(float)
+        return 1 - idx / len(null)
 
-    smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array))
+    if tail == "two":
+        if symmetric:
+            p = compute_p(np.abs(test_value), np.abs(null_array))
+        else:
+            p_l = compute_p(test_value, null_array)
+            p_r = compute_p(test_value * -1, null_array * -1)
+            p = 2 * np.minimum(p_l, p_r)
+    elif tail == "lower":
+        p = compute_p(test_value * -1, null_array * -1)
+    else:
+        p = compute_p(test_value, null_array)
 
     # ensure p_value in the following range:
     # smallest_value <= p_value <= (1.0 - smallest_value)
+    smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array))
     result = np.maximum(smallest_value, np.minimum(p, 1.0 - smallest_value))
 
     if reconstruct:
         result = result[uniq_idx]
 
-    return result
+    return result[0] if return_first else result
 
 
 def nullhist_to_p(test_values, histogram_weights, histogram_bins):
diff --git a/nimare/tests/test_stats.py b/nimare/tests/test_stats.py
@@ -5,34 +5,72 @@
 
 import numpy as np
 
-from nimare import stats
-
-
-def test_null_to_p():
-    """
-    Test nimare.stats.null_to_p.
-    """
-    data = np.arange(1, 101)
-    assert math.isclose(stats.null_to_p(0, data, "lower"), 0.01)
-    assert math.isclose(stats.null_to_p(0, data, "upper"), 0.99)
-    assert math.isclose(stats.null_to_p(0, data, "two"), 0.01)
-    assert math.isclose(stats.null_to_p(5.1, data, "lower"), 0.05)
-    assert math.isclose(stats.null_to_p(5.1, data, "upper"), 0.95)
-    assert math.isclose(stats.null_to_p(5.1, data, "two"), 0.1)
-    assert math.isclose(stats.null_to_p(95.1, data, "lower"), 0.95)
-    assert math.isclose(stats.null_to_p(95.1, data, "upper"), 0.05)
-    assert math.isclose(stats.null_to_p(95.1, data, "two"), 0.1)
-    assert math.isclose(stats.null_to_p(101.1, data, "lower"), 0.99)
-    assert math.isclose(stats.null_to_p(101.1, data, "upper"), 0.01)
-    assert math.isclose(stats.null_to_p(101.1, data, "two"), 0.01)
-
-    # modify data to handle edge case
-    data[98] = 100
-    assert math.isclose(stats.null_to_p(1, data, "lower"), 0.01)
-    assert math.isclose(stats.null_to_p(1, data, "upper"), 0.99)
-    assert math.isclose(stats.null_to_p(100.1, data, "lower"), 0.99)
-    assert math.isclose(stats.null_to_p(100.1, data, "upper"), 0.01)
-    assert math.isclose(stats.null_to_p(100.1, data, "two"), 0.01)
+from nimare.stats import null_to_p, nullhist_to_p
+
+
+def test_null_to_p_float():
+    """Test null_to_p with single float input, assuming asymmetric null dist."""
+
+    null = [-10, -9, -9, -3, -2, -1, -1, 0, 1, 1, 1, 2, 3, 3, 4, 4, 7, 8, 8, 9]
+
+    # Two-tailed
+    assert math.isclose(null_to_p(0, null, "two"), 0.8)
+    assert math.isclose(null_to_p(9, null, "two"), 0.1)
+    assert math.isclose(null_to_p(10, null, "two"), 0.05)
+    assert math.isclose(null_to_p(-9, null, "two"), 0.3)
+    assert math.isclose(null_to_p(-10, null, "two"), 0.1)
+    # Still 0.05 because minimum valid p-value is 1 / len(null)
+    result = null_to_p(20, null, "two")
+    assert result == null_to_p(-20, null, "two")
+    assert math.isclose(result, 0.05)
+
+    # Left/lower-tailed
+    assert math.isclose(null_to_p(9, null, "lower"), 0.95)
+    assert math.isclose(null_to_p(-9, null, "lower"), 0.15)
+    assert math.isclose(null_to_p(0, null, "lower"), 0.4)
+
+    # Right/upper-tailed
+    assert math.isclose(null_to_p(9, null, "upper"), 0.05)
+    assert math.isclose(null_to_p(-9, null, "upper"), 0.95)
+    assert math.isclose(null_to_p(0, null, "upper"), 0.65)
+
+    # Test that 1/n(null) is preserved with extreme values
+    nulldist = np.random.normal(size=10000)
+    assert math.isclose(null_to_p(20, nulldist, "two"), 1 / 10000)
+    assert math.isclose(null_to_p(20, nulldist, "lower"), 1 - 1 / 10000)
+
+
+def test_null_to_p_float_symmetric():
+    """Test null_to_p with single float input, assuming symmetric null dist."""
+
+    null = [-10, -9, -9, -3, -2, -1, -1, 0, 1, 1, 1, 2, 3, 3, 4, 4, 7, 8, 8, 9]
+
+    # Only need to test two-tailed; symmetry is irrelevant for one-tailed
+    assert math.isclose(null_to_p(0, null, "two", symmetric=True), 0.95)
+    result = null_to_p(9, null, "two", symmetric=True)
+    assert result == null_to_p(-9, null, "two", symmetric=True)
+    assert math.isclose(result, 0.2)
+    result = null_to_p(10, null, "two", symmetric=True)
+    assert result == null_to_p(-10, null, "two", symmetric=True)
+    assert math.isclose(result, 0.05)
+    # Still 0.05 because minimum valid p-value is 1 / len(null)
+    result = null_to_p(20, null, "two", symmetric=True)
+    assert result == null_to_p(-20, null, "two", symmetric=True)
+    assert math.isclose(result, 0.05)
+
+
+def test_null_to_p_array():
+    """Test nimare.stats.null_to_p with 1d array input."""
+    N = 10000
+    nulldist = np.random.normal(size=N)
+    t = np.sort(np.random.normal(size=N))
+    p = np.sort(null_to_p(t, nulldist))
+    assert p.shape == (N,)
+    assert (p < 1).all()
+    assert (p > 0).all()
+    # Resulting distribution should be roughly uniform
+    assert np.abs(p.mean() - 0.5) < 0.02
+    assert np.abs(p.var() - 1 / 12) < 0.02
 
 
 def test_nullhist_to_p():
@@ -45,14 +83,14 @@ def test_nullhist_to_p():
     histogram_weights[-1] = 0  # last bin is outside range, so there are 100 bins with values
 
     # When input is a single value
-    assert math.isclose(stats.nullhist_to_p(0, histogram_weights, histogram_bins), 1.0)
-    assert math.isclose(stats.nullhist_to_p(1, histogram_weights, histogram_bins), 0.99)
-    assert math.isclose(stats.nullhist_to_p(99, histogram_weights, histogram_bins), 0.01)
-    assert math.isclose(stats.nullhist_to_p(100, histogram_weights, histogram_bins), 0.01)
+    assert math.isclose(nullhist_to_p(0, histogram_weights, histogram_bins), 1.0)
+    assert math.isclose(nullhist_to_p(1, histogram_weights, histogram_bins), 0.99)
+    assert math.isclose(nullhist_to_p(99, histogram_weights, histogram_bins), 0.01)
+    assert math.isclose(nullhist_to_p(100, histogram_weights, histogram_bins), 0.01)
 
     # When input is an array
     assert np.allclose(
-        stats.nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
+        nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
         np.array([1.0, 0.99, 0.01, 0.01, 0.01]),
     )
 
@@ -61,6 +99,6 @@ def test_nullhist_to_p():
     histogram_weights[-1, :] = 0  # last bin is outside range, so there are 100 bins with values
 
     assert np.allclose(
-        stats.nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
+        nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
         np.array([1.0, 0.99, 0.01, 0.01, 0.01]),
     )

Original file line number	Diff line number	Diff line change
`@@ -316,7 +316,7 @@ def _fit(self, dataset1, dataset2):`
`316`	`316`	`p_arr = np.ones(n_voxels)`
`317`	`317`	`for voxel in range(n_voxels):`
`318`	`318`	`p_arr[voxel] = null_to_p(`
`319`		`- diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two"`
	`319`	`+ diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two", symmetric=True`
`320`	`320`	`)`
`321`	`321`	`diff_signs = np.sign(diff_ale_values - np.median(iter_diff_values, axis=0))`
`322`	`322`