Skip to content

Commit 5e88e45

Browse files
authored
[FIX] Permutation p-values (#447)
* fix and improve null_to_p * update and improve null_to_p tests * add separate methods for computing two-sided p-values when null dist is symmetric or not * expand null_to_p tests * use abs()-based p-value computation for efficiency * black
1 parent b69cedd commit 5e88e45

File tree

3 files changed

+111
-51
lines changed

3 files changed

+111
-51
lines changed

nimare/meta/cbma/ale.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def _fit(self, dataset1, dataset2):
316316
p_arr = np.ones(n_voxels)
317317
for voxel in range(n_voxels):
318318
p_arr[voxel] = null_to_p(
319-
diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two"
319+
diff_ale_values[voxel], iter_diff_values[:, voxel], tail="two", symmetric=True
320320
)
321321
diff_signs = np.sign(diff_ale_values - np.median(iter_diff_values, axis=0))
322322

nimare/stats.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import warnings
44

55
import numpy as np
6-
from scipy import stats
76

87
from . import utils
98

@@ -110,8 +109,8 @@ def pearson(x, y):
110109
return rs
111110

112111

113-
def null_to_p(test_value, null_array, tail="two"):
114-
"""Return p-value for test value against null array.
112+
def null_to_p(test_value, null_array, tail="two", symmetric=False):
113+
"""Return p-value for test value(s) against null array.
115114
116115
Parameters
117116
----------
@@ -125,19 +124,36 @@ def null_to_p(test_value, null_array, tail="two"):
125124
If 'upper', then higher values for the test_value are more significant.
126125
If 'lower', then lower values for the test_value are more significant.
127126
Default is 'two'.
127+
symmetric : bool
128+
When tail="two", indicates how to compute p-values. When False (default),
129+
both one-tailed p-values are computed, and the two-tailed p is double
130+
the minimum one-tailed p. When True, it is assumed that the null
131+
distribution is zero-centered and symmetric, and the two-tailed p-value
132+
is computed as P(abs(test_value) >= abs(null_array)).
128133
129134
Returns
130135
-------
131136
p_value : :obj:`float`
132-
P-value associated with the test value when compared against the null
133-
distribution.
137+
P-value(s) associated with the test value when compared against the null
138+
distribution. Return type matches input type (i.e., a float if
139+
test_value is a single float, and an array if test_value is an array).
134140
135141
Notes
136142
-----
137143
P-values are clipped based on the number of elements in the null array.
138144
Therefore no p-values of 0 or 1 should be produced.
145+
146+
When the null distribution is known to be symmetric and centered on zero,
147+
and two-tailed p-values are desired, use symmetric=True, as it is
148+
approximately twice as efficient computationally, and has lower variance.
139149
"""
150+
151+
if tail not in {"two", "upper", "lower"}:
152+
raise ValueError('Argument "tail" must be one of ["two", "upper", "lower"]')
153+
154+
return_first = isinstance(test_value, (float, int))
140155
test_value = np.atleast_1d(test_value)
156+
null_array = np.array(null_array)
141157

142158
# For efficiency's sake, if there are more than 1000 values, pass only the unique
143159
# values through percentileofscore(), and then reconstruct.
@@ -147,26 +163,32 @@ def null_to_p(test_value, null_array, tail="two"):
147163
else:
148164
reconstruct = False
149165

150-
# TODO: this runs in N^2 time; is there a more efficient alternative?
151-
p = np.array([stats.percentileofscore(null_array, v, "strict") for v in test_value])
152-
p /= 100.0
153-
if tail == "two":
154-
p = (0.5 - np.abs(p - 0.5)) * 2
155-
elif tail == "upper":
156-
p = 1 - p
157-
elif tail != "lower":
158-
raise ValueError('Argument "tail" must be one of ["two", "upper", "lower"]')
166+
def compute_p(t, null):
167+
null = np.sort(null)
168+
idx = np.searchsorted(null, t, side="left").astype(float)
169+
return 1 - idx / len(null)
159170

160-
smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array))
171+
if tail == "two":
172+
if symmetric:
173+
p = compute_p(np.abs(test_value), np.abs(null_array))
174+
else:
175+
p_l = compute_p(test_value, null_array)
176+
p_r = compute_p(test_value * -1, null_array * -1)
177+
p = 2 * np.minimum(p_l, p_r)
178+
elif tail == "lower":
179+
p = compute_p(test_value * -1, null_array * -1)
180+
else:
181+
p = compute_p(test_value, null_array)
161182

162183
# ensure p_value in the following range:
163184
# smallest_value <= p_value <= (1.0 - smallest_value)
185+
smallest_value = np.maximum(np.finfo(float).eps, 1.0 / len(null_array))
164186
result = np.maximum(smallest_value, np.minimum(p, 1.0 - smallest_value))
165187

166188
if reconstruct:
167189
result = result[uniq_idx]
168190

169-
return result
191+
return result[0] if return_first else result
170192

171193

172194
def nullhist_to_p(test_values, histogram_weights, histogram_bins):

nimare/tests/test_stats.py

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,34 +5,72 @@
55

66
import numpy as np
77

8-
from nimare import stats
9-
10-
11-
def test_null_to_p():
12-
"""
13-
Test nimare.stats.null_to_p.
14-
"""
15-
data = np.arange(1, 101)
16-
assert math.isclose(stats.null_to_p(0, data, "lower"), 0.01)
17-
assert math.isclose(stats.null_to_p(0, data, "upper"), 0.99)
18-
assert math.isclose(stats.null_to_p(0, data, "two"), 0.01)
19-
assert math.isclose(stats.null_to_p(5.1, data, "lower"), 0.05)
20-
assert math.isclose(stats.null_to_p(5.1, data, "upper"), 0.95)
21-
assert math.isclose(stats.null_to_p(5.1, data, "two"), 0.1)
22-
assert math.isclose(stats.null_to_p(95.1, data, "lower"), 0.95)
23-
assert math.isclose(stats.null_to_p(95.1, data, "upper"), 0.05)
24-
assert math.isclose(stats.null_to_p(95.1, data, "two"), 0.1)
25-
assert math.isclose(stats.null_to_p(101.1, data, "lower"), 0.99)
26-
assert math.isclose(stats.null_to_p(101.1, data, "upper"), 0.01)
27-
assert math.isclose(stats.null_to_p(101.1, data, "two"), 0.01)
28-
29-
# modify data to handle edge case
30-
data[98] = 100
31-
assert math.isclose(stats.null_to_p(1, data, "lower"), 0.01)
32-
assert math.isclose(stats.null_to_p(1, data, "upper"), 0.99)
33-
assert math.isclose(stats.null_to_p(100.1, data, "lower"), 0.99)
34-
assert math.isclose(stats.null_to_p(100.1, data, "upper"), 0.01)
35-
assert math.isclose(stats.null_to_p(100.1, data, "two"), 0.01)
8+
from nimare.stats import null_to_p, nullhist_to_p
9+
10+
11+
def test_null_to_p_float():
12+
"""Test null_to_p with single float input, assuming asymmetric null dist."""
13+
14+
null = [-10, -9, -9, -3, -2, -1, -1, 0, 1, 1, 1, 2, 3, 3, 4, 4, 7, 8, 8, 9]
15+
16+
# Two-tailed
17+
assert math.isclose(null_to_p(0, null, "two"), 0.8)
18+
assert math.isclose(null_to_p(9, null, "two"), 0.1)
19+
assert math.isclose(null_to_p(10, null, "two"), 0.05)
20+
assert math.isclose(null_to_p(-9, null, "two"), 0.3)
21+
assert math.isclose(null_to_p(-10, null, "two"), 0.1)
22+
# Still 0.05 because minimum valid p-value is 1 / len(null)
23+
result = null_to_p(20, null, "two")
24+
assert result == null_to_p(-20, null, "two")
25+
assert math.isclose(result, 0.05)
26+
27+
# Left/lower-tailed
28+
assert math.isclose(null_to_p(9, null, "lower"), 0.95)
29+
assert math.isclose(null_to_p(-9, null, "lower"), 0.15)
30+
assert math.isclose(null_to_p(0, null, "lower"), 0.4)
31+
32+
# Right/upper-tailed
33+
assert math.isclose(null_to_p(9, null, "upper"), 0.05)
34+
assert math.isclose(null_to_p(-9, null, "upper"), 0.95)
35+
assert math.isclose(null_to_p(0, null, "upper"), 0.65)
36+
37+
# Test that 1/n(null) is preserved with extreme values
38+
nulldist = np.random.normal(size=10000)
39+
assert math.isclose(null_to_p(20, nulldist, "two"), 1 / 10000)
40+
assert math.isclose(null_to_p(20, nulldist, "lower"), 1 - 1 / 10000)
41+
42+
43+
def test_null_to_p_float_symmetric():
44+
"""Test null_to_p with single float input, assuming symmetric null dist."""
45+
46+
null = [-10, -9, -9, -3, -2, -1, -1, 0, 1, 1, 1, 2, 3, 3, 4, 4, 7, 8, 8, 9]
47+
48+
# Only need to test two-tailed; symmetry is irrelevant for one-tailed
49+
assert math.isclose(null_to_p(0, null, "two", symmetric=True), 0.95)
50+
result = null_to_p(9, null, "two", symmetric=True)
51+
assert result == null_to_p(-9, null, "two", symmetric=True)
52+
assert math.isclose(result, 0.2)
53+
result = null_to_p(10, null, "two", symmetric=True)
54+
assert result == null_to_p(-10, null, "two", symmetric=True)
55+
assert math.isclose(result, 0.05)
56+
# Still 0.05 because minimum valid p-value is 1 / len(null)
57+
result = null_to_p(20, null, "two", symmetric=True)
58+
assert result == null_to_p(-20, null, "two", symmetric=True)
59+
assert math.isclose(result, 0.05)
60+
61+
62+
def test_null_to_p_array():
63+
"""Test nimare.stats.null_to_p with 1d array input."""
64+
N = 10000
65+
nulldist = np.random.normal(size=N)
66+
t = np.sort(np.random.normal(size=N))
67+
p = np.sort(null_to_p(t, nulldist))
68+
assert p.shape == (N,)
69+
assert (p < 1).all()
70+
assert (p > 0).all()
71+
# Resulting distribution should be roughly uniform
72+
assert np.abs(p.mean() - 0.5) < 0.02
73+
assert np.abs(p.var() - 1 / 12) < 0.02
3674

3775

3876
def test_nullhist_to_p():
@@ -45,14 +83,14 @@ def test_nullhist_to_p():
4583
histogram_weights[-1] = 0 # last bin is outside range, so there are 100 bins with values
4684

4785
# When input is a single value
48-
assert math.isclose(stats.nullhist_to_p(0, histogram_weights, histogram_bins), 1.0)
49-
assert math.isclose(stats.nullhist_to_p(1, histogram_weights, histogram_bins), 0.99)
50-
assert math.isclose(stats.nullhist_to_p(99, histogram_weights, histogram_bins), 0.01)
51-
assert math.isclose(stats.nullhist_to_p(100, histogram_weights, histogram_bins), 0.01)
86+
assert math.isclose(nullhist_to_p(0, histogram_weights, histogram_bins), 1.0)
87+
assert math.isclose(nullhist_to_p(1, histogram_weights, histogram_bins), 0.99)
88+
assert math.isclose(nullhist_to_p(99, histogram_weights, histogram_bins), 0.01)
89+
assert math.isclose(nullhist_to_p(100, histogram_weights, histogram_bins), 0.01)
5290

5391
# When input is an array
5492
assert np.allclose(
55-
stats.nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
93+
nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
5694
np.array([1.0, 0.99, 0.01, 0.01, 0.01]),
5795
)
5896

@@ -61,6 +99,6 @@ def test_nullhist_to_p():
6199
histogram_weights[-1, :] = 0 # last bin is outside range, so there are 100 bins with values
62100

63101
assert np.allclose(
64-
stats.nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
102+
nullhist_to_p([0, 1, 99, 100, 101], histogram_weights, histogram_bins),
65103
np.array([1.0, 0.99, 0.01, 0.01, 0.01]),
66104
)

0 commit comments

Comments
 (0)