diff --git a/docs/methods/decoding.rst b/docs/methods/decoding.rst index 6d09f1200..12e1bb41a 100644 --- a/docs/methods/decoding.rst +++ b/docs/methods/decoding.rst @@ -12,7 +12,7 @@ Discrete decoding approaches characterize subsets of the Dataset or regions of i The BrainMap approach ````````````````````` -:func:`nimare.decode.discrete.BrainMapDecoder`, :func:`nimare.decode.discrete.brainmap_decode` +:class:`nimare.decode.discrete.BrainMapDecoder`, :func:`nimare.decode.discrete.brainmap_decode` The BrainMap method for discrete functional decoding performs both forward and reverse inference using an annotated coordinate-based database and a target sample of studies within that database. @@ -78,7 +78,7 @@ of foci associated with each study in the database. The Neurosynth approach ``````````````````````` -:func:`nimare.decode.discrete.NeurosynthDecoder`, :func:`nimare.decode.discrete.neurosynth_decode` +:class:`nimare.decode.discrete.NeurosynthDecoder`, :func:`nimare.decode.discrete.neurosynth_decode` The Neurosynth method for discrete functional decoding performs both forward and reverse inference using an annotated coordinate-based database and a target sample of studies within that database. @@ -135,12 +135,29 @@ of any given experiment including a given label. - Convert p-value to signed z-value using :math:`P(s^{+}|l^{-})` to determine sign. +The Neurosynth ROI association approach +``````````````````````````````````````` +:class:`nimare.decode.discrete.ROIAssociationDecoder` + +Neurosynth's ROI association approach is quite simple, but it has been used in at least one publication, `Margulies et al. (2016)`_. + +This approach uses the following steps to calculate label-wise correlation values: + +1. Specify a region of interest (ROI) image in the same space as the Dataset. +2. Generate modeled activation (MA) maps for all studies in Dataset with coordinates. +3. Average the MA values within the ROI to get a study-wise MA regressor. +4. Correlate the MA regressor with study-wise annotation values (e.g., tf-idf values). + +.. _Margulies et al. (2016): https://doi.org/10.1073/pnas.1608282113 + + The GC-LDA approach ``````````````````` :func:`nimare.decode.discrete.gclda_decode_roi` The GC-LDA approach sums :math:`P(topic|voxel)` weights within the region of interest to produce topic-wise weights. + Continuous decoding ------------------- diff --git a/examples/04_decoding/plot_discrete_decoders.py b/examples/04_decoding/plot_discrete_decoders.py index 8fb859fd2..58c87850e 100644 --- a/examples/04_decoding/plot_discrete_decoders.py +++ b/examples/04_decoding/plot_discrete_decoders.py @@ -15,8 +15,7 @@ import nibabel as nib import numpy as np -import pandas as pd -from nilearn.plotting import plot_roi, plot_stat_map +from nilearn.plotting import plot_roi import nimare from nimare.decode import discrete @@ -34,7 +33,7 @@ ############################################################################### # Create a region of interest -# ------------------------------------------- +# --------------------------- # First we'll make an ROI arr = np.zeros(dset.masker.mask_img.shape, int) @@ -46,21 +45,35 @@ ids = dset.get_studies_by_mask(mask_img) ############################################################################### -# Decode an ROI image using the Neurosynth method -# ----------------------------------------------- +# Decode an ROI image using the BrainMap method +# --------------------------------------------- # Run the decoder -decoder = discrete.NeurosynthDecoder(correction=None) +decoder = discrete.BrainMapDecoder(correction=None) decoder.fit(dset) decoded_df = decoder.transform(ids=ids) decoded_df.sort_values(by="probReverse", ascending=False).head() ############################################################################### -# Decode an ROI image using the BrainMap method -# ----------------------------------------------- +# Decode an ROI image using the Neurosynth chi-square method +# ---------------------------------------------------------- # Run the decoder -decoder = discrete.BrainMapDecoder(correction=None) +decoder = discrete.NeurosynthDecoder(correction=None) decoder.fit(dset) decoded_df = decoder.transform(ids=ids) decoded_df.sort_values(by="probReverse", ascending=False).head() + +############################################################################### +# Decode an ROI image using the Neurosynth ROI association method +# --------------------------------------------------------------- + +# This method decodes the ROI image directly, rather than comparing subsets of the Dataset like the +# other two. +decoder = discrete.ROIAssociationDecoder(mask_img) +decoder.fit(dset) + +# The `transform` method doesn't take any parameters. +decoded_df = decoder.transform() + +decoded_df.sort_values(by="r", ascending=False).head() diff --git a/nimare/base.py b/nimare/base.py index 6094dd10b..844a8dfd4 100644 --- a/nimare/base.py +++ b/nimare/base.py @@ -419,6 +419,32 @@ class Decoder(NiMAREBase): __id_cols = ["id", "study_id", "contrast_id"] + def _validate_input(self, dataset, drop_invalid=True): + """Search for, and validate, required inputs as necessary.""" + if not hasattr(dataset, "slice"): + raise ValueError( + f"Argument 'dataset' must be a valid Dataset object, not a {type(dataset)}." + ) + + if self._required_inputs: + data = dataset.get(self._required_inputs, drop_invalid=drop_invalid) + # Do not overwrite existing inputs_ attribute. + # This is necessary for PairwiseCBMAEstimator, which validates two sets of coordinates + # in the same object. + # It makes the *strong* assumption that required inputs will not changes within an + # Estimator across fit calls, so all fields of inputs_ will be overwritten instead of + # retaining outdated fields from previous fit calls. + if not hasattr(self, "inputs_"): + self.inputs_ = {} + + for k, v in data.items(): + if v is None: + raise ValueError( + f"Estimator {self.__class__.__name__} requires input dataset to contain " + f"{k}, but no matching data were found." + ) + self.inputs_[k] = v + def _preprocess_input(self, dataset): """Select features for model based on requested features and feature_group. @@ -429,7 +455,7 @@ def _preprocess_input(self, dataset): if self.feature_group is not None: if not self.feature_group.endswith("__"): self.feature_group += "__" - feature_names = dataset.annotations.columns.values + feature_names = self.inputs_["annotations"].columns.values feature_names = [f for f in feature_names if f.startswith(self.feature_group)] if self.features is not None: features = [f.split("__")[-1] for f in feature_names if f in self.features] @@ -437,24 +463,34 @@ def _preprocess_input(self, dataset): features = feature_names else: if self.features is None: - features = dataset.annotations.columns.values + features = self.inputs_["annotations"].columns.values else: features = self.features + features = [f for f in features if f not in self.__id_cols] + n_features_orig = len(features) + # At least one study in the dataset much have each label - counts = (dataset.annotations[features] > self.frequency_threshold).sum(0) + counts = (self.inputs_["annotations"][features] > self.frequency_threshold).sum(0) features = counts[counts > 0].index.tolist() if not len(features): raise Exception("No features identified in Dataset!") + elif len(features) < n_features_orig: + LGR.info(f"Retaining {len(features)}/({n_features_orig} features.") + self.features_ = features - def fit(self, dataset): + def fit(self, dataset, drop_invalid=True): """Fit Decoder to Dataset. Parameters ---------- dataset : :obj:`nimare.dataset.Dataset` Dataset object to analyze. + drop_invalid : :obj:`bool`, optional + Whether to automatically ignore any studies without the required data or not. + Default is True. + Returns ------- @@ -471,6 +507,7 @@ def fit(self, dataset): Selection of features based on requested features and feature group is performed in `Decoder._preprocess_input`. """ + self._validate_input(dataset, drop_invalid=drop_invalid) self._preprocess_input(dataset) self._fit(dataset) diff --git a/nimare/dataset.py b/nimare/dataset.py index e618d61f9..0054d0085 100755 --- a/nimare/dataset.py +++ b/nimare/dataset.py @@ -401,6 +401,11 @@ def get(self, dict_, drop_invalid=True): temp = [self.coordinates.loc[self.coordinates["id"] == id_] for id_ in self.ids] # Replace empty DataFrames with Nones temp = [t if t.size else None for t in temp] + elif vals[0] == "annotations": + # Break DataFrame down into a list of study-specific DataFrames + temp = [self.annotations.loc[self.annotations["id"] == id_] for id_ in self.ids] + # Replace empty DataFrames with Nones + temp = [t if t.size else None for t in temp] else: raise ValueError(f"Input '{vals[0]}' not understood.") @@ -420,7 +425,7 @@ def get(self, dict_, drop_invalid=True): for k in results: results[k] = [results[k][i] for i in keep_idx] - if dict_.get(k, [None])[0] == "coordinates": + if dict_.get(k, [None])[0] in ("coordinates", "annotations"): results[k] = pd.concat(results[k]) return results diff --git a/nimare/decode/continuous.py b/nimare/decode/continuous.py index 4b62af863..24198328b 100755 --- a/nimare/decode/continuous.py +++ b/nimare/decode/continuous.py @@ -134,6 +134,11 @@ class CorrelationDecoder(Decoder): evaluate results based on significance. """ + _required_inputs = { + "coordinates": ("coordinates", None), + "annotations": ("annotations", None), + } + def __init__( self, feature_group=None, @@ -183,6 +188,8 @@ def _fit(self, dataset): feature_ids = dataset.get_studies_by_label( labels=[feature], label_threshold=self.frequency_threshold ) + feature_ids = sorted(list(set(feature_ids).intersection(self.inputs_["id"]))) + LGR.info( f"Decoding {feature} ({i}/{len(self.features)}): {len(feature_ids)}/" f"{len(dataset.ids)} studies" @@ -191,7 +198,7 @@ def _fit(self, dataset): # This seems like a somewhat inelegant solution # Check if the meta method is a pairwise estimator if "dataset2" in inspect.getfullargspec(self.meta_estimator.fit).args: - nonfeature_ids = sorted(list(set(dataset.ids) - set(feature_ids))) + nonfeature_ids = sorted(list(set(self.inputs_["id"]) - set(feature_ids))) nonfeature_dset = dataset.slice(nonfeature_ids) self.meta_estimator.fit(feature_dset, nonfeature_dset) else: @@ -247,6 +254,10 @@ class CorrelationDistributionDecoder(Decoder): evaluate results based on significance. """ + _required_inputs = { + "annotations": ("annotations", None), + } + def __init__( self, feature_group=None, @@ -258,9 +269,9 @@ def __init__( self.feature_group = feature_group self.features = features self.frequency_threshold = frequency_threshold - self.target_image = target_image self.memory_limit = memory_limit self.results = None + self._required_inputs["images"] = ("image", target_image) def _fit(self, dataset): """Collect sets of maps from the Dataset corresponding to each requested feature. @@ -286,8 +297,13 @@ def _fit(self, dataset): feature_ids = dataset.get_studies_by_label( labels=[feature], label_threshold=self.frequency_threshold ) - test_imgs = dataset.get_images(ids=feature_ids, imtype=self.target_image) - test_imgs = list(filter(None, test_imgs)) + selected_ids = sorted(list(set(feature_ids).intersection(self.inputs_["id"]))) + selected_id_idx = [ + i_id for i_id, id_ in enumerate(self.inputs_["id"]) if id_ in selected_ids + ] + test_imgs = [ + img for i_img, img in enumerate(self.inputs_["images"]) if i_img in selected_id_idx + ] if len(test_imgs): feature_arr = safe_transform( test_imgs, diff --git a/nimare/decode/discrete.py b/nimare/decode/discrete.py index e33e82f5f..62cdaf692 100755 --- a/nimare/decode/discrete.py +++ b/nimare/decode/discrete.py @@ -9,8 +9,10 @@ from .. import references from ..base import Decoder from ..due import due -from ..stats import one_way, two_way +from ..meta.kernel import KernelTransformer, MKDAKernel +from ..stats import one_way, pearson, two_way from ..transforms import p_to_z +from ..utils import check_type, get_masker from .utils import weight_priors @@ -152,6 +154,11 @@ class BrainMapDecoder(Decoder): (2015): 1031-1049. https://doi.org/10.1007/s00429-013-0698-0 """ + _required_inputs = { + "coordinates": ("coordinates", None), + "annotations": ("annotations", None), + } + def __init__( self, feature_group=None, @@ -168,7 +175,7 @@ def __init__( self.results = None def _fit(self, dataset): - self.inputs_ = {"coordinates": dataset.coordinates, "annotations": dataset.annotations} + pass def transform(self, ids, ids2=None): """Apply the decoding method to a Dataset. @@ -432,6 +439,11 @@ class NeurosynthDecoder(Decoder): https://doi.org/10.1038/nmeth.1635 """ + _required_inputs = { + "coordinates": ("coordinates", None), + "annotations": ("annotations", None), + } + def __init__( self, feature_group=None, @@ -450,7 +462,7 @@ def __init__( self.results = None def _fit(self, dataset): - self.inputs_ = {"coordinates": dataset.coordinates, "annotations": dataset.annotations} + pass def transform(self, ids, ids2=None): """Apply the decoding method to a Dataset. @@ -557,7 +569,7 @@ def neurosynth_decode( See Also -------- :class:`nimare.decode.discrete.NeurosynthDecoder`: The associated class for this method. - :func:`nimare.decode.continuous.corr_decode`: The correlation-based decoding + :func:`nimare.decode.continuous.CorrelationDecoder`: The correlation-based decoding method employed in Neurosynth and NeuroVault. References @@ -657,3 +669,92 @@ def neurosynth_decode( ) out_df.index.name = "Term" return out_df + + +@due.dcite(references.NEUROSYNTH, description="Introduces Neurosynth.") +class ROIAssociationDecoder(Decoder): + """Perform discrete functional decoding according to Neurosynth's ROI association method. + + Parameters + ---------- + masker : :class:`nilearn.input_data.NiftiMasker`, img_like, or similar + Masker for region of interest. + kernel_transformer : :obj:`nimare.meta.kernel.KernelTransformer`, optional + Kernel with which to create modeled activation maps. Default is MKDAKernel. + feature_group : :obj:`str`, optional + Feature group name used to select labels from a specific source. + Feature groups are stored as prefixes to feature name columns in + Dataset.annotations, with the format ``[source]_[valuetype]__``. + Input may or may not include the trailing underscore. + Default is None, which uses all feature groups available. + features : :obj:`list`, optional + List of features in dataset annotations to use for decoding. + If feature_group is provided, then features should not include the feature group prefix. + If feature_group is *not* provided, then features *should* include the prefix. + Default is None, which uses all features available. + + Notes + ----- + The general approach in this method is: + 1. Define ROI. + 2. Generate MA maps for all studies in Dataset. + 3. Average MA values within ROI to get study-wise MA regressor. + 4. Correlate MA regressor with study-wise annotation values (e.g., tf-idf values). + + References + ---------- + * Yarkoni, Tal, et al. "Large-scale automated synthesis of human + functional neuroimaging data." Nature methods 8.8 (2011): 665. + https://doi.org/10.1038/nmeth.1635 + """ + + _required_inputs = { + "coordinates": ("coordinates", None), + "annotations": ("annotations", None), + } + + def __init__( + self, + masker, + kernel_transformer=MKDAKernel, + feature_group=None, + features=None, + **kwargs, + ): + self.masker = get_masker(masker) + + # Get kernel transformer + kernel_args = { + k.split("kernel__")[1]: v for k, v in kwargs.items() if k.startswith("kernel__") + } + kernel_transformer = check_type(kernel_transformer, KernelTransformer, **kernel_args) + self.kernel_transformer = kernel_transformer + + self.feature_group = feature_group + self.features = features + self.frequency_threshold = 0 + self.results = None + + def _fit(self, dataset): + roi_values = self.kernel_transformer.transform( + self.inputs_["coordinates"], + self.masker, + return_type="array", + ) + self.roi_values_ = roi_values.mean(axis=1) + + def transform(self): + """Apply the decoding method to a Dataset. + + Returns + ------- + results : :class:`pandas.DataFrame` + Table with each label and the following values associated with each + label: 'r'. + """ + feature_values = self.inputs_["annotations"][self.features_].values + corrs = pearson(self.roi_values_, feature_values.T) + out_df = pd.DataFrame(index=self.features_, columns=["r"], data=corrs) + out_df.index.name = "feature" + self.results = out_df + return out_df diff --git a/nimare/tests/conftest.py b/nimare/tests/conftest.py index 87038d054..1b0faf3c2 100644 --- a/nimare/tests/conftest.py +++ b/nimare/tests/conftest.py @@ -86,6 +86,12 @@ def mni_mask(): ) +@pytest.fixture(scope="session") +def roi_img(): + """Load MNI mask for testing.""" + return nib.load(os.path.join(get_test_data_path(), "amygdala_roi.nii.gz")) + + @pytest.fixture(scope="session") def testdata_ibma_resample(tmp_path_factory): """Create dataset for image-based resampling tests.""" diff --git a/nimare/tests/data/amygdala_roi.nii.gz b/nimare/tests/data/amygdala_roi.nii.gz new file mode 100644 index 000000000..feac29e3d Binary files /dev/null and b/nimare/tests/data/amygdala_roi.nii.gz differ diff --git a/nimare/tests/test_decode_continuous.py b/nimare/tests/test_decode_continuous.py index 90ca8b42b..f935b59a8 100644 --- a/nimare/tests/test_decode_continuous.py +++ b/nimare/tests/test_decode_continuous.py @@ -54,7 +54,10 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory): dset = kern.transform(testdata_laird, return_type="dataset") # And now we have images we can use for decoding! - decoder.set_params(target_image=kern.image_type) + decoder = continuous.CorrelationDistributionDecoder( + features=features, + target_image=kern.image_type, + ) decoder.fit(dset) # Make an image to decode diff --git a/nimare/tests/test_decode_discrete.py b/nimare/tests/test_decode_discrete.py index 5577122f7..17b65c771 100644 --- a/nimare/tests/test_decode_discrete.py +++ b/nimare/tests/test_decode_discrete.py @@ -39,10 +39,12 @@ def test_brainmap_decode(testdata_laird): def test_NeurosynthDecoder(testdata_laird): """Smoke test for discrete.NeurosynthDecoder.""" ids = testdata_laird.ids[:5] - decoder = discrete.NeurosynthDecoder() + labels = testdata_laird.get_labels(ids=testdata_laird.ids) + decoder = discrete.NeurosynthDecoder(features=labels) decoder.fit(testdata_laird) decoded_df = decoder.transform(ids=ids) assert isinstance(decoded_df, pd.DataFrame) + assert decoded_df.shape == (len(labels), 6) def test_NeurosynthDecoder_featuregroup(testdata_laird): @@ -64,10 +66,12 @@ def test_NeurosynthDecoder_featuregroup_failure(testdata_laird): def test_BrainMapDecoder(testdata_laird): """Smoke test for discrete.BrainMapDecoder.""" ids = testdata_laird.ids[:5] - decoder = discrete.BrainMapDecoder() + labels = testdata_laird.get_labels(ids=testdata_laird.ids) + decoder = discrete.BrainMapDecoder(features=labels) decoder.fit(testdata_laird) decoded_df = decoder.transform(ids=ids) assert isinstance(decoded_df, pd.DataFrame) + assert decoded_df.shape == (len(labels), 6) def test_BrainMapDecoder_failure(testdata_laird): @@ -75,3 +79,13 @@ def test_BrainMapDecoder_failure(testdata_laird): decoder = discrete.BrainMapDecoder(features=["doggy"]) with pytest.raises(Exception): decoder.fit(testdata_laird) + + +def test_ROIAssociationDecoder(testdata_laird, roi_img): + """Smoke test for discrete.ROIAssociationDecoder.""" + labels = testdata_laird.get_labels(ids=testdata_laird.ids) + decoder = discrete.ROIAssociationDecoder(masker=roi_img, features=labels) + decoder.fit(testdata_laird) + decoded_df = decoder.transform() + assert isinstance(decoded_df, pd.DataFrame) + assert decoded_df.shape == (len(labels), 1)