From 282f197136d6171203f4457dd79da7bb943b28c8 Mon Sep 17 00:00:00 2001 From: "Julio A. Peraza" <52050407+JulioAPeraza@users.noreply.github.com> Date: Fri, 13 Jan 2023 09:53:37 -0500 Subject: [PATCH] Remove "dataset" `return_type` option from kernel transformers (#752) * Remove "dataset" `return_type` option from kernel transformers * Drop tests ma_map_reuse * Update test_meta_kernel.py * Update test_meta_kernel.py * Update nimare/meta/kernel.py Co-authored-by: Taylor Salo * Update nimare/meta/kernel.py Co-authored-by: Taylor Salo * Add versionchanged to child classes Co-authored-by: Taylor Salo --- nimare/meta/cbma/base.py | 51 ++------------ nimare/meta/kernel.py | 95 ++++++++------------------ nimare/tests/test_decode_continuous.py | 14 +++- nimare/tests/test_meta_ale.py | 60 ---------------- nimare/tests/test_meta_kernel.py | 7 +- 5 files changed, 51 insertions(+), 176 deletions(-) diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py index 484f05bb4..d1856b718 100644 --- a/nimare/meta/cbma/base.py +++ b/nimare/meta/cbma/base.py @@ -1,7 +1,6 @@ """CBMA methods from the ALE and MKDA families.""" import logging from abc import abstractmethod -from hashlib import md5 import nibabel as nib import numpy as np @@ -92,8 +91,7 @@ def _preprocess_input(self, dataset): ---------- dataset : :obj:`~nimare.dataset.Dataset` In this method, the Dataset is used to (1) select the appropriate mask image, - (2) identify any pre-generated MA maps stored in its images attribute, - and (3) extract sample size metadata and place it into the coordinates input. + and (2) extract sample size metadata and place it into the coordinates input. Attributes ---------- @@ -111,17 +109,6 @@ def _preprocess_input(self, dataset): for name, (type_, _) in self._required_inputs.items(): if type_ == "coordinates": - # Try to load existing MA maps - if hasattr(self, "kernel_transformer"): - self.kernel_transformer._infer_names(affine=md5(mask_img.affine).hexdigest()) - if self.kernel_transformer.image_type in dataset.images.columns: - files = dataset.get_images( - ids=self.inputs_["id"], - imtype=self.kernel_transformer.image_type, - ) - if all(f is not None for f in files): - self.inputs_["ma_maps"] = files - # Calculate IJK matrix indices for target mask # Mask space is assumed to be the same as the Dataset's space # These indices are used directly by any KernelTransformer @@ -220,37 +207,13 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"): Return a 4D sparse array of shape (n_studies, mask.shape) with MA maps. """ - if maps_key in self.inputs_.keys(): - LGR.debug(f"Loading pre-generated MA maps ({maps_key}).") - all_exp = [] - all_coords = [] - all_data = [] - for i_exp, img in enumerate(self.inputs_[maps_key]): - img_data = nib.load(img).get_fdata() - nonzero_idx = np.where(img_data != 0) - - all_exp.append(np.full(nonzero_idx[0].shape[0], i_exp)) - all_coords.append(np.vstack(nonzero_idx)) - all_data.append(img_data[nonzero_idx]) - - n_studies = len(self.inputs_[maps_key]) - shape = img_data.shape - kernel_shape = (n_studies,) + shape - - exp = np.hstack(all_exp) - coords = np.vstack((exp.flatten(), np.hstack(all_coords))) - data = np.hstack(all_data).flatten() - - ma_maps = sparse.COO(coords, data, shape=kernel_shape) + LGR.debug(f"Generating MA maps from coordinates ({coords_key}).") - else: - LGR.debug(f"Generating MA maps from coordinates ({coords_key}).") - - ma_maps = self.kernel_transformer.transform( - self.inputs_[coords_key], - masker=self.masker, - return_type="sparse", - ) + ma_maps = self.kernel_transformer.transform( + self.inputs_[coords_key], + masker=self.masker, + return_type="sparse", + ) return ma_maps diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py index 3dbbafe7f..5cdea75b3 100644 --- a/nimare/meta/kernel.py +++ b/nimare/meta/kernel.py @@ -7,17 +7,14 @@ from __future__ import division import logging -import os -from hashlib import md5 import nibabel as nib import numpy as np import pandas as pd -import sparse from nimare.base import NiMAREBase from nimare.meta.utils import compute_ale_ma, compute_kda_ma, get_ale_kernel -from nimare.utils import _add_metadata_to_dataframe, _safe_transform, mm2vox +from nimare.utils import _add_metadata_to_dataframe, mm2vox LGR = logging.getLogger(__name__) @@ -25,6 +22,10 @@ class KernelTransformer(NiMAREBase): """Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`. + .. versionchanged:: 0.0.13 + + - Remove "dataset" `return_type` option. + Coordinate-based meta-analyses leverage coordinates reported in neuroimaging papers to simulate the thresholded statistical maps from the original analyses. This generally involves convolving each coordinate with @@ -39,7 +40,7 @@ class KernelTransformer(NiMAREBase): """ def _infer_names(self, **kwargs): - """Determine filename pattern and image type for files created with this transformer. + """Determine filename pattern and image type. The parameters used to construct the filenames come from the transformer's parameters (attributes saved in ``__init__()``). @@ -53,7 +54,7 @@ def _infer_names(self, **kwargs): Attributes ---------- filename_pattern : str - Filename pattern for images that will be saved by the transformer. + Filename pattern for images. image_type : str Name of the corresponding column in the Dataset.images DataFrame. """ @@ -81,9 +82,9 @@ def transform(self, dataset, masker=None, return_type="image"): Mask to apply to MA maps. Required if ``dataset`` is a DataFrame. If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used. Default is None. - return_type : {'sparse', 'array', 'image', 'dataset'}, optional - Whether to return a numpy array ('array'), a list of niimgs ('image'), - or a Dataset with MA images saved as files ('dataset'). + return_type : {'sparse', 'array', 'image'}, optional + Whether to return a sparse matrix ('sparse'), a numpy array ('array'), + or a list of niimgs ('image'). Default is 'image'. Returns @@ -97,19 +98,17 @@ def transform(self, dataset, masker=None, return_type="image"): contrast and V is voxel. If return_type is 'image', a list of modeled activation images (one for each of the Contrasts in the input dataset). - If return_type is 'dataset', a new Dataset object with modeled - activation images saved to files and referenced in the - Dataset.images attribute. Attributes ---------- filename_pattern : str - Filename pattern for MA maps that will be saved by the transformer. + Filename pattern for MA maps. If :meth:`_infer_names` is executed. image_type : str Name of the corresponding column in the Dataset.images DataFrame. + If :meth:`_infer_names` is executed. """ - if return_type not in ("sparse", "array", "image", "dataset"): - raise ValueError('Argument "return_type" must be "image", "array", or "dataset".') + if return_type not in ("sparse", "array", "image"): + raise ValueError('Argument "return_type" must be "image", "array", or "sparse".') if isinstance(dataset, pd.DataFrame): assert ( @@ -117,9 +116,6 @@ def transform(self, dataset, masker=None, return_type="image"): ), "Argument 'masker' must be provided if dataset is a DataFrame." mask = masker.mask_img coordinates = dataset - assert ( - return_type != "dataset" - ), "Input dataset must be a Dataset if return_type='dataset'." # Calculate IJK. Must assume that the masker is in same space, # but has different affine, from original IJK. @@ -129,24 +125,6 @@ def transform(self, dataset, masker=None, return_type="image"): mask = masker.mask_img coordinates = dataset.coordinates.copy() - # Determine MA map filenames. Must happen after parameters are set. - self._infer_names(affine=md5(mask.affine).hexdigest()) - - # Check for existing MA maps - # Use coordinates to get IDs instead of Dataset.ids bc of possible - # mismatch between full Dataset and contrasts with coordinates. - if self.image_type in dataset.images.columns: - files = dataset.get_images(ids=coordinates["id"].unique(), imtype=self.image_type) - if all(f is not None for f in files): - LGR.debug("Files already exist. Using them.") - if return_type == "array": - masked_data = _safe_transform(files, masker) - return masked_data - elif return_type == "image": - return [nib.load(f) for f in files] - elif return_type == "dataset": - return dataset.copy() - # Calculate IJK if not np.array_equal(mask.affine, dataset.masker.mask_img.affine): LGR.warning("Mask affine does not match Dataset affine. Assuming same space.") @@ -170,24 +148,13 @@ def transform(self, dataset, masker=None, return_type="image"): filter_func=np.mean, ) - # Generate the MA maps if they weren't already available as images if return_type == "array": mask_data = mask.get_fdata().astype(bool) elif return_type == "image": dtype = type(self.value) if hasattr(self, "value") else float mask_data = mask.get_fdata().astype(dtype) - elif return_type == "dataset": - if dataset.basepath is None: - raise ValueError( - "Dataset output path is not set. Set the path with Dataset.update_path()." - ) - elif not os.path.isdir(dataset.basepath): - raise ValueError( - "Output directory does not exist. Set the path to an existing folder with " - "Dataset.update_path()." - ) - dataset = dataset.copy() + # Generate the MA maps transformed_maps = self._transform(mask, coordinates) if return_type == "sparse": @@ -195,11 +162,8 @@ def transform(self, dataset, masker=None, return_type="image"): imgs = [] # Loop over exp ids since sparse._coo.core.COO is not iterable - for i_exp, id_ in enumerate(transformed_maps[1]): - if isinstance(transformed_maps[0][i_exp], sparse._coo.core.COO): - # This step is slow, but it is here just in case user want a - # return_type = "array", "image", or "dataset" - kernel_data = transformed_maps[0][i_exp].todense() + for i_exp, _ in enumerate(transformed_maps[1]): + kernel_data = transformed_maps[0][i_exp].todense() if return_type == "array": img = kernel_data[mask_data] @@ -208,11 +172,6 @@ def transform(self, dataset, masker=None, return_type="image"): kernel_data *= mask_data img = nib.Nifti1Image(kernel_data, mask.affine) imgs.append(img) - elif return_type == "dataset": - img = nib.Nifti1Image(kernel_data, mask.affine) - out_file = os.path.join(dataset.basepath, self.filename_pattern.format(id=id_)) - img.to_filename(out_file) - dataset.images.loc[dataset.images["id"] == id_, self.image_type] = out_file del kernel_data, transformed_maps @@ -220,14 +179,6 @@ def transform(self, dataset, masker=None, return_type="image"): return np.vstack(imgs) elif return_type == "image": return imgs - elif return_type == "dataset": - # Replace NaNs with Nones - dataset.images[self.image_type] = dataset.images[self.image_type].where( - dataset.images[self.image_type].notnull(), None - ) - # Infer relative path - dataset.images = dataset.images - return dataset def _transform(self, mask, coordinates): """Apply the kernel's unique transformer. @@ -264,6 +215,10 @@ class ALEKernel(KernelTransformer): will be determined on a study-wise basis based on the sample sizes available in the input, via the method described in :footcite:t:`eickhoff2012activation`. + .. versionchanged:: 0.0.13 + + - Remove "dataset" `return_type` option. + .. versionchanged:: 0.0.12 * Remove low-memory option in favor of sparse arrays for kernel transformers. @@ -326,6 +281,10 @@ def _transform(self, mask, coordinates): class KDAKernel(KernelTransformer): """Generate KDA modeled activation images from coordinates. + .. versionchanged:: 0.0.13 + + - Remove "dataset" `return_type` option. + .. versionchanged:: 0.0.12 * Remove low-memory option in favor of sparse arrays for kernel transformers. @@ -363,6 +322,10 @@ def _transform(self, mask, coordinates): class MKDAKernel(KDAKernel): """Generate MKDA modeled activation images from coordinates. + .. versionchanged:: 0.0.13 + + - Remove "dataset" `return_type` option. + .. versionchanged:: 0.0.12 * Remove low-memory option in favor of sparse arrays for kernel transformers. diff --git a/nimare/tests/test_decode_continuous.py b/nimare/tests/test_decode_continuous.py index c4a31738d..60786cd3a 100644 --- a/nimare/tests/test_decode_continuous.py +++ b/nimare/tests/test_decode_continuous.py @@ -2,6 +2,8 @@ Tests for nimare.decode.continuous.gclda_decode_map are in test_annotate_gclda. """ +import os + import pandas as pd import pytest @@ -29,6 +31,7 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory): tmpdir = tmp_path_factory.mktemp("test_CorrelationDistributionDecoder") testdata_laird = testdata_laird.copy() + dset = testdata_laird.copy() features = testdata_laird.get_labels(ids=testdata_laird.ids[0])[:5] decoder = continuous.CorrelationDistributionDecoder(features=features) @@ -42,7 +45,16 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory): # Then let's make some images to decode kern = kernel.MKDAKernel(r=10, value=1) - dset = kern.transform(testdata_laird, return_type="dataset") + kern._infer_names() # Determine MA map filenames + + imgs = kern.transform(testdata_laird, return_type="image") + for i_img, img in enumerate(imgs): + id_ = testdata_laird.ids[i_img] + out_file = os.path.join(testdata_laird.basepath, kern.filename_pattern.format(id=id_)) + + # Add file names to dset.images DataFrame + img.to_filename(out_file) + dset.images.loc[testdata_laird.images["id"] == id_, kern.image_type] = out_file # And now we have images we can use for decoding! decoder = continuous.CorrelationDistributionDecoder( diff --git a/nimare/tests/test_meta_ale.py b/nimare/tests/test_meta_ale.py index 97018818d..85e7571c8 100644 --- a/nimare/tests/test_meta_ale.py +++ b/nimare/tests/test_meta_ale.py @@ -1,5 +1,4 @@ """Test nimare.meta.ale (ALE/SCALE meta-analytic algorithms).""" -import logging import os import pickle @@ -15,65 +14,6 @@ from nimare.utils import vox2mm -def test_ALE_ma_map_reuse(testdata_cbma, tmp_path_factory, caplog): - """Test that MA maps are re-used when appropriate.""" - from nimare.meta import kernel - - tmpdir = tmp_path_factory.mktemp("test_ALE_ma_map_reuse") - testdata_cbma.update_path(tmpdir) - - # ALEKernel cannot extract sample_size from a Dataset, - # so we need to set it for this kernel and for the later meta-analyses. - kern = kernel.ALEKernel(sample_size=20) - dset = kern.transform(testdata_cbma, return_type="dataset") - - # The associated column should be in the new Dataset's images DataFrame - cols = dset.images.columns.tolist() - assert any(["ALEKernel" in col for col in cols]) - - # The Dataset without the images will generate them from scratch. - # If drop_invalid is False, then there should be an Exception, since two studies in the test - # dataset are missing coordinates. - meta = ale.ALE(kernel__sample_size=20) - with pytest.raises(Exception): - meta.fit(testdata_cbma, drop_invalid=False) - - with caplog.at_level(logging.DEBUG, logger="nimare.meta.cbma.base"): - meta.fit(testdata_cbma) - assert "Loading pre-generated MA maps" not in caplog.text - - # The Dataset with the images will re-use them, as evidenced by the logger message. - with caplog.at_level(logging.DEBUG, logger="nimare.meta.cbma.base"): - meta.fit(dset) - assert "Loading pre-generated MA maps" in caplog.text - - -def test_ALESubtraction_ma_map_reuse(testdata_cbma, tmp_path_factory, caplog): - """Test that MA maps are re-used when appropriate.""" - from nimare.meta import kernel - - tmpdir = tmp_path_factory.mktemp("test_ALESubtraction_ma_map_reuse") - testdata_cbma.update_path(tmpdir) - - # ALEKernel cannot extract sample_size from a Dataset, - # so we need to set it for this kernel and for the later meta-analyses. - kern = kernel.ALEKernel(sample_size=20) - dset = kern.transform(testdata_cbma, return_type="dataset") - - # The Dataset without the images will generate them from scratch. - sub_meta = ale.ALESubtraction(n_iters=10, kernel__sample_size=20) - - with caplog.at_level(logging.DEBUG, logger="nimare.meta.cbma.base"): - sub_meta.fit(testdata_cbma, testdata_cbma) - assert "Loading pre-generated MA maps" not in caplog.text - - # The Dataset with the images will re-use them, - # as evidenced by the logger message. - with caplog.at_level(logging.DEBUG, logger="nimare.meta.cbma.base"): - sub_meta.fit(dset, dset) - assert "Loading pre-generated MA maps" in caplog.text - - def test_ALE_approximate_null_unit(testdata_cbma, tmp_path_factory): """Unit test for ALE with approximate null_method.""" tmpdir = tmp_path_factory.mktemp("test_ALE_approximate_null_unit") diff --git a/nimare/tests/test_meta_kernel.py b/nimare/tests/test_meta_kernel.py index 9fe416612..76125554e 100644 --- a/nimare/tests/test_meta_kernel.py +++ b/nimare/tests/test_meta_kernel.py @@ -11,8 +11,6 @@ @pytest.mark.parametrize( "kern, res, param, return_type, kwargs", [ - (kernel.ALEKernel, 1, "dataset", "dataset", {"sample_size": 20}), - (kernel.ALEKernel, 2, "dataset", "dataset", {"sample_size": 20}), (kernel.ALEKernel, 1, "dataset", "image", {"sample_size": 20}), (kernel.ALEKernel, 2, "dataset", "image", {"sample_size": 20}), (kernel.ALEKernel, 1, "dataframe", "image", {"sample_size": 20}), @@ -37,7 +35,6 @@ def test_kernel_peaks(testdata_cbma, tmp_path_factory, kern, res, param, return_ Notes ----- Remember that dataframe --> dataset won't work. - Only testing dataset --> dataset with ALEKernel because it takes a while. Test on multiple template resolutions. """ tmpdir = tmp_path_factory.mktemp("test_kernel_peaks") @@ -88,12 +85,12 @@ def test_kernel_peaks(testdata_cbma, tmp_path_factory, kern, res, param, return_ (kernel.KDAKernel, {"r": 4, "value": 1}), ], ) -def test_kernel_transform_attributes(testdata_cbma, kern, kwargs): +def test_kernel_transform_attributes(kern, kwargs): """Check that attributes are added at transform.""" kern_instance = kern(**kwargs) assert not hasattr(kern_instance, "filename_pattern") assert not hasattr(kern_instance, "image_type") - _ = kern_instance.transform(testdata_cbma, return_type="image") + kern_instance._infer_names() assert hasattr(kern_instance, "filename_pattern") assert hasattr(kern_instance, "image_type")