Skip to content

Commit

Permalink
Remove "dataset" return_type option from kernel transformers (#752)
Browse files Browse the repository at this point in the history
* Remove "dataset" `return_type` option from kernel transformers

* Drop tests ma_map_reuse

* Update test_meta_kernel.py

* Update test_meta_kernel.py

* Update nimare/meta/kernel.py

Co-authored-by: Taylor Salo <[email protected]>

* Update nimare/meta/kernel.py

Co-authored-by: Taylor Salo <[email protected]>

* Add versionchanged to child classes

Co-authored-by: Taylor Salo <[email protected]>
  • Loading branch information
JulioAPeraza and tsalo authored Jan 13, 2023
1 parent 97ab7fa commit 282f197
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 176 deletions.
51 changes: 7 additions & 44 deletions nimare/meta/cbma/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""CBMA methods from the ALE and MKDA families."""
import logging
from abc import abstractmethod
from hashlib import md5

import nibabel as nib
import numpy as np
Expand Down Expand Up @@ -92,8 +91,7 @@ def _preprocess_input(self, dataset):
----------
dataset : :obj:`~nimare.dataset.Dataset`
In this method, the Dataset is used to (1) select the appropriate mask image,
(2) identify any pre-generated MA maps stored in its images attribute,
and (3) extract sample size metadata and place it into the coordinates input.
and (2) extract sample size metadata and place it into the coordinates input.
Attributes
----------
Expand All @@ -111,17 +109,6 @@ def _preprocess_input(self, dataset):

for name, (type_, _) in self._required_inputs.items():
if type_ == "coordinates":
# Try to load existing MA maps
if hasattr(self, "kernel_transformer"):
self.kernel_transformer._infer_names(affine=md5(mask_img.affine).hexdigest())
if self.kernel_transformer.image_type in dataset.images.columns:
files = dataset.get_images(
ids=self.inputs_["id"],
imtype=self.kernel_transformer.image_type,
)
if all(f is not None for f in files):
self.inputs_["ma_maps"] = files

# Calculate IJK matrix indices for target mask
# Mask space is assumed to be the same as the Dataset's space
# These indices are used directly by any KernelTransformer
Expand Down Expand Up @@ -220,37 +207,13 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
Return a 4D sparse array of shape
(n_studies, mask.shape) with MA maps.
"""
if maps_key in self.inputs_.keys():
LGR.debug(f"Loading pre-generated MA maps ({maps_key}).")
all_exp = []
all_coords = []
all_data = []
for i_exp, img in enumerate(self.inputs_[maps_key]):
img_data = nib.load(img).get_fdata()
nonzero_idx = np.where(img_data != 0)

all_exp.append(np.full(nonzero_idx[0].shape[0], i_exp))
all_coords.append(np.vstack(nonzero_idx))
all_data.append(img_data[nonzero_idx])

n_studies = len(self.inputs_[maps_key])
shape = img_data.shape
kernel_shape = (n_studies,) + shape

exp = np.hstack(all_exp)
coords = np.vstack((exp.flatten(), np.hstack(all_coords)))
data = np.hstack(all_data).flatten()

ma_maps = sparse.COO(coords, data, shape=kernel_shape)
LGR.debug(f"Generating MA maps from coordinates ({coords_key}).")

else:
LGR.debug(f"Generating MA maps from coordinates ({coords_key}).")

ma_maps = self.kernel_transformer.transform(
self.inputs_[coords_key],
masker=self.masker,
return_type="sparse",
)
ma_maps = self.kernel_transformer.transform(
self.inputs_[coords_key],
masker=self.masker,
return_type="sparse",
)

return ma_maps

Expand Down
95 changes: 29 additions & 66 deletions nimare/meta/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@
from __future__ import division

import logging
import os
from hashlib import md5

import nibabel as nib
import numpy as np
import pandas as pd
import sparse

from nimare.base import NiMAREBase
from nimare.meta.utils import compute_ale_ma, compute_kda_ma, get_ale_kernel
from nimare.utils import _add_metadata_to_dataframe, _safe_transform, mm2vox
from nimare.utils import _add_metadata_to_dataframe, mm2vox

LGR = logging.getLogger(__name__)


class KernelTransformer(NiMAREBase):
"""Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`.
.. versionchanged:: 0.0.13
- Remove "dataset" `return_type` option.
Coordinate-based meta-analyses leverage coordinates reported in
neuroimaging papers to simulate the thresholded statistical maps from the
original analyses. This generally involves convolving each coordinate with
Expand All @@ -39,7 +40,7 @@ class KernelTransformer(NiMAREBase):
"""

def _infer_names(self, **kwargs):
"""Determine filename pattern and image type for files created with this transformer.
"""Determine filename pattern and image type.
The parameters used to construct the filenames come from the transformer's
parameters (attributes saved in ``__init__()``).
Expand All @@ -53,7 +54,7 @@ def _infer_names(self, **kwargs):
Attributes
----------
filename_pattern : str
Filename pattern for images that will be saved by the transformer.
Filename pattern for images.
image_type : str
Name of the corresponding column in the Dataset.images DataFrame.
"""
Expand Down Expand Up @@ -81,9 +82,9 @@ def transform(self, dataset, masker=None, return_type="image"):
Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
Default is None.
return_type : {'sparse', 'array', 'image', 'dataset'}, optional
Whether to return a numpy array ('array'), a list of niimgs ('image'),
or a Dataset with MA images saved as files ('dataset').
return_type : {'sparse', 'array', 'image'}, optional
Whether to return a sparse matrix ('sparse'), a numpy array ('array'),
or a list of niimgs ('image').
Default is 'image'.
Returns
Expand All @@ -97,29 +98,24 @@ def transform(self, dataset, masker=None, return_type="image"):
contrast and V is voxel.
If return_type is 'image', a list of modeled activation images
(one for each of the Contrasts in the input dataset).
If return_type is 'dataset', a new Dataset object with modeled
activation images saved to files and referenced in the
Dataset.images attribute.
Attributes
----------
filename_pattern : str
Filename pattern for MA maps that will be saved by the transformer.
Filename pattern for MA maps. If :meth:`_infer_names` is executed.
image_type : str
Name of the corresponding column in the Dataset.images DataFrame.
If :meth:`_infer_names` is executed.
"""
if return_type not in ("sparse", "array", "image", "dataset"):
raise ValueError('Argument "return_type" must be "image", "array", or "dataset".')
if return_type not in ("sparse", "array", "image"):
raise ValueError('Argument "return_type" must be "image", "array", or "sparse".')

if isinstance(dataset, pd.DataFrame):
assert (
masker is not None
), "Argument 'masker' must be provided if dataset is a DataFrame."
mask = masker.mask_img
coordinates = dataset
assert (
return_type != "dataset"
), "Input dataset must be a Dataset if return_type='dataset'."

# Calculate IJK. Must assume that the masker is in same space,
# but has different affine, from original IJK.
Expand All @@ -129,24 +125,6 @@ def transform(self, dataset, masker=None, return_type="image"):
mask = masker.mask_img
coordinates = dataset.coordinates.copy()

# Determine MA map filenames. Must happen after parameters are set.
self._infer_names(affine=md5(mask.affine).hexdigest())

# Check for existing MA maps
# Use coordinates to get IDs instead of Dataset.ids bc of possible
# mismatch between full Dataset and contrasts with coordinates.
if self.image_type in dataset.images.columns:
files = dataset.get_images(ids=coordinates["id"].unique(), imtype=self.image_type)
if all(f is not None for f in files):
LGR.debug("Files already exist. Using them.")
if return_type == "array":
masked_data = _safe_transform(files, masker)
return masked_data
elif return_type == "image":
return [nib.load(f) for f in files]
elif return_type == "dataset":
return dataset.copy()

# Calculate IJK
if not np.array_equal(mask.affine, dataset.masker.mask_img.affine):
LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
Expand All @@ -170,36 +148,22 @@ def transform(self, dataset, masker=None, return_type="image"):
filter_func=np.mean,
)

# Generate the MA maps if they weren't already available as images
if return_type == "array":
mask_data = mask.get_fdata().astype(bool)
elif return_type == "image":
dtype = type(self.value) if hasattr(self, "value") else float
mask_data = mask.get_fdata().astype(dtype)
elif return_type == "dataset":
if dataset.basepath is None:
raise ValueError(
"Dataset output path is not set. Set the path with Dataset.update_path()."
)
elif not os.path.isdir(dataset.basepath):
raise ValueError(
"Output directory does not exist. Set the path to an existing folder with "
"Dataset.update_path()."
)
dataset = dataset.copy()

# Generate the MA maps
transformed_maps = self._transform(mask, coordinates)

if return_type == "sparse":
return transformed_maps[0]

imgs = []
# Loop over exp ids since sparse._coo.core.COO is not iterable
for i_exp, id_ in enumerate(transformed_maps[1]):
if isinstance(transformed_maps[0][i_exp], sparse._coo.core.COO):
# This step is slow, but it is here just in case user want a
# return_type = "array", "image", or "dataset"
kernel_data = transformed_maps[0][i_exp].todense()
for i_exp, _ in enumerate(transformed_maps[1]):
kernel_data = transformed_maps[0][i_exp].todense()

if return_type == "array":
img = kernel_data[mask_data]
Expand All @@ -208,26 +172,13 @@ def transform(self, dataset, masker=None, return_type="image"):
kernel_data *= mask_data
img = nib.Nifti1Image(kernel_data, mask.affine)
imgs.append(img)
elif return_type == "dataset":
img = nib.Nifti1Image(kernel_data, mask.affine)
out_file = os.path.join(dataset.basepath, self.filename_pattern.format(id=id_))
img.to_filename(out_file)
dataset.images.loc[dataset.images["id"] == id_, self.image_type] = out_file

del kernel_data, transformed_maps

if return_type == "array":
return np.vstack(imgs)
elif return_type == "image":
return imgs
elif return_type == "dataset":
# Replace NaNs with Nones
dataset.images[self.image_type] = dataset.images[self.image_type].where(
dataset.images[self.image_type].notnull(), None
)
# Infer relative path
dataset.images = dataset.images
return dataset

def _transform(self, mask, coordinates):
"""Apply the kernel's unique transformer.
Expand Down Expand Up @@ -264,6 +215,10 @@ class ALEKernel(KernelTransformer):
will be determined on a study-wise basis based on the sample sizes available in the input,
via the method described in :footcite:t:`eickhoff2012activation`.
.. versionchanged:: 0.0.13
- Remove "dataset" `return_type` option.
.. versionchanged:: 0.0.12
* Remove low-memory option in favor of sparse arrays for kernel transformers.
Expand Down Expand Up @@ -326,6 +281,10 @@ def _transform(self, mask, coordinates):
class KDAKernel(KernelTransformer):
"""Generate KDA modeled activation images from coordinates.
.. versionchanged:: 0.0.13
- Remove "dataset" `return_type` option.
.. versionchanged:: 0.0.12
* Remove low-memory option in favor of sparse arrays for kernel transformers.
Expand Down Expand Up @@ -363,6 +322,10 @@ def _transform(self, mask, coordinates):
class MKDAKernel(KDAKernel):
"""Generate MKDA modeled activation images from coordinates.
.. versionchanged:: 0.0.13
- Remove "dataset" `return_type` option.
.. versionchanged:: 0.0.12
* Remove low-memory option in favor of sparse arrays for kernel transformers.
Expand Down
14 changes: 13 additions & 1 deletion nimare/tests/test_decode_continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Tests for nimare.decode.continuous.gclda_decode_map are in test_annotate_gclda.
"""
import os

import pandas as pd
import pytest

Expand Down Expand Up @@ -29,6 +31,7 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory):
tmpdir = tmp_path_factory.mktemp("test_CorrelationDistributionDecoder")

testdata_laird = testdata_laird.copy()
dset = testdata_laird.copy()
features = testdata_laird.get_labels(ids=testdata_laird.ids[0])[:5]

decoder = continuous.CorrelationDistributionDecoder(features=features)
Expand All @@ -42,7 +45,16 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory):

# Then let's make some images to decode
kern = kernel.MKDAKernel(r=10, value=1)
dset = kern.transform(testdata_laird, return_type="dataset")
kern._infer_names() # Determine MA map filenames

imgs = kern.transform(testdata_laird, return_type="image")
for i_img, img in enumerate(imgs):
id_ = testdata_laird.ids[i_img]
out_file = os.path.join(testdata_laird.basepath, kern.filename_pattern.format(id=id_))

# Add file names to dset.images DataFrame
img.to_filename(out_file)
dset.images.loc[testdata_laird.images["id"] == id_, kern.image_type] = out_file

# And now we have images we can use for decoding!
decoder = continuous.CorrelationDistributionDecoder(
Expand Down
Loading

0 comments on commit 282f197

Please sign in to comment.