Skip to content

Commit

Permalink
Optimize compute_kda_ma for memory and speed (#857)
Browse files Browse the repository at this point in the history
* Simplify stacking

* Fix typo

* Remove vstack

* Fix stacking

* Remove @Profile

* Use jit for _convolve_sphere

* switch arrays to int32 where possible

* reduce memory consumption

* fix style

* Simplify numba

* Run black

* �Mask outside space in numba

* Add indicator later

* Set value to input

* Add `sum_across_studies` to kda (#859)

* Resolve merge

* Add sum aross studies

* Remove @Profile

* Only enable sum across studies for MKDA Chi Squared

* Run black

* Return dense for MKDACHiSquared

* Update nimare/meta/utils.py

Co-authored-by: James Kent <[email protected]>

* Run black

* Update nimare/meta/utils.py

Co-authored-by: James Kent <[email protected]>

* Format suggestion

* change how number of studies and active voxels are found

* add explicit dtype when creating image

* make the comment clearer

* add the kernel argument to the dictionary

* bump minimum required versions

* alternative way to sum across studies in a general way

* fix arguments and style

* pin minimum seaborn version

---------

Co-authored-by: Alejandro de la Vega <[email protected]>
Co-authored-by: James Kent <[email protected]>

* manage minimum dependencies

* Index within numba

* Only allow sum_overlap if not sum_across_studies

* Add unique index back

* Remove @Profile

* run black

* ensure the methods for creating the kernel are equivalent

---------

Co-authored-by: James Kent <[email protected]>
Co-authored-by: Alejandro de la Vega <[email protected]>
  • Loading branch information
3 people authored Jan 10, 2024
1 parent 6315367 commit 1fa0603
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 118 deletions.
4 changes: 2 additions & 2 deletions nimare/meta/cbma/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _compute_weights(self, ma_values):
"""
return None

def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps", return_type="sparse"):
"""Collect modeled activation maps from Estimator inputs.
Parameters
Expand All @@ -231,7 +231,7 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
ma_maps = self.kernel_transformer.transform(
self.inputs_[coords_key],
masker=self.masker,
return_type="sparse",
return_type=return_type,
)

return ma_maps
Expand Down
56 changes: 13 additions & 43 deletions nimare/meta/cbma/mkda.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ class MKDAChi2(PairwiseCBMAEstimator):

def __init__(
self,
kernel_transformer=MKDAKernel,
kernel_transformer=MKDAKernel(),
prior=0.5,
memory=Memory(location=None, verbose=0),
memory_level=0,
Expand Down Expand Up @@ -438,35 +438,21 @@ def _fit(self, dataset1, dataset2):
self.null_distributions_ = {}

# Generate MA maps and calculate count variables for first dataset
ma_maps1 = self._collect_ma_maps(
n_selected_active_voxels = self._collect_ma_maps(
maps_key="ma_maps1",
coords_key="coordinates1",
return_type="summary_array",
)
n_selected = ma_maps1.shape[0]
n_selected_active_voxels = ma_maps1.sum(axis=0)

if isinstance(n_selected_active_voxels, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

# Indexing the sparse array is slow, perform masking in the dense array
n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1)
n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)]

del ma_maps1
n_selected = self.dataset1.coordinates["id"].unique().shape[0]

# Generate MA maps and calculate count variables for second dataset
ma_maps2 = self._collect_ma_maps(
n_unselected_active_voxels = self._collect_ma_maps(
maps_key="ma_maps2",
coords_key="coordinates2",
return_type="summary_array",
)
n_unselected = ma_maps2.shape[0]
n_unselected_active_voxels = ma_maps2.sum(axis=0)
if isinstance(n_unselected_active_voxels, sparse._coo.core.COO):
n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1)
n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)]

del ma_maps2
n_unselected = self.dataset2.coordinates["id"].unique().shape[0]

n_mappables = n_selected + n_unselected

Expand Down Expand Up @@ -587,33 +573,17 @@ def _run_fwe_permutation(self, iter_xyz1, iter_xyz2, iter_df1, iter_df2, conn, v
iter_df2[["x", "y", "z"]] = iter_xyz2

# Generate MA maps and calculate count variables for first dataset
temp_ma_maps1 = self.kernel_transformer.transform(
iter_df1, self.masker, return_type="sparse"
n_selected_active_voxels = self.kernel_transformer.transform(
iter_df1, self.masker, return_type="summary_array"
)
n_selected = temp_ma_maps1.shape[0]
n_selected_active_voxels = temp_ma_maps1.sum(axis=0)

if isinstance(n_selected_active_voxels, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

# Indexing the sparse array is slow, perform masking in the dense array
n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1)
n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)]

del temp_ma_maps1
n_selected = self.dataset1.coordinates["id"].unique().shape[0]

# Generate MA maps and calculate count variables for second dataset
temp_ma_maps2 = self.kernel_transformer.transform(
iter_df2, self.masker, return_type="sparse"
n_unselected_active_voxels = self.kernel_transformer.transform(
iter_df2, self.masker, return_type="summary_array"
)
n_unselected = temp_ma_maps2.shape[0]
n_unselected_active_voxels = temp_ma_maps2.sum(axis=0)
if isinstance(n_unselected_active_voxels, sparse._coo.core.COO):
n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1)
n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)]

del temp_ma_maps2
n_unselected = self.dataset2.coordinates["id"].unique().shape[0]

# Currently unused conditional probabilities
# pAgF = n_selected_active_voxels / n_selected
Expand Down
60 changes: 48 additions & 12 deletions nimare/meta/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
class KernelTransformer(NiMAREBase):
"""Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`.
.. versionchanged:: 0.2.1
- Add return_type='summary_array' option to transform method.
.. versionchanged:: 0.0.13
- Remove "dataset" `return_type` option.
Expand All @@ -44,7 +48,7 @@ class KernelTransformer(NiMAREBase):
Notes
-----
All extra (non-ijk) parameters for a given kernel should be overrideable as
All extra (non-ijk) parameters for a given kernel should be overridable as
parameters to __init__, so we can access them with get_params() and also
apply them to datasets with missing data.
"""
Expand Down Expand Up @@ -96,7 +100,7 @@ def transform(self, dataset, masker=None, return_type="image"):
Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
Default is None.
return_type : {'sparse', 'array', 'image'}, optional
return_type : {'sparse', 'array', 'image', 'summary_array'}, optional
Whether to return a sparse matrix ('sparse'), a numpy array ('array'),
or a list of niimgs ('image').
Default is 'image'.
Expand All @@ -110,6 +114,8 @@ def transform(self, dataset, masker=None, return_type="image"):
equal to `shape` of the images.
If return_type is 'array', a 2D numpy array (C x V), where C is
contrast and V is voxel.
If return_type is 'summary_array', a 1D numpy array (V,) containing
a summary measure for each voxel that has been combined across experiments.
If return_type is 'image', a list of modeled activation images
(one for each of the Contrasts in the input dataset).
Expand All @@ -121,8 +127,10 @@ def transform(self, dataset, masker=None, return_type="image"):
Name of the corresponding column in the Dataset.images DataFrame.
If :meth:`_infer_names` is executed.
"""
if return_type not in ("sparse", "array", "image"):
raise ValueError('Argument "return_type" must be "image", "array", or "sparse".')
if return_type not in ("sparse", "array", "image", "summary_array"):
raise ValueError(
'Argument "return_type" must be "image", "array", "summary_array", "sparse".'
)

if isinstance(dataset, pd.DataFrame):
assert (
Expand Down Expand Up @@ -169,9 +177,14 @@ def transform(self, dataset, masker=None, return_type="image"):
mask_data = mask.get_fdata().astype(dtype)

# Generate the MA maps
transformed_maps = self._cache(self._transform, func_memory_level=2)(mask, coordinates)
if return_type == "summary_array" or return_type == "sparse":
args = (mask, coordinates, return_type)
else:
args = (mask, coordinates)

if return_type == "sparse":
transformed_maps = self._cache(self._transform, func_memory_level=2)(*args)

if return_type == "sparse" or return_type == "summary_array":
return transformed_maps[0]

imgs = []
Expand All @@ -184,7 +197,7 @@ def transform(self, dataset, masker=None, return_type="image"):
imgs.append(img)
elif return_type == "image":
kernel_data *= mask_data
img = nib.Nifti1Image(kernel_data, mask.affine)
img = nib.Nifti1Image(kernel_data, mask.affine, dtype=kernel_data.dtype)
imgs.append(img)

del kernel_data, transformed_maps
Expand All @@ -194,7 +207,7 @@ def transform(self, dataset, masker=None, return_type="image"):
elif return_type == "image":
return imgs

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
"""Apply the kernel's unique transformer.
Parameters
Expand All @@ -206,18 +219,27 @@ def _transform(self, mask, coordinates):
The DataFrame must have the following columns: "id", "i", "j", "k".
Additionally, individual kernels may require other columns
(e.g., "sample_size" for ALE).
return_type : {'sparse', 'summary_array'}, optional
Whether to return a 4D sparse matrix ('sparse') where each contrast map is
saved separately or a 1D numpy array ('summary_array') where the contrast maps
are combined.
Default is 'sparse'.
Returns
-------
transformed_maps : N-length list of (3D array, str) tuples or (4D array, 1D array) tuple
Transformed data, containing one element for each study.
- Case 1: A kernel that is not an (M)KDAKernel, with no memory limit.
- Case 1: A kernel that is not an (M)KDAKernel
Each list entry is composed of a 3D array (the MA map) and the study's ID.
- Case 2: (M)KDAKernel, with no memory limit.
- Case 2: (M)KDAKernel
There is a length-2 tuple with a 4D numpy array of the shape (N, X, Y, Z),
containing all of the MA maps, and a numpy array of shape (N,) with the study IDs.
- Case 3: A kernel with return_type='summary_array'.
The transformed data is a 1D numpy array of shape (X, Y, Z) containing the
summary measure for each voxel.
"""
pass

Expand Down Expand Up @@ -277,7 +299,7 @@ def __init__(
self.sample_size = sample_size
super().__init__(memory=memory, memory_level=memory_level)

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
ijks = coordinates[["i", "j", "k"]].values
exp_idx = coordinates["id"].values

Expand Down Expand Up @@ -344,6 +366,10 @@ def _generate_description(self):
class KDAKernel(KernelTransformer):
"""Generate KDA modeled activation images from coordinates.
.. versionchanged:: 0.2.1
- Add new parameter ``return_type`` to transform method.
.. versionchanged:: 0.0.13
- Add new parameter ``memory`` to cache modeled activation (MA) maps.
Expand Down Expand Up @@ -383,16 +409,25 @@ def __init__(
self.value = value
super().__init__(memory=memory, memory_level=memory_level)

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
"""Return type can either be sparse or summary_array."""
ijks = coordinates[["i", "j", "k"]].values
exp_idx = coordinates["id"].values
if return_type == "sparse":
sum_across_studies = False
elif return_type == "summary_array":
sum_across_studies = True
else:
raise ValueError('Argument "return_type" must be "sparse" or "summary_array".')

transformed = compute_kda_ma(
mask,
ijks,
self.r,
self.value,
exp_idx,
sum_overlap=self._sum_overlap,
sum_across_studies=sum_across_studies,
)
exp_ids = np.unique(exp_idx)
return transformed, exp_ids
Expand Down Expand Up @@ -421,6 +456,7 @@ class MKDAKernel(KDAKernel):
.. versionchanged:: 0.2.1
- New parameters: ``memory`` and ``memory_level`` for memory caching.
- Add new parameter ``return_type`` to transform method.
.. versionchanged:: 0.0.13
Expand Down
Loading

0 comments on commit 1fa0603

Please sign in to comment.