Skip to content

Commit

Permalink
Improve memory management in MKDAChi2 Estimator (#638)
Browse files Browse the repository at this point in the history
* Rename logger.

* Try to reduce memory usage in MKDAChi2.

* Undo change to logger name.

* Run black.
  • Loading branch information
tsalo authored Feb 14, 2022
1 parent 86eda1c commit 69be3e3
Showing 1 changed file with 33 additions and 14 deletions.
47 changes: 33 additions & 14 deletions nimare/meta/cbma/mkda.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,34 +223,41 @@ def _fit(self, dataset1, dataset2):
self.masker = self.masker or dataset1.masker
self.null_distributions_ = {}

# Generate MA maps and calculate count variables for first dataset
ma_maps1 = self._collect_ma_maps(
maps_key="ma_maps1",
coords_key="coordinates1",
fname_idx=0,
)
n_selected = ma_maps1.shape[0]
n_selected_active_voxels = np.sum(ma_maps1.astype(bool), axis=0)

# Close the memmap.
# Deleting the variable should be enough, but I'd prefer to be explicit.
if isinstance(ma_maps1, np.memmap):
LGR.debug(f"Closing memmap at {ma_maps1.filename}")
ma_maps1._mmap.close()

del ma_maps1

# Generate MA maps and calculate count variables for second dataset
ma_maps2 = self._collect_ma_maps(
maps_key="ma_maps2",
coords_key="coordinates2",
fname_idx=1,
)

# Calculate different count variables
n_selected = ma_maps1.shape[0]
n_unselected = ma_maps2.shape[0]
n_mappables = n_selected + n_unselected
n_selected_active_voxels = np.sum(ma_maps1, axis=0)
n_unselected_active_voxels = np.sum(ma_maps2, axis=0)

# Remove large arrays
if isinstance(ma_maps1, np.memmap):
LGR.debug(f"Closing memmap at {ma_maps1.filename}")
ma_maps1._mmap.close()
n_unselected_active_voxels = np.sum(ma_maps2.astype(bool), axis=0)

# Close the memmap.
# Deleting the variable should be enough, but I'd prefer to be explicit.
if isinstance(ma_maps2, np.memmap):
LGR.debug(f"Closing memmap at {ma_maps2.filename}")
ma_maps2._mmap.close()

del ma_maps1, ma_maps2
del ma_maps2

n_mappables = n_selected + n_unselected

# Nomenclature for variables below: p = probability,
# F = feature present, g = given, U = unselected, A = activation.
Expand All @@ -261,6 +268,8 @@ def _fit(self, dataset1, dataset2):
(n_selected_active_voxels + n_unselected_active_voxels) / n_mappables
).squeeze()

del n_mappables

# Conditional probabilities
pAgF = n_selected_active_voxels / n_selected
pAgU = n_unselected_active_voxels / n_unselected
Expand All @@ -276,6 +285,8 @@ def _fit(self, dataset1, dataset2):
pAgF_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels))
pAgF_z = p_to_z(pAgF_p_vals, tail="two") * pAgF_sign

del pAgF_sign

# Two-way chi-square for specificity of activation
cells = np.squeeze(
np.array(
Expand All @@ -288,11 +299,19 @@ def _fit(self, dataset1, dataset2):
]
).T
)
del n_selected, n_unselected

pFgA_chi2_vals = two_way(cells)

del n_selected_active_voxels, n_unselected_active_voxels

pFgA_p_vals = special.chdtrc(1, pFgA_chi2_vals)
pFgA_p_vals[pFgA_p_vals < 1e-240] = 1e-240
pFgA_sign = np.sign(pAgF - pAgU).ravel()
pFgA_z = p_to_z(pFgA_p_vals, tail="two") * pFgA_sign

del pFgA_sign, pAgU

images = {
"prob_desc-A": pA,
"prob_desc-AgF": pAgF,
Expand All @@ -319,20 +338,20 @@ def _run_fwe_permutation(self, iter_xyz1, iter_xyz2, iter_df1, iter_df2):
iter_df1[["x", "y", "z"]] = iter_xyz1
iter_df2[["x", "y", "z"]] = iter_xyz2

# Generate MA maps and calculate count variables for first dataset
temp_ma_maps1 = self.kernel_transformer.transform(
iter_df1, self.masker, return_type="array"
)
n_selected = temp_ma_maps1.shape[0]
n_selected_active_voxels = np.sum(temp_ma_maps1, axis=0)

del temp_ma_maps1

# Generate MA maps and calculate count variables for second dataset
temp_ma_maps2 = self.kernel_transformer.transform(
iter_df2, self.masker, return_type="array"
)
n_unselected = temp_ma_maps2.shape[0]
n_unselected_active_voxels = np.sum(temp_ma_maps2, axis=0)

del temp_ma_maps2

# Currently unused conditional probabilities
Expand Down

0 comments on commit 69be3e3

Please sign in to comment.