Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to RunBanksy #162

Merged
merged 11 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SeuratWrappers
Title: Community-Provided Methods and Extensions for the Seurat Object
Version: 0.3.4
Date: 2024-01-23
Version: 0.3.5
Date: 2024-04-04
Authors@R: c(
person(given = 'Andrew', family = 'Butler', email = '[email protected]', role = 'aut', comment = c(ORCID = '0000-0003-3608-0463')),
person(given = "Saket", family = "Choudhary", email = "[email protected]", role = "ctb", comment = c(ORCID = "0000-0001-5202-7633")),
Expand Down
137 changes: 108 additions & 29 deletions R/banksy.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,31 @@ NULL
#' @param lambda (numeric) Spatial weight parameter
#' @param assay (character) Assay in Seurat object to use
#' @param slot (character) Slot in Seurat assay to use
#' @param use_agf (boolean) Whether to use the AGF
#' @param dimx (character) Column name of spatial x dimension (must be in metadata)
#' @param dimy (character) Column name of spatial y dimension (must be in metadata)
#' @param dimz (character) Column name of spatial z dimension (must be in metadata)
#' @param ndim (integer) Number of spatial dimensions to extract
#' @param features (character) Features to compute. Can be 'all', 'variable' or
#' a vector of feature names
#' @param group (character) Column name of a grouping variable (must be in metadata)
#' @param split.scale (boolean) Whether to separate scaling by group
#' @param k_geom (numeric) kNN parameter - number of neighbors to use
#' @param n (numeric) kNN_rn parameter - exponent of radius
#' @param sigma (numeric) rNN parameter - standard deviation of Gaussian kernel
#' @param alpha (numeric) rNN parameter - determines radius used
#' @param k_spatial (numeric) rNN parameter - number of neighbors to use
#' @param spatial_mode (character) spatial mode to use (kNN_r, kNN_rn, kNN_rank,
#' kNN_unif, rNN_gauss)
#' @param spatial_mode (character) Kernel for neighborhood computation
#' \itemize{
#' \item{kNN_median: k-nearest neighbors with median-scaled Gaussian kernel}
#' \item{kNN_r: k-nearest neighbors with $1/r$ kernel}
#' \item{kNN_rn: k-nearest neighbors with $1/r^n$ kernel}
#' \item{kNN_rank: k-nearest neighbors with rank Gaussian kernel}
#' \item{kNN_unif: k-nearest neighbors wth uniform kernel}
#' \item{rNN_gauss: radial nearest neighbors with Gaussian kernel}
#' }
#' @param assay_name (character) Name for Banksy assay in Seurat object
#' @param M (numeric) Advanced usage. Highest azimuthal harmonic
#' @param verbose (boolean) Print messages
#'
#' @return A Seurat object with new assay holding a Banksy matrix
Expand All @@ -33,11 +46,13 @@ NULL
#' Algorithm that Unifies Cell Type Clustering and Tissue Domain Segmentation
#'
#' @export
RunBanksy <- function(object, lambda, assay='RNA', slot='data',
dimx=NULL, dimy=NULL, features='variable',
k_geom=10, n=2, sigma=1.5,
alpha=0.05, k_spatial=10, spatial_mode='kNN_r',
assay_name='BANKSY', verbose=TRUE) {
RunBanksy <- function(object, lambda, assay='RNA', slot='data', use_agf=FALSE,
dimx=NULL, dimy=NULL, dimz=NULL, ndim=2,
features='variable',
group=NULL, split.scale=TRUE,
k_geom=15, n=2, sigma=1.5,
alpha=0.05, k_spatial=10, spatial_mode='kNN_median',
assay_name='BANKSY', M=NULL, verbose=TRUE) {
# Check packages
SeuratWrappers:::CheckPackage(package = 'data.table', repository = 'CRAN')
SeuratWrappers:::CheckPackage(package = 'Matrix', repository = 'CRAN')
Expand All @@ -50,20 +65,50 @@ RunBanksy <- function(object, lambda, assay='RNA', slot='data',
data_own <- get_data(object, assay, slot, features, verbose)

# Get locs
locs <- get_locs(object, dimx, dimy, data_own, verbose)
locs <- get_locs(object, dimx, dimy, dimz, ndim, data_own, group, verbose)
if (!is.null(group)) {
object <- AddMetaData(
object, metadata = locs,
col.name = paste0('staggered_', colnames(locs)))
}

# Compute neighbor matrix
if (verbose) message('Computing neighbor matrix')
data_nbr <- Banksy:::compute.banksyMatrices(
gcm = data_own, locs = locs, sigma = sigma, alpha = alpha,
kspatial = k_spatial, k_geom = k_geom, n = n, spatialMode = spatial_mode,
verbose = verbose)
knn_list <- lapply(k_geom, function(kg) {
Banksy:::computeNeighbors(locs,
spatial_mode = spatial_mode, k_geom = kg, n = n,
sigma=sigma, alpha=alpha, k_spatial=k_spatial,
verbose=verbose)
})

# Create Banksy matrix
M <- seq(0, max(Banksy:::getM(use_agf, M)))
# Compute harmonics
center <- rep(TRUE, length(M))
# Only center higher harmonics
center[1] <- FALSE
har <- Map(function(knn_df, M, center) {
x <- Banksy:::computeHarmonics(data_own, knn_df, M, center, verbose)
rownames(x) <- paste0(rownames(x), '.m', M)
x
}, knn_list, M, center)

# Scale by lambdas
lambdas <- Banksy:::getLambdas(lambda, n_harmonics = length(har))

# Merge with own expression
if (verbose) message('Creating Banksy matrix')
data_banksy <- Matrix::Matrix(
rbind(sqrt(1 - lambda) * data_own, sqrt(lambda) * data_nbr),
sparse = TRUE)
data_banksy <- c(list(data_own), har)
if (verbose) message('Scaling BANKSY matrix. Do not call ScaleData on assay ', assay_name)
data_scaled <- lapply(data_banksy, fast_scaler,
object, group, split.scale, verbose)

# Multiple by lambdas
data_banksy <- Map(function(lam, mat) lam * mat, lambdas, data_banksy)
data_scaled <- Map(function(lam, mat) lam * mat, lambdas, data_scaled)

# Rbind
data_banksy <- do.call(rbind, data_banksy)
data_scaled <- do.call(rbind, data_scaled)

# Create an assay object
if (grepl(pattern = 'counts', x = slot)) {
Expand All @@ -76,6 +121,8 @@ RunBanksy <- function(object, lambda, assay='RNA', slot='data',
if (verbose) message('Setting default assay to ', assay_name)
object[[assay_name]] <- banksy_assay
DefaultAssay(object) <- assay_name
object <- SetAssayData(object, slot = 'scale.data', new.data = data_scaled,
assay = assay_name)

# Log commands
object <- Seurat::LogSeuratCommand(object = object)
Expand All @@ -89,9 +136,9 @@ get_data <- function(object, assay, slot, features, verbose) {
if (verbose) message('Fetching data from slot ', slot,' from assay ', assay)
data_own <- Seurat::GetAssayData(object = object, assay = assay, slot = slot)
# Feature subset
if (features != 'all') {
if (features[1] != 'all') {
if (verbose) message('Subsetting by features')
if (features == 'variable') {
if (features[1] == 'variable') {
feat <- Seurat::VariableFeatures(object)
if (length(feat) == 0) {
warning('No variable features found. Running Seurat::FindVariableFeatures')
Expand All @@ -109,33 +156,65 @@ get_data <- function(object, assay, slot, features, verbose) {
}

# Get locations from Seurat object
get_locs <- function(object, dimx, dimy, data_own, verbose) {
get_locs <- function(object, dimx, dimy, dimz, ndim, data_own, group, verbose) {

if (!is.null(dimx) & !is.null(dimy)) {
# Convert locations data to data table
locations <- data.frame(
# Extract locations from metadata
locs <- data.frame(
sdimx = unlist(object[[dimx]]),
sdimy = unlist(object[[dimy]])
)
rownames(locations) <- colnames(object)
locs <- data.table::data.table(locations, keep.rownames = TRUE)
data.table::setnames(locs, 'rn', 'cell_ID')
rownames(locs) <- colnames(object)

# Add z-dim if present
if (!is.null(dimz)) locs$sdimz = object[[dimz]]

# Check locations
obj_samples <- colnames(data_own)
locs_samples <- locs[['cell_ID']]
locs_samples <- rownames(locs)
if (any(is.na(match(obj_samples, locs_samples)))) {
na_id <- which(is.na(match(obj_samples, locs_samples)))
warning('No centroids found for samples: ', paste(obj_samples[na_id], collapse = ', '), '. Dropping samples.')
warning('No centroids found for samples: ',
paste(obj_samples[na_id], collapse = ', '), '. Dropping samples.')
data_own <- data_own[, -na_id, drop = FALSE]
}
locs <- locs[match(obj_samples, locs_samples),,drop=FALSE]

} else {
locations <- Seurat::GetTissueCoordinates(object)[,1:2]
locs <- data.table::data.table(locations, keep.rownames = TRUE)
# Extract locations with Seurat accessor
locs <- Seurat::GetTissueCoordinates(object)[,seq_len(ndim)]
}

colnames(locs) <- c('cell_ID', 'sdimx', 'sdimy')
dim_names <- paste0('sdim', c('x','y','z'))
colnames(locs) <- dim_names[seq_len(ncol(locs))]

if (!is.null(group)) {
# Stagger locations by group
if (verbose) message('Staggering locations by ', group)
locs[,1] = locs[,1] + abs(min(locs[,1]))
max_x = max(locs[,1]) * 2
n_groups = length(unique(unlist(object[[group]])))
shift = seq(from = 0, length.out = n_groups, by = max_x)
locs[,1] = locs[,1] + rep(shift, table(object[[group]]))
}

return(locs)
}

# Scaling
fast_scaler = function(data, object, group, split.scale, verbose) {
# Split scaling by group
if (!is.null(group) & split.scale) {
groups = unlist(object[[group]])
ugroups = unique(groups)
for (curr_group in ugroups) {
if (verbose) message('Scaling group: ', curr_group)
curr_group_id <- which(curr_group == groups)
data[, curr_group_id] <- Seurat:::FastRowScale(
data[, curr_group_id])
}
} else {
data <- Seurat::FastRowScale(data)
}
data
}
Loading
Loading