Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 52 additions & 41 deletions bin/assign_grnas_sceptre.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ assign_grnas_sceptre_v1 <- function(mudata, probability_threshold = "default", n

# assign gRNAs
assign_grnas_args <- list(sceptre_object, method = "mixture")

# Add optional parameters if they are not "default"
if (probability_threshold != "default") {
assign_grnas_args$probability_threshold <- as.numeric(probability_threshold)
Expand All @@ -32,80 +32,86 @@ assign_grnas_sceptre_v1 <- function(mudata, probability_threshold = "default", n
assign_grnas_args$parallel <- FALSE
assign_grnas_args$n_processors <- as.integer(2)
}

sceptre_object <- do.call(sceptre::assign_grnas, assign_grnas_args)

# extract sparse logical matrix of gRNA assignments
grna_assignment_matrix <- sceptre_object |>
sceptre::get_grna_assignments() |>
methods::as("dsparseMatrix")
colnames(grna_assignment_matrix) <- colnames(MultiAssayExperiment::assay(mudata[['guide']]))

# reorder columns to match original mudata order
original_colnames <- colnames(
MultiAssayExperiment::assay(mudata[["guide"]])
)
grna_assignment_matrix <- grna_assignment_matrix[, original_colnames]

# add gRNA assignment matrix to MuData
# SummarizedExperiment::assays(mudata[['guide']])[['guide_assignment']] <- grna_assignment_matrix
assays(mudata[["guide"]])$guide_assignment <-
grna_assignment_matrix

return(list(mudata = mudata, guide_assignment = grna_assignment_matrix))
}

convert_mudata_to_sceptre_object_v1 <- function(mudata, remove_collinear_covariates = FALSE){
convert_mudata_to_sceptre_object_v1 <- function(mudata, remove_collinear_covariates = FALSE) {
# extract information from MuData
moi <- MultiAssayExperiment::metadata(mudata[['guide']])$moi
if(is.null(SummarizedExperiment::assayNames(mudata[['gene']]))){
SummarizedExperiment::assayNames(mudata[['gene']]) <- 'counts'
} else{
SummarizedExperiment::assayNames(mudata[['gene']])[[1]] <- 'counts'
moi <- MultiAssayExperiment::metadata(mudata[["guide"]])$moi
if (is.null(SummarizedExperiment::assayNames(mudata[["gene"]]))) {
SummarizedExperiment::assayNames(mudata[["gene"]]) <- "counts"
} else {
SummarizedExperiment::assayNames(mudata[["gene"]])[[1]] <- "counts"
}
if(is.null(SummarizedExperiment::assayNames(mudata[['guide']]))){
SummarizedExperiment::assayNames(mudata[['guide']]) <- 'counts'
} else{
SummarizedExperiment::assayNames(mudata[['guide']])[[1]] <- 'counts'
if (is.null(SummarizedExperiment::assayNames(mudata[["guide"]]))) {
SummarizedExperiment::assayNames(mudata[["guide"]]) <- "counts"
} else {
SummarizedExperiment::assayNames(mudata[["guide"]])[[1]] <- "counts"
}

scRNA_data <- mudata@ExperimentList$gene
guides_data <- mudata@ExperimentList$guide
response_matrix <- scRNA_data@assays@data@listData[["counts"]]

if(!is.null(SummarizedExperiment::colData(mudata))){
if (!is.null(SummarizedExperiment::colData(mudata))) {
covariates <- SummarizedExperiment::colData(mudata) |> as.data.frame()
covariates[] <- lapply(covariates, as.factor)
# Check and remove factor variables with fewer than two levels
number_of_levels <- sapply(covariates, function(x) length(unique(x)))
multi_level_factors <- number_of_levels > 1
covariates_clean <- covariates[, multi_level_factors, drop = FALSE]

if(ncol(covariates_clean) == 0){
if (ncol(covariates_clean) == 0) {
remove_collinear_covariates <- FALSE
}
if(remove_collinear_covariates){
model_matrix <- stats::model.matrix(object = ~ ., data = covariates_clean)

if (remove_collinear_covariates) {
model_matrix <- stats::model.matrix(object = ~., data = covariates_clean)
multicollinear <- Matrix::rankMatrix(model_matrix) < ncol(model_matrix)
if(multicollinear){
if (multicollinear) {
extra_covariates <- data.frame()
} else{
} else {
extra_covariates <- covariates_clean
}
} else{
} else {
extra_covariates <- covariates_clean
}
} else{
} else {
extra_covariates <- data.frame()
}

# if guide assignments not present, then extract guide counts
if(length(guides_data@assays@data@listData) == 1){
if (length(guides_data@assays@data@listData) == 1) {
grna_matrix <- guides_data@assays@data@listData[["counts"]]
# otherwise, extract guide assignments
} else{
} else {
grna_matrix <- guides_data@assays@data@listData[["guide_assignment"]]
}

grna_ids <- rownames(SingleCellExperiment::rowData(mudata[['guide']]))
grna_ids <- rownames(SingleCellExperiment::rowData(mudata[["guide"]]))
rownames(grna_matrix) <- grna_ids

gene_ids <- rownames(SingleCellExperiment::rowData(mudata[['gene']]))
gene_ids <- rownames(SingleCellExperiment::rowData(mudata[["gene"]]))
rownames(response_matrix) <- gene_ids
grna_target_data_frame <- SingleCellExperiment::rowData(mudata[['guide']]) |>
grna_target_data_frame <- SingleCellExperiment::rowData(mudata[["guide"]]) |>
as.data.frame() |>
tibble::rownames_to_column(var = "grna_id") |>
dplyr::rename(grna_target = intended_target_name) |>
Expand Down Expand Up @@ -136,20 +142,25 @@ convert_mudata_to_sceptre_object_v1 <- function(mudata, remove_collinear_covaria

# Run Command (only execute when script is run directly, not when sourced)
if (!exists(".sourced_from_test")) {

args <- commandArgs(trailingOnly = TRUE)

mudata_input <- args[1]
probability_threshold <- if(length(args) >= 2) args[2] else "default"
n_em_rep <- if(length(args) >= 3) args[3] else "default"
n_processors <- if(length(args) >= 4) suppressWarnings(as.integer(args[4])) else NA
probability_threshold <- if (length(args) >= 2) args[2] else "default"
n_em_rep <- if (length(args) >= 3) args[3] else "default"
n_processors <- if (length(args) >= 4) suppressWarnings(as.integer(args[4])) else NA

mudata_in <- MuData::readH5MU(mudata_input)
results <- assign_grnas_sceptre_v1(mudata = mudata_in,
probability_threshold = probability_threshold,
n_em_rep = n_em_rep,
n_processors = n_processors)
guide_assignment <- results$guide_assignment

writeMM(guide_assignment, "guide_assignment.mtx")
results <- assign_grnas_sceptre_v1(
mudata = mudata_in,
probability_threshold = probability_threshold,
n_em_rep = n_em_rep,
n_processors = n_processors
)

output_file <- sub("(\\.h5mu)$", "_out\\1", mudata_input)

MuData::writeH5MU(results$mudata, output_file)
# guide_assignment <- results$guide_assignment

# writeMM(guide_assignment, "guide_assignment.mtx")
}
108 changes: 50 additions & 58 deletions bin/inference_sceptre.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,14 @@ convert_mudata_to_sceptre_object_v1 <- function(mudata, remove_collinear_covaria
extra_covariates <- data.frame()
}

# if guide assignments not present, then extract guide counts
if (length(guides_data@assays@data@listData) == 1) {
grna_matrix <- guides_data@assays@data@listData[["counts"]]
# otherwise, extract guide assignments
} else {
grna_matrix <- guides_data@assays@data@listData[["guide_assignment"]]
}
# # if guide assignments not present, then extract guide counts
# if (length(guides_data@assays@data@listData) == 1) {
# grna_matrix <- guides_data@assays@data@listData[["counts"]]
# # otherwise, extract guide assignments
# } else {

# }
grna_matrix <- guides_data@assays@data@listData[["guide_assignment"]]

grna_ids <- rownames(SingleCellExperiment::rowData(mudata[["guide"]]))
rownames(grna_matrix) <- grna_ids
Expand Down Expand Up @@ -96,12 +97,14 @@ convert_mudata_to_sceptre_object_v1 <- function(mudata, remove_collinear_covaria


inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
use_parallel <- !is.na(n_processors) && as.integer(n_processors) > 1
n_processors <- if (use_parallel) as.integer(n_processors) else NULL
# convert MuData object to sceptre object
sceptre_object <- convert_mudata_to_sceptre_object_v1(mudata, remove_collinear_covariates = TRUE)

# extract set of discovery pairs to test (if available)
moi <- MultiAssayExperiment::metadata(mudata[["guide"]])$moi

# Check if pairs_to_test exists in metadata
if (!is.null(MultiAssayExperiment::metadata(mudata)$pairs_to_test)) {
pairs_to_test <- MultiAssayExperiment::metadata(mudata)$pairs_to_test |>
Expand All @@ -115,20 +118,20 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
# assemble base arguments to set_analysis_parameters()
args_list <- list(...)

discovery_pairs <- discovery_pairs |>
dplyr::mutate(grna_target = replace(grna_target, tolower(trimws(as.character(grna_target))) == "nan", NA_character_)) |>
dplyr::filter(!is.na(grna_target))
discovery_pairs <- discovery_pairs |>
dplyr::mutate(grna_target = replace(grna_target, tolower(trimws(as.character(grna_target))) == "nan", NA_character_)) |>
dplyr::filter(!is.na(grna_target))

tmp_allowed <- unique(sceptre_object@grna_target_data_frame$grna_target)
tmp_bad <- setdiff(unique(discovery_pairs$grna_target), tmp_allowed)
tmp_allowed <- unique(sceptre_object@grna_target_data_frame$grna_target)
tmp_bad <- setdiff(unique(discovery_pairs$grna_target), tmp_allowed)

print("Diferences:")
print(tmp_bad)

print(setdiff(unique(discovery_pairs$grna_target), unique(sceptre_object@grna_target_data_frame$grna_target)))

print('Diferences:')
print (tmp_bad)

print (setdiff(unique(discovery_pairs$grna_target), unique(sceptre_object@grna_target_data_frame$grna_target)))



if ("discovery_pairs" %in% names(args_list)) {
warning("The `discovery_pairs` argument is ignored. The `discovery_pairs` are set from the `pairs_to_test` metadata.")
}
Expand All @@ -140,7 +143,7 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
discovery_pairs <- sceptre::construct_trans_pairs(sceptre_object)
args_list[["discovery_pairs"]] <- discovery_pairs
}
#print (discovery_pairs)
# print (discovery_pairs)


# construct formula excluding gRNA covariates to avoid multicollinearity
Expand All @@ -163,26 +166,11 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
args_union$grna_integration_strategy <- "union"
# set analysis parameters for union
sceptre_object <- do.call(sceptre::set_analysis_parameters, args_union)
# assign grnas and run QC (relaxed thresholds to keep all cells; mirror prior behaviour)
sceptre_object <- sceptre_object |>
sceptre::assign_grnas(
method = "thresholding",
threshold = 1,
parallel = (!is.na(n_processors) && as.integer(n_processors) > 1),
n_processors = if (!is.na(n_processors) && as.integer(n_processors) > 1) as.integer(n_processors) else NULL
) |>
sceptre::run_qc(
n_nonzero_trt_thresh = 0L,
n_nonzero_cntrl_thresh = 0L,
p_mito_threshold = 1
)

# run discovery analysis (grouped)
sceptre_object <- sceptre_object |>
sceptre::run_discovery_analysis(
parallel = (!is.na(n_processors) && as.integer(n_processors) > 1),
n_processors = if (!is.na(n_processors) && as.integer(n_processors) > 1) as.integer(n_processors) else NULL
)
sceptre::run_discovery_analysis(parallel = use_parallel, n_processors = n_processors) |>
sceptre::run_calibration_check(parallel = use_parallel, n_processors = n_processors)

# get union (per-element) results
union_results <- sceptre_object |>
Expand All @@ -193,10 +181,18 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
intended_target_name = grna_target,
log2_fc = log_2_fold_change
)
# use union_results directly (no extra distinct/left_join)
union_test_results <- union_results

# store union results in mudata metadata as requested
# get calibration results
calibration_results <- sceptre_object |>
sceptre::get_result(analysis = "run_calibration_check") |>
dplyr::select(response_id, grna_target, p_value, log_2_fold_change) |>
dplyr::rename(
gene_id = response_id,
intended_target_name = grna_target,
log2_fc = log_2_fold_change
)

union_test_results <- rbind(union_results, calibration_results)
MultiAssayExperiment::metadata(mudata)$per_element_results <- union_test_results

# also write the per-element (union) results to file
Expand All @@ -211,29 +207,16 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
silent = TRUE
)

## 2) Singleton (per-guide) analysis — reuse sceptre_object to exploit caching
## 2) Singleton (per-guide) analysis
args_singleton <- args_list
args_singleton$grna_integration_strategy <- "singleton"

# set analysis parameters for singleton (reuse same sceptre_object reference)
sceptre_object <- do.call(sceptre::set_analysis_parameters, args_singleton)

# run assignment, qc and discovery for singleton
sceptre_object <- sceptre_object |>
sceptre::assign_grnas(
method = "thresholding",
threshold = 1,
parallel = (!is.na(n_processors) && as.integer(n_processors) > 1),
n_processors = if (!is.na(n_processors) && as.integer(n_processors) > 1) as.integer(n_processors) else NULL
) |>
sceptre::run_qc(
n_nonzero_trt_thresh = 0L,
n_nonzero_cntrl_thresh = 0L,
p_mito_threshold = 1
) |>
sceptre::run_discovery_analysis(
parallel = (!is.na(n_processors) && as.integer(n_processors) > 1),
n_processors = if (!is.na(n_processors) && as.integer(n_processors) > 1) as.integer(n_processors) else NULL
)
sceptre::run_discovery_analysis(parallel = use_parallel, n_processors = n_processors) |>
sceptre::run_calibration_check(parallel = use_parallel, n_processors = n_processors)

# extract singleton (per-guide) results, preserve grna_id and rename to guide_id
singleton_results <- sceptre_object |>
Expand All @@ -245,8 +228,17 @@ inference_sceptre_m <- function(mudata, n_processors = NA, ...) {
log2_fc = log_2_fold_change
)

# use singleton_results directly
singleton_test_results <- singleton_results
# extract singleton calibration results
singleton_calibration_results <- sceptre_object |>
sceptre::get_result(analysis = "run_calibration_check") |>
dplyr::select(response_id, grna_id, p_value, log_2_fold_change) |>
dplyr::rename(
gene_id = response_id,
guide_id = grna_id,
log2_fc = log_2_fold_change
)

singleton_test_results <- rbind(singleton_results, singleton_calibration_results)
MultiAssayExperiment::metadata(mudata)$per_guide_results <- singleton_test_results

try(
Expand Down
Loading