diff --git a/NAMESPACE b/NAMESPACE index e9ca7bc4..99678f32 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ export(quantile_normalise_abundance) export(reduce_dimensions) export(remove_redundancy) export(rename) +export(resolve_complete_confounders_of_non_interest) export(right_join) export(rotate_dimensions) export(rowwise) @@ -141,9 +142,11 @@ importFrom(purrr,map2_dfc) importFrom(purrr,map2_dfr) importFrom(purrr,map_chr) importFrom(purrr,map_dfr) +importFrom(purrr,map_int) importFrom(purrr,map_lgl) importFrom(purrr,reduce) importFrom(purrr,when) +importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,dots_list) importFrom(rlang,dots_values) @@ -159,6 +162,7 @@ importFrom(rlang,quo_is_symbol) importFrom(rlang,quo_is_symbolic) importFrom(rlang,quo_name) importFrom(rlang,quo_squash) +importFrom(rlang,set_names) importFrom(scales,extended_breaks) importFrom(scales,label_scientific) importFrom(scales,log_breaks) diff --git a/R/functions_SE.R b/R/functions_SE.R index 557cb5e4..0b596319 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1139,7 +1139,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, object = .formula |> lme4::nobars(), data = metadata ) - + if(quo_is_symbolic(.dispersion)) dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe() else @@ -1156,8 +1156,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, # Scaling sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method) - - + + glmmSeq_object = glmmSeq( .formula, countdata = counts , @@ -1490,3 +1490,86 @@ univariable_differential_tissue_composition_SE = function( unnest(surv_test, keep_empty = TRUE) } + + +#' Resolve Complete Confounders of Non-Interest +#' +#' This function processes a SummarizedExperiment object to handle confounders +#' that are not of interest in the analysis. It deals with two factors, adjusting +#' the data by nesting and summarizing over these factors. +#' +#' @importFrom rlang enquo +#' @importFrom rlang quo_name +#' @importFrom rlang !! +#' @importFrom tidyr unnest +#' @importFrom tidyr nest +#' @importFrom dplyr mutate +#' @importFrom dplyr arrange +#' @importFrom dplyr slice +#' @importFrom dplyr pull +#' @importFrom dplyr select +#' @importFrom purrr map_int +#' @importFrom dplyr if_else +#' +#' @param se A SummarizedExperiment object that contains the data to be processed. +#' @param .factor_1 A symbol or quosure representing the first factor variable in `se`. +#' @param .factor_2 A symbol or quosure representing the second factor variable in `se`. +#' @return A modified SummarizedExperiment object with confounders resolved. +#' @examples +#' # Not run: +#' # se is a SummarizedExperiment object +#' resolve_complete_confounders_of_non_interest(se, .factor_1 = factor1, .factor_2 = factor2) +#' @noRd +resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, .factor_2){ + + .factor_1 <- enquo(.factor_1) + .factor_2 <- enquo(.factor_2) + + cd = + colData(se) |> + as_tibble() |> + rowid_to_column() |> + distinct(rowid, !!.factor_1, !!.factor_2) |> + + nest(se_data = -c(!!.factor_1, !!.factor_2)) |> + nest(data = -!!.factor_1) |> + mutate(n1 = map_int(data, ~ .x |> distinct(!!.factor_2) |> nrow())) |> + unnest(data) |> + + # Nest data excluding .factor_2 and count distinct .factor_1 values + nest(data = -!!.factor_2) |> + mutate(n2 = map_int(data, ~ .x |> distinct(!!.factor_1) |> nrow())) |> + unnest(data) + + # Choose a dummy value for .factor_2 based on sorting by n1 + n2 + dummy_factor_2 <- cd |> arrange(desc(n1 + n2)) |> slice(1) |> pull(!!.factor_2) + + # Messages if I have confounders + if(cd |> filter(n1 + n2 < 3) |> nrow() > 0){ + + message(sprintf("tidybulk says: IMPORTANT! the columns %s and %s, have been corrected for complete confounders and now are NOT interpretable, and cannot be used in hypothesis testing.", quo_name(.factor_1), quo_name(.factor_2))) + + message(sprintf( + "tidybulk says: The value(s) %s in column %s from sample(s) %s, has been changed to %s.", + cd |> filter(n1 + n2 < 3) |> pull(!!.factor_2), + quo_name(.factor_2), + cd |> filter(n1 + n2 < 3) |> pull(se_data) |> map(colnames) |> unlist(), + dummy_factor_2 + )) + + # Replace .factor_2 with dummy_factor_2 where n1 + n2 is less than 3 and unnest + cd = cd |> + mutate(!!.factor_2 := if_else(n1 + n2 < 3, dummy_factor_2, !!.factor_2)) + } + + colData(se)[,c(quo_name(.factor_1), quo_name(.factor_2))] = + cd |> + unnest(se_data) |> + arrange(rowid) |> + select(!!.factor_1, !!.factor_2) |> + DataFrame() + + se + +} + diff --git a/R/methods.R b/R/methods.R index 75f8d598..c43a2817 100755 --- a/R/methods.R +++ b/R/methods.R @@ -4956,3 +4956,30 @@ as_matrix <- function(tbl, } +#' Resolve Complete Confounders of Non-Interest +#' +#' This generic function processes a SummarizedExperiment object to handle confounders +#' that are not of interest in the analysis. It dynamically handles combinations +#' of provided factors, adjusting the data by nesting and summarizing over these factors. +#' +#' +#' @param se A SummarizedExperiment object that contains the data to be processed. +#' @param ... Arbitrary number of factor variables represented as symbols or quosures +#' to be considered for resolving confounders. These factors are processed +#' in combinations of two. +#' +#' @rdname resolve_complete_confounders_of_non_interest-methods +#' +#' @return A modified SummarizedExperiment object with confounders resolved. +#' +#' @examples +#' # Not run: +#' # se is a SummarizedExperiment object +#' # resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3) +#' @export +setGeneric("resolve_complete_confounders_of_non_interest", function(se, ...) { + standardGeneric("resolve_complete_confounders_of_non_interest") +}) + + + diff --git a/R/methods_SE.R b/R/methods_SE.R index b6ba071b..09949834 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -2884,3 +2884,56 @@ setMethod("describe_transcript", "SummarizedExperiment", .describe_transcript_SE #' #' @return A consistent object (to the input) including additional columns for transcript symbol setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcript_SE) + + +#' @importFrom dplyr select +#' @importFrom rlang set_names +#' @importFrom tibble as_tibble +.resolve_complete_confounders_of_non_interest <- function(se, ...){ + + combination_of_factors_of_NON_interest = + # Factors + se[1,1, drop=FALSE] |> + select(...) |> + suppressWarnings() |> + colnames() |> + + # Combinations + combn(2) |> + t() |> + as_tibble() |> + set_names(c("factor_1", "factor_2")) + + for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){ + se = + se |> + resolve_complete_confounders_of_non_interest_pair_SE( + !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1), + !!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2) + ) + } + + se +} + +#' resolve_complete_confounders_of_non_interest +#' @inheritParams resolve_complete_confounders_of_non_interest +#' +#' @docType methods +#' @rdname resolve_complete_confounders_of_non_interest-methods +#' +#' @return A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). +setMethod("resolve_complete_confounders_of_non_interest", + "SummarizedExperiment", + .resolve_complete_confounders_of_non_interest) + +#' resolve_complete_confounders_of_non_interest +#' @inheritParams resolve_complete_confounders_of_non_interest +#' +#' @docType methods +#' @rdname resolve_complete_confounders_of_non_interest-methods +#' +#' @return A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). +setMethod("resolve_complete_confounders_of_non_interest", + "RangedSummarizedExperiment", + .resolve_complete_confounders_of_non_interest) diff --git a/man/resolve_complete_confounders_of_non_interest-methods.Rd b/man/resolve_complete_confounders_of_non_interest-methods.Rd new file mode 100644 index 00000000..b828aaf8 --- /dev/null +++ b/man/resolve_complete_confounders_of_non_interest-methods.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R, R/methods_SE.R +\docType{methods} +\name{resolve_complete_confounders_of_non_interest} +\alias{resolve_complete_confounders_of_non_interest} +\alias{resolve_complete_confounders_of_non_interest,SummarizedExperiment-method} +\alias{resolve_complete_confounders_of_non_interest,RangedSummarizedExperiment-method} +\title{Resolve Complete Confounders of Non-Interest} +\usage{ +resolve_complete_confounders_of_non_interest(se, ...) + +\S4method{resolve_complete_confounders_of_non_interest}{SummarizedExperiment}(se, ...) + +\S4method{resolve_complete_confounders_of_non_interest}{RangedSummarizedExperiment}(se, ...) +} +\arguments{ +\item{se}{A SummarizedExperiment object that contains the data to be processed.} + +\item{...}{Arbitrary number of factor variables represented as symbols or quosures +to be considered for resolving confounders. These factors are processed +in combinations of two.} +} +\value{ +A modified SummarizedExperiment object with confounders resolved. + +A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). + +A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). +} +\description{ +This generic function processes a SummarizedExperiment object to handle confounders +that are not of interest in the analysis. It dynamically handles combinations +of provided factors, adjusting the data by nesting and summarizing over these factors. +} +\examples{ +# Not run: +# se is a SummarizedExperiment object +# resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3) +} diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 5230714e..d6fc3fba 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -825,3 +825,49 @@ test_that("Only reduced dimensions UMAP - no object",{ expect_equal( class(attr(res, "internals")$UMAP[[1]])[1], "numeric" ) }) + + +test_that("resolve_complete_confounders_of_non_interest",{ + + + library(tidySummarizedExperiment) + library(tidybulk) + + # Sample annotations + sample_annotations <- data.frame( + sample_id = paste0("Sample", seq(1, 9)), + factor_of_interest = c(rep("treated", 4), rep("untreated", 5)), + A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"), + B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"), + C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"), + stringsAsFactors = FALSE + ) + + # Simulated assay data (e.g., gene expression data) + # Let's assume we have 100 genes (rows) and 9 samples (columns) + assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9) + + # Row data (e.g., gene annotations) + # For simplicity, we'll just use a sequence of gene IDs + row_data <- data.frame(gene_id = paste0("Gene", seq_len(100))) + + # Create SummarizedExperiment object + se <- SummarizedExperiment(assays = list(counts = assay_data), + rowData = row_data, + colData = DataFrame(sample_annotations)) + + expected_tibble = + tibble( + .sample = c("1", "5", "2", "4", "6", "8", "9", "3", "7"), + A = c("a1", "a1", "a2", "a2", "a2", "a2", "a3", "a1", "a1"), + B = c("b1", "b1", "b1", "b1", "b1", "b1", "b1", "b2", "b2"), + C = rep("c1", 9) + ) |> arrange(.sample) + + se |> + resolve_complete_confounders_of_non_interest(A, B, C) |> + distinct(.sample, A, B, C) |> + expect_identical(expected_tibble ) + + +})