From 3042f189f205617d9c9db42de65e1687f07ff0e7 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 7 Dec 2023 16:04:03 +1100 Subject: [PATCH 1/3] add design to dispersion estimation --- R/functions.R | 9 ++++++++- R/utilities.R | 30 +++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/R/functions.R b/R/functions.R index 09937d08..0ee421de 100755 --- a/R/functions.R +++ b/R/functions.R @@ -758,10 +758,17 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, # Reorder counts counts = counts[,rownames(metadata),drop=FALSE] + # Create design matrix for dispersion, removing random effects + design = + model.matrix( + object = .formula |> eliminate_random_effects(), + data = metadata + ) + if(quo_is_symbolic(.dispersion)) dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe() else - dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)) + dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) # # Check dispersion # if(!names(dispersion) |> sort() |> identical( diff --git a/R/utilities.R b/R/utilities.R index 205933a7..b59a58e0 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1524,4 +1524,32 @@ feature__ = get_special_column_name_symbol(".feature") sample__ = get_special_column_name_symbol(".sample") - +#' Produced using ChatGPT - Eliminate Random Effects from a Formula +#' +#' This function takes a mixed-effects model formula and returns a modified +#' formula with all random effects removed, leaving only the fixed effects. +#' +#' @param formula An object of class \code{formula}, representing a mixed-effects model formula. +#' @return A formula object with random effects parts removed. +#' @examples +#' eliminate_random_effects(~ age_days * sex + (1 | file_id) + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 + age_days * sex | tissue)) +#' @noRd +#' @importFrom stats as.formula +eliminate_random_effects <- function(formula) { + # Convert the formula to a string + formula_str <- deparse(formula) + + # Split the string by '+' while keeping the brackets content together + split_str <- unlist(strsplit(formula_str, "(?<=\\))\\s*\\+\\s*|\\+\\s*(?=\\()", perl = TRUE)) + + # Filter out the random effects parts + fixed_effects <- grep("\\|", split_str, value = TRUE, invert = TRUE) + + # Combine the fixed effects parts back into a single string + modified_str <- paste(fixed_effects, collapse = " + ") + + # Convert the modified string back to a formula + modified_formula <- as.formula(modified_str) + + return(modified_formula) +} \ No newline at end of file From 33c5dcdcb0dc7d015f03a5feb1c91a9935ceb563 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 7 Dec 2023 16:23:22 +1100 Subject: [PATCH 2/3] fixed messaging --- DESCRIPTION | 2 +- R/functions_SE.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2e25694a..86beeea2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.15.2 +Version: 1.15.3 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", diff --git a/R/functions_SE.R b/R/functions_SE.R index 1e671c03..325c751b 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1171,11 +1171,11 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, # # Add raw object # attach_to_internals(glmmSeq_object, "glmmSeq") %>% - # Communicate the attribute added - { - rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once") - (.) - } %>% + # # Communicate the attribute added + # { + # rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once") + # (.) + # } %>% # Attach prefix setNames(c( From 0693ee1d725d6bb39aad40fc61489e7b3718ee0a Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Thu, 7 Dec 2023 17:57:16 +1100 Subject: [PATCH 3/3] edit dispersion for SE methods --- R/functions_SE.R | 9 +++++- R/methods.R | 29 +++++++++++++++++++ man/test_differential_abundance-methods.Rd | 29 +++++++++++++++++++ tests/testthat/test-bulk_methods.R | 2 +- .../test-bulk_methods_SummarizedExperiment.R | 2 +- 5 files changed, 68 insertions(+), 3 deletions(-) diff --git a/R/functions_SE.R b/R/functions_SE.R index 325c751b..23374aee 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1133,10 +1133,17 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, .data %>% assay(my_assay) + # Create design matrix for dispersion, removing random effects + design = + model.matrix( + object = .formula |> eliminate_random_effects(), + data = metadata + ) + if(quo_is_symbolic(.dispersion)) dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe() else - dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)) + dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) # # Check dispersion # if(!names(dispersion) |> sort() |> identical( diff --git a/R/methods.R b/R/methods.R index cec1e7f3..75f8d598 100755 --- a/R/methods.R +++ b/R/methods.R @@ -2600,6 +2600,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula. #' #' Underlying method for edgeR framework: +#' #' .data |> #' #' # Filter @@ -2623,7 +2624,10 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' edgeR::glmQLFit(design) |> // or glmFit according to choice #' edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice #' +#' +#' #' Underlying method for DESeq2 framework: +#' #' keep_abundant( #' factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]), #' minimum_counts = minimum_counts, @@ -2636,6 +2640,31 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' DESeq2::results() #' #' +#' +#' Underlying method for glmmSeq framework: +#' +#' counts = +#' .data %>% +#' assay(my_assay) +#' +#' # Create design matrix for dispersion, removing random effects +#' design = +#' model.matrix( +#' object = .formula |> eliminate_random_effects(), +#' data = metadata +#' ) +#' +#' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) +#' +#' glmmSeq( .formula, +#' countdata = counts , +#' metadata = metadata |> as.data.frame(), +#' dispersion = dispersion, +#' progress = TRUE, +#' method = method |> str_remove("(?i)^glmmSeq_" ), +#' ) +#' +#' #' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate). #' #' diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd index 9ad6e796..d21594b9 100755 --- a/man/test_differential_abundance-methods.Rd +++ b/man/test_differential_abundance-methods.Rd @@ -180,6 +180,7 @@ This function provides the option to use edgeR \url{https://doi.org/10.1093/bioi All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula. Underlying method for edgeR framework: + .data |> # Filter @@ -203,7 +204,10 @@ keep_abundant( edgeR::glmQLFit(design) |> // or glmFit according to choice edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice + + Underlying method for DESeq2 framework: + keep_abundant( factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]), minimum_counts = minimum_counts, @@ -214,6 +218,31 @@ keep_abundant( DESeq2::DESeqDataSet(design = .formula) |> DESeq2::DESeq() |> DESeq2::results() + + + +Underlying method for glmmSeq framework: + +counts = +.data %>% + assay(my_assay) + +# Create design matrix for dispersion, removing random effects +design = + model.matrix( + object = .formula |> eliminate_random_effects(), + data = metadata + ) + +dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts)) + + glmmSeq( .formula, + countdata = counts , + metadata = metadata |> as.data.frame(), + dispersion = dispersion, + progress = TRUE, + method = method |> str_remove("(?i)^glmmSeq_" ), + ) } \examples{ diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index d7c2d545..8ea1506d 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -855,7 +855,7 @@ test_that("differential trancript abundance - random effects",{ pull(P_condition_adjusted) |> head(4) |> expect_equal( - c(0.02381167, 0.01097209, 0.01056741, 0.02381167), + c(0.03643414, 0.02938584, 0.02938584, 0.03643414), tolerance=1e-3 ) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 49366aed..5230714e 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -493,7 +493,7 @@ test_that("differential trancript abundance - random effects SE",{ rowData(res)[,"P_condition_adjusted"] |> head(4) |> expect_equal( - c(0.1065866, 0.1109067, 0.1116562 , NA), + c(0.03394914, 0.03394914, 0.03394914, NA), tolerance=1e-2 )