Merge branch 'master' of github.com:stemangiola/tidybulk

stemangiola · Dec 8, 2023 · 4b27518 · 4b27518
2 parents b3f1af6 + 8c17496
commit 4b27518
Show file tree

Hide file tree

Showing 8 changed files with 111 additions and 11 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 1.15.2
+Version: 1.15.3
 Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "[email protected]",

diff --git a/R/functions.R b/R/functions.R
@@ -758,10 +758,17 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
   # Reorder counts
   counts = counts[,rownames(metadata),drop=FALSE]
 
+  # Create design matrix for dispersion, removing random effects
+  design =
+    model.matrix(
+      object = .formula |> eliminate_random_effects(),
+      data = metadata
+    )
+
   if(quo_is_symbolic(.dispersion))
     dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
   else
-    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+    dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
 
   # # Check dispersion
   # if(!names(dispersion) |> sort() |> identical(

diff --git a/R/functions_SE.R b/R/functions_SE.R
@@ -1133,10 +1133,17 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
     .data %>%
     assay(my_assay)
 
+  # Create design matrix for dispersion, removing random effects
+  design =
+    model.matrix(
+      object = .formula |> eliminate_random_effects(),
+      data = metadata
+    )
+
   if(quo_is_symbolic(.dispersion))
     dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
   else
-    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+    dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
 
   # # Check dispersion
   # if(!names(dispersion) |> sort() |> identical(
@@ -1171,11 +1178,11 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
     # # Add raw object
     # attach_to_internals(glmmSeq_object, "glmmSeq") %>%
 
-    # Communicate the attribute added
-    {
-      rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
-      (.)
-    }  %>%
+    # # Communicate the attribute added
+    # {
+    #   rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
+    #   (.)
+    # }  %>%
 
     # Attach prefix
     setNames(c(

diff --git a/R/methods.R b/R/methods.R
@@ -2600,6 +2600,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.
 #'
 #' Underlying method for edgeR framework:
+#' 
 #' 	.data |>
 #'
 #' 	# Filter
@@ -2623,7 +2624,10 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #'			edgeR::glmQLFit(design) |> // or glmFit according to choice
 #'			edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice
 #'
+#'
+#'
 #'	Underlying method for DESeq2 framework:
+#'	
 #'	keep_abundant(
 #'			factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]),
 #'			minimum_counts = minimum_counts,
@@ -2636,6 +2640,31 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #'	DESeq2::results()
 #'
 #'
+#'
+#' Underlying method for glmmSeq framework:
+#'
+#' counts =
+#' .data %>%
+#'   assay(my_assay)
+#' 
+#' # Create design matrix for dispersion, removing random effects
+#' design =
+#'   model.matrix(
+#'     object = .formula |> eliminate_random_effects(),
+#'     data = metadata
+#'   )
+#' 
+#' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
+#' 
+#'   glmmSeq( .formula,
+#'            countdata = counts ,
+#'            metadata =   metadata |> as.data.frame(),
+#'            dispersion = dispersion,
+#'            progress = TRUE,
+#'            method = method |> str_remove("(?i)^glmmSeq_" ),
+#'   )
+#'   
+#' 
 #' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g.,  log fold change, p-value and false discovery rate).
 #'
 #'

diff --git a/R/utilities.R b/R/utilities.R
@@ -1524,4 +1524,32 @@ feature__ =  get_special_column_name_symbol(".feature")
 sample__ = get_special_column_name_symbol(".sample")
 
 
-
+#' Produced using ChatGPT - Eliminate Random Effects from a Formula
+#'
+#' This function takes a mixed-effects model formula and returns a modified
+#' formula with all random effects removed, leaving only the fixed effects.
+#'
+#' @param formula An object of class \code{formula}, representing a mixed-effects model formula.
+#' @return A formula object with random effects parts removed.
+#' @examples
+#' eliminate_random_effects(~ age_days * sex + (1 | file_id) + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 + age_days * sex | tissue))
+#' @noRd
+#' @importFrom stats as.formula
+eliminate_random_effects <- function(formula) {
+  # Convert the formula to a string
+  formula_str <- deparse(formula)
+
+  # Split the string by '+' while keeping the brackets content together
+  split_str <- unlist(strsplit(formula_str, "(?<=\\))\\s*\\+\\s*|\\+\\s*(?=\\()", perl = TRUE))
+
+  # Filter out the random effects parts
+  fixed_effects <- grep("\\|", split_str, value = TRUE, invert = TRUE)
+
+  # Combine the fixed effects parts back into a single string
+  modified_str <- paste(fixed_effects, collapse = " + ")
+
+  # Convert the modified string back to a formula
+  modified_formula <- as.formula(modified_str)
+
+  return(modified_formula)
+}
diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd
diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R
@@ -855,7 +855,7 @@ test_that("differential trancript abundance - random effects",{
     pull(P_condition_adjusted) |>
     head(4) |>
     expect_equal(
-      c(0.02381167, 0.01097209, 0.01056741, 0.02381167),
+      c(0.03643414, 0.02938584, 0.02938584, 0.03643414),
       tolerance=1e-3
     )
 

diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -493,7 +493,7 @@ test_that("differential trancript abundance - random effects SE",{
  rowData(res)[,"P_condition_adjusted"] |>
     head(4) |>
     expect_equal(
-      c(0.1065866, 0.1109067, 0.1116562 , NA),
+      c(0.03394914, 0.03394914, 0.03394914,  NA),
       tolerance=1e-2
     )