From 3042f189f205617d9c9db42de65e1687f07ff0e7 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Thu, 7 Dec 2023 16:04:03 +1100
Subject: [PATCH 1/3] add design to dispersion estimation

---
 R/functions.R |  9 ++++++++-
 R/utilities.R | 30 +++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 09937d08..0ee421de 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -758,10 +758,17 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
   # Reorder counts
   counts = counts[,rownames(metadata),drop=FALSE]
 
+  # Create design matrix for dispersion, removing random effects
+  design =
+    model.matrix(
+      object = .formula |> eliminate_random_effects(),
+      data = metadata
+    )
+  
   if(quo_is_symbolic(.dispersion))
     dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
   else
-    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+    dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
 
   # # Check dispersion
   # if(!names(dispersion) |> sort() |> identical(
diff --git a/R/utilities.R b/R/utilities.R
index 205933a7..b59a58e0 100755
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1524,4 +1524,32 @@ feature__ =  get_special_column_name_symbol(".feature")
 sample__ = get_special_column_name_symbol(".sample")
 
 
-
+#' Produced using ChatGPT - Eliminate Random Effects from a Formula
+#'
+#' This function takes a mixed-effects model formula and returns a modified
+#' formula with all random effects removed, leaving only the fixed effects.
+#'
+#' @param formula An object of class \code{formula}, representing a mixed-effects model formula.
+#' @return A formula object with random effects parts removed.
+#' @examples
+#' eliminate_random_effects(~ age_days * sex + (1 | file_id) + ethnicity_simplified + assay_simplified + .aggregated_cells + (1 + age_days * sex | tissue))
+#' @noRd
+#' @importFrom stats as.formula
+eliminate_random_effects <- function(formula) {
+  # Convert the formula to a string
+  formula_str <- deparse(formula)
+  
+  # Split the string by '+' while keeping the brackets content together
+  split_str <- unlist(strsplit(formula_str, "(?<=\\))\\s*\\+\\s*|\\+\\s*(?=\\()", perl = TRUE))
+  
+  # Filter out the random effects parts
+  fixed_effects <- grep("\\|", split_str, value = TRUE, invert = TRUE)
+  
+  # Combine the fixed effects parts back into a single string
+  modified_str <- paste(fixed_effects, collapse = " + ")
+  
+  # Convert the modified string back to a formula
+  modified_formula <- as.formula(modified_str)
+  
+  return(modified_formula)
+}
\ No newline at end of file

From 33c5dcdcb0dc7d015f03a5feb1c91a9935ceb563 Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Thu, 7 Dec 2023 16:23:22 +1100
Subject: [PATCH 2/3] fixed messaging

---
 DESCRIPTION      |  2 +-
 R/functions_SE.R | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2e25694a..86beeea2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 1.15.2
+Version: 1.15.3
 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",
diff --git a/R/functions_SE.R b/R/functions_SE.R
index 1e671c03..325c751b 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -1171,11 +1171,11 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
     # # Add raw object
     # attach_to_internals(glmmSeq_object, "glmmSeq") %>%
 
-    # Communicate the attribute added
-    {
-      rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
-      (.)
-    }  %>%
+    # # Communicate the attribute added
+    # {
+    #   rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
+    #   (.)
+    # }  %>%
 
     # Attach prefix
     setNames(c(

From 0693ee1d725d6bb39aad40fc61489e7b3718ee0a Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Thu, 7 Dec 2023 17:57:16 +1100
Subject: [PATCH 3/3] edit dispersion for SE methods

---
 R/functions_SE.R                              |  9 +++++-
 R/methods.R                                   | 29 +++++++++++++++++++
 man/test_differential_abundance-methods.Rd    | 29 +++++++++++++++++++
 tests/testthat/test-bulk_methods.R            |  2 +-
 .../test-bulk_methods_SummarizedExperiment.R  |  2 +-
 5 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/R/functions_SE.R b/R/functions_SE.R
index 325c751b..23374aee 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -1133,10 +1133,17 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
     .data %>%
     assay(my_assay)
 
+  # Create design matrix for dispersion, removing random effects
+  design =
+    model.matrix(
+      object = .formula |> eliminate_random_effects(),
+      data = metadata
+    )
+  
   if(quo_is_symbolic(.dispersion))
     dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
   else
-    dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
+    dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
 
   # # Check dispersion
   # if(!names(dispersion) |> sort() |> identical(
diff --git a/R/methods.R b/R/methods.R
index cec1e7f3..75f8d598 100755
--- a/R/methods.R
+++ b/R/methods.R
@@ -2600,6 +2600,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.
 #'
 #' Underlying method for edgeR framework:
+#' 
 #' 	.data |>
 #'
 #' 	# Filter
@@ -2623,7 +2624,10 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #'			edgeR::glmQLFit(design) |> // or glmFit according to choice
 #'			edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice
 #'
+#'
+#'
 #'	Underlying method for DESeq2 framework:
+#'	
 #'	keep_abundant(
 #'			factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]),
 #'			minimum_counts = minimum_counts,
@@ -2636,6 +2640,31 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
 #'	DESeq2::results()
 #'
 #'
+#'
+#' Underlying method for glmmSeq framework:
+#'
+#' counts =
+#' .data %>%
+#'   assay(my_assay)
+#' 
+#' # Create design matrix for dispersion, removing random effects
+#' design =
+#'   model.matrix(
+#'     object = .formula |> eliminate_random_effects(),
+#'     data = metadata
+#'   )
+#' 
+#' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
+#' 
+#'   glmmSeq( .formula,
+#'            countdata = counts ,
+#'            metadata =   metadata |> as.data.frame(),
+#'            dispersion = dispersion,
+#'            progress = TRUE,
+#'            method = method |> str_remove("(?i)^glmmSeq_" ),
+#'   )
+#'   
+#' 
 #' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g.,  log fold change, p-value and false discovery rate).
 #'
 #'
diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd
index 9ad6e796..d21594b9 100755
--- a/man/test_differential_abundance-methods.Rd
+++ b/man/test_differential_abundance-methods.Rd
@@ -180,6 +180,7 @@ This function provides the option to use edgeR \url{https://doi.org/10.1093/bioi
 All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.
 
 Underlying method for edgeR framework:
+
 	.data |>
 
 	# Filter
@@ -203,7 +204,10 @@ keep_abundant(
 		edgeR::glmQLFit(design) |> // or glmFit according to choice
 		edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) // or glmLRT according to choice
 
+
+
 Underlying method for DESeq2 framework:
+
 keep_abundant(
 		factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]),
 		minimum_counts = minimum_counts,
@@ -214,6 +218,31 @@ keep_abundant(
 DESeq2::DESeqDataSet(design = .formula) |>
 DESeq2::DESeq() |>
 DESeq2::results()
+
+
+
+Underlying method for glmmSeq framework:
+
+counts =
+.data %>%
+  assay(my_assay)
+
+# Create design matrix for dispersion, removing random effects
+design =
+  model.matrix(
+    object = .formula |> eliminate_random_effects(),
+    data = metadata
+  )
+
+dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
+
+  glmmSeq( .formula,
+           countdata = counts ,
+           metadata =   metadata |> as.data.frame(),
+           dispersion = dispersion,
+           progress = TRUE,
+           method = method |> str_remove("(?i)^glmmSeq_" ),
+  )
 }
 \examples{
 
diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R
index d7c2d545..8ea1506d 100755
--- a/tests/testthat/test-bulk_methods.R
+++ b/tests/testthat/test-bulk_methods.R
@@ -855,7 +855,7 @@ test_that("differential trancript abundance - random effects",{
     pull(P_condition_adjusted) |>
     head(4) |>
     expect_equal(
-      c(0.02381167, 0.01097209, 0.01056741, 0.02381167),
+      c(0.03643414, 0.02938584, 0.02938584, 0.03643414),
       tolerance=1e-3
     )
 
diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
index 49366aed..5230714e 100755
--- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R
+++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -493,7 +493,7 @@ test_that("differential trancript abundance - random effects SE",{
  rowData(res)[,"P_condition_adjusted"] |>
     head(4) |>
     expect_equal(
-      c(0.1065866, 0.1109067, 0.1116562 , NA),
+      c(0.03394914, 0.03394914, 0.03394914,  NA),
       tolerance=1e-2
     )