From be1f5c61233c6c5f06c40b5fb6e3cba6949b52ae Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Sun, 17 Sep 2023 20:56:19 +1000
Subject: [PATCH] add summary method

---
 R/functions.R    | 16 +++++-----
 R/functions_SE.R |  2 +-
 R/glmmSeq.R      | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index 82f9fc2a..e0e5c561 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -693,7 +693,7 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
   .abundance = enquo(.abundance)
   .sample_total_read_count = enquo(.sample_total_read_count)
   .dispersion = enquo(.dispersion)
-  
+
   # Check if omit_contrast_in_colnames is correctly setup
   if(omit_contrast_in_colnames & length(.contrasts) > 1){
     warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
@@ -757,33 +757,33 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
 
   # Reorder counts
   counts = counts[,rownames(metadata),drop=FALSE]
-  
+
   if(quo_is_symbolic(.dispersion))
     dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
   else
     dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts))
-  
+
   # # Check dispersion
   # if(!names(dispersion) |> sort() |> identical(
   #   rownames(counts) |>
   #   sort()
   # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay")
-  
+
   # Make sure the order matches the counts
   dispersion = dispersion[rownames(counts)]
-  
-  glmmSeq_object = 
+
+  glmmSeq_object =
     glmmSeq( .formula,
           countdata = counts ,
           metadata =   metadata,
           dispersion = dispersion,
-          progress = TRUE, 
+          progress = TRUE,
           method = method |> str_remove("(?i)^glmmSeq_"),
           ...
   )
 
   glmmSeq_object |>
-    summary() |>
+    summary_lmmSeq() |>
     as_tibble(rownames = "gene") |>
     mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
 
diff --git a/R/functions_SE.R b/R/functions_SE.R
index fcee85d0..5f067aec 100755
--- a/R/functions_SE.R
+++ b/R/functions_SE.R
@@ -1158,7 +1158,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
     )
 
   glmmSeq_object |>
-    summary() |>
+    summary_lmmSeq() |>
     as_tibble(rownames = "transcript") |>
     mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>
 
diff --git a/R/glmmSeq.R b/R/glmmSeq.R
index 4ef8699f..268d6907 100644
--- a/R/glmmSeq.R
+++ b/R/glmmSeq.R
@@ -802,3 +802,81 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N
       metadata = subsetMetadata, modelData = modelData, optInfo = optInfo,
       errors = outputErrors, vars = list(id = id, removeSingles = removeSingles))
 }
+
+# From GlmmSeq
+
+#' Summarise a 'glmmSeq'/'lmmSeq' object
+#'
+#' Summarise results from [glmmSeq] or [lmmSeq] analysis
+#'
+#' @keywords internal
+#' @noRd
+#'
+#' @param object an object of class `"GlmmSeq"` or `"lmmSeq"`
+#' @param gene an optional character value specifying a single gene whose
+#'   results are summarised
+#' @param digits integer, used for number formatting
+#' @param ... arguments to be passed to other methods
+#' @return
+#' If `gene=NULL` a dataframe of results for all genes is returned. Otherwise
+#' the output of GLMM or LMM model results for a single gene including
+#' coefficients, test statistics, p-values is printed and the dataframe for all
+#' genes is returned invisibly.
+#' @seealso [glmmSeq()], [lmmSeq()]
+summary_lmmSeq <- function(object,
+                           gene = NULL,
+                           digits = max(3L, getOption("digits") - 3L), ...) {
+  if (is.null(gene)) {
+    statSet <- names(object@stats)
+    gp <- lapply(statSet, function(i) {
+      out <- object@stats[[i]]
+      if (i %in% c("Chisq", "Fval", "Df", "NumDF", "DenDF")) colnames(out) <- paste(i, colnames(out), sep = "_")
+      if (i == "pvals") colnames(out) <- paste0("P_", colnames(out))
+      if (i == "stdErr") colnames(out) <- paste0("se_", colnames(out))
+      out
+    })
+    do.call(cbind, gp)
+  } else {
+    out <- lapply(object@stats, function(i) i[gene, ])
+    if (is(object, "GlmmSeq")) {
+      cat("Generalised linear mixed model\n")
+      cat(paste0("Method: ", object@info$method, "\n"))
+      if (object@info$method == "lme4") {
+        cat("Family: Negative Binomial\n")
+      } else {
+        cat(paste0("Family: ", object@info$family, "\n"))
+      }
+    } else {
+      cat("Linear mixed model\n")
+    }
+    cat("Formula: ")
+    print(object@formula)
+    print(out$res)
+    cat("\nFixed effects:\n")
+    cfdf <- data.frame(Estimate = out$coef,
+                       `Std. Error` = out$stdErr, check.names = FALSE)
+    print(cfdf, digits = digits)
+    if (object@info$test.stat == "Wald") {
+      cat("\nAnalysis of Deviance Table (Type II Wald chisquare tests)\n")
+      testdf <- data.frame(Chisq = out$Chisq, Df = out$Df,
+                           `Pr(>Chisq)` = out$pvals,
+                           row.names = colnames(object@stats$Chisq),
+                           check.names = FALSE)
+    } else if (object@info$test.stat == "LRT") {
+      cat("\nLikelihood ratio test\nReduced formula: ")
+      print(object@reduced)
+      testdf <- data.frame(Chisq = out$Chisq, Df = out$Df,
+                           `Pr(>Chisq)` = out$pvals,
+                           row.names = " ",
+                           check.names = FALSE)
+    } else {
+      cat("\nType III Analysis of Variance Table with Satterthwaite's method\n")
+      testdf <- data.frame(NumDF = out$NumDF, DenDF = out$DenDF,
+                           `F value` = out$Fval, `Pr(>F)` = out$pvals,
+                           row.names = colnames(object@stats$Fval),
+                           check.names = FALSE)
+    }
+    print(testdf, digits = digits)
+    invisible(out)
+  }
+}