From be1f5c61233c6c5f06c40b5fb6e3cba6949b52ae Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 20:56:19 +1000 Subject: [PATCH] add summary method --- R/functions.R | 16 +++++----- R/functions_SE.R | 2 +- R/glmmSeq.R | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/R/functions.R b/R/functions.R index 82f9fc2a..e0e5c561 100755 --- a/R/functions.R +++ b/R/functions.R @@ -693,7 +693,7 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, .abundance = enquo(.abundance) .sample_total_read_count = enquo(.sample_total_read_count) .dispersion = enquo(.dispersion) - + # Check if omit_contrast_in_colnames is correctly setup if(omit_contrast_in_colnames & length(.contrasts) > 1){ warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present") @@ -757,33 +757,33 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, # Reorder counts counts = counts[,rownames(metadata),drop=FALSE] - + if(quo_is_symbolic(.dispersion)) dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe() else dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)) - + # # Check dispersion # if(!names(dispersion) |> sort() |> identical( # rownames(counts) |> # sort() # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay") - + # Make sure the order matches the counts dispersion = dispersion[rownames(counts)] - - glmmSeq_object = + + glmmSeq_object = glmmSeq( .formula, countdata = counts , metadata = metadata, dispersion = dispersion, - progress = TRUE, + progress = TRUE, method = method |> str_remove("(?i)^glmmSeq_"), ... ) glmmSeq_object |> - summary() |> + summary_lmmSeq() |> as_tibble(rownames = "gene") |> mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> diff --git a/R/functions_SE.R b/R/functions_SE.R index fcee85d0..5f067aec 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1158,7 +1158,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, ) glmmSeq_object |> - summary() |> + summary_lmmSeq() |> as_tibble(rownames = "transcript") |> mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 4ef8699f..268d6907 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -802,3 +802,81 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N metadata = subsetMetadata, modelData = modelData, optInfo = optInfo, errors = outputErrors, vars = list(id = id, removeSingles = removeSingles)) } + +# From GlmmSeq + +#' Summarise a 'glmmSeq'/'lmmSeq' object +#' +#' Summarise results from [glmmSeq] or [lmmSeq] analysis +#' +#' @keywords internal +#' @noRd +#' +#' @param object an object of class `"GlmmSeq"` or `"lmmSeq"` +#' @param gene an optional character value specifying a single gene whose +#' results are summarised +#' @param digits integer, used for number formatting +#' @param ... arguments to be passed to other methods +#' @return +#' If `gene=NULL` a dataframe of results for all genes is returned. Otherwise +#' the output of GLMM or LMM model results for a single gene including +#' coefficients, test statistics, p-values is printed and the dataframe for all +#' genes is returned invisibly. +#' @seealso [glmmSeq()], [lmmSeq()] +summary_lmmSeq <- function(object, + gene = NULL, + digits = max(3L, getOption("digits") - 3L), ...) { + if (is.null(gene)) { + statSet <- names(object@stats) + gp <- lapply(statSet, function(i) { + out <- object@stats[[i]] + if (i %in% c("Chisq", "Fval", "Df", "NumDF", "DenDF")) colnames(out) <- paste(i, colnames(out), sep = "_") + if (i == "pvals") colnames(out) <- paste0("P_", colnames(out)) + if (i == "stdErr") colnames(out) <- paste0("se_", colnames(out)) + out + }) + do.call(cbind, gp) + } else { + out <- lapply(object@stats, function(i) i[gene, ]) + if (is(object, "GlmmSeq")) { + cat("Generalised linear mixed model\n") + cat(paste0("Method: ", object@info$method, "\n")) + if (object@info$method == "lme4") { + cat("Family: Negative Binomial\n") + } else { + cat(paste0("Family: ", object@info$family, "\n")) + } + } else { + cat("Linear mixed model\n") + } + cat("Formula: ") + print(object@formula) + print(out$res) + cat("\nFixed effects:\n") + cfdf <- data.frame(Estimate = out$coef, + `Std. Error` = out$stdErr, check.names = FALSE) + print(cfdf, digits = digits) + if (object@info$test.stat == "Wald") { + cat("\nAnalysis of Deviance Table (Type II Wald chisquare tests)\n") + testdf <- data.frame(Chisq = out$Chisq, Df = out$Df, + `Pr(>Chisq)` = out$pvals, + row.names = colnames(object@stats$Chisq), + check.names = FALSE) + } else if (object@info$test.stat == "LRT") { + cat("\nLikelihood ratio test\nReduced formula: ") + print(object@reduced) + testdf <- data.frame(Chisq = out$Chisq, Df = out$Df, + `Pr(>Chisq)` = out$pvals, + row.names = " ", + check.names = FALSE) + } else { + cat("\nType III Analysis of Variance Table with Satterthwaite's method\n") + testdf <- data.frame(NumDF = out$NumDF, DenDF = out$DenDF, + `F value` = out$Fval, `Pr(>F)` = out$pvals, + row.names = colnames(object@stats$Fval), + check.names = FALSE) + } + print(testdf, digits = digits) + invisible(out) + } +}