From ea09c1348bb7fbb49840f5b3c584d4552b51208f Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 14 Sep 2023 12:39:26 +1000 Subject: [PATCH 01/13] add method glmmTMB --- R/glmmSeq.R | 173 +++++++++++++++++++++++++++++++++++++++++++++---- R/methods.R | 4 +- R/methods_SE.R | 6 +- 3 files changed, 165 insertions(+), 18 deletions(-) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 4e30737c..019699ed 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -1,6 +1,6 @@ # From https://github.com/easystats/parameters -standard_error = function (model) +lme4_standard_error = function (model) { rand.se <- lme4::ranef(model, condVar = TRUE) n.groupings <- length(rand.se) @@ -19,6 +19,69 @@ standard_error = function (model) rand.se } +glmmTMB_standard_error = function (model){ + rand.se <- glmmTMB::ranef(model, condVar = TRUE)$cond + n.groupings <- length(rand.se) + for (m in 1:n.groupings) { + vars.m <- attr(rand.se[[m]], "condVar") + K <- dim(vars.m)[1] + J <- dim(vars.m)[3] + names.full <- dimnames(rand.se[[m]]) + rand.se[[m]] <- array(NA, c(J, K)) + for (j in 1:J) { + rand.se[[m]][j, ] <- sqrt(diag(as.matrix(vars.m[, + , j]))) + } + dimnames(rand.se[[m]]) <- list(names.full[[1]], names.full[[2]]) + } + rand.se +} + +#' Get matrix from tibble +#' +#' @importFrom purrr map2_dfc +#' @importFrom tidyr pivot_longer +#' @importFrom tidyr pivot_wider +#' @importFrom dplyr join_by +#' +#' @keywords internal +#' @noRd +#' +glmmTMB_to_confidence_intervals_random_effects = function(fit){ + + + ster = glmmTMB_standard_error(fit) + ster = map2_dfr( + ster, names(ster), + ~ .x |> + as_tibble(rownames = "group_id") |> + setNames( + "group_id" |> + c(sprintf("%s__%s", .y, colnames(.x))) + ) |> + pivot_longer(-group_id, names_to = "parameter", values_to = "CI") + ) + + mod = glmmTMB::ranef(fit, condVar=T)$cond + mod = map2_dfr( + mod, names(mod), + ~ .x |> + as_tibble(rownames = "group_id") |> + setNames( + "group_id" |> + c(sprintf("%s__%s", .y, colnames(.x))) + ) |> + pivot_longer(-group_id, names_to = "parameter", values_to = "mode") + ) + + mod |> + left_join(ster, join_by(group_id, parameter)) |> + mutate(lower = mode - CI, upper = mode + CI) |> + select(-CI) |> + tidyr::unite("parameter", c(group_id, parameter), sep="_") |> + pivot_wider(names_from = parameter, values_from = c(lower, mode, upper), names_glue = "{parameter}__{.value}") +} + #' Get matrix from tibble #' @@ -33,7 +96,7 @@ standard_error = function (model) lmer_to_confidence_intervals_random_effects = function(fit){ - ster = standard_error(fit) + ster = lme4_standard_error(fit) ster = map2_dfr( ster, names(ster), ~ .x |> @@ -65,6 +128,67 @@ lmer_to_confidence_intervals_random_effects = function(fit){ pivot_wider(names_from = parameter, values_from = c(lower, mode, upper), names_glue = "{parameter}__{.value}") } +glmmTMBcore = function (geneList, fullFormula, reduced, data, family, control, + offset, modelData, designMatrix, hyp.matrix, ...) +{ + data[, "count"] <- geneList + fit <- try(suppressMessages(suppressWarnings(glmmTMB::glmmTMB(fullFormula, + data, family, control = control, offset = offset, ...))), + silent = TRUE) + if (!inherits(fit, "try-error")) { + singular <- conv <- NA + stdErr <- suppressWarnings(coef(summary(fit))$cond[, 2]) + vcov. <- vcov(fit)$cond + fixedEffects <- glmmTMB::fixef(fit)$cond + disp <- glmmTMB::sigma(fit) + msg <- fit$fit$message + stats <- setNames(c(disp, AIC(fit), as.numeric(logLik(fit))), + c("Dispersion", "AIC", "logLik")) + if (is.null(reduced)) { + test <- glmmSeq:::lmer_wald(fixedEffects, hyp.matrix, vcov.) + } + else { + fit2 <- try(suppressMessages(suppressWarnings(glmmTMB::glmmTMB(reduced, + data, family, control = control, offset = offset, + ...))), silent = TRUE) + if (!inherits(fit2, "try-error")) { + lrt <- anova(fit, fit2) + test <- list(chisq = setNames(lrt$Chisq[2], "LRT"), + df = lrt[2, "Chi Df"]) + } + else { + test <- list(chisq = NA, df = NA) + } + } + newY <- predict(fit, newdata = modelData, re.form = NA) + a <- designMatrix %*% vcov. + b <- as.matrix(a %*% t(designMatrix)) + predVar <- diag(b) + newSE <- suppressWarnings(sqrt(predVar)) + newLCI <- exp(newY - newSE * 1.96) + newUCI <- exp(newY + newSE * 1.96) + predictdf <- c(exp(newY), newLCI, newUCI) + + ci_random_effect_df = glmmTMB_to_confidence_intervals_random_effects(fit) + + + return(list( + stats = stats, + coef = fixedEffects, + stdErr = stdErr, + chisq = test$chisq, + df = test$df, + predict = predictdf, + optinfo = c(singular, conv), + ci_random_effect_df = ci_random_effect_df, + message = msg, + tryErrors = "")) + } + else { + return(list(stats = NA, coef = NA, stdErr = NA, chisq = NA, + df = NA, predict = NA, optinfo = NA, ci_random_effect_df = NA, tryErrors = fit[1])) + } +} glmerCore = function (geneList, fullFormula, reduced, data, control, offset, modelData, designMatrix, hyp.matrix, max_rows_for_matrix_multiplication = Inf, return_fit =FALSE, ...) @@ -156,7 +280,7 @@ glmerCore = function (geneList, fullFormula, reduced, data, control, offset, glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = NA, sizeFactors = NULL, reduced = NULL, modelData = NULL, designMatrix = NULL, - method = c("lme4", "glmmTMB"), control = NULL, family = nbinom2, + method = c("lme4", "glmmTMB"), control = NULL, family = glmmTMB::nbinom2, cores = 1, removeSingles = FALSE, zeroCount = 0.125, verbose = TRUE, returnList = FALSE, progress = FALSE, max_rows_for_matrix_multiplication = Inf, ...) { @@ -164,7 +288,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N method <- match.arg(method) if (is.null(control)) { control <- switch(method, lme4 = lme4::glmerControl(optimizer = "bobyqa"), - glmmTMB = glmmTMBControl()) + glmmTMB = glmmTMB::glmmTMBControl()) } countdata <- as.matrix(countdata) if (length(lme4::findbars(modelFormula)) == 0) { @@ -220,9 +344,9 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N modelData <- expand.grid(varLevels) colnames(modelData) <- reducedVars if (method == "glmmTMB") { - modelData <- cbind(modelData, .id = NA) - colnames(modelData)[which(colnames(modelData) == - ".id")] <- id + na_matrix = rep(NA, nrow(modelData) * length(id)) |> matrix(ncol = length(id)) + colnames(na_matrix) = id + modelData <- modelData |> cbind(na_matrix) } } if (is.null(designMatrix)) { @@ -258,13 +382,24 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N if (method == "lme4") { + + # FASTER - Stefano + control$calc.derivs = FALSE + if (!all(rownames(countdata) %in% names(dispersion))) { stop("Some dispersion values are missing") } fullList <- lapply(rownames(countdata), function(i) { list(y = countdata[i, ], dispersion = dispersion[i]) }) - if (Sys.info()["sysname"] == "Windows" & cores > 1) { + if(cores == 1){ + resultList <- lapply(fullList, function(geneList) { + glmerCore(geneList, fullFormula, reduced, + subsetMetadata, control, offset, modelData, + designMatrix, hyp.matrix,, max_rows_for_matrix_multiplication = max_rows_for_matrix_multiplication, ...) + }) + } + else if (Sys.info()["sysname"] == "Windows" & cores > 1) { cl <- parallel::makeCluster(cores) on.exit(stopCluster(cl)) dots <- list(...) @@ -340,10 +475,22 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N } } else { + + # FASTER - Stefano + control$profile=TRUE + fullList <- lapply(rownames(countdata), function(i) { countdata[i, ] }) - if (Sys.info()["sysname"] == "Windows" & cores > 1) { + + if(cores == 1){ + resultList <- lapply(fullList, function(geneList) { + glmmTMBcore(geneList, fullFormula, reduced, + subsetMetadata, family, control, offset, + modelData, designMatrix, hyp.matrix, ...) + }) + } + else if (Sys.info()["sysname"] == "Windows" & cores > 1) { cl <- makeCluster(cores) on.exit(stopCluster(cl)) dots <- list(...) @@ -368,7 +515,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N family = family, control = control, offset = offset, modelData = modelData, designMatrix = designMatrix, hyp.matrix = hyp.matrix), dots) - do.call(glmmSeq:::glmmTMBcore, args) + do.call(glmmTMBcore, args) }, cl = cl) } @@ -379,7 +526,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N family = family, control = control, offset = offset, modelData = modelData, designMatrix = designMatrix, hyp.matrix = hyp.matrix), dots) - do.call(glmmSeq:::glmmTMBcore, args) + do.call(glmmTMBcore, args) }) } @@ -396,7 +543,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N } resultList <- pbmcapply::pbmclapply(fullList, function(geneList) { - glmmSeq:::glmmTMBcore(geneList, fullFormula, reduced, + glmmTMBcore(geneList, fullFormula, reduced, subsetMetadata, family, control, offset, modelData, designMatrix, hyp.matrix, ...) }, mc.cores = cores) @@ -405,7 +552,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N } else { resultList <- mclapply(fullList, function(geneList) { - glmmSeq:::glmmTMBcore(geneList, fullFormula, reduced, + glmmTMBcore(geneList, fullFormula, reduced, subsetMetadata, family, control, offset, modelData, designMatrix, hyp.matrix, ...) }, mc.cores = cores) diff --git a/R/methods.R b/R/methods.R index e0086b77..af70fc74 100755 --- a/R/methods.R +++ b/R/methods.R @@ -2860,7 +2860,7 @@ such as batch effects (if applicable) in the formula. ), # glmmseq - tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmTMB") ~ get_differential_transcript_abundance_glmmSeq( + tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmtmb") ~ get_differential_transcript_abundance_glmmSeq( ., .formula, .sample = !!.sample, @@ -2876,7 +2876,7 @@ such as batch effects (if applicable) in the formula. ), # Else error - TRUE ~ stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\"") + TRUE ~ stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\", \"glmmseq_lme4\", \"glmmseq_glmmTMB\"") ) diff --git a/R/methods_SE.R b/R/methods_SE.R index 7c86345d..1a2e443e 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -1448,7 +1448,7 @@ such as batch effects (if applicable) in the formula. ), # glmmseq - tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmTMB") ~ get_differential_transcript_abundance_glmmSeq_SE( + tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmtmb") ~ get_differential_transcript_abundance_glmmSeq_SE( ., .formula, .abundance = !!.abundance, @@ -1463,7 +1463,7 @@ such as batch effects (if applicable) in the formula. ), # Else error - TRUE ~ stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\"") + TRUE ~ stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\", \"glmmseq_lme4\", \"glmmseq_glmmTMB\"") ) # Add results @@ -1485,7 +1485,7 @@ such as batch effects (if applicable) in the formula. tolower(method) == "limma_voom" ~ (.) %>% memorise_methods_used("voom"), tolower(method) == "limma_voom_sample_weights" ~ (.) %>% memorise_methods_used("voom_sample_weights"), tolower(method) == "deseq2" ~ (.) %>% memorise_methods_used("deseq2"), - tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmTMB") ~ (.) %>% memorise_methods_used("glmmseq"), + tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmtmb") ~ (.) %>% memorise_methods_used("glmmseq"), ~ stop("tidybulk says: method not supported") ) %>% From 846a2016efba767c38e8a0b1f796b4f5b1d3cc5c Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 14 Sep 2023 13:41:57 +1000 Subject: [PATCH 02/13] relax tests --- tests/testthat/test-bulk_methods.R | 26 +++++++++---------- .../test-bulk_methods_SummarizedExperiment.R | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 417c5827..d7c2d545 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -660,12 +660,12 @@ test_that("DESeq2 differential trancript abundance - no object",{ install.packages("BiocManager", repos = "https://cloud.r-project.org") BiocManager::install("DESeq2", ask = FALSE) } - + test_deseq2_df = DESeq2::DESeqDataSet(se_mini,design=~condition) colData(test_deseq2_df)$condition = factor(colData(test_deseq2_df)$condition) - + res_deseq2 = - test_deseq2_df |> + test_deseq2_df |> DESeq2::DESeq() |> DESeq2::results() @@ -835,13 +835,13 @@ test_that("DESeq2 differential trancript abundance - no object",{ test_that("differential trancript abundance - random effects",{ - my_input = + my_input = input_df |> identify_abundant(a, b, c, factor_of_interest = condition) |> mutate(time = time |> stringr::str_replace_all(" ", "_")) |> - + filter(b %in% c("ABCB4" , "ABCB9" , "ACAP1", "ACHE", "ACP5" , "ADAM28")) - + my_input |> test_differential_abundance( ~ condition + (1 + condition | time), @@ -849,7 +849,7 @@ test_that("differential trancript abundance - random effects",{ .transcript = b, .abundance = c, method = "glmmseq_lme4", - action="only", + action="only", cores = 1 ) |> pull(P_condition_adjusted) |> @@ -860,13 +860,13 @@ test_that("differential trancript abundance - random effects",{ ) # Custom dispersion - my_input = + my_input = my_input |> left_join( - my_input |> pivot_transcript(b) |> mutate(disp_ = 2 ), + my_input |> pivot_transcript(b) |> mutate(disp_ = 2 ), by = join_by(b, entrez, .abundant) ) - + my_input |> test_differential_abundance( @@ -875,15 +875,15 @@ test_that("differential trancript abundance - random effects",{ .transcript = b, .abundance = c, method = "glmmseq_lme4", - action="only", - cores = 1, + action="only", + cores = 1, .dispersion = disp_ ) |> pull(P_condition_adjusted) |> head(4) |> expect_equal( c(0.1081176, 0.1303558, 0.1303558, 0.1693276), - tolerance=1e-3 + tolerance=1e-2 ) }) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 067bd6bc..49366aed 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -518,7 +518,7 @@ test_that("differential trancript abundance - random effects SE",{ head(4) |> expect_equal( c(0.1153254, 0.1668555, 0.1668555 , NA), - tolerance=1e-3 + tolerance=1e-2 ) }) From 8bf8dba59c050e4192f77ba6bfc126aa9a6565ca Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 14 Sep 2023 16:03:16 +1000 Subject: [PATCH 03/13] fix ::: --- R/glmmSeq.R | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 019699ed..da9454b4 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -1,3 +1,104 @@ +#' @importFrom stats pchisq +organiseStats = function (resultList, test.stat) +{ + statsList <- lapply(resultList, "[[", "stats") + s <- do.call(rbind, statsList) + coefList <- lapply(resultList, "[[", "coef") + cf <- do.call(rbind, coefList) + SEList <- lapply(resultList, "[[", "stdErr") + stdErr <- do.call(rbind, SEList) + if (test.stat == "Wald") { + chisqList <- lapply(resultList, "[[", "chisq") + chisq <- do.call(rbind, chisqList) + dfList <- lapply(resultList, "[[", "df") + df <- do.call(rbind, dfList) + pvals <- pchisq(chisq, df = df, lower.tail = FALSE) + colnames(df) <- colnames(chisq) + colnames(pvals) <- colnames(chisq) + s <- list(res = s, coef = cf, stdErr = stdErr, Chisq = chisq, + Df = df, pvals = pvals) + } + else if (test.stat == "F") { + NumDF <- lapply(resultList, function(x) x$test[, 1]) + NumDF <- do.call(rbind, NumDF) + DenDF <- lapply(resultList, function(x) x$test[, 2]) + DenDF <- do.call(rbind, DenDF) + Fval <- lapply(resultList, function(x) x$test[, 3]) + Fval <- do.call(rbind, Fval) + pvals <- lapply(resultList, function(x) x$test[, 4]) + pvals <- do.call(rbind, pvals) + if (ncol(NumDF) == 1) { + colnames(NumDF) <- colnames(DenDF) <- colnames(Fval) <- colnames(pvals) <- rownames(resultList[[1]]$test) + } + s <- list(res = s, coef = cf, stdErr = stdErr, NumDF = NumDF, + DenDF = DenDF, Fval = Fval, pvals = pvals) + } + else { + LRT <- lapply(resultList, "[[", "test") + LRT <- do.call(rbind, LRT) + chisq <- LRT[, 1, drop = FALSE] + df <- LRT[, 2, drop = FALSE] + pvals <- LRT[, 3, drop = FALSE] + colnames(chisq) <- colnames(df) <- colnames(pvals) <- "LRT" + s <- list(res = s, coef = cf, stdErr = stdErr, Chisq = chisq, + Df = df, pvals = pvals) + } +} + +hyp_matrix = function (fullFormula, metadata, LHS) +{ + reduced2 <- nobars(fullFormula) + fac <- attr(terms(reduced2), "factors") + data2 <- metadata + data2[, LHS] <- rep(0, nrow(data2)) + dm2 <- model.matrix(reduced2, data2) + assign <- attr(dm2, "assign") + term.labels <- attr(terms(reduced2), "term.labels") + p <- length(assign) + I.p <- diag(p) + n.terms <- length(term.labels) + hyp.matrix.1 <- hyp.matrix.2 <- list() + for (i in seq_len(n.terms)) { + which.term <- i + subs.term <- which(assign == which.term) + relatives <- car_relatives(term.labels[i], term.labels, + fac) + subs.relatives <- NULL + for (relative in relatives) subs.relatives <- c(subs.relatives, + which(assign == relative)) + hyp.matrix.1[[i]] <- I.p[subs.relatives, , drop = FALSE] + hyp.matrix.2[[i]] <- I.p[c(subs.relatives, subs.term), + , drop = FALSE] + } + names(hyp.matrix.1) <- term.labels + return(list(hyp.matrix.1 = hyp.matrix.1, hyp.matrix.2 = hyp.matrix.2)) +} + +lmer_wald = function (fixef, hyp.matrix, vcov.) +{ + hyp.matrix.1 <- hyp.matrix[[1]] + hyp.matrix.2 <- hyp.matrix[[2]] + hyp.list <- lapply(seq_along(hyp.matrix.1), function(i) { + hyp.matrix.term <- if (nrow(hyp.matrix.1[[i]]) == 0) { + hyp.matrix.2[[i]] + } + else t(car_ConjComp(t(hyp.matrix.1[[i]]), t(hyp.matrix.2[[i]]), + vcov.)) + hyp.matrix.term <- hyp.matrix.term[!apply(hyp.matrix.term, + 1, function(x) all(x == 0)), , drop = FALSE] + hyp.matrix.term + }) + b <- fixef + V <- vcov. + chi_val <- lapply(hyp.list, function(L) { + as.vector(t(L %*% b) %*% solve(L %*% V %*% t(L)) %*% + (L %*% b)) + }) + df <- unlist(lapply(hyp.list, NROW)) + list(chisq = setNames(unlist(chi_val), names(hyp.matrix.1)), + df = df) +} + # From https://github.com/easystats/parameters lme4_standard_error = function (model) @@ -145,7 +246,7 @@ glmmTMBcore = function (geneList, fullFormula, reduced, data, family, control, stats <- setNames(c(disp, AIC(fit), as.numeric(logLik(fit))), c("Dispersion", "AIC", "logLik")) if (is.null(reduced)) { - test <- glmmSeq:::lmer_wald(fixedEffects, hyp.matrix, vcov.) + test <- lmer_wald(fixedEffects, hyp.matrix, vcov.) } else { fit2 <- try(suppressMessages(suppressWarnings(glmmTMB::glmmTMB(reduced, @@ -218,7 +319,7 @@ glmerCore = function (geneList, fullFormula, reduced, data, control, offset, stats <- setNames(c(disp, AIC(fit), as.numeric(logLik(fit))), c("Dispersion", "AIC", "logLik")) if (is.null(reduced)) { - test <- glmmSeq:::lmer_wald(fixedEffects, hyp.matrix, vcov.) + test <- lmer_wald(fixedEffects, hyp.matrix, vcov.) } else { fit2 <- try(suppressMessages(suppressWarnings(lme4::glmer(reduced, @@ -354,7 +455,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N } if (is.null(reduced)) { test.stat <- "Wald" - hyp.matrix <- glmmSeq:::hyp_matrix(fullFormula, metadata, "count") + hyp.matrix <- hyp_matrix(fullFormula, metadata, "count") } else { if (length(lme4::findbars(reduced)) == 0) { @@ -586,7 +687,7 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N optInfo <- t(vapply(resultList[noErr], function(x) { setNames(x$optinfo, c("Singular", "Conv")) }, FUN.VALUE = c(1, 1))) - s <- glmmSeq:::organiseStats(resultList[noErr], "Wald") + s <- organiseStats(resultList[noErr], "Wald") meanExp <- rowMeans(log2(countdata[noErr, , drop = FALSE] + 1)) # Add coeff for random effects From e41a9529be34a250563955d2d73004fa206c2f6d Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 14 Sep 2023 20:29:32 +1000 Subject: [PATCH 04/13] add dependency --- R/glmmSeq.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index da9454b4..664fc045 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -47,7 +47,7 @@ organiseStats = function (resultList, test.stat) hyp_matrix = function (fullFormula, metadata, LHS) { - reduced2 <- nobars(fullFormula) + reduced2 <- lme4::nobars(fullFormula) fac <- attr(terms(reduced2), "factors") data2 <- metadata data2[, LHS] <- rep(0, nrow(data2)) From 24673bd27d9d304f1476c5b1e430a724fc74dbc5 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 14 Sep 2023 20:34:40 +1000 Subject: [PATCH 05/13] add internal function --- R/glmmSeq.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 664fc045..8d142ed4 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -1,3 +1,16 @@ +## Source car:::relatives +car_relatives <- function (term, names, factors) +{ + is.relative <- function(term1, term2) { + all(!(factors[, term1] & (!factors[, term2]))) + } + if (length(names) == 1) + return(NULL) + which.term <- which(term == names) + (1:length(names))[-which.term][sapply(names[-which.term], + function(term2) is.relative(term, term2))] +} + #' @importFrom stats pchisq organiseStats = function (resultList, test.stat) { From f65d233ff3ebef63b01503404e20a16d4adce195 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 15 Sep 2023 20:12:48 +1000 Subject: [PATCH 06/13] fix bug for tmb for gene4s with error --- R/glmmSeq.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 8d142ed4..852d2568 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -1,3 +1,11 @@ +## Source car:::ConjComp +car_ConjComp <- function (X, Z = diag(nrow(X)), ip = diag(nrow(X))){ + xq <- qr(t(Z) %*% ip %*% X) + if (xq$rank == 0) + return(Z) + Z %*% qr.Q(xq, complete = TRUE)[, -(1:xq$rank)] +} + ## Source car:::relatives car_relatives <- function (term, names, factors) { @@ -300,7 +308,7 @@ glmmTMBcore = function (geneList, fullFormula, reduced, data, family, control, } else { return(list(stats = NA, coef = NA, stdErr = NA, chisq = NA, - df = NA, predict = NA, optinfo = NA, ci_random_effect_df = NA, tryErrors = fit[1])) + df = NA, predict = NA, optinfo = NA, ci_random_effect_df = NA, message = "COMPLETE FIT ERROR", tryErrors = fit[1])) } } From 0b395c4200b85c808e2f7d70d12fca0876866149 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 16 Sep 2023 14:54:21 +1000 Subject: [PATCH 07/13] avoid complete failure if vcov is NaN --- R/glmmSeq.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 852d2568..18e73b74 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -260,7 +260,14 @@ glmmTMBcore = function (geneList, fullFormula, reduced, data, family, control, if (!inherits(fit, "try-error")) { singular <- conv <- NA stdErr <- suppressWarnings(coef(summary(fit))$cond[, 2]) + vcov. <- vcov(fit)$cond + + if(vcov. |> as.numeric() |> is.nan() |> all()) + return(list(stats = NA, coef = NA, stdErr = NA, chisq = NA, + df = NA, predict = NA, optinfo = NA, ci_random_effect_df = NA, + message = "vcov. failed to calculate", tryErrors = fit[1])) + fixedEffects <- glmmTMB::fixef(fit)$cond disp <- glmmTMB::sigma(fit) msg <- fit$fit$message From 8bcd08ad5c24f84d31ff5d0654c880d59b832fc8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 17:55:37 +1000 Subject: [PATCH 08/13] unrelated details, use abundance for reduce dimension --- R/methods_SE.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 1a2e443e..11fd4c04 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -449,6 +449,8 @@ setMethod("cluster_elements", .reduce_dimensions_se = function(.data, + .abundance = NULL, + method, .dims = 2, top = 500, @@ -460,15 +462,18 @@ setMethod("cluster_elements", # Fix NOTEs . = NULL + .abundance = enquo(.abundance) + + if(.abundance |> quo_is_symbolic()) my_assay = quo_name(.abundance) + else my_assay = get_assay_scaled_if_exists_SE(.data) + my_assay = .data %>% # Filter abundant if performed filter_if_abundant_were_identified() %>% - assays() %>% - as.list() %>% - .[[get_assay_scaled_if_exists_SE(.data)]] %>% + assay(my_assay) %>% # Filter most variable genes keep_variable_transcripts_SE(top = top, transform = transform) %>% @@ -941,7 +946,7 @@ setMethod("remove_redundancy", apply(2, pmax, 0) } else { - stop("tidybulk says: the argument \"method\" must be combat_seq, combat, or limma_remove_batch_effect") + stop("tidybulk says: the argument \"method\" must be \"combat_seq\", \"combat\", or \"limma_remove_batch_effect\"") } From 69d6d33d2b0fe721f84a871580ba9e6585acae96 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 18:01:21 +1000 Subject: [PATCH 09/13] add dependency --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 58ce8456..e75cee39 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -172,6 +172,7 @@ importFrom(stats,median) importFrom(stats,model.matrix) importFrom(stats,na.omit) importFrom(stats,p.adjust) +importFrom(stats,pchisq) importFrom(stats,plogis) importFrom(stats,prcomp) importFrom(stats,qlogis) From 20c8fb55d2ad684a9f62c60d34fe3b5d5357ed2a Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 18:26:13 +1000 Subject: [PATCH 10/13] add glmmSeq class --- R/glmmSeq.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 18e73b74..634ac8fc 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -407,6 +407,20 @@ glmerCore = function (geneList, fullFormula, reduced, data, control, offset, } +setClass("GlmmSeq", slots = list( + info = "list", + formula = "formula", + stats = "list_or_matrix", + predict = "df_or_matrix", + reduced = "formulaOrNULL", + countdata = "df_or_matrix", + metadata = "df_or_matrix", + modelData = "df_or_matrix", + optInfo = "matrix", + errors = "character_or_list", + vars = "list" +)) + glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = NA, sizeFactors = NULL, reduced = NULL, modelData = NULL, designMatrix = NULL, method = c("lme4", "glmmTMB"), control = NULL, family = glmmTMB::nbinom2, From a276b5a3755881c6501c578e98f74e2c6777b727 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 18:30:02 +1000 Subject: [PATCH 11/13] add class detals --- R/glmmSeq.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 634ac8fc..4ef8699f 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -407,6 +407,34 @@ glmerCore = function (geneList, fullFormula, reduced, data, control, offset, } +# From glmmSeq + +# Class definitions + +setClassUnion("character_or_list", c("character", "list")) +setClassUnion("df_or_matrix", c("data.frame", "matrix")) +setClassUnion("list_or_matrix", c("list", "matrix")) +setClassUnion("formulaOrNULL", c("formula", "NULL")) + +#' An S4 class to define the glmmSeq output +#' +#' @keywords internal +#' @noRd +#' +#' @slot info List including the matched call, dispersions, offset, designMatrix +#' @slot formula The model formula +#' @slot stats Statistics from fitted models +#' @slot predict Predicted values +#' @slot reduced Optional reduced formula for LRT +#' @slot countdata The input expression data with count data in rows +#' @slot metadata The input metadata +#' @slot modelData Model data for predictions +#' @slot optInfo Information on whether the model was singular or converged +#' @slot errors Any errors +#' @slot vars List of variables stored from the original call, including the +#' `id` variable (by default automatically identified from the random effect +#' term in the model) and `removeSingles` argument + setClass("GlmmSeq", slots = list( info = "list", formula = "formula", From be1f5c61233c6c5f06c40b5fb6e3cba6949b52ae Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 20:56:19 +1000 Subject: [PATCH 12/13] add summary method --- R/functions.R | 16 +++++----- R/functions_SE.R | 2 +- R/glmmSeq.R | 78 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/R/functions.R b/R/functions.R index 82f9fc2a..e0e5c561 100755 --- a/R/functions.R +++ b/R/functions.R @@ -693,7 +693,7 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, .abundance = enquo(.abundance) .sample_total_read_count = enquo(.sample_total_read_count) .dispersion = enquo(.dispersion) - + # Check if omit_contrast_in_colnames is correctly setup if(omit_contrast_in_colnames & length(.contrasts) > 1){ warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present") @@ -757,33 +757,33 @@ get_differential_transcript_abundance_glmmSeq <- function(.data, # Reorder counts counts = counts[,rownames(metadata),drop=FALSE] - + if(quo_is_symbolic(.dispersion)) dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe() else dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)) - + # # Check dispersion # if(!names(dispersion) |> sort() |> identical( # rownames(counts) |> # sort() # )) stop("tidybulk says: The features in the dispersion vector do not overlap with the feature in the assay") - + # Make sure the order matches the counts dispersion = dispersion[rownames(counts)] - - glmmSeq_object = + + glmmSeq_object = glmmSeq( .formula, countdata = counts , metadata = metadata, dispersion = dispersion, - progress = TRUE, + progress = TRUE, method = method |> str_remove("(?i)^glmmSeq_"), ... ) glmmSeq_object |> - summary() |> + summary_lmmSeq() |> as_tibble(rownames = "gene") |> mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> diff --git a/R/functions_SE.R b/R/functions_SE.R index fcee85d0..5f067aec 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -1158,7 +1158,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data, ) glmmSeq_object |> - summary() |> + summary_lmmSeq() |> as_tibble(rownames = "transcript") |> mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> diff --git a/R/glmmSeq.R b/R/glmmSeq.R index 4ef8699f..268d6907 100644 --- a/R/glmmSeq.R +++ b/R/glmmSeq.R @@ -802,3 +802,81 @@ glmmSeq = function (modelFormula, countdata, metadata, id = NULL, dispersion = N metadata = subsetMetadata, modelData = modelData, optInfo = optInfo, errors = outputErrors, vars = list(id = id, removeSingles = removeSingles)) } + +# From GlmmSeq + +#' Summarise a 'glmmSeq'/'lmmSeq' object +#' +#' Summarise results from [glmmSeq] or [lmmSeq] analysis +#' +#' @keywords internal +#' @noRd +#' +#' @param object an object of class `"GlmmSeq"` or `"lmmSeq"` +#' @param gene an optional character value specifying a single gene whose +#' results are summarised +#' @param digits integer, used for number formatting +#' @param ... arguments to be passed to other methods +#' @return +#' If `gene=NULL` a dataframe of results for all genes is returned. Otherwise +#' the output of GLMM or LMM model results for a single gene including +#' coefficients, test statistics, p-values is printed and the dataframe for all +#' genes is returned invisibly. +#' @seealso [glmmSeq()], [lmmSeq()] +summary_lmmSeq <- function(object, + gene = NULL, + digits = max(3L, getOption("digits") - 3L), ...) { + if (is.null(gene)) { + statSet <- names(object@stats) + gp <- lapply(statSet, function(i) { + out <- object@stats[[i]] + if (i %in% c("Chisq", "Fval", "Df", "NumDF", "DenDF")) colnames(out) <- paste(i, colnames(out), sep = "_") + if (i == "pvals") colnames(out) <- paste0("P_", colnames(out)) + if (i == "stdErr") colnames(out) <- paste0("se_", colnames(out)) + out + }) + do.call(cbind, gp) + } else { + out <- lapply(object@stats, function(i) i[gene, ]) + if (is(object, "GlmmSeq")) { + cat("Generalised linear mixed model\n") + cat(paste0("Method: ", object@info$method, "\n")) + if (object@info$method == "lme4") { + cat("Family: Negative Binomial\n") + } else { + cat(paste0("Family: ", object@info$family, "\n")) + } + } else { + cat("Linear mixed model\n") + } + cat("Formula: ") + print(object@formula) + print(out$res) + cat("\nFixed effects:\n") + cfdf <- data.frame(Estimate = out$coef, + `Std. Error` = out$stdErr, check.names = FALSE) + print(cfdf, digits = digits) + if (object@info$test.stat == "Wald") { + cat("\nAnalysis of Deviance Table (Type II Wald chisquare tests)\n") + testdf <- data.frame(Chisq = out$Chisq, Df = out$Df, + `Pr(>Chisq)` = out$pvals, + row.names = colnames(object@stats$Chisq), + check.names = FALSE) + } else if (object@info$test.stat == "LRT") { + cat("\nLikelihood ratio test\nReduced formula: ") + print(object@reduced) + testdf <- data.frame(Chisq = out$Chisq, Df = out$Df, + `Pr(>Chisq)` = out$pvals, + row.names = " ", + check.names = FALSE) + } else { + cat("\nType III Analysis of Variance Table with Satterthwaite's method\n") + testdf <- data.frame(NumDF = out$NumDF, DenDF = out$DenDF, + `F value` = out$Fval, `Pr(>F)` = out$pvals, + row.names = colnames(object@stats$Fval), + check.names = FALSE) + } + print(testdf, digits = digits) + invisible(out) + } +} From a02224b4fe76f875185c4de77c1265bcbd3171b5 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 17 Sep 2023 21:31:36 +1000 Subject: [PATCH 13/13] fix dependencies --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 1c97a1ae..f76747cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -83,6 +83,7 @@ Suggests: pbapply, pbmcapply, lme4, + glmmTMB, MASS VignetteBuilder: knitr