Merge branch 'master' into solve-conflicts-tidyverse

stemangiola · Jul 27, 2023 · 43a8341 · 43a8341
2 parents 06f3e7c + 381c6e0
commit 43a8341
Show file tree

Hide file tree

Showing 16 changed files with 1,738 additions and 278 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,5 @@ egsea_report_*
 /doc*
 /Meta/
 /doc/
+_targets.R
+_targets*
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,8 +17,8 @@ Depends:
 Imports:
     tibble,
     readr,
-    dplyr,
     ggplot2,
+    dplyr (>= 1.1.0),
     magrittr,
     tidyr,
     stringi,
@@ -78,7 +78,12 @@ Suggests:
     igraph,
     EGSEA,
     IRanges,
-    here
+    here,
+    glmmSeq,
+    pbapply,
+    pbmcapply,
+    lme4,
+    MASS
 VignetteBuilder: 
     knitr
 RdMacros:

diff --git a/NAMESPACE b/NAMESPACE
@@ -39,6 +39,7 @@ export(log10_reverse_trans)
 export(logit_trans)
 export(pivot_sample)
 export(pivot_transcript)
+export(quantile_normalise_abundance)
 export(reduce_dimensions)
 export(remove_redundancy)
 export(rotate_dimensions)
@@ -54,6 +55,7 @@ export(tibble)
 export(tidybulk)
 export(tidybulk_SAM_BAM)
 exportMethods(as_SummarizedExperiment)
+exportMethods(quantile_normalise_abundance)
 exportMethods(scale_abundance)
 exportMethods(tidybulk)
 exportMethods(tidybulk_SAM_BAM)
@@ -87,6 +89,7 @@ importFrom(dplyr,group_by_drop_default)
 importFrom(dplyr,group_cols)
 importFrom(dplyr,if_else)
 importFrom(dplyr,inner_join)
+importFrom(dplyr,join_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,mutate_if)
@@ -109,13 +112,15 @@ importFrom(magrittr,"%$%")
 importFrom(magrittr,"%>%")
 importFrom(magrittr,divide_by)
 importFrom(magrittr,equals)
+importFrom(magrittr,extract2)
 importFrom(magrittr,multiply_by)
 importFrom(magrittr,set_colnames)
 importFrom(magrittr,set_rownames)
 importFrom(parallel,mclapply)
 importFrom(purrr,as_mapper)
 importFrom(purrr,map)
 importFrom(purrr,map2)
+importFrom(purrr,map2_dfc)
 importFrom(purrr,map2_dfr)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_dfr)

diff --git a/R/functions.R b/R/functions.R
@@ -355,8 +355,6 @@ get_scaled_counts_bulk <- function(.data,
 
 }
 
-
-
 #' Get differential transcription information to a tibble using edgeR.
 #'
 #' @keywords internal
@@ -643,6 +641,158 @@ get_differential_transcript_abundance_bulk <- function(.data,
 		}
 }
 
+#' Get differential transcription information to a tibble using glmmSeq
+#'
+#' @keywords internal
+#' @noRd
+#'
+#' 
+#' 
+#' @import tibble
+#' @importFrom magrittr set_colnames
+#' @importFrom stats model.matrix
+#' @importFrom utils install.packages
+#' @importFrom purrr when
+#' @importFrom rlang inform
+#' @importFrom tidyr spread
+#' @importFrom tidyr pivot_wider
+#' @importFrom dplyr slice
+#'
+#'
+#' @param .data A tibble
+#' @param .formula a formula with no response variable, referring only to numeric variables
+#' @param .sample The name of the sample column
+#' @param .transcript The name of the transcript/gene column
+#' @param .abundance The name of the transcript/gene abundance column
+#' @param .contrasts A character vector. Not used for this method
+#' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)
+#' @param test_above_log2_fold_change A positive real value. This works for edgeR and limma_voom methods. It uses the `treat` function, which tests that the difference in abundance is bigger than this threshold rather than zero \url{https://pubmed.ncbi.nlm.nih.gov/19176553}.
+#' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
+#' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames.
+#' @param .sample_total_read_count
+#'
+#' @return A tibble with glmmSeq results
+#'
+get_differential_transcript_abundance_glmmSeq <- function(.data,
+                                                               .formula,
+                                                               .sample = NULL,
+                                                               .transcript = NULL,
+                                                               .abundance = NULL,
+                                                               .contrasts = NULL,
+                                                               method ,
+                                                               test_above_log2_fold_change = NULL,
+                                                               scaling_method = NULL,
+                                                               omit_contrast_in_colnames = FALSE,
+                                                               prefix = "",
+                                                               .sample_total_read_count = NULL,
+                                                          ...) {
+  # Get column names
+  .sample = enquo(.sample)
+  .transcript = enquo(.transcript)
+  .abundance = enquo(.abundance)
+  .sample_total_read_count = enquo(.sample_total_read_count)
+
+  # Check if omit_contrast_in_colnames is correctly setup
+  if(omit_contrast_in_colnames & length(.contrasts) > 1){
+    warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
+    omit_contrast_in_colnames = FALSE
+  }
+
+  # # Specify the design column tested
+  # if(is.null(.contrasts))
+  #   message(
+  #     sprintf(
+  #       "tidybulk says: The design column being tested is %s",
+  #       design %>% colnames %>% .[1]
+  #     )
+  #   )
+
+  # # If I don't have intercept or I have categorical factor of interest BUT I don't have contrasts
+  # if(
+  #   is.null(.contrasts) &
+  #   (
+  #     (
+  #       ! .data |> pull(parse_formula(.formula)[1]) |> is("numeric") &
+  #       .data |> pull(parse_formula(.formula)[1]) |> unique() |> length() |> gt(2)
+  #     ) |
+  #     colnames(design)[1] != "(Intercept)"
+  #   )
+  # )
+  #   warning("tidybulk says: If you have (i) an intercept-free design (i.e. ~ 0 + factor) or you have a categorical factor of interest with more than 2 values you should use the `contrasts` argument.")
+  # 
+  # my_contrasts =
+  #   .contrasts %>%
+  #   ifelse_pipe(length(.) > 0,
+  #               ~ limma::makeContrasts(contrasts = .x, levels = design),
+  #               ~ NULL)
+
+  # Check if package is installed, otherwise install
+  if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
+    message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
+    if (!requireNamespace("BiocManager", quietly = TRUE))
+      install.packages("BiocManager", repos = "https://cloud.r-project.org")
+    BiocManager::install("edgeR", ask = FALSE)
+  }
+
+  # Check if package is installed, otherwise install
+  if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
+    message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
+    if (!requireNamespace("BiocManager", quietly = TRUE))
+      install.packages("BiocManager", repos = "https://cloud.r-project.org")
+    BiocManager::install("glmmSeq", ask = FALSE)
+  }
+
+  metadata = 
+    .data |> 
+    pivot_sample(!!.sample) |> 
+    as_data_frame(rownames = !!.sample)
+
+  counts = 
+    .data %>%
+    select(!!.transcript,!!.sample,!!.abundance) %>%
+    spread(!!.sample,!!.abundance) %>%
+    as_matrix(rownames = !!.transcript)
+
+  # Reorder counts
+  counts = counts[,rownames(metadata),drop=FALSE]
+
+  glmmSeq_object = 
+    glmmSeq( .formula,
+          countdata = counts ,
+          metadata =   metadata,
+          dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
+          progress = TRUE, 
+          method = method |> str_remove("(?i)^glmmSeq_"),
+          ...
+  ) 
+
+  glmmSeq_object |> 
+    summary() |> 
+    as_tibble(rownames = "gene") |>
+    mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |> 
+
+    # Attach attributes
+    reattach_internals(.data) %>%
+
+    # select method
+    memorise_methods_used("glmmSeq") |> 
+
+    # Add raw object
+    attach_to_internals(glmmSeq_object, "glmmSeq") %>%
+    # Communicate the attribute added
+    {
+
+      rlang::inform("\ntidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq",  .frequency = "once")
+
+      (.)
+    }  %>%
+
+    # Attach prefix
+    setNames(c(
+      colnames(.)[1],
+      sprintf("%s%s", prefix, colnames(.)[2:ncol(.)])
+    ))
+}
 
 #' Get differential transcription information to a tibble using voom.
 #'