stemangiola · stemangiola · May 16, 2024 · Apr 29, 2024 · Apr 29, 2024 · May 10, 2024
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,4 @@ _targets.R
 _targets*
 .DS_Store
 ._.DS_Store
+counts_SE.rda
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 1.17.0
+Version: 1.17.2
 Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "[email protected]",

diff --git a/R/methods.R b/R/methods.R
@@ -586,19 +586,25 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance)
 #' @param .sample The name of the sample column
 #' @param .transcript The name of the transcript/gene column
 #' @param .abundance The name of the transcript/gene abundance column
-#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible.
+#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale datasets.
+#' @param target_distribution A numeric vector. If NULL the target distribution will be calculated by preprocessCore. This argument only affects the "preprocesscore_normalize_quantiles_use_target" method.
 #' @param action A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information.
 #'
 #'
-#' @details Scales transcript abundance compensating for sequencing depth
-#' (e.g., with TMM algorithm, Robinson and Oshlack doi.org/10.1186/gb-2010-11-3-r25).
-#' Lowly transcribed transcripts/genes (defined with minimum_counts and minimum_proportion parameters)
-#' are filtered out from the scaling procedure.
-#' The scaling inference is then applied back to all unfiltered data.
+#' @details Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore).
 #'
 #' Underlying method
-#' edgeR::calcNormFactors(.data, method = c("TMM","TMMwsp","RLE","upperquartile"))
-#'
+#' 
+#' If `limma_normalize_quantiles` is chosen
+#' 
+#' .data |>limma::normalizeQuantiles()
+#'  
+#'  If `preprocesscore_normalize_quantiles_use_target` is chosen
+#'  
+#' .data |> 
+#'    preprocessCore::normalize.quantiles.use.target(
+#'       target = preprocessCore::normalize.quantiles.determine.target(.data)
+#'    )
 #'
 #'
 #' @return A tbl object with additional columns with scaled data as `<NAME OF COUNT COLUMN>_scaled`
@@ -621,6 +627,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
                                                     .transcript = NULL,
                                                     .abundance = NULL,
                                                     method = "limma_normalize_quantiles",
+                                                    target_distribution = NULL,
                                                     action = "add")
   standardGeneric("quantile_normalise_abundance"))
 
@@ -630,6 +637,8 @@ setGeneric("quantile_normalise_abundance", function(.data,
                                           .transcript = NULL,
                                           .abundance = NULL,
                                           method = "limma_normalize_quantiles",
+                                          target_distribution = NULL,
+
                                           action = "add")
 {
 
@@ -685,10 +694,12 @@ setGeneric("quantile_normalise_abundance", function(.data,
       BiocManager::install("preprocessCore", ask = FALSE)
     }
 
+    if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+
     .data_norm_quant =
       .data_norm |>
       preprocessCore::normalize.quantiles.use.target(
-        target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+        target = target_distribution
       )
 
     colnames(.data_norm_quant) = .data_norm |> colnames()

diff --git a/R/methods_SE.R b/R/methods_SE.R
@@ -248,6 +248,7 @@ setMethod("scale_abundance",
                                .transcript = NULL,
                                .abundance = NULL,
                                method = "limma_normalize_quantiles",
+                               target_distribution = NULL,
                                action = NULL) {
 
 
@@ -311,10 +312,12 @@ setMethod("scale_abundance",
       assay(my_assay) |>
       as.matrix()
 
+    if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+
     .data_norm =
       .data_norm |>
       preprocessCore::normalize.quantiles.use.target(
-        target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+        target = target_distribution
       )
 
     colnames(.data_norm) = .data |> assay(my_assay) |> colnames()
@@ -1476,22 +1479,21 @@ such as batch effects (if applicable) in the formula.
   else
     stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\", \"glmmseq_lme4\", \"glmmseq_glmmTMB\"")
 
-
-  statistics =
-    my_differential_abundance$result %>%
-    as_matrix(rownames = "transcript") %>%
-    .[match(rownames(rowData(.data)), rownames(.)),,drop=FALSE]
-
   # If action is get just return the statistics
-  if(action == "get") return(statistics)
-
+  if(action == "get") return(my_differential_abundance$result)
+  
 	# Add results
-	rowData(.data) = rowData(.data) %>% cbind(statistics)
+	rowData(.data) = rowData(.data) %>% cbind(
+
+	  # Parse the statistics
+	  my_differential_abundance$result %>%
+	    as_matrix(rownames = "transcript") %>%
+	    .[match(rownames(rowData(.data)), rownames(.)),,drop=FALSE]
+	)
 
 
 	.data %>%
 
-
 		# Add bibliography
 		when(
 			tolower(method) ==  "edger_likelihood_ratio" ~ (.) %>% memorise_methods_used(c("edger", "edgeR_likelihood_ratio")),