Merge pull request #313 from stemangiola/quantile-normalise-custom-ta…

…rget Quantile normalise custom target
stemangiola · May 15, 2024 · dfbd885 · dfbd885
2 parents 87b2d87 + d64967f
commit dfbd885
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 18 deletions.
diff --git a/R/methods.R b/R/methods.R
@@ -586,19 +586,25 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance)
 #' @param .sample The name of the sample column
 #' @param .transcript The name of the transcript/gene column
 #' @param .abundance The name of the transcript/gene abundance column
-#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible.
+#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale datasets.
+#' @param target_distribution A numeric vector. If NULL the target distribution will be calculated by preprocessCore. This argument only affects the "preprocesscore_normalize_quantiles_use_target" method.
 #' @param action A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information.
 #'
 #'
-#' @details Scales transcript abundance compensating for sequencing depth
-#' (e.g., with TMM algorithm, Robinson and Oshlack doi.org/10.1186/gb-2010-11-3-r25).
-#' Lowly transcribed transcripts/genes (defined with minimum_counts and minimum_proportion parameters)
-#' are filtered out from the scaling procedure.
-#' The scaling inference is then applied back to all unfiltered data.
+#' @details Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore).
 #'
 #' Underlying method
-#' edgeR::calcNormFactors(.data, method = c("TMM","TMMwsp","RLE","upperquartile"))
-#'
+#' 
+#' If `limma_normalize_quantiles` is chosen
+#' 
+#' .data |>limma::normalizeQuantiles()
+#'  
+#'  If `preprocesscore_normalize_quantiles_use_target` is chosen
+#'  
+#' .data |> 
+#'    preprocessCore::normalize.quantiles.use.target(
+#'       target = preprocessCore::normalize.quantiles.determine.target(.data)
+#'    )
 #'
 #'
 #' @return A tbl object with additional columns with scaled data as `<NAME OF COUNT COLUMN>_scaled`
@@ -621,6 +627,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
                                                     .transcript = NULL,
                                                     .abundance = NULL,
                                                     method = "limma_normalize_quantiles",
+                                                    target_distribution = NULL,
                                                     action = "add")
   standardGeneric("quantile_normalise_abundance"))
 
@@ -630,6 +637,8 @@ setGeneric("quantile_normalise_abundance", function(.data,
                                           .transcript = NULL,
                                           .abundance = NULL,
                                           method = "limma_normalize_quantiles",
+                                          target_distribution = NULL,
+
                                           action = "add")
 {
 
@@ -685,10 +694,12 @@ setGeneric("quantile_normalise_abundance", function(.data,
       BiocManager::install("preprocessCore", ask = FALSE)
     }
 
+    if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+
     .data_norm_quant =
       .data_norm |>
       preprocessCore::normalize.quantiles.use.target(
-        target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+        target = target_distribution
       )
 
     colnames(.data_norm_quant) = .data_norm |> colnames()

diff --git a/R/methods_SE.R b/R/methods_SE.R
@@ -248,6 +248,7 @@ setMethod("scale_abundance",
                                .transcript = NULL,
                                .abundance = NULL,
                                method = "limma_normalize_quantiles",
+                               target_distribution = NULL,
                                action = NULL) {
 
 
@@ -311,10 +312,12 @@ setMethod("scale_abundance",
       assay(my_assay) |>
       as.matrix()
 
+    if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+
     .data_norm =
       .data_norm |>
       preprocessCore::normalize.quantiles.use.target(
-        target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
+        target = target_distribution
       )
 
     colnames(.data_norm) = .data |> assay(my_assay) |> colnames()

diff --git a/man/quantile_normalise_abundance-methods.Rd b/man/quantile_normalise_abundance-methods.Rd
diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R
@@ -105,7 +105,21 @@ test_that("quantile normalisation",{
           filter(a=="SRR1740035" & b=="ABCB9") |>
           dplyr::pull(c_scaled)
       )
-
+
+    target_distribution = 
+      se_mini |> 
+      assay( "count") |> 
+      as.matrix() |> 
+      preprocessCore::normalize.quantiles.determine.target() 
+
+    se_mini |> 
+      quantile_normalise_abundance(
+        method = "preprocesscore_normalize_quantiles_use_target", 
+        target_distribution = target_distribution
+      ) |> 
+      expect_no_error()
+
+
 })
 
 test_that("tidybulk SummarizedExperiment normalisation subset",{