diff --git a/R/methods.R b/R/methods.R index f03fea94..4bf025e5 100755 --- a/R/methods.R +++ b/R/methods.R @@ -586,19 +586,25 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' @param .sample The name of the sample column #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column -#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible. +#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale datasets. +#' @param target_distribution A numeric vector. If NULL the target distribution will be calculated by preprocessCore. This argument only affects the "preprocesscore_normalize_quantiles_use_target" method. #' @param action A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information. #' #' -#' @details Scales transcript abundance compensating for sequencing depth -#' (e.g., with TMM algorithm, Robinson and Oshlack doi.org/10.1186/gb-2010-11-3-r25). -#' Lowly transcribed transcripts/genes (defined with minimum_counts and minimum_proportion parameters) -#' are filtered out from the scaling procedure. -#' The scaling inference is then applied back to all unfiltered data. +#' @details Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore). #' #' Underlying method -#' edgeR::calcNormFactors(.data, method = c("TMM","TMMwsp","RLE","upperquartile")) -#' +#' +#' If `limma_normalize_quantiles` is chosen +#' +#' .data |>limma::normalizeQuantiles() +#' +#' If `preprocesscore_normalize_quantiles_use_target` is chosen +#' +#' .data |> +#' preprocessCore::normalize.quantiles.use.target( +#' target = preprocessCore::normalize.quantiles.determine.target(.data) +#' ) #' #' #' @return A tbl object with additional columns with scaled data as `_scaled` @@ -621,6 +627,7 @@ setGeneric("quantile_normalise_abundance", function(.data, .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = "add") standardGeneric("quantile_normalise_abundance")) @@ -630,6 +637,8 @@ setGeneric("quantile_normalise_abundance", function(.data, .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, + action = "add") { @@ -685,10 +694,12 @@ setGeneric("quantile_normalise_abundance", function(.data, BiocManager::install("preprocessCore", ask = FALSE) } + if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm) + .data_norm_quant = .data_norm |> preprocessCore::normalize.quantiles.use.target( - target = preprocessCore::normalize.quantiles.determine.target(.data_norm) + target = target_distribution ) colnames(.data_norm_quant) = .data_norm |> colnames() diff --git a/R/methods_SE.R b/R/methods_SE.R index 8aa478b5..89edcddf 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -248,6 +248,7 @@ setMethod("scale_abundance", .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = NULL) { @@ -311,10 +312,12 @@ setMethod("scale_abundance", assay(my_assay) |> as.matrix() + if(is.null(target_distribution)) target_distribution = preprocessCore::normalize.quantiles.determine.target(.data_norm) + .data_norm = .data_norm |> preprocessCore::normalize.quantiles.use.target( - target = preprocessCore::normalize.quantiles.determine.target(.data_norm) + target = target_distribution ) colnames(.data_norm) = .data |> assay(my_assay) |> colnames() diff --git a/man/quantile_normalise_abundance-methods.Rd b/man/quantile_normalise_abundance-methods.Rd index fdaf6e46..5c527cd8 100644 --- a/man/quantile_normalise_abundance-methods.Rd +++ b/man/quantile_normalise_abundance-methods.Rd @@ -16,6 +16,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = "add" ) @@ -25,6 +26,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = "add" ) @@ -34,6 +36,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = "add" ) @@ -43,6 +46,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = "add" ) @@ -52,6 +56,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = NULL ) @@ -61,6 +66,7 @@ quantile_normalise_abundance( .transcript = NULL, .abundance = NULL, method = "limma_normalize_quantiles", + target_distribution = NULL, action = NULL ) } @@ -73,7 +79,9 @@ quantile_normalise_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{method}{A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible.} +\item{method}{A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale datasets.} + +\item{target_distribution}{A numeric vector. If NULL the target distribution will be calculated by preprocessCore. This argument only affects the "preprocesscore_normalize_quantiles_use_target" method.} \item{action}{A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information.} } @@ -96,14 +104,20 @@ quantile_normalise_abundance() takes as input A `tbl` (with at least three colum \details{ `r lifecycle::badge("maturing")` -Scales transcript abundance compensating for sequencing depth -(e.g., with TMM algorithm, Robinson and Oshlack doi.org/10.1186/gb-2010-11-3-r25). -Lowly transcribed transcripts/genes (defined with minimum_counts and minimum_proportion parameters) -are filtered out from the scaling procedure. -The scaling inference is then applied back to all unfiltered data. +Tranform the feature abundance across samples so to have the same quantile distribution (using preprocessCore). Underlying method -edgeR::calcNormFactors(.data, method = c("TMM","TMMwsp","RLE","upperquartile")) + +If `limma_normalize_quantiles` is chosen + +.data |>limma::normalizeQuantiles() + + If `preprocesscore_normalize_quantiles_use_target` is chosen + +.data |> + preprocessCore::normalize.quantiles.use.target( + target = preprocessCore::normalize.quantiles.determine.target(.data) + ) } \examples{ diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 26df1262..3f9ed97e 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -105,7 +105,21 @@ test_that("quantile normalisation",{ filter(a=="SRR1740035" & b=="ABCB9") |> dplyr::pull(c_scaled) ) - + + target_distribution = + se_mini |> + assay( "count") |> + as.matrix() |> + preprocessCore::normalize.quantiles.determine.target() + + se_mini |> + quantile_normalise_abundance( + method = "preprocesscore_normalize_quantiles_use_target", + target_distribution = target_distribution + ) |> + expect_no_error() + + }) test_that("tidybulk SummarizedExperiment normalisation subset",{