Skip to content

Commit

Permalink
add method
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed Dec 13, 2023
1 parent 915117b commit a22afaa
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 3 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export(quantile_normalise_abundance)
export(reduce_dimensions)
export(remove_redundancy)
export(rename)
export(resolve_complete_confounders_of_non_interest)
export(right_join)
export(rotate_dimensions)
export(rowwise)
Expand Down Expand Up @@ -141,9 +142,11 @@ importFrom(purrr,map2_dfc)
importFrom(purrr,map2_dfr)
importFrom(purrr,map_chr)
importFrom(purrr,map_dfr)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,reduce)
importFrom(purrr,when)
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,dots_list)
importFrom(rlang,dots_values)
Expand All @@ -159,6 +162,7 @@ importFrom(rlang,quo_is_symbol)
importFrom(rlang,quo_is_symbolic)
importFrom(rlang,quo_name)
importFrom(rlang,quo_squash)
importFrom(rlang,set_names)
importFrom(scales,extended_breaks)
importFrom(scales,label_scientific)
importFrom(scales,log_breaks)
Expand Down
89 changes: 86 additions & 3 deletions R/functions_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,7 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,
object = .formula |> lme4::nobars(),
data = metadata
)

if(quo_is_symbolic(.dispersion))
dispersion = rowData(.data)[,quo_name(.dispersion),drop=FALSE] |> as_tibble(rownames = feature__$name) |> deframe()
else
Expand All @@ -1156,8 +1156,8 @@ get_differential_transcript_abundance_glmmSeq_SE <- function(.data,

# Scaling
sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method)


glmmSeq_object =
glmmSeq( .formula,
countdata = counts ,
Expand Down Expand Up @@ -1490,3 +1490,86 @@ univariable_differential_tissue_composition_SE = function(

unnest(surv_test, keep_empty = TRUE)
}


#' Resolve Complete Confounders of Non-Interest
#'
#' This function processes a SummarizedExperiment object to handle confounders
#' that are not of interest in the analysis. It deals with two factors, adjusting
#' the data by nesting and summarizing over these factors.
#'
#' @importFrom rlang enquo
#' @importFrom rlang quo_name
#' @importFrom rlang !!
#' @importFrom tidyr unnest
#' @importFrom tidyr nest
#' @importFrom dplyr mutate
#' @importFrom dplyr arrange
#' @importFrom dplyr slice
#' @importFrom dplyr pull
#' @importFrom dplyr select
#' @importFrom purrr map_int
#' @importFrom dplyr if_else
#'
#' @param se A SummarizedExperiment object that contains the data to be processed.
#' @param .factor_1 A symbol or quosure representing the first factor variable in `se`.
#' @param .factor_2 A symbol or quosure representing the second factor variable in `se`.
#' @return A modified SummarizedExperiment object with confounders resolved.
#' @examples
#' # Not run:
#' # se is a SummarizedExperiment object
#' resolve_complete_confounders_of_non_interest(se, .factor_1 = factor1, .factor_2 = factor2)
#' @noRd
resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, .factor_2){

.factor_1 <- enquo(.factor_1)
.factor_2 <- enquo(.factor_2)

cd =
colData(se) |>
as_tibble() |>
rowid_to_column() |>
distinct(rowid, !!.factor_1, !!.factor_2) |>

nest(se_data = -c(!!.factor_1, !!.factor_2)) |>
nest(data = -!!.factor_1) |>
mutate(n1 = map_int(data, ~ .x |> distinct(!!.factor_2) |> nrow())) |>
unnest(data) |>

# Nest data excluding .factor_2 and count distinct .factor_1 values
nest(data = -!!.factor_2) |>
mutate(n2 = map_int(data, ~ .x |> distinct(!!.factor_1) |> nrow())) |>
unnest(data)

# Choose a dummy value for .factor_2 based on sorting by n1 + n2
dummy_factor_2 <- cd |> arrange(desc(n1 + n2)) |> slice(1) |> pull(!!.factor_2)

# Messages if I have confounders
if(cd |> filter(n1 + n2 < 3) |> nrow() > 0){

message(sprintf("tidybulk says: IMPORTANT! the columns %s and %s, have been corrected for complete confounders and now are NOT interpretable, and cannot be used in hypothesis testing.", quo_name(.factor_1), quo_name(.factor_2)))

message(sprintf(
"tidybulk says: The value(s) %s in column %s from sample(s) %s, has been changed to %s.",
cd |> filter(n1 + n2 < 3) |> pull(!!.factor_2),
quo_name(.factor_2),
cd |> filter(n1 + n2 < 3) |> pull(se_data) |> map(colnames) |> unlist(),
dummy_factor_2
))

# Replace .factor_2 with dummy_factor_2 where n1 + n2 is less than 3 and unnest
cd = cd |>
mutate(!!.factor_2 := if_else(n1 + n2 < 3, dummy_factor_2, !!.factor_2))
}

colData(se)[,c(quo_name(.factor_1), quo_name(.factor_2))] =
cd |>
unnest(se_data) |>
arrange(rowid) |>
select(!!.factor_1, !!.factor_2) |>
DataFrame()

se

}

27 changes: 27 additions & 0 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -4956,3 +4956,30 @@ as_matrix <- function(tbl,
}


#' Resolve Complete Confounders of Non-Interest
#'
#' This generic function processes a SummarizedExperiment object to handle confounders
#' that are not of interest in the analysis. It dynamically handles combinations
#' of provided factors, adjusting the data by nesting and summarizing over these factors.
#'
#'
#' @param se A SummarizedExperiment object that contains the data to be processed.
#' @param ... Arbitrary number of factor variables represented as symbols or quosures
#' to be considered for resolving confounders. These factors are processed
#' in combinations of two.
#'
#' @rdname resolve_complete_confounders_of_non_interest-methods
#'
#' @return A modified SummarizedExperiment object with confounders resolved.
#'
#' @examples
#' # Not run:
#' # se is a SummarizedExperiment object
#' # resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3)
#' @export
setGeneric("resolve_complete_confounders_of_non_interest", function(se, ...) {
standardGeneric("resolve_complete_confounders_of_non_interest")
})



53 changes: 53 additions & 0 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -2884,3 +2884,56 @@ setMethod("describe_transcript", "SummarizedExperiment", .describe_transcript_SE
#'
#' @return A consistent object (to the input) including additional columns for transcript symbol
setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcript_SE)


#' @importFrom dplyr select
#' @importFrom rlang set_names
#' @importFrom tibble as_tibble
.resolve_complete_confounders_of_non_interest <- function(se, ...){

combination_of_factors_of_NON_interest =
# Factors
se[1,1, drop=FALSE] |>
select(...) |>
suppressWarnings() |>
colnames() |>

# Combinations
combn(2) |>
t() |>
as_tibble() |>
set_names(c("factor_1", "factor_2"))

for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){
se =
se |>
resolve_complete_confounders_of_non_interest_pair_SE(
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1),
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2)
)
}

se
}

#' resolve_complete_confounders_of_non_interest
#' @inheritParams resolve_complete_confounders_of_non_interest
#'
#' @docType methods
#' @rdname resolve_complete_confounders_of_non_interest-methods
#'
#' @return A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate).
setMethod("resolve_complete_confounders_of_non_interest",
"SummarizedExperiment",
.resolve_complete_confounders_of_non_interest)

#' resolve_complete_confounders_of_non_interest
#' @inheritParams resolve_complete_confounders_of_non_interest
#'
#' @docType methods
#' @rdname resolve_complete_confounders_of_non_interest-methods
#'
#' @return A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate).
setMethod("resolve_complete_confounders_of_non_interest",
"RangedSummarizedExperiment",
.resolve_complete_confounders_of_non_interest)
39 changes: 39 additions & 0 deletions man/resolve_complete_confounders_of_non_interest-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions tests/testthat/test-bulk_methods_SummarizedExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -825,3 +825,49 @@ test_that("Only reduced dimensions UMAP - no object",{
expect_equal( class(attr(res, "internals")$UMAP[[1]])[1], "numeric" )

})


test_that("resolve_complete_confounders_of_non_interest",{


library(tidySummarizedExperiment)
library(tidybulk)

# Sample annotations
sample_annotations <- data.frame(
sample_id = paste0("Sample", seq(1, 9)),
factor_of_interest = c(rep("treated", 4), rep("untreated", 5)),
A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"),
B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"),
C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"),
stringsAsFactors = FALSE
)

# Simulated assay data (e.g., gene expression data)
# Let's assume we have 100 genes (rows) and 9 samples (columns)
assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9)

# Row data (e.g., gene annotations)
# For simplicity, we'll just use a sequence of gene IDs
row_data <- data.frame(gene_id = paste0("Gene", seq_len(100)))

# Create SummarizedExperiment object
se <- SummarizedExperiment(assays = list(counts = assay_data),
rowData = row_data,
colData = DataFrame(sample_annotations))

expected_tibble =
tibble(
.sample = c("1", "5", "2", "4", "6", "8", "9", "3", "7"),
A = c("a1", "a1", "a2", "a2", "a2", "a2", "a3", "a1", "a1"),
B = c("b1", "b1", "b1", "b1", "b1", "b1", "b1", "b2", "b2"),
C = rep("c1", 9)
) |> arrange(.sample)

se |>
resolve_complete_confounders_of_non_interest(A, B, C) |>
distinct(.sample, A, B, C) |>
expect_identical(expected_tibble )


})

0 comments on commit a22afaa

Please sign in to comment.