Skip to content

Commit

Permalink
more improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed May 15, 2024
1 parent a22afaa commit 7aad89f
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 31 deletions.
22 changes: 11 additions & 11 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -465,20 +465,20 @@ get_differential_transcript_abundance_bulk <- function(.data,
data = df_for_edgeR %>% select(!!.sample, one_of(parse_formula(.formula))) %>% distinct %>% arrange(!!.sample)
)

# Print the design column names in case I want contrasts
message(
sprintf(
"tidybulk says: The design column names are \"%s\"",
design %>% colnames %>% paste(collapse = ", ")
)
)
# # Print the design column names in case I want contrasts
# message(
# sprintf(
# "tidybulk says: The design column names are \"%s\"",
# design %>% colnames %>% paste(collapse = ", ")
# )
# )

# Specify the design column tested
if(is.null(.contrasts))
message(
sprintf(
"tidybulk says: The design column being tested is %s",
design %>% colnames %>% .[1]
design %>% colnames %>% .[2]
)
)

Expand Down Expand Up @@ -764,7 +764,7 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,
object = .formula |> eliminate_random_effects(),
data = metadata
)

if(quo_is_symbolic(.dispersion))
dispersion = .data |> pivot_transcript(!!.transcript) |> select(!!.transcript, !!.dispersion) |> deframe()
else
Expand All @@ -781,8 +781,8 @@ get_differential_transcript_abundance_glmmSeq <- function(.data,

# Scaling
sizeFactors <- counts |> edgeR::calcNormFactors(method = scaling_method)


glmmSeq_object =
glmmSeq( .formula,
countdata = counts ,
Expand Down
2 changes: 1 addition & 1 deletion R/functions_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ get_reduced_dimensions_TSNE_bulk_SE <-
#' @param of_samples A boolean
#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity
#' @param calculate_for_pca_dimensions An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered
#' @param ... Further parameters passed to the function uwot
#' @param ... Further parameters passed to the function uwot::tumap
#'
#' @return A tibble with additional columns
#'
Expand Down
38 changes: 19 additions & 19 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ setMethod("cluster_elements", "tidybulk", .cluster_elements)
#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity
#' @param scale A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE.
#' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).
#' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"
#' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE", or uwot::tumap if you choose method="umap"
#'
#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)
#'
Expand Down Expand Up @@ -1143,14 +1143,14 @@ setGeneric("reduce_dimensions", function(.data,
# adjust top for the max number of features I have
if(top > .data |> distinct(!!.feature) |> nrow()){
warning(sprintf(
"tidybulk says: the \"top\" argument %s is higher than the number of features %s",
top,
"tidybulk says: the \"top\" argument %s is higher than the number of features %s",
top,
.data |> distinct(!!.feature) |> nrow()
))

top = min(top, .data |> distinct(!!.feature) |> nrow())
}

# Validate data frame
if(do_validate()) {
validation(.data, !!.element, !!.feature, !!.abundance)
Expand Down Expand Up @@ -2600,7 +2600,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.
#'
#' Underlying method for edgeR framework:
#'
#'
#' .data |>
#'
#' # Filter
Expand All @@ -2627,7 +2627,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#'
#'
#' Underlying method for DESeq2 framework:
#'
#'
#' keep_abundant(
#' factor_of_interest = !!as.symbol(parse_formula(.formula)[[1]]),
#' minimum_counts = minimum_counts,
Expand All @@ -2646,25 +2646,25 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol)
#' counts =
#' .data %>%
#' assay(my_assay)
#'
#'
#' # Create design matrix for dispersion, removing random effects
#' design =
#' model.matrix(
#' object = .formula |> eliminate_random_effects(),
#' data = metadata
#' )
#'
#'
#' dispersion = counts |> edgeR::estimateDisp(design = design) %$% tagwise.dispersion |> setNames(rownames(counts))
#'
#'
#' glmmSeq( .formula,
#' countdata = counts ,
#' metadata = metadata |> as.data.frame(),
#' dispersion = dispersion,
#' progress = TRUE,
#' method = method |> str_remove("(?i)^glmmSeq_" ),
#' )
#'
#'
#'
#'
#' @return A consistent object (to the input) with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate).
#'
#'
Expand Down Expand Up @@ -3969,12 +3969,12 @@ setGeneric("test_gene_rank", function(.data,

# DEPRECATION OF reference function
if (is_present(.sample) & !is.null(.sample)) {

# Signal the deprecation to the user
deprecate_warn("1.13.2", "tidybulk::test_gene_rank(.sample = )", details = "The argument .sample is now deprecated and not needed anymore.")

}

# Get column names
.arrange_desc = enquo(.arrange_desc)
.entrez = enquo(.entrez)
Expand Down Expand Up @@ -4012,14 +4012,14 @@ setGeneric("test_gene_rank", function(.data,

.data |>
select(!!.entrez, !!.arrange_desc) |>
distinct() |>
distinct() |>

# Select one entrez - NEEDED?
with_groups(c(!!.entrez,!!.arrange_desc ), slice, 1) |>
with_groups(c(!!.entrez,!!.arrange_desc ), slice, 1) |>

# arrange
# arrange
arrange(desc(!!.arrange_desc)) |>

# Format
deframe() |>
entrez_rank_to_gsea(species, gene_collections = gene_sets ) |>
Expand Down
38 changes: 38 additions & 0 deletions dev/solve_complete_confounders.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@






library(tidySummarizedExperiment)
library(tidybulk)

# Sample annotations
sample_annotations <- data.frame(
sample_id = paste0("Sample", seq(1, 9)),
factor_of_interest = c(rep("treated", 4), rep("untreated", 5)),
A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"),
B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"),
C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"),
stringsAsFactors = FALSE
)

# Simulated assay data (e.g., gene expression data)
# Let's assume we have 100 genes (rows) and 9 samples (columns)
assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9)

# Row data (e.g., gene annotations)
# For simplicity, we'll just use a sequence of gene IDs
row_data <- data.frame(gene_id = paste0("Gene", seq_len(100)))

# Create SummarizedExperiment object
se <- SummarizedExperiment(assays = list(counts = assay_data),
rowData = row_data,
colData = DataFrame(sample_annotations))

se |>
resolve_complete_confounders_of_non_interest(A, B, C) |>
distinct(.sample, A, B, C)



0 comments on commit 7aad89f

Please sign in to comment.