Skip to content

Commit

Permalink
Merge pull request #32 from sigven/rev1
Browse files Browse the repository at this point in the history
Rev1
  • Loading branch information
sigven committed Jun 17, 2023
2 parents e6dabcd + 0ad7da9 commit 751ba53
Show file tree
Hide file tree
Showing 40 changed files with 5,006 additions and 1,995 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: oncoEnrichR
Type: Package
Title: Cancer-dedicated gene set interpretation
Version: 1.4.0
Date: 2023-02-21
Date: 2023-06-17
Authors@R: person(given = "Sigve", family = "Nakken", role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8468-2050"))
Expand All @@ -16,7 +16,8 @@ Description: oncoEnrichR performs annotation and prioritization of genesets
(comprehensive target-drug associations, cancer hallmark evidence and
target-tumor type associations), TCGA (genetic aberrations and gene co-expression
patterns in human tumor samples), STRING/BioGRID (protein-protein interaction networks),
curated protein complexes (CORUM/Compleat/hu.MAP), COMPARTMENTS (subcellular compartments),
curated protein complexes (CORUM/Compleat/ComplexPortal/hu.MAP2),
COMPARTMENTS (subcellular compartments),
Human Protein Atlas (tissue and cell-type specific expression patterns and
prognostic associations), CellChatDB (ligand-receptor interactions),
DoRothEA (regulatory interactions), synthetic lethality predictions,
Expand All @@ -35,6 +36,7 @@ Imports: dplyr,
stringi,
stringr,
tidyr,
glue,
googledrive,
visNetwork (>= 2.0.9),
SummarizedExperiment (>= 1.22.0),
Expand Down
47 changes: 22 additions & 25 deletions R/cell_tissue.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ gene_tissue_cell_spec_cat <-
dbtype = "enrichment_db_hpa_tissue")
}
lgr::lgr$info(
paste0(edb, ": retrieving ", etype,
" specificity category of target genes")
glue::glue(
"{edb}: Retrieving {etype} specificity category ",
"of target genes")
)

specificity_groups_target <- as.data.frame(
Expand All @@ -84,22 +85,19 @@ gene_tissue_cell_spec_cat <-
specificity_groups_target <-
specificity_groups_target |>
dplyr::mutate(
genename = paste0(
genename = glue::glue(
"<a href='https://gtexportal.org/home/gene/",
.data$ensembl_gene_id,"' target='_blank'>",
.data$name,"</a>")
"{.data$ensembl_gene_id},' target='_blank'>",
"{.data$name}</a>")
)
} else {
specificity_groups_target <-
specificity_groups_target |>
dplyr::mutate(
genename = paste0(
genename = glue::glue(
"<a href='https://www.proteinatlas.org/",
.data$ensembl_gene_id,
"-",
.data$symbol,
"/celltype' target='_blank'>",
.data$name,"</a>")
"{.data$ensembl_gene_id}-{.data$symbol}",
"/celltype' target='_blank'>{.data$name}</a>")
)
}
specificity_groups_target <- as.data.frame(
Expand All @@ -122,9 +120,10 @@ gene_tissue_cell_spec_cat <-
'pct' = 0,
'n' = 0,
'tot' = 0,
group = paste0("Target set (n = ",
formatC(tot, format="f",
big.mark = ",", digits=0),")"))
group = paste0(
"Target set (n = ",
formatC(tot, format="f",
big.mark = ",", digits=0),")"))
if (nrow(dplyr::inner_join(
df,
specificity_groups_target,
Expand Down Expand Up @@ -234,8 +233,9 @@ gene_tissue_cell_enrichment <-
dbtype = "enrichment_db_hpa_tissue")
}
lgr::lgr$info(
paste0(edb, ": estimating enrichment of ", etype,
" in target set with TissueEnrich"))
glue::glue(
"{edb}: Estimating enrichment of {etype}",
" in target set with TissueEnrich"))

df <- data.frame('entrezgene' = as.integer(qgenes_entrez),
stringsAsFactors = F) |>
Expand All @@ -251,21 +251,18 @@ gene_tissue_cell_enrichment <-
if (resolution == "tissue") {
df <- df |>
dplyr::mutate(
genename = paste0(
genename = glue::glue(
"<a href='https://gtexportal.org/home/gene/",
.data$ensembl_gene_id,"' target='_blank'>",
.data$name,"</a>")
"{.data$ensembl_gene_id}' target='_blank'>",
"{.data$name}</a>")
)
} else {
df <- df |>
dplyr::mutate(
genename = paste0(
genename = glue::glue(
"<a href='https://www.proteinatlas.org/",
.data$ensembl_gene_id,
"-",
.data$symbol,
"/celltype' target='_blank'>",
.data$name,"</a>")
"{.data$ensembl_gene_id}-{.data$symbol}",
"/celltype' target='_blank'>{.data$name}</a>")
)
}

Expand Down
28 changes: 14 additions & 14 deletions R/enrich.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ get_go_enrichment <- function(query_entrez,

lgr::lgr$info(
paste0("GO - Enrichment/ORA: performing gene enrichment analysis of target set (subontology ",ontology,")"))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: p_value_adjustment_method = ",p_value_adjustment_method))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: minGSSize = ",min_geneset_size))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: maxGSSize = ",max_geneset_size))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: remove redundancy of enriched GO terms = ",simplify))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: Background geneset: '",bgset_description,"'"))
lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: Background geneset size = ",length(background_entrez)))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: p_value_adjustment_method = ",p_value_adjustment_method))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: minGSSize = ",min_geneset_size))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: maxGSSize = ",max_geneset_size))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: remove redundancy of enriched GO terms = ",simplify))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: Background geneset: '",bgset_description,"'"))
lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: Background geneset size = ",length(background_entrez)))


stopifnot(p_value_adjustment_method %in%
Expand Down Expand Up @@ -188,13 +188,13 @@ get_universal_enrichment <- function(query_entrez,
lgr::lgr$appenders$console$set_layout(
lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))

lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: performing gene enrichment analysis of target set"))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: p_value_adjustment_method = ",p_value_adjustment_method))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: minGSSize = ",min_geneset_size))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: maxGSSize = ",max_geneset_size))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: Background geneset: '",bgset_description,"'"))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: Background geneset size = ",length(background_entrez)))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: performing gene enrichment analysis of target set with clusterProfiler"))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: p_value_adjustment_method = ",p_value_adjustment_method))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: minGSSize = ",min_geneset_size))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: maxGSSize = ",max_geneset_size))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: Background geneset: '",bgset_description,"'"))
lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: Background geneset size = ",length(background_entrez)))

stopifnot(is.character(query_entrez))
stopifnot(!is.null(background_entrez))
Expand Down
36 changes: 23 additions & 13 deletions R/onco_enrichr.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ load_db <- function(cache_dir = NA,
)

lgr::lgr$info("Downloading remote oncoEnrichR dataset from UiO server to cache_dir")
download.file(
utils::download.file(
url = fname_uio,
destfile = fname_local,
quiet = T
Expand Down Expand Up @@ -884,7 +884,7 @@ init_report <- function(oeDB,
#' interactions, subcellular compartment enrichment, pathway enrichment,
#' synthetic lethality interactions, prognostic associations, and more.
#'
#' @param query character vector with gene/query identifiers
#' @param query character vector with gene/query identifiers (minimum 2, maximum 1000)
#' @param oeDB oncoEnrichR data repository object - as returned from `load_db()`
#' @param query_id_type character indicating source of query (one of
#' "uniprot_acc", "symbol","entrezgene", or "ensembl_gene", "ensembl_mrna",
Expand Down Expand Up @@ -1789,7 +1789,7 @@ onco_enrich <- function(query = NULL,
dplyr::left_join(
cosmic_variants,
by = c("VAR_ID"),
multiple = "all")
relationship = "many-to-many")
}

onc_rep[["data"]][["tcga"]][["recurrent_variants"]] <-
Expand Down Expand Up @@ -1833,12 +1833,32 @@ onco_enrich <- function(query = NULL,
.data$ENSEMBL_TRANSCRIPT_ID,"</a>")) |>
dplyr::select(-c("VAR_ID")) |>
dplyr::rename(CONSEQUENCE_ALTERNATE = "VEP_ALL_CSQ") |>
dplyr::mutate(MUTATION_HOTSPOT = dplyr::if_else(
stringr::str_detect(.data$MUTATION_HOTSPOT, "exonic") &
stringr::str_detect(.data$MUTATION_HOTSPOT, "[0-9]-[0-9]"),
as.character(NA),
as.character(.data$MUTATION_HOTSPOT)
)) |>
tidyr::separate(
MUTATION_HOTSPOT,
c("tmp1","tmp2","tmp3","tmp4","tmp5","tmp6"),
sep = "\\|", remove = T, fill = "right") |>
dplyr::mutate(MUTATION_HOTSPOT = paste(
.data$tmp2, .data$tmp4, .data$tmp5, .data$tmp6, sep="|"
)) |>
dplyr::mutate(MUTATION_HOTSPOT = dplyr::if_else(
!is.na(.data$MUTATION_HOTSPOT) &
stringr::str_detect(.data$MUTATION_HOTSPOT, "NA\\|NA"),
as.character(NA),
as.character(.data$MUTATION_HOTSPOT)
)) |>
dplyr::select(c("SYMBOL",
"CONSEQUENCE",
"PROTEIN_CHANGE",
"MUTATION_HOTSPOT",
"PROTEIN_DOMAIN",
"LOSS_OF_FUNCTION",
"MUTATION_HOTSPOT_MATCH",
"ENSEMBL_GENE_ID",
"ENSEMBL_TRANSCRIPT_ID",
"PRIMARY_SITE",
Expand Down Expand Up @@ -2120,12 +2140,9 @@ write <- function(report,
## Assign to env
pos <- 1
envir = as.environment(pos)
#for (e in export) assign(e, get(e), envir = envir)

## TODO: check that report parameter is a valid oncoEnrichR result object
if (!is.null(report)) {
# assign("onc_enrich_report",
# report, envir = .GlobalEnv)
assign("onc_enrich_report",
report,
envir = envir)
Expand All @@ -2138,8 +2155,6 @@ write <- function(report,


if (!is.null(oeDB[['tcgadb']][['maf']])) {
# assign("tcga_maf_datasets",
# oeDB[['tcgadb']][['maf']], envir = .GlobalEnv)
assign("tcga_maf_datasets",
oeDB[['tcgadb']][['maf']],
envir = envir)
Expand Down Expand Up @@ -2170,7 +2185,6 @@ write <- function(report,
)
)
dir.create(tmpdir)
#system(paste0('mkdir ', tmpdir))
system(paste0('cp ',
oe_rmarkdown_template_dir,
.Platform$file.sep,
Expand Down Expand Up @@ -2243,10 +2257,6 @@ write <- function(report,
)
)

# target_html <- file.path(output_directory, paste0(
# file_basename_prefix, ".html")
# )

if (file.exists(rmdown_html) & dir.exists(rmdown_supporting1) &
dir.exists(rmdown_supporting2)) {
system(paste0('mv ', rmdown_html, ' ',
Expand Down
8 changes: 4 additions & 4 deletions R/ppi.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,12 +272,12 @@ get_biogrid_network_nodes_edges <-
## support/confidence in the protein-protein interaction network
genedb <- genedb |>
dplyr::mutate(oncogene = dplyr::if_else(
.data$oncogene_confidence_level == "WEAK",
.data$oncogene_confidence_level == "MODERATE",
FALSE,
as.logical(.data$oncogene)
)) |>
dplyr::mutate(tumor_suppressor = dplyr::if_else(
.data$tsg_confidence_level == "WEAK",
.data$tsg_confidence_level == "MODERATE",
FALSE,
as.logical(.data$tumor_suppressor)
))
Expand Down Expand Up @@ -391,12 +391,12 @@ get_string_network_nodes_edges <-
## support/confidence in the protein-protein interaction network
genedb <- genedb |>
dplyr::mutate(oncogene = dplyr::if_else(
.data$oncogene_confidence_level == "WEAK",
.data$oncogene_confidence_level == "MODERATE",
FALSE,
as.logical(.data$oncogene)
)) |>
dplyr::mutate(tumor_suppressor = dplyr::if_else(
.data$tsg_confidence_level == "WEAK",
.data$tsg_confidence_level == "MODERATE",
FALSE,
as.logical(.data$tumor_suppressor)
))
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
12 changes: 8 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1296,11 +1296,15 @@ add_excel_sheet <- function(
if (is.data.frame(report$data$protein_complex[[c]])) {
if (NROW(report$data$protein_complex[[c]]) > 0) {

res_name <- c
if(c == 'omnipath'){
res_name <- 'omnipathr'
}

df <- report$data$protein_complex[[c]] |>
dplyr::mutate(
annotation_source = report$config$resources[[c]]$name,
version = report$config$resources[[c]]$version) |>
annotation_source = report$config$resources[[res_name]]$name,
version = report$config$resources[[res_name]]$version) |>
dplyr::select(c("annotation_source",
"version"),
dplyr::everything()) |>
Expand Down Expand Up @@ -1406,8 +1410,8 @@ add_excel_sheet <- function(
if (NROW(report$data$cancer_prognosis$km_cshl$assocs[[t]]) > 0) {
df <- report$data$cancer_prognosis$km_cshl$assocs[[t]] |>
dplyr::mutate(
annotation_source = "Smith et al., Cell Reports, 2022 (tcga-survival.com)",
version = "v2") |>
annotation_source = report$config$resources$tcga_survival$name,
version = report$config$resources$tcga_survival$version) |>
dplyr::mutate(feature_type = t) |>
dplyr::arrange(.data$feature_type, .data$z_score) |>
dplyr::select(c("annotation_source", "version"),
Expand Down
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ Web-based access to **oncoEnrichR** is available at [**https://oncotools.elixir.

## News

- June 18th 2023: [**1.4.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-4-1)
- February 21st 2023 [**1.4.0 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-4-0)
- September 27th 2022: [**1.3.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-2)
- September 2nd 2022: [**1.2.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-2)
- July 13th 2022: [**1.2.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-1)

## Example report

<a href="https://doi.org/10.5281/zenodo.7657930"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.7657930.svg" alt="DOI"/></a>
<a href="https://doi.org/10.5281/zenodo.8051153"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.8051153.svg" alt="DOI"/></a>

### Contact

Expand Down
Loading

0 comments on commit 751ba53

Please sign in to comment.