Merge pull request #32 from sigven/rev1

Rev1
sigven · Jun 17, 2023 · 751ba53 · 751ba53
2 parents e6dabcd + 0ad7da9
commit 751ba53
Show file tree

Hide file tree

Showing 40 changed files with 5,006 additions and 1,995 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: oncoEnrichR
 Type: Package
 Title: Cancer-dedicated gene set interpretation
 Version: 1.4.0
-Date: 2023-02-21
+Date: 2023-06-17
 Authors@R: person(given = "Sigve", family = "Nakken", role = c("aut", "cre"),
              email = "[email protected]",
              comment = c(ORCID = "0000-0001-8468-2050"))
@@ -16,7 +16,8 @@ Description: oncoEnrichR performs annotation and prioritization of genesets
     (comprehensive target-drug associations, cancer hallmark evidence and 
     target-tumor type associations), TCGA (genetic aberrations and gene co-expression 
     patterns in human tumor samples), STRING/BioGRID (protein-protein interaction networks), 
-    curated protein complexes (CORUM/Compleat/hu.MAP), COMPARTMENTS (subcellular compartments), 
+    curated protein complexes (CORUM/Compleat/ComplexPortal/hu.MAP2), 
+    COMPARTMENTS (subcellular compartments), 
     Human Protein Atlas (tissue and cell-type specific expression patterns and 
     prognostic associations), CellChatDB (ligand-receptor interactions),
     DoRothEA (regulatory interactions), synthetic lethality predictions, 
@@ -35,6 +36,7 @@ Imports: dplyr,
          stringi,
          stringr,
          tidyr,
+         glue,
          googledrive,
          visNetwork (>= 2.0.9), 
          SummarizedExperiment (>= 1.22.0),

diff --git a/R/cell_tissue.R b/R/cell_tissue.R
@@ -65,8 +65,9 @@ gene_tissue_cell_spec_cat <-
                      dbtype = "enrichment_db_hpa_tissue")
     }
     lgr::lgr$info(
-      paste0(edb, ": retrieving ", etype,
-             " specificity category of target genes")
+      glue::glue(
+        "{edb}: Retrieving {etype} specificity category ",
+        "of target genes")
     )
 
     specificity_groups_target <- as.data.frame(
@@ -84,22 +85,19 @@ gene_tissue_cell_spec_cat <-
       specificity_groups_target <-
         specificity_groups_target |>
         dplyr::mutate(
-          genename = paste0(
+          genename = glue::glue(
             "<a href='https://gtexportal.org/home/gene/",
-            .data$ensembl_gene_id,"' target='_blank'>",
-            .data$name,"</a>")
+            "{.data$ensembl_gene_id},' target='_blank'>",
+            "{.data$name}</a>")
         )
     } else {
       specificity_groups_target <-
         specificity_groups_target |>
         dplyr::mutate(
-          genename = paste0(
+          genename = glue::glue(
             "<a href='https://www.proteinatlas.org/",
-            .data$ensembl_gene_id,
-            "-",
-            .data$symbol,
-            "/celltype' target='_blank'>",
-            .data$name,"</a>")
+            "{.data$ensembl_gene_id}-{.data$symbol}",
+            "/celltype' target='_blank'>{.data$name}</a>")
         )
     }
     specificity_groups_target <- as.data.frame(
@@ -122,9 +120,10 @@ gene_tissue_cell_spec_cat <-
         'pct' = 0,
         'n' = 0,
         'tot' = 0,
-        group = paste0("Target set (n = ",
-                       formatC(tot, format="f",
-                               big.mark = ",", digits=0),")"))
+        group = paste0(
+          "Target set (n = ",
+          formatC(tot, format="f",
+                  big.mark = ",", digits=0),")"))
       if (nrow(dplyr::inner_join(
         df,
         specificity_groups_target,
@@ -234,8 +233,9 @@ gene_tissue_cell_enrichment <-
                      dbtype = "enrichment_db_hpa_tissue")
     }
     lgr::lgr$info(
-      paste0(edb, ": estimating enrichment of ", etype,
-             " in target set with TissueEnrich"))
+      glue::glue(
+        "{edb}: Estimating enrichment of {etype}",
+        " in target set with TissueEnrich"))
 
     df <- data.frame('entrezgene' = as.integer(qgenes_entrez),
                      stringsAsFactors = F) |>
@@ -251,21 +251,18 @@ gene_tissue_cell_enrichment <-
     if (resolution == "tissue") {
       df <- df |>
         dplyr::mutate(
-          genename = paste0(
+          genename = glue::glue(
             "<a href='https://gtexportal.org/home/gene/",
-            .data$ensembl_gene_id,"' target='_blank'>",
-            .data$name,"</a>")
+            "{.data$ensembl_gene_id}' target='_blank'>",
+            "{.data$name}</a>")
         )
     } else {
       df <- df |>
         dplyr::mutate(
-          genename = paste0(
+          genename = glue::glue(
             "<a href='https://www.proteinatlas.org/",
-            .data$ensembl_gene_id,
-            "-",
-            .data$symbol,
-            "/celltype' target='_blank'>",
-            .data$name,"</a>")
+            "{.data$ensembl_gene_id}-{.data$symbol}",
+            "/celltype' target='_blank'>{.data$name}</a>")
         )
     }
 

diff --git a/R/enrich.R b/R/enrich.R
@@ -16,13 +16,13 @@ get_go_enrichment <- function(query_entrez,
 
   lgr::lgr$info(
     paste0("GO - Enrichment/ORA: performing gene enrichment analysis of target set (subontology ",ontology,")"))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: p_value_adjustment_method = ",p_value_adjustment_method))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: minGSSize = ",min_geneset_size))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: maxGSSize = ",max_geneset_size))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: remove redundancy of enriched GO terms = ",simplify))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: Background geneset: '",bgset_description,"'"))
-  lgr::lgr$info( paste0("GO - Enrichment/ORA: settings: Background geneset size = ",length(background_entrez)))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: p_value_adjustment_method = ",p_value_adjustment_method))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: minGSSize = ",min_geneset_size))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: maxGSSize = ",max_geneset_size))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: remove redundancy of enriched GO terms = ",simplify))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: Background geneset: '",bgset_description,"'"))
+  lgr::lgr$info( paste0("GO - Enrichment/ORA clusterProfiler settings: Background geneset size = ",length(background_entrez)))
 
 
   stopifnot(p_value_adjustment_method %in%
@@ -188,13 +188,13 @@ get_universal_enrichment <- function(query_entrez,
   lgr::lgr$appenders$console$set_layout(
     lgr::LayoutFormat$new(timestamp_fmt = "%Y-%m-%d %T"))
 
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: performing gene enrichment analysis of target set"))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: p_value_adjustment_method = ",p_value_adjustment_method))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: minGSSize = ",min_geneset_size))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: maxGSSize = ",max_geneset_size))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: Background geneset: '",bgset_description,"'"))
-  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: settings: Background geneset size = ",length(background_entrez)))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA: performing gene enrichment analysis of target set with clusterProfiler"))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: p_value_cutoff = ",p_value_cutoff,", q_value_cutoff = ",q_value_cutoff))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: p_value_adjustment_method = ",p_value_adjustment_method))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: minGSSize = ",min_geneset_size))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: maxGSSize = ",max_geneset_size))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: Background geneset: '",bgset_description,"'"))
+  lgr::lgr$info( paste0(dbsource, " - Enrichment/ORA clusterProfiler settings: Background geneset size = ",length(background_entrez)))
 
   stopifnot(is.character(query_entrez))
   stopifnot(!is.null(background_entrez))

diff --git a/R/onco_enrichr.R b/R/onco_enrichr.R
@@ -120,7 +120,7 @@ load_db <- function(cache_dir = NA,
         )
 
         lgr::lgr$info("Downloading remote oncoEnrichR dataset from UiO server to cache_dir")
-        download.file(
+        utils::download.file(
           url = fname_uio,
           destfile = fname_local,
           quiet = T
@@ -884,7 +884,7 @@ init_report <- function(oeDB,
 #' interactions, subcellular compartment enrichment, pathway enrichment,
 #' synthetic lethality interactions, prognostic associations, and more.
 #'
-#' @param query character vector with gene/query identifiers
+#' @param query character vector with gene/query identifiers (minimum 2, maximum 1000)
 #' @param oeDB oncoEnrichR data repository object - as returned from `load_db()`
 #' @param query_id_type character indicating source of query (one of
 #' "uniprot_acc", "symbol","entrezgene", or "ensembl_gene", "ensembl_mrna",
@@ -1789,7 +1789,7 @@ onco_enrich <- function(query = NULL,
           dplyr::left_join(
             cosmic_variants,
             by = c("VAR_ID"),
-            multiple = "all")
+            relationship = "many-to-many")
       }
 
       onc_rep[["data"]][["tcga"]][["recurrent_variants"]] <-
@@ -1833,12 +1833,32 @@ onco_enrich <- function(query = NULL,
               .data$ENSEMBL_TRANSCRIPT_ID,"</a>")) |>
         dplyr::select(-c("VAR_ID")) |>
         dplyr::rename(CONSEQUENCE_ALTERNATE = "VEP_ALL_CSQ") |>
+        dplyr::mutate(MUTATION_HOTSPOT = dplyr::if_else(
+          stringr::str_detect(.data$MUTATION_HOTSPOT, "exonic") &
+            stringr::str_detect(.data$MUTATION_HOTSPOT, "[0-9]-[0-9]"),
+          as.character(NA),
+          as.character(.data$MUTATION_HOTSPOT)
+        )) |>
+        tidyr::separate(
+          MUTATION_HOTSPOT,
+          c("tmp1","tmp2","tmp3","tmp4","tmp5","tmp6"),
+          sep = "\\|", remove = T, fill = "right") |>
+        dplyr::mutate(MUTATION_HOTSPOT = paste(
+          .data$tmp2, .data$tmp4, .data$tmp5, .data$tmp6, sep="|"
+        )) |>
+        dplyr::mutate(MUTATION_HOTSPOT = dplyr::if_else(
+          !is.na(.data$MUTATION_HOTSPOT) &
+            stringr::str_detect(.data$MUTATION_HOTSPOT, "NA\\|NA"),
+          as.character(NA),
+          as.character(.data$MUTATION_HOTSPOT)
+        )) |>
         dplyr::select(c("SYMBOL",
                       "CONSEQUENCE",
                       "PROTEIN_CHANGE",
                       "MUTATION_HOTSPOT",
                       "PROTEIN_DOMAIN",
                       "LOSS_OF_FUNCTION",
+                      "MUTATION_HOTSPOT_MATCH",
                       "ENSEMBL_GENE_ID",
                       "ENSEMBL_TRANSCRIPT_ID",
                       "PRIMARY_SITE",
@@ -2120,12 +2140,9 @@ write <- function(report,
   ## Assign to env
   pos <- 1
   envir = as.environment(pos)
-  #for (e in export) assign(e, get(e), envir = envir)
 
   ## TODO: check that report parameter is a valid oncoEnrichR result object
   if (!is.null(report)) {
-    # assign("onc_enrich_report",
-    #        report, envir = .GlobalEnv)
     assign("onc_enrich_report",
            report,
            envir = envir)
@@ -2138,8 +2155,6 @@ write <- function(report,
 
 
   if (!is.null(oeDB[['tcgadb']][['maf']])) {
-    # assign("tcga_maf_datasets",
-    #        oeDB[['tcgadb']][['maf']], envir = .GlobalEnv)
     assign("tcga_maf_datasets",
            oeDB[['tcgadb']][['maf']],
            envir = envir)
@@ -2170,7 +2185,6 @@ write <- function(report,
           )
         )
       dir.create(tmpdir)
-      #system(paste0('mkdir ', tmpdir))
       system(paste0('cp ',
                     oe_rmarkdown_template_dir,
                     .Platform$file.sep,
@@ -2243,10 +2257,6 @@ write <- function(report,
           )
         )
 
-        # target_html <- file.path(output_directory, paste0(
-        #   file_basename_prefix, ".html")
-        # )
-
         if (file.exists(rmdown_html) & dir.exists(rmdown_supporting1) &
            dir.exists(rmdown_supporting2)) {
           system(paste0('mv ', rmdown_html, ' ',

diff --git a/R/ppi.R b/R/ppi.R
@@ -272,12 +272,12 @@ get_biogrid_network_nodes_edges <-
     ## support/confidence in the protein-protein interaction network
     genedb <- genedb |>
       dplyr::mutate(oncogene = dplyr::if_else(
-        .data$oncogene_confidence_level == "WEAK",
+        .data$oncogene_confidence_level == "MODERATE",
         FALSE,
         as.logical(.data$oncogene)
       )) |>
       dplyr::mutate(tumor_suppressor = dplyr::if_else(
-        .data$tsg_confidence_level == "WEAK",
+        .data$tsg_confidence_level == "MODERATE",
         FALSE,
         as.logical(.data$tumor_suppressor)
       ))
@@ -391,12 +391,12 @@ get_string_network_nodes_edges <-
   ## support/confidence in the protein-protein interaction network
   genedb <- genedb |>
     dplyr::mutate(oncogene = dplyr::if_else(
-      .data$oncogene_confidence_level == "WEAK",
+      .data$oncogene_confidence_level == "MODERATE",
       FALSE,
       as.logical(.data$oncogene)
     )) |>
     dplyr::mutate(tumor_suppressor = dplyr::if_else(
-      .data$tsg_confidence_level == "WEAK",
+      .data$tsg_confidence_level == "MODERATE",
       FALSE,
       as.logical(.data$tumor_suppressor)
     ))

diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/utils.R b/R/utils.R
@@ -1296,11 +1296,15 @@ add_excel_sheet <- function(
       if (is.data.frame(report$data$protein_complex[[c]])) {
         if (NROW(report$data$protein_complex[[c]]) > 0) {
 
+          res_name <- c
+          if(c == 'omnipath'){
+            res_name <- 'omnipathr'
+          }
 
           df <- report$data$protein_complex[[c]] |>
             dplyr::mutate(
-              annotation_source = report$config$resources[[c]]$name,
-              version = report$config$resources[[c]]$version) |>
+              annotation_source = report$config$resources[[res_name]]$name,
+              version = report$config$resources[[res_name]]$version) |>
             dplyr::select(c("annotation_source",
                             "version"),
                           dplyr::everything()) |>
@@ -1406,8 +1410,8 @@ add_excel_sheet <- function(
         if (NROW(report$data$cancer_prognosis$km_cshl$assocs[[t]]) > 0) {
           df <- report$data$cancer_prognosis$km_cshl$assocs[[t]] |>
             dplyr::mutate(
-              annotation_source = "Smith et al., Cell Reports, 2022 (tcga-survival.com)",
-              version = "v2") |>
+              annotation_source = report$config$resources$tcga_survival$name,
+              version = report$config$resources$tcga_survival$version) |>
             dplyr::mutate(feature_type = t) |>
             dplyr::arrange(.data$feature_type, .data$z_score) |>
             dplyr::select(c("annotation_source", "version"),

diff --git a/README.md b/README.md
@@ -20,14 +20,13 @@ Web-based access to **oncoEnrichR** is available at [**https://oncotools.elixir.
 
 ## News
 
+-   June 18th 2023: [**1.4.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-4-1)
 -   February 21st 2023 [**1.4.0 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-4-0)
 -   September 27th 2022: [**1.3.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-2)
--   September 2nd 2022: [**1.2.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-2)
--   July 13th 2022: [**1.2.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-1)
 
 ## Example report
 
-<a href="https://doi.org/10.5281/zenodo.7657930"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.7657930.svg" alt="DOI"/></a>
+<a href="https://doi.org/10.5281/zenodo.8051153"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.8051153.svg" alt="DOI"/></a>
 
 ### Contact