Skip to content

Commit

Permalink
Merge pull request #26 from sigven/dev
Browse files Browse the repository at this point in the history
Dev update
  • Loading branch information
sigven authored Sep 23, 2022
2 parents 6f74a86 + 859026a commit 8db4d94
Show file tree
Hide file tree
Showing 21 changed files with 285 additions and 70 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: oncoEnrichR
Type: Package
Title: Cancer-dedicated gene set interpretation
Version: 1.3.0
Date: 2022-09-12
Date: 2022-09-23
Authors@R: person(given = "Sigve", family = "Nakken", role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8468-2050"))
Expand Down
6 changes: 5 additions & 1 deletion R/disease_drug.R
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,11 @@ target_drug_associations <- function(qgenes,
.data$targeted_cancer_drugs_ep,
.data$approved_drugs) |>
dplyr::filter(!is.na(.data$targeted_cancer_drugs_lp) |
!is.na(.data$targeted_cancer_drugs_ep))
!is.na(.data$targeted_cancer_drugs_ep)) |>
dplyr::rename(
drugs_late_phase = .data$targeted_cancer_drugs_lp,
drugs_early_phase = .data$targeted_cancer_drugs_ep
)

lgr::lgr$info( paste0("Open Targets Platform: annotation of target tractabilities (druggability)"))

Expand Down
46 changes: 33 additions & 13 deletions R/onco_enrichr.R
Original file line number Diff line number Diff line change
Expand Up @@ -1610,25 +1610,36 @@ onco_enrich <- function(query = NULL,
PROTEIN_DOMAIN = dplyr::if_else(
!is.na(.data$PFAM_ID),
paste0(
"<a href=\"http://pfam.xfam.org/family/", .data$PFAM_ID,
"<a href=\"http://pfam.xfam.org/family/",
.data$PFAM_ID,
"\" target='_blank'>",
.data$PFAM_DOMAIN_NAME,
"</a>"),
as.character(NA)
)
) |>
dplyr::select(-c(.data$PFAM_DOMAIN_NAME, .data$PFAM_ID)) |>
dplyr::left_join(dplyr::select(oeDB[['genedb']][['all']],
.data$symbol, .data$ensembl_gene_id),
by = c("SYMBOL" = "symbol")) |>
dplyr::rename(ENSEMBL_GENE_ID = .data$ensembl_gene_id) |>
dplyr::mutate(ENSEMBL_TRANSCRIPT_ID =
paste0("<a href='https://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;g=",
.data$ENSEMBL_GENE_ID,
";t=",
.data$ENSEMBL_TRANSCRIPT_ID,"' target='_blank'>",
.data$ENSEMBL_TRANSCRIPT_ID,"</a>")) |>
dplyr::select(
-c(.data$PFAM_DOMAIN_NAME, .data$PFAM_ID)) |>
dplyr::left_join(
dplyr::select(oeDB[['genedb']][['all']],
.data$symbol, .data$ensembl_gene_id),
by = c("SYMBOL" = "symbol")) |>
dplyr::mutate(
ENSEMBL_GENE_ID =
paste0(
"<a href='https://www.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=",
.data$ensembl_gene_id,"' target='_blank'>",
.data$ensembl_gene_id,"</a>")) |>
dplyr::mutate(
ENSEMBL_TRANSCRIPT_ID =
paste0(
"<a href='https://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;g=",
.data$ensembl_gene_id,
";t=",
.data$ENSEMBL_TRANSCRIPT_ID,"' target='_blank'>",
.data$ENSEMBL_TRANSCRIPT_ID,"</a>")) |>
dplyr::select(-.data$VAR_ID) |>
dplyr::rename(CONSEQUENCE_ALTERNATE = .data$VEP_ALL_CSQ) |>
dplyr::select(.data$SYMBOL,
.data$CONSEQUENCE,
.data$PROTEIN_CHANGE,
Expand All @@ -1637,7 +1648,11 @@ onco_enrich <- function(query = NULL,
.data$LOSS_OF_FUNCTION,
.data$ENSEMBL_GENE_ID,
.data$ENSEMBL_TRANSCRIPT_ID,
dplyr::everything())
.data$PRIMARY_SITE,
.data$SITE_RECURRENCE,
.data$TOTAL_RECURRENCE,
.data$COSMIC_MUTATION_ID,
.data$CONSEQUENCE_ALTERNATE)
}

for(psite in names(onc_rep[["data"]][["tcga"]][["aberration"]][["table"]][["snv_indel"]])){
Expand Down Expand Up @@ -2106,6 +2121,7 @@ write <- function(report,
"subcellcomp",
"cell_tissue",
"aberration",
"recurrent_variants",
"coexpression",
"prognostic_association_I",
"prognostic_association_II"
Expand All @@ -2115,6 +2131,10 @@ write <- function(report,
if(elem == "cancer_association"){
show_elem <- "disease"
}

if(elem == "recurrent_variants"){
show_elem <- "aberration"
}
if(elem == "prognostic_association_I"){
show_elem <- "cancer_prognosis"
}
Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
80 changes: 76 additions & 4 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1077,23 +1077,23 @@ add_excel_sheet <- function(
)
) |>
dplyr::mutate(
targeted_cancer_drugs_lp =
drugs_late_phase =
stringr::str_replace_all(
stringr::str_squish(
stringr::str_trim(
textclean::replace_html(.data$targeted_cancer_drugs_lp)
textclean::replace_html(.data$drugs_late_phase)
)
),
" , ",
", "
)
) |>
dplyr::mutate(
targeted_cancer_drugs_ep =
drugs_early_phase =
stringr::str_replace_all(
stringr::str_squish(
stringr::str_trim(
textclean::replace_html(.data$targeted_cancer_drugs_ep)
textclean::replace_html(.data$drugs_early_phase)
)
),
" , ",
Expand Down Expand Up @@ -1420,6 +1420,78 @@ add_excel_sheet <- function(

}

if(analysis_output == "recurrent_variants"){

if(is.data.frame(report$data$tcga$recurrent_variants)){
if(NROW(report$data$tcga$recurrent_variants) > 0){
df <-
report$data$tcga$recurrent_variants

colnames(df) <- tolower(colnames(df))
df <- as.data.frame(
df |>
dplyr::mutate(
site_recurrence = as.numeric(.data$site_recurrence)
) |>
dplyr::arrange(
dplyr::desc(.data$total_recurrence),
dplyr::desc(.data$site_recurrence)) |>
dplyr::mutate(
annotation_source = report$config$resources$tcga$name,
version = report$config$resources$tcga$version) |>
dplyr::mutate(
ensembl_gene_id =
stringr::str_trim(
textclean::replace_html(.data$ensembl_gene_id)
)
) |>
dplyr::mutate(
ensembl_transcript_id =
stringr::str_trim(
textclean::replace_html(.data$ensembl_transcript_id)
)
) |>
dplyr::mutate(
protein_domain =
stringr::str_trim(
textclean::replace_html(.data$protein_domain)
)
) |>
dplyr::mutate(
cosmic_mutation_id =
stringr::str_trim(
textclean::replace_html(.data$cosmic_mutation_id)
)
) |>
dplyr::mutate(
site_recurrence = paste(.data$primary_site,
.data$site_recurrence, sep=":")
) |>
dplyr::group_by(
.data$symbol, .data$consequence,
.data$protein_change, .data$protein_domain,
.data$mutation_hotspot,
.data$loss_of_function,
.data$ensembl_gene_id,
.data$ensembl_transcript_id,
.data$total_recurrence,
.data$cosmic_mutation_id
) |>
dplyr::summarise(site_recurrence = paste(
.data$site_recurrence, collapse=", "
), .groups = "drop") |>
dplyr::arrange(
dplyr::desc(.data$total_recurrence)
)

)

target_df <- target_df |>
dplyr::bind_rows(df)
}
}
}

if(analysis_output == "aberration"){

## cna aberrations
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,22 @@ Web-based access to **oncoEnrichR** is available at [**https://oncotools.elixir.

## News

- September 23rd 2022: [**1.3.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-1)
- September 12th 2022: [**1.3.0 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-3-0)
- September 2nd 2022: [**1.2.2 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-2)
- July 13th 2022: [**1.2.1 release**](https://sigven.github.io/oncoEnrichR/articles/CHANGELOG.html#version-1-2-1)

## Example report

<a href="https://doi.org/10.5281/zenodo.7070557"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.7070557.svg" alt="DOI"/></a>
<a href="https://doi.org/10.5281/zenodo.7104355"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.7104355.svg" alt="DOI"/></a>

### Contact

sigven AT ifi.uio.no

### Funding and Collaboration

OncoEnrichR is supported by the [Centre for Cancer Cell Reprogramming](https://www.med.uio.no/cancell/english/) at the [University of Oslo](https://www.uio.no)/[Oslo University Hospital](https://radium.no), and [Elixir Norway (Oslo node)](https://elixir.no/organization/organisation/elixir-uio).
oncoEnrichR is supported by the [Centre for Cancer Cell Reprogramming](https://www.med.uio.no/cancell/english/) at the [University of Oslo](https://www.uio.no)/[Oslo University Hospital](https://radium.no), and [Elixir Norway (Oslo node)](https://elixir.no/organization/organisation/elixir-uio).

<br> <br>

Expand Down
16 changes: 8 additions & 8 deletions data_processing_code/RELEASE_NOTES.txt
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
##ONCOENRICHR_DB_VERSION = 20220910
oncoEnrichR https://gihtub.com/sigven/oncoEnrichR R package for functional interrogation of genesets in the context of cancer v1.3.0 oncoEnrichR software
##ONCOENRICHR_DB_VERSION = 20220921
oncoEnrichR https://gihtub.com/sigven/oncoEnrichR R package for functional interrogation of genesets in the context of cancer v1.3.1 oncoEnrichR software
Omnipath https://omnipathdb.org/ Database of molecular biology prior knowledge: gene regulatory interactions, enzyme-PTM relationships, protein complexes, protein annotations etc. v3.4.0/OmnipathR omnipath db
hu.MAP http://humap2.proteincomplexes.org/ Human Protein Complex Map v2.0 humap2 db
dorothea https://saezlab.github.io/dorothea/ Gene set resource containing signed transcription factor (TF) - target interactions v1.8.0 dorothea db
tissueEnrich https://www.bioconductor.org/packages/release/bioc/vignettes/TissueEnrich/inst/doc/TissueEnrich.html R package used to calculate enrichment of tissue-specific genes in a set of input genes v1.16.0 tissueenrich software
oncoPhenoMap https://github.com/sigven/oncoPhenoMap Crossmapped phenotype ontologies for the oncology domain v0.3.8 oncophenomap software
oncoPhenoMap https://github.com/sigven/oncoPhenoMap Crossmapped phenotype ontologies for the oncology domain v0.4.0 oncophenomap software
STRING https://string-db.org Protein-protein interaction database v11.5 string db
GENCODE https://www.gencodegenes.org/ High quality reference gene annotation and experimental validation v41 gencode db
TCGA https://cancergenome.nih.gov The Cancer Genome Atlas - Tumor gene expression and somatic DNA aberrations v32.0 (March 29th 2022) tcga db
TCGA https://cancergenome.nih.gov The Cancer Genome Atlas - Tumor gene expression and somatic DNA aberrations v34.0 (July 27th 2022) tcga db
UniProtKB http://www.uniprot.org Comprehensive resource of protein sequence and functional information v2022_03 uniprot db
NetPath http://www.netpath.org Manually curated resource of signal transduction pathways in humans v1 (2010) netpath db
EFO https://github.com/EBISPOT/efo Experimental Factor Ontology v3.43.0 efo db
DiseaseOntology https://github.com/DiseaseOntology Human Disease Ontology 2022-06-07 do db
EFO https://github.com/EBISPOT/efo Experimental Factor Ontology v3.46.0 efo db
DiseaseOntology https://github.com/DiseaseOntology Human Disease Ontology 2022-08-29 do db
COMPPI https://comppi.linkgroup.hu/ Compartmentalized protein-protein interaction database v2.1.1 (October 2018) comppi db
WikiPathways https://www.wikipathways.org A database of biological pathways maintained by and for the scientific community 20220810 wikipathway db
WikiPathways https://www.wikipathways.org A database of biological pathways maintained by and for the scientific community 20220910 wikipathway db
MSigDB http://software.broadinstitute.org/gsea/msigdb/index.jsp Molecular Signatures Database - collection of annotated gene sets August 2022 (MSigDB v2022.1) msigdb db
REACTOME https://reactome.org Manually curated and peer-reviewed pathway database v81 (MSigDB v2022.1) reactome db
CellChatDB http://www.cellchat.org/cellchatdb/ Multimeric ligand-receptor complexes v1 (2021) cellchatdb db
CellTalkDB http://tcm.zju.edu.cn/celltalkdb/ A manually curated database of literature-supported ligand-receptor interactions in human and mouse Nov 2020 celltalkdb db
GeneOntology https://geneontology.org Knowledgebase that contains the largest structural source of information on the functions of genes August 2022 (MSigDB v2022.1) go db
KEGG https://www.genome.jp/kegg/pathway.html Collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks 20220809 kegg db
CancerMine http://bionlp.bcgsc.ca/cancermine/ Literature-mined database of tumor suppressor genes/proto-oncogenes v47 - 20220708 cancermine db
CancerMine http://bionlp.bcgsc.ca/cancermine/ Literature-mined database of tumor suppressor genes/proto-oncogenes v48 - 20220920 cancermine db
NCG http://ncg.kcl.ac.uk/index.php Network of cancer genes - a web resource to analyze duplicability, orthology and network properties of cancer genes v7.0 ncg db
CGC https://cancer.sanger.ac.uk/census Cancer Gene Census v96 cgc db
Pfam http://pfam.xfam.org Collection of protein families/domains 2021_11 (v35.0) pfam db
Expand Down
10 changes: 5 additions & 5 deletions data_processing_code/data_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ library(gganatogram)
source('data_processing_code/data_utility_functions.R')

msigdb_version <- '2022.1'
wikipathways_version <- "20220810"
wikipathways_version <- "20220910"
netpath_version <- "2010"
opentargets_version <- "2022.06"
kegg_version <- "20220809"
Expand All @@ -14,17 +14,17 @@ uniprot_release <- "2022_03"
## Which databases to update or retrieve from last updated state
update_omnipathdb <- F
update_hpa <- F
update_ncbi_gene_summary <- T
update_ncbi_gene_summary <- F
update_project_score <- F
update_project_survival <- F
update_tcga <- F
update_tcga <- T
update_cancer_hallmarks <- F
update_omnipath_regulatory <- F
update_omnipath_complexdb <- F
update_gencode <- F
update_ligand_receptor_db <- T

oe_version <- "1.3.0"
oe_version <- "1.3.1"

data_raw_dir <- "/Users/sigven/project_data/package__oncoEnrichR/db/raw"
data_output_dir <- "/Users/sigven/project_data/package__oncoEnrichR/db/output"
Expand Down Expand Up @@ -99,7 +99,7 @@ ts_oncogene_annotations <-
get_ts_oncogene_annotations(
raw_db_dir = data_raw_dir,
gene_info = gene_info,
version = "47") |>
version = "48") |>
dplyr::select(
entrezgene, tumor_suppressor,
oncogene, citation_links_oncogene,
Expand Down
Loading

0 comments on commit 8db4d94

Please sign in to comment.