Skip to content

Commit

Permalink
improved PPI retrieval
Browse files Browse the repository at this point in the history
  • Loading branch information
sigven committed Aug 11, 2020
1 parent 1b3ba71 commit 788489b
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 43 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: oncoEnrichR
Type: Package
Title: Functional annotation and enrichment analysis of genesets in the context of cancer
Version: 0.7.0
Version: 0.7.2
Author: Sigve Nakken
Maintainer: Sigve Nakken <[email protected]>
URL: https://github.com/sigven/oncoEnrichR
Expand All @@ -23,5 +23,5 @@ License: MIT
Encoding: UTF-8
Remotes: mjkallen/rlogging, jespermaag/gganatogram, molsysbio/speed2
LazyData: true
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
Roxygen: list(markdown = TRUE)
6 changes: 4 additions & 2 deletions R/onco_enrichr.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,17 @@ onco_enrich <- function(query,
#gtex_atlasassay_groups = c("g32","g9","g29","g10","g28","g44","g33","g50","g37","g38","g42","g35")) {
stopifnot(is.character(query))
stopifnot(p_value_adjustment_method %in% c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
if (length(query) > 1000 | length(query) < 20) {
rlogging::message(paste0("ERROR: oncoEnrichR needs minimum 20 query identifiers, and accepts a maximum of 1000. Query contained n = ",length(query), " identifiers"))
if (length(query) > 800 | length(query) < 20) {
rlogging::message(paste0("ERROR: oncoEnrichR needs minimum 20 query identifiers, and accepts a maximum of 800. Query contained n = ",length(query), " identifiers"))
return(NULL)
}
stopifnot(query_source == "symbol" | query_source == "entrezgene" |
query_source == "uniprot_acc" | query_source == "ensembl_gene_id")
stopifnot(ppi_score_threshold > 0 & ppi_score_threshold <= 1000)
stopifnot(p_value_cutoff_enrichment > 0 & p_value_cutoff_enrichment < 1)
stopifnot(q_value_cutoff_enrichment > 0 & q_value_cutoff_enrichment < 1)
stopifnot(ppi_add_nodes <= 50)


qgenes_match <-
oncoEnrichR::verify_query_genes(query,
Expand Down
159 changes: 126 additions & 33 deletions R/ppi.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,34 +68,16 @@ get_network_communities <- function(edges = NULL, nodes = NULL){
return(community_structure)
}

get_ppi_network <- function(qgenes, ppi_source = "STRING", genedb = NULL,
cancerdrugdb = NULL, settings = NULL){

stopifnot(!is.null(settings))
stopifnot(!is.null(genedb))
stopifnot(!is.null(cancerdrugdb))
stopifnot(settings$query_type == "interaction_partners" | settings$query_type == "network")
oncoEnrichR::validate_db_df(genedb, dbtype = "genedb")
get_string_network_nodes_edges <- function(qgenes, all_query_nodes = NULL, settings = NULL, genedb = NULL){

query_list <- paste(qgenes, collapse="%0d")

query_nodes <- data.frame("entrezgene" = qgenes, stringsAsFactors = F) %>%
dplyr::distinct() %>%
dplyr::left_join(genedb, by = c("entrezgene" = "entrezgene")) %>%
dplyr::mutate(id = paste0("s",entrezgene)) %>%
dplyr::select(-ensembl_gene_id) %>%
dplyr::mutate(query_node = T) %>%
dplyr::distinct()

rlogging::message("STRINGdb: retrieving protein-protein interaction network from (v11)")
rlogging::message(paste0("STRINGdb: Settings - required_score = ",
settings$minimum_score,", add_nodes = ",settings$add_nodes))

all_edges <- jsonlite::fromJSON(paste0("https://string-db.org/api/json/",
settings$query_type,"?species=9606&identifiers=",
query_list,"&required_score=",
settings$minimum_score,"&add_nodes=",settings$add_nodes)) %>%
dplyr::left_join(dplyr::select(genedb,entrezgene,symbol),by=c("preferredName_A" = "symbol")) %>%
edges <- jsonlite::fromJSON(paste0("https://string-db.org/api/json/",
settings$query_type, "?species=9606&identifiers=",
query_list, "&required_score=",
settings$minimum_score, "&add_nodes=", settings$add_nodes)) %>%
dplyr::left_join(dplyr::select(genedb,entrezgene,symbol), by = c("preferredName_A" = "symbol")) %>%
dplyr::filter(!is.na(entrezgene)) %>%
dplyr::rename(entrezgene_a = entrezgene) %>%
dplyr::mutate(entrezgene_a = as.character(entrezgene_a)) %>%
Expand All @@ -104,7 +86,7 @@ get_ppi_network <- function(qgenes, ppi_source = "STRING", genedb = NULL,
cancer_driver, tcga_driver),by=c("entrezgene_a" = "entrezgene")) %>%
dplyr::rename(oncogene_A = oncogene, tsgene_A = tumor_suppressor,
cdriver_A = cancer_driver, tcga_driver_A = tcga_driver) %>%
dplyr::left_join(dplyr::select(genedb,entrezgene,symbol),by=c("preferredName_B" = "symbol")) %>%
dplyr::left_join(dplyr::select(genedb,entrezgene,symbol), by = c("preferredName_B" = "symbol")) %>%
dplyr::filter(!is.na(entrezgene)) %>%
dplyr::rename(entrezgene_b = entrezgene) %>%
dplyr::mutate(entrezgene_b = as.character(entrezgene_b)) %>%
Expand All @@ -114,29 +96,141 @@ get_ppi_network <- function(qgenes, ppi_source = "STRING", genedb = NULL,
dplyr::rename(oncogene_B = oncogene, tsgene_B = tumor_suppressor, cdriver_B = cancer_driver,
tcga_driver_B = tcga_driver) %>%
dplyr::mutate(interaction_symbol = paste0(preferredName_A,"_",preferredName_B)) %>%
dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("preferredName_A" = "symbol")) %>%
dplyr::left_join(dplyr::select(all_query_nodes, symbol, query_node), by = c("preferredName_A" = "symbol")) %>%
dplyr::rename(query_node_A = query_node) %>%
dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("preferredName_B" = "symbol")) %>%
dplyr::left_join(dplyr::select(all_query_nodes, symbol, query_node), by = c("preferredName_B" = "symbol")) %>%
dplyr::rename(query_node_B = query_node) %>%
dplyr::mutate(weight = score) %>%
dplyr::distinct() %>%
dplyr::select(-c(ncbiTaxonId,stringId_A,stringId_B))

network_nodes <- data.frame("symbol" = unique(c(all_edges$preferredName_A, all_edges$preferredName_B)),
nodes <- data.frame("symbol" = unique(c(edges$preferredName_A, edges$preferredName_B)),
stringsAsFactors = F) %>%
dplyr::distinct() %>%
dplyr::left_join(genedb, by = c("symbol" = "symbol")) %>%
dplyr::select(-ensembl_gene_id) %>%
dplyr::filter(!is.na(entrezgene)) %>%
dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("symbol")) %>%
dplyr::left_join(dplyr::select(all_query_nodes, symbol, query_node), by = c("symbol")) %>%
dplyr::mutate(id = paste0("s",entrezgene)) %>%
dplyr::distinct()

all_nodes <- dplyr::bind_rows(query_nodes, network_nodes) %>%
dplyr::distinct() %>%
dplyr::bind_rows(all_query_nodes) %>%
dplyr::distinct() %>%
dplyr::mutate(query_node = dplyr::if_else(is.na(query_node), FALSE, as.logical(query_node)))


#all_nodes <- dplyr::bind_rows(query_nodes, network_nodes) %>%
#dplyr::distinct() %>%
#dplyr::mutate(query_node = dplyr::if_else(is.na(query_node), FALSE, as.logical(query_node)))


network <- list()
network$edges <- edges
network$nodes <- nodes

return(network)


}

get_ppi_network <- function(qgenes, ppi_source = "STRING", genedb = NULL,
cancerdrugdb = NULL, settings = NULL){

stopifnot(!is.null(settings))
stopifnot(!is.null(genedb))
stopifnot(!is.null(cancerdrugdb))
stopifnot(settings$query_type == "interaction_partners" | settings$query_type == "network")
oncoEnrichR::validate_db_df(genedb, dbtype = "genedb")

query_nodes <- data.frame("entrezgene" = qgenes, stringsAsFactors = F) %>%
dplyr::distinct() %>%
dplyr::left_join(genedb, by = c("entrezgene" = "entrezgene")) %>%
dplyr::mutate(id = paste0("s",entrezgene)) %>%
dplyr::select(-ensembl_gene_id) %>%
dplyr::mutate(query_node = T) %>%
dplyr::distinct()

rlogging::message("STRINGdb: retrieving protein-protein interaction network from (v11)")
rlogging::message(paste0("STRINGdb: Settings - required_score = ",
settings$minimum_score,", add_nodes = ",settings$add_nodes))

all_edges <- data.frame()
all_nodes <- data.frame()

if(length(qgenes) > 200){
i <- 1
omnipathdb <- data.frame()
while(i <= length(qgenes)){
qgenes_set <- qgenes[i:min(length(qgenes),i + 199)]

ppi_network_data <- get_string_network_nodes_edges(qgenes = qgenes_set,
all_query_nodes = query_nodes,
settings = settings,
genedb = genedb)

all_edges <- all_edges %>% dplyr::bind_rows(ppi_network_data$edges) %>% dplyr::distinct()
all_nodes <- all_nodes %>% dplyr::bind_rows(ppi_network_data$nodes) %>% dplyr::distinct()
i <- i + 199
}

}else{
ppi_network_data <- get_string_network_nodes_edges(qgenes = qgenes,
all_query_nodes = query_nodes,
settings = settings,
genedb = genedb)
all_edges <- all_edges %>% dplyr::bind_rows(ppi_network_data$edges) %>% dplyr::distinct()
all_nodes <- all_nodes %>% dplyr::bind_rows(ppi_network_data$nodes) %>% dplyr::distinct()
}



# query_list <- paste(qgenes, collapse="%0d")
#
# all_edges <- jsonlite::fromJSON(paste0("https://string-db.org/api/json/",
# settings$query_type, "?species=9606&identifiers=",
# query_list, "&required_score=",
# settings$minimum_score, "&add_nodes=", settings$add_nodes)) %>%
# dplyr::left_join(dplyr::select(genedb,entrezgene,symbol), by = c("preferredName_A" = "symbol")) %>%
# dplyr::filter(!is.na(entrezgene)) %>%
# dplyr::rename(entrezgene_a = entrezgene) %>%
# dplyr::mutate(entrezgene_a = as.character(entrezgene_a)) %>%
# dplyr::mutate(from = paste0("s",entrezgene_a)) %>%
# dplyr::left_join(dplyr::select(genedb, entrezgene, oncogene, tumor_suppressor,
# cancer_driver, tcga_driver),by=c("entrezgene_a" = "entrezgene")) %>%
# dplyr::rename(oncogene_A = oncogene, tsgene_A = tumor_suppressor,
# cdriver_A = cancer_driver, tcga_driver_A = tcga_driver) %>%
# dplyr::left_join(dplyr::select(genedb,entrezgene,symbol), by = c("preferredName_B" = "symbol")) %>%
# dplyr::filter(!is.na(entrezgene)) %>%
# dplyr::rename(entrezgene_b = entrezgene) %>%
# dplyr::mutate(entrezgene_b = as.character(entrezgene_b)) %>%
# dplyr::mutate(to = paste0("s",entrezgene_b)) %>%
# dplyr::left_join(dplyr::select(genedb, entrezgene, oncogene, tumor_suppressor,
# cancer_driver, tcga_driver), by = c("entrezgene_a" = "entrezgene")) %>%
# dplyr::rename(oncogene_B = oncogene, tsgene_B = tumor_suppressor, cdriver_B = cancer_driver,
# tcga_driver_B = tcga_driver) %>%
# dplyr::mutate(interaction_symbol = paste0(preferredName_A,"_",preferredName_B)) %>%
# dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("preferredName_A" = "symbol")) %>%
# dplyr::rename(query_node_A = query_node) %>%
# dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("preferredName_B" = "symbol")) %>%
# dplyr::rename(query_node_B = query_node) %>%
# dplyr::mutate(weight = score) %>%
# dplyr::distinct() %>%
# dplyr::select(-c(ncbiTaxonId,stringId_A,stringId_B))
#
# network_nodes <- data.frame("symbol" = unique(c(all_edges$preferredName_A, all_edges$preferredName_B)),
# stringsAsFactors = F) %>%
# dplyr::distinct() %>%
# dplyr::left_join(genedb, by = c("symbol" = "symbol")) %>%
# dplyr::select(-ensembl_gene_id) %>%
# dplyr::filter(!is.na(entrezgene)) %>%
# dplyr::left_join(dplyr::select(query_nodes, symbol, query_node), by = c("symbol")) %>%
# dplyr::mutate(id = paste0("s",entrezgene)) %>%
# dplyr::distinct()
#
# all_nodes <- dplyr::bind_rows(query_nodes, network_nodes) %>%
# dplyr::distinct() %>%
# dplyr::mutate(query_node = dplyr::if_else(is.na(query_node), FALSE, as.logical(query_node)))


all_nodes <- all_nodes %>%
dplyr::mutate(shape = dplyr::if_else(query_node == T,settings$visnetwork_shape, as.character("box")))
all_nodes$shadow <- settings$visnetwork_shadow
Expand All @@ -151,7 +245,6 @@ get_ppi_network <- function(qgenes, ppi_source = "STRING", genedb = NULL,
all_nodes$size <- 25
all_nodes <- all_nodes %>%
dplyr::mutate(color.background = dplyr::if_else(query_node == T, "lightblue", "mistyrose")) %>%
#dplyr::mutate(tumor_suppressor = )
dplyr::mutate(color.background = dplyr::if_else(tumor_suppressor == T & oncogene == F,
"firebrick", as.character(color.background),
as.character(color.background))) %>%
Expand Down
Binary file modified data/alias2primary.rda
Binary file not shown.
Binary file modified data/otdb.rda
Binary file not shown.
Binary file modified data/release_notes.rda
Binary file not shown.
Binary file modified data/wikipathwaydb.rda
Binary file not shown.
13 changes: 7 additions & 6 deletions man/onco_enrich.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file renamed oncoEnrichR_0.7.1.pdf → oncoEnrichR_0.7.2.pdf
Binary file not shown.

0 comments on commit 788489b

Please sign in to comment.