-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1449 from bgyori/rgd
Add client for RGD database
- Loading branch information
Showing
5 changed files
with
61,502 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
"""A client for accessing RGD rat gene data.""" | ||
|
||
from collections import defaultdict | ||
from typing import List, Union | ||
|
||
from indra.util import read_unicode_csv | ||
from indra.resources import get_resource_path | ||
|
||
|
||
def get_id_from_name(name: str) -> Union[str, None]: | ||
"""Return an RGD ID from an RGD gene symbol. | ||
Parameters | ||
---------- | ||
name : | ||
The RGD gene symbol whose ID will be returned. | ||
Returns | ||
------- | ||
: | ||
The RGD ID (without prefix) or None if not available. | ||
""" | ||
return rgd_name_to_id.get(name) | ||
|
||
|
||
def get_name_from_id(rgd_id: str) -> Union[str, None]: | ||
"""Return the RGD gene symbol for a given RGD ID. | ||
Parameters | ||
---------- | ||
rgd_id : | ||
The RGD ID (without prefix) whose symbol will be returned. | ||
Returns | ||
------- | ||
: | ||
The RGD symbol for the given ID or None if not available. | ||
""" | ||
return rgd_id_to_name.get(rgd_id) | ||
|
||
|
||
def get_synonyms(rgd_id: str) -> List[str]: | ||
"""Return the synonyms for an RGD ID. | ||
Parameters | ||
---------- | ||
rgd_id : | ||
An RGD ID, without prefix. | ||
Returns | ||
------- | ||
: | ||
The list of synonyms corresponding to the RGD ID, or an empty list | ||
if not available. | ||
""" | ||
return rgd_synonyms.get(rgd_id, []) | ||
|
||
|
||
def get_id_from_name_synonym(name_synonym: str) -> Union[None, str, List[str]]: | ||
"""Return an RGD ID from an RGD gene symbol or synonym. | ||
If the given name or synonym is the official symbol of a gene, its | ||
ID is returned. If the input is a synonym, it can correspond to | ||
one or more genes. If there is a single gene whose synonym matches | ||
the input, the ID is returned as a string. If multiple genes share | ||
the given synonym, their IDs are returned in a list. If the input | ||
doesn't match any names or synonyms, None is returned. | ||
Parameters | ||
---------- | ||
name_synonym : | ||
The RGD gene symbol or synonym whose ID will be returned. | ||
Returns | ||
------- | ||
: | ||
The RGD ID (without prefix) of a single gene, a list of RGD IDs, | ||
or None. | ||
""" | ||
rgd_id = rgd_name_to_id.get(name_synonym) | ||
if rgd_id: | ||
return rgd_id | ||
rgd_ids = rgd_synonyms_reverse.get(name_synonym) | ||
if rgd_ids: | ||
if len(rgd_ids) == 1: | ||
return rgd_ids[0] | ||
else: | ||
return rgd_ids | ||
return None | ||
|
||
|
||
def get_ensembl_id(rgd_id: str) -> Union[str, None]: | ||
"""Return the Ensembl ID for an RGD ID. | ||
Parameters | ||
---------- | ||
rgd_id : | ||
An RGD ID, without prefix. | ||
Returns | ||
------- | ||
: | ||
A list of Ensembl IDs corresponding to the RGD ID, | ||
or None if not available. | ||
""" | ||
return rgd_id_to_ensembl.get(rgd_id) | ||
|
||
|
||
def _read_rgd(): | ||
fname = get_resource_path('rgd_entries.tsv') | ||
rgd_id_to_name = {} | ||
rgd_name_to_id = {} | ||
rgd_synonyms = {} | ||
rgd_synonyms_reverse = defaultdict(list) | ||
rgd_id_to_ensembl = {} | ||
ensemble_id_to_rgd = {} | ||
for rgd_id, name, synonyms_str, ensembl_id in \ | ||
read_unicode_csv(fname, '\t'): | ||
if name: | ||
rgd_id_to_name[rgd_id] = name | ||
rgd_name_to_id[name] = rgd_id | ||
if synonyms_str: | ||
synonyms = synonyms_str.split(';') | ||
rgd_synonyms[rgd_id] = synonyms | ||
for synonym in synonyms: | ||
rgd_synonyms_reverse[synonym].append(rgd_id) | ||
if ensembl_id: | ||
ensemble_ids = ensembl_id.split(';') | ||
rgd_id_to_ensembl[rgd_id] = ensemble_ids | ||
for ensemble_id in ensemble_ids: | ||
ensemble_id_to_rgd[ensemble_id] = rgd_id | ||
|
||
return rgd_id_to_name, rgd_name_to_id, rgd_synonyms, \ | ||
dict(rgd_synonyms_reverse), rgd_id_to_ensembl, ensemble_id_to_rgd | ||
|
||
|
||
rgd_id_to_name, rgd_name_to_id, rgd_synonyms, rgd_synonyms_reverse, \ | ||
rgd_id_to_ensembl, ensemble_id_to_rgd = _read_rgd() |
Oops, something went wrong.