Skip to content

Commit

Permalink
feat: additional species-specific ontologies for cxg 5.3 multispecies…
Browse files Browse the repository at this point in the history
… schema (#255)

## Reason for Change

- Part of the CxG 5.3 release to support multiple species, some of which
require species-specific ontology queries

## Changes
- add: BUILDER - support for referencing `cross_ontology_term` e.g. SSOM
files
- add: BUILDER - support for prefix mapping e.g. map terms from one
ontology (prefix) to another
- add: BUILDER - updated species ontologies: FBbt, FBv, ZFA/ZFS, WBls,
WBbt, XAO
- add: API - API query to lookup bridge terms between ontological
namespaces
- add: ARTIFACT - contain cross-ontology mappings
- modify: Updated EFO to v69.0

## Testing steps

- all tests pass
- builder task works
- descendant artifact generator works

---------

Co-authored-by: github-actions <[email protected]>
Co-authored-by: Joyce Yan <[email protected]>
Co-authored-by: Nayib Gloria <[email protected]>
Co-authored-by: Evan Molinelli <[email protected]>
  • Loading branch information
5 people authored Jan 28, 2025
1 parent 2f1b91b commit 64c32fe
Show file tree
Hide file tree
Showing 22 changed files with 987 additions and 314 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@
.coverage*
/htmlcov/
/api/python/docs/
/.vscode
**/__pycache__/
*.owl
*.sssom.tsv
2 changes: 1 addition & 1 deletion api/python/ontology-assets-version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b525a902031034abe2d73ec0d10d71e71fa65365
70aba3af2a4b938d8bb54d650225c217fe9648fa
5 changes: 5 additions & 0 deletions api/python/src/cellxgene_ontology_guide/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ class Ontology(Enum):
MmusDv = "mmusdv"
PATO = "pato"
NCBITaxon = "ncbitaxon"
FBbt = "fbbt"
FBdv = "fbdv"
ZFA = "zfa"
WBbt = "wbbt"
WBls = "wbls"


class CuratedOntologyTermList(Enum):
Expand Down
61 changes: 61 additions & 0 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,3 +662,64 @@ def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[
"""
ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name)
return ontology_term_label_to_id_map.get(term_label)

def get_bridge_term_id(self, term_id: str, cross_ontology: str) -> Optional[str]:
"""
For a given term ID, fetch the equivalent term ID from a given ontology. Only returns exact match if it exists.
If no applicable match is found, returns None.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_bridge_term_id("FBbt:00000001", "UBERON")
'UBERON:0000468'
:param term_id: str ontology term to find equivalent term for
:param cross_ontology: str name of ontology to search for equivalent term in
:return: Optional[str] equivalent term ID from the cross_ontology
"""
if cross_ontology not in self.cxg_schema.cross_ontology_mappings:
raise ValueError(
f"{cross_ontology} is not in the set of supported cross ontology mappings "
f"{self.cxg_schema.cross_ontology_mappings}."
)
ontology_name = self._parse_ontology_name(term_id)
cross_ontology_terms = self.cxg_schema.ontology(ontology_name)[term_id].get("cross_ontology_terms")
bridge_term_id: Optional[str] = None
if cross_ontology_terms:
bridge_term_id = cross_ontology_terms.get(cross_ontology)
return bridge_term_id

def get_closest_bridge_term_ids(self, term_id: str, cross_ontology: str) -> List[str]:
"""
For a given term ID, fetch the equivalent term ID from a given ontology. If match is found,
returns a list of 1 with the exact match. If no exact match is found, traverses the ancestors
of the term for the closest match.
If no applicable match is found, returns an empty list.
If multiple ancestors of the same distance have matches, returns all possible closest matches.
Raises ValueError if term ID or cross_ontology are not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_closest_bridge_term_ids("FBbt:00000039", "UBERON")
['UBERON:0000476', 'UBERON:0000920']
:param term_id: str ontology term to find closest term for
:param cross_ontology: str name of ontology to search for closest term in
:return: List[str] list of closest term IDs from the cross_ontology
"""
closest_bridge_terms: List[str] = []
terms_to_match = [term_id]
while terms_to_match and not closest_bridge_terms:
for term in terms_to_match:
if closest_bridge_term := self.get_bridge_term_id(term, cross_ontology):
closest_bridge_terms.append(closest_bridge_term)
terms_to_match = [parent for child in terms_to_match for parent in self.get_term_parents(child)]
return closest_bridge_terms
3 changes: 3 additions & 0 deletions api/python/src/cellxgene_ontology_guide/supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def __init__(self, version: Optional[str] = None):
for ontology, info in self.supported_ontologies.items()
for imported_ontology in info.get("additional_ontologies", [])
}
self.cross_ontology_mappings = {
ontology for ontology, info in self.supported_ontologies.items() if info.get("cross_ontology_mapping")
}
self.ontology_file_names: Dict[str, str] = {}
self.deprecated_on = ontology_info[_version].get("deprecated_on")
if self.deprecated_on:
Expand Down
115 changes: 113 additions & 2 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,31 +72,118 @@ def ontology_dict_with_imports():


@pytest.fixture
def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supported_versions, mock_load_ontology_file):
def ontology_dict_with_cross_ontology_terms():
return {
# test cases: terms with exact matches + ancestors of terms without exact matches
"ZFA:0000000": {
"ancestors": {},
"cross_ontology_terms": {
"CL": "CL:0000000",
},
},
"ZFA:0000001": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000001",
},
},
"ZFA:0000002": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000002",
},
},
"ZFA:0000003": {
"ancestors": {
"ZFA:0000000": 1,
},
"cross_ontology_terms": {
"CL": "CL:0000003",
},
},
# test case: term with no exact term and multiple closest terms 1 edge away
"ZFA:0000004": {
"ancestors": {
"ZFA:0000001": 1,
"ZFA:0000002": 1,
"ZFA:0000000": 2,
},
},
# test case: term with no exact term and 1 closest term, 1 edge away
"ZFA:0000005": {
"ancestors": {
"ZFA:0000003": 1,
"ZFA:0000000": 2,
},
},
# test case: term with no exact term and multiple closest terms 2 edges away
"ZFA:0000006": {
"ancestors": {
"ZFA:0000004": 1,
"ZFA:0000005": 1,
"ZFA:0000001": 2,
"ZFA:0000002": 2,
"ZFA:0000003": 2,
"ZFA:0000000": 3,
},
},
# test case: term with no exact or closest term
"ZFA:0000007": {
"ancestors": {},
},
}


@pytest.fixture
def mock_CXGSchema(
ontology_dict,
ontology_dict_with_imports,
ontology_dict_with_cross_ontology_terms,
mock_load_supported_versions,
mock_load_ontology_file,
):
mock_load_supported_versions.return_value = {
"5.0.0": {
"ontologies": {
"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
"CL": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"cross_ontology_mapping": "cl.sssom",
},
"HANCESTRO": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"additional_ontologies": ["AfPO"],
},
"ZFA": {
"version": "2024-01-01",
"source": "http://example.com",
"filename": "zfa.owl",
"map_to": ["CL"],
},
}
}
}
cxg_schema = CXGSchema()
cxg_schema.ontology_file_names = {
"CL": "CL-ontology-2024-01-01.json.gz",
"HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz",
"ZFA": "ZFA-ontology-2024-01-01.json.gz",
}

def get_mock_ontology_dict(file_name):
if "CL" in file_name:
return ontology_dict
if "HANCESTRO" in file_name:
return ontology_dict_with_imports
if "ZFA" in file_name:
return ontology_dict_with_cross_ontology_terms
return None

mock_load_ontology_file.side_effect = get_mock_ontology_dict
Expand Down Expand Up @@ -584,3 +671,27 @@ def test_get_term_id_by_label(ontology_parser, label, ontology_name, expected):
def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser):
with pytest.raises(ValueError):
ontology_parser.get_term_id_by_label("gene A", "GO")


@pytest.mark.parametrize("term_id,expected", [("ZFA:0000000", "CL:0000000"), ("ZFA:0000004", None)])
def test_get_bridge_term_id(ontology_parser, term_id, expected):
assert ontology_parser.get_bridge_term_id(term_id, "CL") == expected


def test_get_bridge_term_id__unsupported_cross_ontology(ontology_parser):
with pytest.raises(ValueError):
ontology_parser.get_bridge_term_id("ZFA:0000000", "HANCESTRO")


@pytest.mark.parametrize(
"term_id,expected",
[
("ZFA:0000007", []),
("ZFA:0000006", ["CL:0000001", "CL:0000002", "CL:0000003"]),
("ZFA:0000005", ["CL:0000003"]),
("ZFA:0000004", ["CL:0000001", "CL:0000002"]),
("ZFA:0000000", ["CL:0000000"]),
],
)
def test_get_closest_bridge_term_ids(ontology_parser, term_id, expected):
assert ontology_parser.get_closest_bridge_term_ids(term_id, "CL") == expected
8 changes: 7 additions & 1 deletion api/python/tests/test_supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ def ontology_info_content():
return {
"5.0.0": {
"ontologies": {
"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"},
"CL": {
"version": "v2024-01-01",
"source": "http://example.com",
"filename": "cl.owl",
"cross_ontology_mapping": "cl.sssom",
},
"HANCESTRO": {
"version": "v2024-01-01",
"source": "http://example.com",
Expand Down Expand Up @@ -94,6 +99,7 @@ def test__init__defaults(self, ontology_info_content, initialized_CXGSchemaInfo)
assert initialized_CXGSchemaInfo.version == "5.0.0"
assert initialized_CXGSchemaInfo.supported_ontologies == ontology_info_content["5.0.0"]["ontologies"]
assert initialized_CXGSchemaInfo.imported_ontologies == {"FOO": "HANCESTRO", "OOF": "HANCESTRO"}
assert initialized_CXGSchemaInfo.cross_ontology_mappings == {"CL"}

@pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"])
def test__init__specific_version(self, version, mock_load_supported_versions):
Expand Down
10 changes: 10 additions & 0 deletions asset-schemas/all_ontology_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@
"type": "integer"
}
},
"cross_ontology_terms": {
"type": "object",
"description": "Map of bridge terms that connect this ontology term to other ontologies.",
"patternProperties": {
"^[A-Za-z0-9]+$": {
"$ref": "ontology_term_id_schema.json#/definitions/supported_term_id"
}
},
"additionalProperties": false
},
"comments": {
"type": "array",
"items": {
Expand Down
17 changes: 13 additions & 4 deletions asset-schemas/ontology_info_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "A schema for the set of valid ontology reference files mapping to a CZ CellXGene Dataset Schema Versions",
"type": "object",
"patternProperties": {
"^[0-9]+\\.[0-9]+\\.[0-9]+$": {
"^[0-9]+\\.[0-9]+\\.[0-9]+(-.+)?$": {
"description": "The version of CellxGene schema that maps to this set of ontology versions",
"type": "object",
"properties": {
Expand Down Expand Up @@ -51,15 +51,24 @@
"type": "string"
},
"description": "List of additional term id prefixes to extracted from the source ontology file."
},
"cross_ontology_mapping": {
"type": "string",
"description": "name of SSSOM file mapping this ontology's terms to cross-species equivalent ontology terms."
},
"map_to": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of ontologies to map equivalent terms to this ontology"
}
},
"required": [
"version",
"source",
"filename"
],
"additionalProperties": false
]
}
}
}

44 changes: 44 additions & 0 deletions asset-schemas/ontology_term_id_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,30 @@
"type": "string",
"pattern": "^UBERON+:[0-9]+$"
},
"FBbt_term_id": {
"type": "string",
"pattern": "^FBbt+:[0-9]+$"
},
"FBdv_term_id": {
"type": "string",
"pattern": "^FBdv+:[0-9]+$"
},
"ZFA_term_id": {
"type": "string",
"pattern": "^ZFA+:[0-9]+$"
},
"ZFS_term_id": {
"type": "string",
"pattern": "^ZFS+:[0-9]+$"
},
"WBls_term_id": {
"type": "string",
"pattern": "^WBls+:[0-9]+$"
},
"WBbt_term_id": {
"type": "string",
"pattern": "^WBbt+:[0-9]+$"
},
"supported_term_id": {
"anyOf": [
{
Expand Down Expand Up @@ -67,6 +91,26 @@
{
"$ref": "#/definitions/UBERON_term_id"
}
,
{
"$ref": "#/definitions/FBbt_term_id"
}
,
{
"$ref": "#/definitions/FBdv_term_id"
},
{
"$ref": "#/definitions/ZFA_term_id"
},
{
"$ref": "#/definitions/ZFS_term_id"
},
{
"$ref": "#/definitions/WBls_term_id"
},
{
"$ref": "#/definitions/WBbt_term_id"
}
]
}
}
Expand Down
Binary file added ontology-assets/FBbt-ontology-v2024-10-17.json.gz
Binary file not shown.
Binary file added ontology-assets/FBbt-ontology-v2024-12-05.json.gz
Binary file not shown.
Binary file added ontology-assets/FBdv-ontology-v2024-10-17.json.gz
Binary file not shown.
Binary file added ontology-assets/FBdv-ontology-v2024-12-04.json.gz
Binary file not shown.
Binary file not shown.
Binary file added ontology-assets/WBls-ontology-vWS295.json.gz
Binary file not shown.
Binary file added ontology-assets/ZFA-ontology-v2022-12-09.json.gz
Binary file not shown.
Loading

0 comments on commit 64c32fe

Please sign in to comment.