-
Notifications
You must be signed in to change notification settings - Fork 0
/
chebi_parser.py
138 lines (90 loc) · 3.53 KB
/
chebi_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import sys
import libchebipy
"""
A set of functions to assist in substrate identfication and classification
using ChEBI, for reference.
"""
def get_substrate_name(id):
"""
Get the name associated with a ChEBI ID
"""
id = libchebipy.ChebiEntity(id)
return id.get_name()
def get_primary(id):
"""
Get the primary ChEBI ID, since the primary ID is not guaranteed to be the
ID that is reported.
"""
chebi_id = libchebipy.ChebiEntity(id)
primary = chebi_id.get_parent_id()
if primary == None:
primary = id
return primary
def find_predecessor(id,classes = None):
"""
Go up one level in the ChEBI ontology. If classes are provided,
then the ChEBI ID's at the current level are checked for a potential classification.
"""
category = (None,None)
primary = get_primary(id)
if primary in classes:
return (get_substrate_name(primary),primary)
primary_chebi = libchebipy.ChebiEntity(primary)
predecessor = primary_chebi.get_outgoings()
while len(predecessor) != 0 :
x = predecessor.pop(0)
if x.get_type() == 'is_a':
target_id = get_primary(x.get_target_chebi_id())
if target_id in classes:
return (get_substrate_name(target_id),target_id)
target_chebi = libchebipy.ChebiEntity(target_id)
predecessor += target_chebi.get_outgoings()
return category
def find_role(id,classes=None):
"""
Performs an identical function to find_predecessor, but traverses role
ontology, rather than chemical entity ontology.
"""
role = (None,None)
primary = get_primary(id)
if primary in classes:
return (get_substrate_name(primary),primary)
primary_chebi = libchebipy.ChebiEntity(id)
predecessor = primary_chebi.get_outgoings()
for pre in predecessor:
if pre.get_type() =='has_role':
target_id = get_primary(pre.get_target_chebi_id())
if target_id in classes:
return (get_substrate_name(target_id),target_id)
role = find_predecessor(target_id,classes=classes)
return role
def ce_categorization(id,primary_ce=None,secondary_ce=None):
"""
Find the chemical entity classification ,if possible.
"""
ce = None
ce = find_predecessor(id,classes=primary_ce)
if ce == (None,None):
if secondary_ce:
ce = find_predecessor(id,classes=secondary_ce)
return ce
def role_categorization(id,primary_role=None,secondary_role=None):
"""
Find the role classification ,if possible.
"""
role = None
role = find_role(id,classes=primary_role)
if role == (None,None):
if secondary_role:
role = find_role(id,classes=secondary_role)
return role
if __name__ == "__main__":
id = sys.argv[1]
ce_classes = set(['CHEBI:33696','CHEBI:33838','CHEBI:36976','CHEBI:23888','CHEBI:33281','CHEBI:18059','CHEBI:33229',
'CHEBI:25696','CHEBI:33575','CHEBI:24834','CHEBI:25697','CHEBI:36915','CHEBI:33709','CHEBI:16670',
'CHEBI:26672','CHEBI:31432','CHEBI:35381','CHEBI:50699','CHEBI:18154','CHEBI:72813','CHEBI:88061',
'CHEBI:10545','CHEBI:25367','CHEBI:24403','CHEBI:23357','CHEBI:17627','CHEBI:83821','CHEBI:17237',
'CHEBI:24870'])
role_classes = set(['CHEBI:23888','CHEBI:33281','CHEBI:26672','CHEBI:31432','CHEBI:33229','CHEBI:23357','CHEBI:25212'])
print(find_predecessor(id,classes=ce_classes))
print(find_role(id,classes=role_classes))