77import logging
88from collections import defaultdict
99from indra .ontology .ontology_graph import IndraOntology
10- from indra .ontology .bio .ontology import CACHE_DIR
11- from indra .ontology .bio import bio_ontology
10+ from indra .ontology .bio .ontology import CACHE_DIR , BioOntology
1211
1312
1413logger = logging .getLogger (__name__ )
@@ -24,6 +23,10 @@ def __init__(self, db_path=DEFAULT_SQLITE_ONTOLOGY):
2423 build_sqlite_ontology (db_path )
2524 conn = sqlite3 .connect (db_path )
2625 self .cur = conn .cursor ()
26+ self ._initialized = True
27+
28+ def initialize (self ):
29+ pass
2730
2831 def isa_or_partof (self , ns1 , id1 , ns2 , id2 ):
2932 q = """SELECT 1 FROM relationships
@@ -36,18 +39,28 @@ def child_rel(self, ns, id, rel_types):
3639 q = """SELECT children FROM child_lookup
3740 WHERE parent_id=? AND parent_ns=?
3841 LIMIT 1;"""
42+ if rel_types and 'isa' in rel_types or 'partof' in rel_types :
43+ rel_types |= {'isa_or_partof' }
3944 self .cur .execute (q , (id , ns ))
4045 res = self .cur .fetchone ()
4146 if res is None :
4247 yield from []
4348 else :
44- yield from [tuple (x .split (':' , 1 )) for x in res [0 ].split (',' )]
49+ children = res [0 ].split (',' )
50+ for child in children :
51+ curie , rel_type = child .split ('|' , 1 )
52+ if rel_type in rel_types :
53+ yield tuple (curie .split (':' , 1 ))
4554
4655 def get_parents (self , ns , id ):
47- return list (self .parent_rel (ns , id , {'isa' , 'partof' }))
56+ # Note that for isa/partof ontological child/parent is the
57+ # opposite of the graph-based child/parent
58+ return list (self .child_rel (ns , id , {'isa_or_partof' }))
4859
4960 def get_children (self , ns , id , ns_filter = None ):
50- children = list (self .child_rel (ns , id , {'isa' , 'partof' }))
61+ # Note that for isa/partof ontological child/parent is the
62+ # opposite of the graph-based child/parent
63+ children = list (self .parent_rel (ns , id , {'isa_or_partof' }))
5164 if ns_filter :
5265 children = [(cns , cid ) for cns , cid in children
5366 if cns in ns_filter ]
@@ -57,12 +70,18 @@ def parent_rel(self, ns, id, rel_types):
5770 q = """SELECT parents FROM parent_lookup
5871 WHERE child_id=? AND child_ns=?
5972 LIMIT 1;"""
73+ if rel_types and 'isa' in rel_types or 'partof' in rel_types :
74+ rel_types |= {'isa_or_partof' }
6075 self .cur .execute (q , (id , ns ))
6176 res = self .cur .fetchone ()
6277 if res is None :
6378 yield from []
6479 else :
65- yield from [tuple (x .split (':' , 1 )) for x in res [0 ].split (',' )]
80+ parents = res [0 ].split (',' )
81+ for parent in parents :
82+ curie , rel_type = parent .split ('|' , 1 )
83+ if rel_type in rel_types :
84+ yield tuple (curie .split (':' , 1 ))
6685
6786 def get_node_property (self , ns , id , property ):
6887 q = """SELECT properties FROM node_properties
@@ -92,6 +111,7 @@ def build_sqlite_ontology(db_path=DEFAULT_SQLITE_ONTOLOGY, force=False):
92111 pass
93112
94113 # Initialize the bio ontology and build the transitive closure
114+ bio_ontology = BioOntology ()
95115 bio_ontology .initialize ()
96116 bio_ontology ._build_transitive_closure ()
97117
@@ -107,26 +127,51 @@ def build_sqlite_ontology(db_path=DEFAULT_SQLITE_ONTOLOGY, force=False):
107127 child_ns TEXT NOT NULL,
108128 parent_id TEXT NOT NULL,
109129 parent_ns TEXT NOT NULL,
130+ rel_type TEXT NOT NULL,
110131 UNIQUE (child_id, child_ns, parent_id, parent_ns)
111132 );"""
112133 cur .execute (q )
113134
114135 # Insert into the database in chunks
115136 chunk_size = 10000
137+ # Note: the transitive closure consists of pairs with the first element
138+ # being the ontological child and the second the parent. However,
139+ # in a graph representation isa/partof edges point from the ontological
140+ # child to the ontological parent. Here, we need to follow the graph-based
141+ # parent->child relationships, not the ontological ones.
116142 tc = sorted (bio_ontology .transitive_closure )
117143 all_children = defaultdict (set )
118144 all_parents = defaultdict (set )
119145 for i in range (0 , len (tc ), chunk_size ):
120146 chunk = tc [i :i + chunk_size ]
121- chunk_values = [(child .split (':' , 1 )[1 ], child .split (':' )[0 ],
122- parent .split (':' , 1 )[1 ], parent .split (':' )[0 ])
147+ chunk_values = [(parent .split (':' , 1 )[1 ], parent .split (':' )[0 ],
148+ child .split (':' , 1 )[1 ], child .split (':' )[0 ])
123149 for child , parent in chunk ]
124150 for cid , cns , pid , pns in chunk_values :
125- all_children [(pid , pns )].add ('%s:%s' % (cns , cid ))
126- all_parents [(cid , cns )].add ('%s:%s' % (pns , pid ))
127- cur .executemany ("""INSERT INTO relationships (child_id,
128- child_ns, parent_id, parent_ns)
129- VALUES (?, ?, ?, ?);""" , chunk_values )
151+ all_children [(pid , pns )].add (
152+ '%s:%s|%s' % (cns , cid , 'isa_or_partof' ))
153+ all_parents [(cid , cns )].add (
154+ '%s:%s|%s' % (pns , pid , 'isa_or_partof' ))
155+ cur .executemany ("""INSERT INTO relationships (parent_id,
156+ parent_ns, child_id, child_ns, rel_type)
157+ VALUES (?, ?, ?, ?, 'isa_or_partof');""" ,
158+ chunk_values )
159+
160+ for parent , child , data in bio_ontology .edges (data = True ):
161+ parent_ns , parent_id = bio_ontology .get_ns_id (parent )
162+ child_ns , child_id = bio_ontology .get_ns_id (child )
163+ rel_type = data .get ('type' )
164+ if rel_type in {'isa' , 'partof' }:
165+ continue
166+ all_children [(parent_id , parent_ns )].add (
167+ '%s:%s|%s' % (child_ns , child_id , rel_type ))
168+ all_parents [(child_id , child_ns )].add (
169+ '%s:%s|%s' % (parent_ns , parent_id , rel_type ))
170+ cur .execute ("""INSERT INTO relationships (parent_id,
171+ parent_ns, child_id, child_ns, rel_type)
172+ VALUES (?, ?, ?, ?, ?);""" ,
173+ (parent_id , parent_ns , child_id , child_ns , rel_type ))
174+
130175 q = """CREATE INDEX idx_child_parent ON relationships
131176 (child_id, child_ns, parent_id, parent_ns);"""
132177 cur .execute (q )
0 commit comments