Skip to content

Commit f52a02d

Browse files
authored
Merge branch 'master' into search_page
2 parents 99e74a2 + ed26006 commit f52a02d

File tree

3 files changed

+67
-16
lines changed

3 files changed

+67
-16
lines changed

indra/ontology/bio/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@
44

55
from indra.config import get_config
66
from .ontology import BioOntology
7+
from .sqlite_ontology import SqliteOntology, DEFAULT_SQLITE_ONTOLOGY
78
from ..virtual import VirtualOntology
89

910
indra_ontology_url = get_config('INDRA_ONTOLOGY_URL')
10-
bio_ontology = BioOntology() if not indra_ontology_url else \
11-
VirtualOntology(url=indra_ontology_url)
11+
if indra_ontology_url is None:
12+
bio_ontology = BioOntology()
13+
elif indra_ontology_url == "sqlite":
14+
sqlite_ontology_path = get_config("SQLITE_ONTOLOGY_PATH") or DEFAULT_SQLITE_ONTOLOGY
15+
bio_ontology = SqliteOntology(db_path=sqlite_ontology_path)
16+
else:
17+
bio_ontology = VirtualOntology(url=indra_ontology_url)

indra/ontology/bio/sqlite_ontology.py

Lines changed: 58 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
import logging
88
from collections import defaultdict
99
from indra.ontology.ontology_graph import IndraOntology
10-
from indra.ontology.bio.ontology import CACHE_DIR
11-
from indra.ontology.bio import bio_ontology
10+
from indra.ontology.bio.ontology import CACHE_DIR, BioOntology
1211

1312

1413
logger = logging.getLogger(__name__)
@@ -24,6 +23,10 @@ def __init__(self, db_path=DEFAULT_SQLITE_ONTOLOGY):
2423
build_sqlite_ontology(db_path)
2524
conn = sqlite3.connect(db_path)
2625
self.cur = conn.cursor()
26+
self._initialized = True
27+
28+
def initialize(self):
29+
pass
2730

2831
def isa_or_partof(self, ns1, id1, ns2, id2):
2932
q = """SELECT 1 FROM relationships
@@ -36,18 +39,28 @@ def child_rel(self, ns, id, rel_types):
3639
q = """SELECT children FROM child_lookup
3740
WHERE parent_id=? AND parent_ns=?
3841
LIMIT 1;"""
42+
if rel_types and 'isa' in rel_types or 'partof' in rel_types:
43+
rel_types |= {'isa_or_partof'}
3944
self.cur.execute(q, (id, ns))
4045
res = self.cur.fetchone()
4146
if res is None:
4247
yield from []
4348
else:
44-
yield from [tuple(x.split(':', 1)) for x in res[0].split(',')]
49+
children = res[0].split(',')
50+
for child in children:
51+
curie, rel_type = child.split('|', 1)
52+
if rel_type in rel_types:
53+
yield tuple(curie.split(':', 1))
4554

4655
def get_parents(self, ns, id):
47-
return list(self.parent_rel(ns, id, {'isa', 'partof'}))
56+
# Note that for isa/partof ontological child/parent is the
57+
# opposite of the graph-based child/parent
58+
return list(self.child_rel(ns, id, {'isa_or_partof'}))
4859

4960
def get_children(self, ns, id, ns_filter=None):
50-
children = list(self.child_rel(ns, id, {'isa', 'partof'}))
61+
# Note that for isa/partof ontological child/parent is the
62+
# opposite of the graph-based child/parent
63+
children = list(self.parent_rel(ns, id, {'isa_or_partof'}))
5164
if ns_filter:
5265
children = [(cns, cid) for cns, cid in children
5366
if cns in ns_filter]
@@ -57,12 +70,18 @@ def parent_rel(self, ns, id, rel_types):
5770
q = """SELECT parents FROM parent_lookup
5871
WHERE child_id=? AND child_ns=?
5972
LIMIT 1;"""
73+
if rel_types and 'isa' in rel_types or 'partof' in rel_types:
74+
rel_types |= {'isa_or_partof'}
6075
self.cur.execute(q, (id, ns))
6176
res = self.cur.fetchone()
6277
if res is None:
6378
yield from []
6479
else:
65-
yield from [tuple(x.split(':', 1)) for x in res[0].split(',')]
80+
parents = res[0].split(',')
81+
for parent in parents:
82+
curie, rel_type = parent.split('|', 1)
83+
if rel_type in rel_types:
84+
yield tuple(curie.split(':', 1))
6685

6786
def get_node_property(self, ns, id, property):
6887
q = """SELECT properties FROM node_properties
@@ -92,6 +111,7 @@ def build_sqlite_ontology(db_path=DEFAULT_SQLITE_ONTOLOGY, force=False):
92111
pass
93112

94113
# Initialize the bio ontology and build the transitive closure
114+
bio_ontology = BioOntology()
95115
bio_ontology.initialize()
96116
bio_ontology._build_transitive_closure()
97117

@@ -107,26 +127,51 @@ def build_sqlite_ontology(db_path=DEFAULT_SQLITE_ONTOLOGY, force=False):
107127
child_ns TEXT NOT NULL,
108128
parent_id TEXT NOT NULL,
109129
parent_ns TEXT NOT NULL,
130+
rel_type TEXT NOT NULL,
110131
UNIQUE (child_id, child_ns, parent_id, parent_ns)
111132
);"""
112133
cur.execute(q)
113134

114135
# Insert into the database in chunks
115136
chunk_size = 10000
137+
# Note: the transitive closure consists of pairs with the first element
138+
# being the ontological child and the second the parent. However,
139+
# in a graph representation isa/partof edges point from the ontological
140+
# child to the ontological parent. Here, we need to follow the graph-based
141+
# parent->child relationships, not the ontological ones.
116142
tc = sorted(bio_ontology.transitive_closure)
117143
all_children = defaultdict(set)
118144
all_parents = defaultdict(set)
119145
for i in range(0, len(tc), chunk_size):
120146
chunk = tc[i:i+chunk_size]
121-
chunk_values = [(child.split(':', 1)[1], child.split(':')[0],
122-
parent.split(':', 1)[1], parent.split(':')[0])
147+
chunk_values = [(parent.split(':', 1)[1], parent.split(':')[0],
148+
child.split(':', 1)[1], child.split(':')[0])
123149
for child, parent in chunk]
124150
for cid, cns, pid, pns in chunk_values:
125-
all_children[(pid, pns)].add('%s:%s' % (cns, cid))
126-
all_parents[(cid, cns)].add('%s:%s' % (pns, pid))
127-
cur.executemany("""INSERT INTO relationships (child_id,
128-
child_ns, parent_id, parent_ns)
129-
VALUES (?, ?, ?, ?);""", chunk_values)
151+
all_children[(pid, pns)].add(
152+
'%s:%s|%s' % (cns, cid, 'isa_or_partof'))
153+
all_parents[(cid, cns)].add(
154+
'%s:%s|%s' % (pns, pid, 'isa_or_partof'))
155+
cur.executemany("""INSERT INTO relationships (parent_id,
156+
parent_ns, child_id, child_ns, rel_type)
157+
VALUES (?, ?, ?, ?, 'isa_or_partof');""",
158+
chunk_values)
159+
160+
for parent, child, data in bio_ontology.edges(data=True):
161+
parent_ns, parent_id = bio_ontology.get_ns_id(parent)
162+
child_ns, child_id = bio_ontology.get_ns_id(child)
163+
rel_type = data.get('type')
164+
if rel_type in {'isa', 'partof'}:
165+
continue
166+
all_children[(parent_id, parent_ns)].add(
167+
'%s:%s|%s' % (child_ns, child_id, rel_type))
168+
all_parents[(child_id, child_ns)].add(
169+
'%s:%s|%s' % (parent_ns, parent_id, rel_type))
170+
cur.execute("""INSERT INTO relationships (parent_id,
171+
parent_ns, child_id, child_ns, rel_type)
172+
VALUES (?, ?, ?, ?, ?);""",
173+
(parent_id, parent_ns, child_id, child_ns, rel_type))
174+
130175
q = """CREATE INDEX idx_child_parent ON relationships
131176
(child_id, child_ns, parent_id, parent_ns);"""
132177
cur.execute(q)

indra/tests/test_pmc_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def test_get_xml_PMC():
9595

9696
@pytest.mark.webservice
9797
def test_get_xml_invalid():
98-
pmc_id = '9999999'
98+
pmc_id = '123456789000'
9999
xml_str = pmc_client.get_xml(pmc_id)
100100
assert xml_str is None
101101

0 commit comments

Comments
 (0)