Skip to content

Commit df4ad3e

Browse files
committed
Fixed SMILES reader to handle bonds
1 parent ab69a9a commit df4ad3e

File tree

2 files changed

+207
-1
lines changed

2 files changed

+207
-1
lines changed

src/indigo/molecule/model/molecule.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ impl Molecule {
3838
return self.graph.edge_weight(edge);
3939
}
4040

41+
pub fn get_bond_by_atoms(&self, ni1: NodeIndex, ni2: NodeIndex) -> Option<&Bond> {
42+
let e = self.graph.find_edge_undirected(ni1, ni2);
43+
if e.is_none() {
44+
return None;
45+
}
46+
return self.graph.edge_weight(e.unwrap().0);
47+
}
48+
49+
pub fn has_bond(&self, ni1: NodeIndex, ni2: NodeIndex) -> bool {
50+
return self.graph.find_edge_undirected(ni1, ni2).is_some();
51+
}
52+
4153
pub fn count_atoms(&self) -> usize {
4254
return self.graph.node_count();
4355
}

src/indigo/molecule/smiles/reader/molecule.rs

Lines changed: 195 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
1919
let mut molecule = Molecule::new();
2020
let mut open_cycles: HashMap<u8, NodeIndex> = HashMap::new();
2121
let mut stack: Vec<(NodeIndex, BondOrder)> = Vec::new();
22+
let mut pending_bonds: Vec<(NodeIndex, NodeIndex, Bond)> = Vec::new();
2223

2324
let mut parse_atoms_and_bonds = many0(alt((
2425
map(parse_atom, |atom| (Some(atom), None, None, None)),
@@ -33,7 +34,6 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
3334
let mut prev_node = NodeIndex::end();
3435
let mut prev_bond = BondOrder::Single;
3536

36-
let mut pending_bonds: Vec<(NodeIndex, NodeIndex, Bond)> = Vec::new();
3737

3838
for (atom, bond, cycle_digit, open_paren) in atoms_and_bonds {
3939
if let Some(open) = open_paren {
@@ -54,6 +54,7 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
5454
} else if let Some(digit) = cycle_digit {
5555
if let Some(open_node) = open_cycles.remove(&digit) {
5656
pending_bonds.push((prev_node, open_node, Bond { order: prev_bond }));
57+
prev_bond = BondOrder::Single;
5758
} else {
5859
open_cycles.insert(digit, prev_node);
5960
}
@@ -131,6 +132,33 @@ mod tests {
131132
);
132133
}
133134

135+
#[test]
136+
fn parse_molecule_branch() {
137+
let m = parse_molecule("C(O)N").unwrap().1;
138+
assert_eq!(m.count_atoms(), 3);
139+
assert_eq!(m.count_bonds(), 2);
140+
assert_eq!(
141+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
142+
6
143+
);
144+
assert_eq!(
145+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
146+
8
147+
);
148+
assert_eq!(
149+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
150+
7
151+
);
152+
assert_eq!(
153+
m.get_bond(EdgeIndex::new(0)).unwrap().order,
154+
BondOrder::Single
155+
);
156+
assert_eq!(
157+
m.get_bond(EdgeIndex::new(1)).unwrap().order,
158+
BondOrder::Single
159+
);
160+
}
161+
134162
#[test]
135163
fn parse_molecule_c1cc1() {
136164
let m = parse_molecule("C1P=N#1").unwrap().1;
@@ -177,6 +205,172 @@ mod tests {
177205
);
178206
}
179207

208+
#[test]
209+
fn parse_molecule_branch_double_bond() {
210+
let m = parse_molecule("C(=O)N").unwrap().1;
211+
assert_eq!(m.count_atoms(), 3);
212+
assert_eq!(m.count_bonds(), 2);
213+
assert_eq!(
214+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
215+
6
216+
);
217+
assert_eq!(
218+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
219+
8
220+
);
221+
assert_eq!(
222+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
223+
7
224+
);
225+
assert_eq!(
226+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
227+
BondOrder::Double
228+
);
229+
assert_eq!(
230+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order,
231+
BondOrder::Single
232+
);
233+
}
234+
235+
#[test]
236+
fn parse_molecule_branch_double_bonds() {
237+
let m = parse_molecule("C(=O)=N").unwrap().1;
238+
assert_eq!(m.count_atoms(), 3);
239+
assert_eq!(m.count_bonds(), 2);
240+
assert_eq!(
241+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
242+
6
243+
);
244+
assert_eq!(
245+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
246+
8
247+
);
248+
assert_eq!(
249+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
250+
7
251+
);
252+
assert_eq!(
253+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
254+
BondOrder::Double
255+
);
256+
assert_eq!(
257+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order,
258+
BondOrder::Double
259+
);
260+
}
261+
262+
#[test]
263+
fn parse_molecule_branch_recursive() {
264+
let m = parse_molecule("C(=S(=O)P)N").unwrap().1;
265+
assert_eq!(m.count_atoms(), 5);
266+
assert_eq!(m.count_bonds(), 4);
267+
assert_eq!(
268+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
269+
6
270+
);
271+
assert_eq!(
272+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
273+
16
274+
);
275+
assert_eq!(
276+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
277+
8
278+
);
279+
assert_eq!(
280+
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
281+
15
282+
);
283+
assert_eq!(
284+
m.get_atom(NodeIndex::new(4)).unwrap().element.atomic_number,
285+
7
286+
);
287+
assert_eq!(
288+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
289+
BondOrder::Double
290+
);
291+
assert_eq!(
292+
m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order,
293+
BondOrder::Double
294+
);
295+
assert_eq!(
296+
m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order,
297+
BondOrder::Single
298+
);
299+
assert_eq!(
300+
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(4)).unwrap().order,
301+
BondOrder::Single
302+
);
303+
}
304+
305+
#[test]
306+
fn parse_molecule_cycle_double() {
307+
let m = parse_molecule("N1OC=1S").unwrap().1;
308+
assert_eq!(m.count_atoms(), 4);
309+
assert_eq!(m.count_bonds(), 4);
310+
assert_eq!(
311+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
312+
7
313+
);
314+
assert_eq!(
315+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
316+
8
317+
);
318+
assert_eq!(
319+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
320+
6
321+
);
322+
assert_eq!(
323+
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
324+
16
325+
);
326+
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1)));
327+
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single);
328+
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2)));
329+
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Single);
330+
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(2)));
331+
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order == BondOrder::Double);
332+
assert!(m.has_bond(NodeIndex::new(2), NodeIndex::new(3)));
333+
assert!(m.get_bond_by_atoms(NodeIndex::new(2), NodeIndex::new(3)).unwrap().order == BondOrder::Single);
334+
}
335+
336+
337+
#[test]
338+
fn parse_molecule_cycle_branch() {
339+
let m = parse_molecule("N1C(=P)S=1O").unwrap().1;
340+
assert_eq!(m.count_atoms(), 5);
341+
assert_eq!(m.count_bonds(), 5);
342+
assert_eq!(
343+
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
344+
7
345+
);
346+
assert_eq!(
347+
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
348+
6
349+
);
350+
assert_eq!(
351+
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
352+
15
353+
);
354+
assert_eq!(
355+
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
356+
16
357+
);
358+
assert_eq!(
359+
m.get_atom(NodeIndex::new(4)).unwrap().element.atomic_number,
360+
8
361+
);
362+
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1)));
363+
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single);
364+
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2)));
365+
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Double);
366+
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(3)));
367+
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order == BondOrder::Single);
368+
assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(4)));
369+
assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(4)).unwrap().order == BondOrder::Single);
370+
assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(0)));
371+
assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(0)).unwrap().order == BondOrder::Double);
372+
}
373+
180374
#[test]
181375
fn parse_molecule_c1cc() {
182376
assert!(parse_molecule("C1CC").is_err())

0 commit comments

Comments
 (0)