Skip to content

Commit

Permalink
Fixed SMILES reader to handle bonds
Browse files Browse the repository at this point in the history
  • Loading branch information
mkviatkovskii committed Apr 7, 2024
1 parent ab69a9a commit df4ad3e
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 1 deletion.
12 changes: 12 additions & 0 deletions src/indigo/molecule/model/molecule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,18 @@ impl Molecule {
return self.graph.edge_weight(edge);
}

pub fn get_bond_by_atoms(&self, ni1: NodeIndex, ni2: NodeIndex) -> Option<&Bond> {
let e = self.graph.find_edge_undirected(ni1, ni2);
if e.is_none() {
return None;
}
return self.graph.edge_weight(e.unwrap().0);
}

pub fn has_bond(&self, ni1: NodeIndex, ni2: NodeIndex) -> bool {
return self.graph.find_edge_undirected(ni1, ni2).is_some();
}

pub fn count_atoms(&self) -> usize {
return self.graph.node_count();
}
Expand Down
196 changes: 195 additions & 1 deletion src/indigo/molecule/smiles/reader/molecule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
let mut molecule = Molecule::new();
let mut open_cycles: HashMap<u8, NodeIndex> = HashMap::new();
let mut stack: Vec<(NodeIndex, BondOrder)> = Vec::new();
let mut pending_bonds: Vec<(NodeIndex, NodeIndex, Bond)> = Vec::new();

let mut parse_atoms_and_bonds = many0(alt((
map(parse_atom, |atom| (Some(atom), None, None, None)),
Expand All @@ -33,7 +34,6 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
let mut prev_node = NodeIndex::end();
let mut prev_bond = BondOrder::Single;

let mut pending_bonds: Vec<(NodeIndex, NodeIndex, Bond)> = Vec::new();

for (atom, bond, cycle_digit, open_paren) in atoms_and_bonds {
if let Some(open) = open_paren {
Expand All @@ -54,6 +54,7 @@ fn parse_molecule(input: &str) -> IResult<&str, Molecule> {
} else if let Some(digit) = cycle_digit {
if let Some(open_node) = open_cycles.remove(&digit) {
pending_bonds.push((prev_node, open_node, Bond { order: prev_bond }));
prev_bond = BondOrder::Single;
} else {
open_cycles.insert(digit, prev_node);
}
Expand Down Expand Up @@ -131,6 +132,33 @@ mod tests {
);
}

#[test]
fn parse_molecule_branch() {
let m = parse_molecule("C(O)N").unwrap().1;
assert_eq!(m.count_atoms(), 3);
assert_eq!(m.count_bonds(), 2);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
8
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_bond(EdgeIndex::new(0)).unwrap().order,
BondOrder::Single
);
assert_eq!(
m.get_bond(EdgeIndex::new(1)).unwrap().order,
BondOrder::Single
);
}

#[test]
fn parse_molecule_c1cc1() {
let m = parse_molecule("C1P=N#1").unwrap().1;
Expand Down Expand Up @@ -177,6 +205,172 @@ mod tests {
);
}

#[test]
fn parse_molecule_branch_double_bond() {
let m = parse_molecule("C(=O)N").unwrap().1;
assert_eq!(m.count_atoms(), 3);
assert_eq!(m.count_bonds(), 2);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
8
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
BondOrder::Double
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order,
BondOrder::Single
);
}

#[test]
fn parse_molecule_branch_double_bonds() {
let m = parse_molecule("C(=O)=N").unwrap().1;
assert_eq!(m.count_atoms(), 3);
assert_eq!(m.count_bonds(), 2);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
8
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
BondOrder::Double
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order,
BondOrder::Double
);
}

#[test]
fn parse_molecule_branch_recursive() {
let m = parse_molecule("C(=S(=O)P)N").unwrap().1;
assert_eq!(m.count_atoms(), 5);
assert_eq!(m.count_bonds(), 4);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
16
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
8
);
assert_eq!(
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
15
);
assert_eq!(
m.get_atom(NodeIndex::new(4)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order,
BondOrder::Double
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order,
BondOrder::Double
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order,
BondOrder::Single
);
assert_eq!(
m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(4)).unwrap().order,
BondOrder::Single
);
}

#[test]
fn parse_molecule_cycle_double() {
let m = parse_molecule("N1OC=1S").unwrap().1;
assert_eq!(m.count_atoms(), 4);
assert_eq!(m.count_bonds(), 4);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
8
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
16
);
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1)));
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single);
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2)));
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Single);
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(2)));
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order == BondOrder::Double);
assert!(m.has_bond(NodeIndex::new(2), NodeIndex::new(3)));
assert!(m.get_bond_by_atoms(NodeIndex::new(2), NodeIndex::new(3)).unwrap().order == BondOrder::Single);
}


#[test]
fn parse_molecule_cycle_branch() {
let m = parse_molecule("N1C(=P)S=1O").unwrap().1;
assert_eq!(m.count_atoms(), 5);
assert_eq!(m.count_bonds(), 5);
assert_eq!(
m.get_atom(NodeIndex::new(0)).unwrap().element.atomic_number,
7
);
assert_eq!(
m.get_atom(NodeIndex::new(1)).unwrap().element.atomic_number,
6
);
assert_eq!(
m.get_atom(NodeIndex::new(2)).unwrap().element.atomic_number,
15
);
assert_eq!(
m.get_atom(NodeIndex::new(3)).unwrap().element.atomic_number,
16
);
assert_eq!(
m.get_atom(NodeIndex::new(4)).unwrap().element.atomic_number,
8
);
assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1)));
assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single);
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2)));
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Double);
assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(3)));
assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order == BondOrder::Single);
assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(4)));
assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(4)).unwrap().order == BondOrder::Single);
assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(0)));
assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(0)).unwrap().order == BondOrder::Double);
}

#[test]
fn parse_molecule_c1cc() {
assert!(parse_molecule("C1CC").is_err())
Expand Down

0 comments on commit df4ad3e

Please sign in to comment.