diff --git a/src/ringo/math.rs b/src/ringo/math.rs index b2dfa31..4974a69 100644 --- a/src/ringo/math.rs +++ b/src/ringo/math.rs @@ -1 +1 @@ -pub mod similarity; \ No newline at end of file +pub mod similarity; diff --git a/src/ringo/math/similarity/tanimoto.rs b/src/ringo/math/similarity/tanimoto.rs index a224cb6..33480df 100644 --- a/src/ringo/math/similarity/tanimoto.rs +++ b/src/ringo/math/similarity/tanimoto.rs @@ -3,13 +3,14 @@ use fixedbitset::FixedBitSet; pub fn tanimoto_bitset(a: &FixedBitSet, b: &FixedBitSet) -> f32 { let mut and_ = a.clone(); and_.intersect_with(b); - return and_.count_ones(..) as f32 / (a.count_ones(..) + b.count_ones(..) - and_.count_ones(..)) as f32; + return and_.count_ones(..) as f32 + / (a.count_ones(..) + b.count_ones(..) - and_.count_ones(..)) as f32; } #[cfg(test)] mod tests { + use crate::ringo::math::similarity::tanimoto::tanimoto_bitset; use fixedbitset::FixedBitSet; - use crate::ringo::math::similarity::tanimoto::{tanimoto_bitset}; #[test] fn test_tanimoto_bitset_033() { diff --git a/src/ringo/molecule/model/molecule.rs b/src/ringo/molecule/model/molecule.rs index f2ded08..18572ab 100644 --- a/src/ringo/molecule/model/molecule.rs +++ b/src/ringo/molecule/model/molecule.rs @@ -1,17 +1,17 @@ +use crate::ringo::math::similarity::tanimoto::tanimoto_bitset; use crate::ringo::molecule::model::atom::Atom; use crate::ringo::molecule::model::bond::Bond; use crate::ringo::molecule::model::element::atomic_weight; +use crate::ringo::molecule::smiles::reader::molecule::parse_molecule; use crate::ringo::ringo::fingerprint::Fingerprint; +use fixedbitset::FixedBitSet; use petgraph::stable_graph::{EdgeIndex, NodeIndex, StableGraph}; use petgraph::visit::EdgeRef; use petgraph::Undirected; use std::borrow::Borrow; -use std::collections::{BTreeSet}; use std::collections::hash_map::DefaultHasher; +use std::collections::BTreeSet; use std::hash::Hasher; -use fixedbitset::FixedBitSet; -use crate::ringo::math::similarity::tanimoto::tanimoto_bitset; -use crate::ringo::molecule::smiles::reader::molecule::parse_molecule; pub struct Molecule { graph: StableGraph, @@ -92,7 +92,15 @@ impl Molecule { let mut fp = FixedBitSet::new(); for node in self.graph.node_indices() { - ecfp_recursive(&self.graph, radius, 1, node, &mut fp, fp_length, &mut DefaultHasher::new()); + ecfp_recursive( + &self.graph, + radius, + 1, + node, + &mut fp, + fp_length, + &mut DefaultHasher::new(), + ); } Fingerprint(fp) @@ -108,7 +116,6 @@ fn ecfp_recursive( fp_length: usize, hasher: &mut DefaultHasher, ) { - if depth > radius { return; } @@ -136,11 +143,16 @@ fn ecfp_recursive( } } - #[test] fn test_ecfp() { - let ecfp_ibuprofen = parse_molecule("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O").unwrap().1.ecfp(2, 128); - let ecfp_naproxen = parse_molecule("CC(C1=CC2=C(C=C1)C=C(C=C2)OC)C(=O)O").unwrap().1.ecfp(2, 128); + let ecfp_ibuprofen = parse_molecule("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O") + .unwrap() + .1 + .ecfp(2, 128); + let ecfp_naproxen = parse_molecule("CC(C1=CC2=C(C=C1)C=C(C=C2)OC)C(=O)O") + .unwrap() + .1 + .ecfp(2, 128); let sim = tanimoto_bitset(&ecfp_ibuprofen.0, &ecfp_naproxen.0); assert!(0.53 < sim && sim < 0.54); } diff --git a/src/ringo/molecule/smiles/reader/molecule.rs b/src/ringo/molecule/smiles/reader/molecule.rs index 573f446..3e15b68 100644 --- a/src/ringo/molecule/smiles/reader/molecule.rs +++ b/src/ringo/molecule/smiles/reader/molecule.rs @@ -34,7 +34,6 @@ pub(crate) fn parse_molecule(input: &str) -> IResult<&str, Molecule> { let mut prev_node = NodeIndex::end(); let mut prev_bond = BondOrder::Single; - for (atom, bond, cycle_digit, open_paren) in atoms_and_bonds { if let Some(open) = open_paren { if open { @@ -223,11 +222,15 @@ mod tests { 7 ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)) + .unwrap() + .order, BondOrder::Double ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)) + .unwrap() + .order, BondOrder::Single ); } @@ -250,11 +253,15 @@ mod tests { 7 ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)) + .unwrap() + .order, BondOrder::Double ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)) + .unwrap() + .order, BondOrder::Double ); } @@ -285,19 +292,27 @@ mod tests { 7 ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)) + .unwrap() + .order, BondOrder::Double ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)) + .unwrap() + .order, BondOrder::Double ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)) + .unwrap() + .order, BondOrder::Single ); assert_eq!( - m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(4)).unwrap().order, + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(4)) + .unwrap() + .order, BondOrder::Single ); } @@ -324,16 +339,35 @@ mod tests { 16 ); assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1))); - assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)) + .unwrap() + .order + == BondOrder::Single + ); assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2))); - assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)) + .unwrap() + .order + == BondOrder::Single + ); assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(2))); - assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)).unwrap().order == BondOrder::Double); + assert!( + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(2)) + .unwrap() + .order + == BondOrder::Double + ); assert!(m.has_bond(NodeIndex::new(2), NodeIndex::new(3))); - assert!(m.get_bond_by_atoms(NodeIndex::new(2), NodeIndex::new(3)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(2), NodeIndex::new(3)) + .unwrap() + .order + == BondOrder::Single + ); } - #[test] fn parse_molecule_cycle_branch() { let m = parse_molecule("N1C(=P)S=1O").unwrap().1; @@ -360,15 +394,40 @@ mod tests { 8 ); assert!(m.has_bond(NodeIndex::new(0), NodeIndex::new(1))); - assert!(m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(0), NodeIndex::new(1)) + .unwrap() + .order + == BondOrder::Single + ); assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(2))); - assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)).unwrap().order == BondOrder::Double); + assert!( + m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(2)) + .unwrap() + .order + == BondOrder::Double + ); assert!(m.has_bond(NodeIndex::new(1), NodeIndex::new(3))); - assert!(m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(1), NodeIndex::new(3)) + .unwrap() + .order + == BondOrder::Single + ); assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(4))); - assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(4)).unwrap().order == BondOrder::Single); + assert!( + m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(4)) + .unwrap() + .order + == BondOrder::Single + ); assert!(m.has_bond(NodeIndex::new(3), NodeIndex::new(0))); - assert!(m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(0)).unwrap().order == BondOrder::Double); + assert!( + m.get_bond_by_atoms(NodeIndex::new(3), NodeIndex::new(0)) + .unwrap() + .order + == BondOrder::Double + ); } #[test] diff --git a/src/ringo/ringo.rs b/src/ringo/ringo.rs index 559227b..df87b4d 100644 --- a/src/ringo/ringo.rs +++ b/src/ringo/ringo.rs @@ -1,4 +1,4 @@ +pub(crate) mod fingerprint; mod index; -mod search; mod index_item; -pub(crate) mod fingerprint; +mod search; diff --git a/src/ringo/ringo/fingerprint.rs b/src/ringo/ringo/fingerprint.rs index 55dc0d0..14997b6 100644 --- a/src/ringo/ringo/fingerprint.rs +++ b/src/ringo/ringo/fingerprint.rs @@ -32,8 +32,8 @@ impl<'de> bincode::BorrowDecode<'de> for Fingerprint { #[cfg(test)] mod tests { - use fixedbitset::{FixedBitSet}; use crate::ringo::ringo::fingerprint::{Fingerprint, FINGERPRINT_SIZE}; + use fixedbitset::FixedBitSet; #[test] fn test_fingerprint_encode_decode() { @@ -42,7 +42,10 @@ mod tests { fp.0.set(17, true); let encoded = bincode::encode_to_vec(&fp, bincode::config::standard()).unwrap(); - let decoded: Fingerprint = bincode::decode_from_slice(&encoded, bincode::config::standard()).unwrap().0; + let decoded: Fingerprint = + bincode::decode_from_slice(&encoded, bincode::config::standard()) + .unwrap() + .0; assert_eq!(decoded.0.ones().collect::>(), vec![1, 17]); } } diff --git a/src/ringo/ringo/index.rs b/src/ringo/ringo/index.rs index 9efb2d3..6ea8108 100644 --- a/src/ringo/ringo/index.rs +++ b/src/ringo/ringo/index.rs @@ -1,7 +1,7 @@ -use std::fs::File; -use std::io::{BufRead}; use crate::ringo::molecule::smiles::reader::molecule::parse_molecule; use crate::ringo::ringo::index_item::IndexItem; +use std::fs::File; +use std::io::BufRead; fn index(smiles_file: &str) { // open file for reading @@ -13,12 +13,14 @@ fn index(smiles_file: &str) { for line in std::io::BufReader::new(fi).lines() { let line = line.unwrap(); let molecule = parse_molecule(&line).unwrap().1; - IndexItem{position: offset, fingerprint: molecule.ecfp(2, 512)}; + IndexItem { + position: offset, + fingerprint: molecule.ecfp(2, 512), + }; offset += line.len() + 1; } } - #[test] fn test_index() { index("molecules.smi"); diff --git a/src/ringo/ringo/index_item.rs b/src/ringo/ringo/index_item.rs index f4c25ea..ad33538 100644 --- a/src/ringo/ringo/index_item.rs +++ b/src/ringo/ringo/index_item.rs @@ -1,30 +1,36 @@ -use bincode::{Decode, Encode}; use crate::ringo::ringo::fingerprint::Fingerprint; +use bincode::{Decode, Encode}; #[derive(Debug, Encode, Decode)] pub struct IndexItem { pub position: usize, - pub fingerprint: Fingerprint + pub fingerprint: Fingerprint, } #[cfg(test)] mod tests { + use crate::ringo::ringo::fingerprint::Fingerprint; + use crate::ringo::ringo::index_item::IndexItem; use bincode::config::standard; use bincode::{decode_from_slice, encode_to_vec}; use fixedbitset::FixedBitSet; - use crate::ringo::ringo::index_item::IndexItem; - use crate::ringo::ringo::fingerprint::Fingerprint; #[test] fn test_index_item_encode_decode() { let fp = Fingerprint(FixedBitSet::with_capacity(512)); - let mut ii = IndexItem {position: 0, fingerprint: fp}; + let mut ii = IndexItem { + position: 0, + fingerprint: fp, + }; ii.position = 0; ii.fingerprint.0.set(1, true); ii.fingerprint.0.set(17, true); let encoded = encode_to_vec(&ii, standard()).unwrap(); let decoded: IndexItem = decode_from_slice(&encoded, standard()).unwrap().0; - assert_eq!(decoded.fingerprint.0.ones().collect::>(), vec![1, 17]); + assert_eq!( + decoded.fingerprint.0.ones().collect::>(), + vec![1, 17] + ); } -} \ No newline at end of file +} diff --git a/src/ringo/ringo/search.rs b/src/ringo/ringo/search.rs index e69de29..8b13789 100644 --- a/src/ringo/ringo/search.rs +++ b/src/ringo/ringo/search.rs @@ -0,0 +1 @@ +