From cfcf8a4773447ab82de68d731cc45cbc6700629e Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 28 Nov 2023 11:51:34 -0600 Subject: [PATCH 1/2] Enumerate input molecule with tautomers --- docs/releasehistory.md | 2 ++ openff/toolkit/_tests/test_molecule.py | 21 ++++++++++----- openff/toolkit/topology/molecule.py | 10 ++++--- openff/toolkit/utils/openeye_wrapper.py | 35 +++++++++++++------------ openff/toolkit/utils/rdkit_wrapper.py | 21 ++++++++------- 5 files changed, 52 insertions(+), 37 deletions(-) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index e41a93848..ef0ec69ea 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -12,6 +12,8 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Behavior changes +- [PR #17XX](https://github.com/openforcefield/openff-toolkit/pull/17XX): `Molecule.enumerate_tautomers` now includes the input molecule in the returned list. + ### Bugfixes ### New features diff --git a/openff/toolkit/_tests/test_molecule.py b/openff/toolkit/_tests/test_molecule.py index 35677d945..1fe1b984e 100644 --- a/openff/toolkit/_tests/test_molecule.py +++ b/openff/toolkit/_tests/test_molecule.py @@ -2103,12 +2103,18 @@ def test_enumerating_tautomers(self, molecule_data, toolkit_class): tautomers = mol.enumerate_tautomers(toolkit_registry=toolkit) - assert len(tautomers) == molecule_data["tautomers"] - assert mol not in tautomers - # check that the molecules are not isomorphic of the input + assert len(tautomers) == molecule_data["tautomers"] + 1 + assert mol in tautomers + + # check that only one molecule (literally the input) is isomorphic of the input + + number_isomorphic = 0 for taut in tautomers: assert taut.n_conformers == 0 - assert mol.is_isomorphic_with(taut) is False + if mol.is_isomorphic_with(taut): + number_isomorphic += 1 + + assert number_isomorphic == 1 else: pytest.skip("Required toolkit is unavailable") @@ -2132,8 +2138,8 @@ def test_enumerating_tautomers_options(self, toolkit_class): tautomers = mol.enumerate_tautomers( max_states=tauts_no, toolkit_registry=toolkit ) - assert len(tautomers) <= tauts_no - assert mol not in tautomers + assert len(tautomers) <= tauts_no + 1 + assert mol in tautomers @pytest.mark.parametrize( "toolkit_class", [RDKitToolkitWrapper, OpenEyeToolkitWrapper] @@ -2146,7 +2152,8 @@ def test_enumerating_no_tautomers(self, toolkit_class): mol = Molecule.from_smiles("CC", toolkit_registry=toolkit) tautomers = mol.enumerate_tautomers(toolkit_registry=toolkit) - assert tautomers == [] + assert len(tautomers) == 1 + assert tautomers[0] == mol else: pytest.skip("Required toolkit is unavailable") diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index a59b292ab..0b8877869 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -4145,8 +4145,10 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST toolkit.to_file_obj(self, file_path, file_format) def enumerate_tautomers( - self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY - ): + self, + max_states=20, + toolkit_registry=GLOBAL_TOOLKIT_REGISTRY, + ) -> List["Molecule"]: """ Enumerate the possible tautomers of the current molecule @@ -4161,8 +4163,8 @@ def enumerate_tautomers( Returns ------- - molecules: List[openff.toolkit.topology.Molecule] - A list of openff.toolkit.topology.Molecule instances not including the input molecule. + molecules: list[openff.toolkit.Molecule] + A list of openff.toolkit.Molecule instances including the input molecule. """ if isinstance(toolkit_registry, ToolkitRegistry): diff --git a/openff/toolkit/utils/openeye_wrapper.py b/openff/toolkit/utils/openeye_wrapper.py index dc618c431..7779c230a 100644 --- a/openff/toolkit/utils/openeye_wrapper.py +++ b/openff/toolkit/utils/openeye_wrapper.py @@ -811,14 +811,16 @@ def _assign_aromaticity_and_stereo_from_3d(self, offmol): return offmol_w_stereo_and_aro def enumerate_protomers( - self, molecule: "Molecule", max_states: int = 10 - ) -> List["Molecule"]: + self, + molecule: "Molecule", + max_states: int = 10, + ) -> list["Molecule"]: """ Enumerate the formal charges of a molecule to generate different protomoers. Parameters ---------- - molecule: openff.toolkit.topology.Molecule + molecule: openff.toolkit.Molecule The molecule whose state we should enumerate max_states: int optional, default=10, @@ -826,7 +828,7 @@ def enumerate_protomers( Returns ------- - molecules: List[openff.toolkit.topology.Molecule], + molecules: list[openff.toolkit.Molecule], A list of the protomers of the input molecules not including the input. """ @@ -912,8 +914,10 @@ def enumerate_stereoisomers( return molecules[:max_isomers] def enumerate_tautomers( - self, molecule: "Molecule", max_states: int = 20 - ) -> List["Molecule"]: + self, + molecule: "Molecule", + max_states: int = 20, + ) -> list["Molecule"]: """ Enumerate the possible tautomers of the current molecule @@ -927,8 +931,8 @@ def enumerate_tautomers( Returns ------- - molecules: List[openff.toolkit.topology.Molecule] - A list of openff.toolkit.topology.Molecule instances excluding the input molecule. + molecules: list[openff.toolkit.Molecule] + A list of openff.toolkit.Molecule instances excluding the input molecule. """ from openeye import oequacpac @@ -945,16 +949,13 @@ def enumerate_tautomers( tautomer_options.SetCarbonHybridization(False) for tautomer in oequacpac.OEEnumerateTautomers(oemol, tautomer_options): - # remove the input tautomer from the output - taut = self.from_openeye( - tautomer, allow_undefined_stereo=True, _cls=molecule.__class__ - ) - if taut != molecule: - tautomers.append( - self.from_openeye( - tautomer, allow_undefined_stereo=True, _cls=molecule.__class__ - ) + tautomers.append( + self.from_openeye( + tautomer, + allow_undefined_stereo=True, + _cls=molecule.__class__, ) + ) return tautomers diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index 4e2fb3b2c..04efbc341 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -1339,14 +1339,16 @@ def enumerate_stereoisomers( return molecules def enumerate_tautomers( - self, molecule: "Molecule", max_states: int = 20 - ) -> List["Molecule"]: + self, + molecule: "Molecule", + max_states: int = 20, + ) -> list["Molecule"]: """ Enumerate the possible tautomers of the current molecule. Parameters ---------- - molecule: openff.toolkit.topology.Molecule + molecule: openff.toolkit.Molecule The molecule whose state we should enumerate max_states: int optional, default=20 @@ -1354,8 +1356,8 @@ def enumerate_tautomers( Returns ------- - molecules: List[openff.toolkit.topology.Molecule] - A list of openff.toolkit.topology.Molecule instances not including the input molecule. + molecules: list[openff.toolkit.Molecule] + A list of openff.toolkit.Molecule instances including the input molecule. """ from rdkit import Chem @@ -1371,11 +1373,12 @@ def enumerate_tautomers( molecules = [] for taut in tautomers: taut_hs = Chem.AddHs(taut) - mol = self.from_smiles( - Chem.MolToSmiles(taut_hs), allow_undefined_stereo=True + molecules.append( + self.from_smiles( + Chem.MolToSmiles(taut_hs), + allow_undefined_stereo=True, + ) ) - if mol != molecule: - molecules.append(mol) return molecules[:max_states] From e933385cc603b7083488b764e4a510c05b2a4009 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Wed, 29 Nov 2023 11:41:28 -0600 Subject: [PATCH 2/2] Drop annotation --- openff/toolkit/topology/molecule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 0b8877869..d55493da1 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -4144,11 +4144,12 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST else: toolkit.to_file_obj(self, file_path, file_format) + # TODO: This should probably be a class method def enumerate_tautomers( self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY, - ) -> List["Molecule"]: + ): """ Enumerate the possible tautomers of the current molecule