Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enumerate input molecule with tautomers #1780

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/releasehistory.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w

### Behavior changes

- [PR #17XX](https://github.com/openforcefield/openff-toolkit/pull/17XX): `Molecule.enumerate_tautomers` now includes the input molecule in the returned list.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- [PR #17XX](https://github.com/openforcefield/openff-toolkit/pull/17XX): `Molecule.enumerate_tautomers` now includes the input molecule in the returned list.
- [PR #1780](https://github.com/openforcefield/openff-toolkit/pull/1780): `Molecule.enumerate_tautomers` now includes the input molecule in the returned list.


### Bugfixes

### New features
Expand Down
21 changes: 14 additions & 7 deletions openff/toolkit/_tests/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -2103,12 +2103,18 @@ def test_enumerating_tautomers(self, molecule_data, toolkit_class):

tautomers = mol.enumerate_tautomers(toolkit_registry=toolkit)

assert len(tautomers) == molecule_data["tautomers"]
assert mol not in tautomers
# check that the molecules are not isomorphic of the input
assert len(tautomers) == molecule_data["tautomers"] + 1
assert mol in tautomers

# check that only one molecule (literally the input) is isomorphic of the input

number_isomorphic = 0
for taut in tautomers:
assert taut.n_conformers == 0
assert mol.is_isomorphic_with(taut) is False
if mol.is_isomorphic_with(taut):
number_isomorphic += 1

assert number_isomorphic == 1

else:
pytest.skip("Required toolkit is unavailable")
Expand All @@ -2132,8 +2138,8 @@ def test_enumerating_tautomers_options(self, toolkit_class):
tautomers = mol.enumerate_tautomers(
max_states=tauts_no, toolkit_registry=toolkit
)
assert len(tautomers) <= tauts_no
assert mol not in tautomers
assert len(tautomers) <= tauts_no + 1
assert mol in tautomers

@pytest.mark.parametrize(
"toolkit_class", [RDKitToolkitWrapper, OpenEyeToolkitWrapper]
Expand All @@ -2146,7 +2152,8 @@ def test_enumerating_no_tautomers(self, toolkit_class):
mol = Molecule.from_smiles("CC", toolkit_registry=toolkit)

tautomers = mol.enumerate_tautomers(toolkit_registry=toolkit)
assert tautomers == []
assert len(tautomers) == 1
assert tautomers[0] == mol

else:
pytest.skip("Required toolkit is unavailable")
Expand Down
9 changes: 6 additions & 3 deletions openff/toolkit/topology/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4144,8 +4144,11 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST
else:
toolkit.to_file_obj(self, file_path, file_format)

# TODO: This should probably be a class method
def enumerate_tautomers(
self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY
self,
max_states=20,
toolkit_registry=GLOBAL_TOOLKIT_REGISTRY,
):
"""
Enumerate the possible tautomers of the current molecule
Expand All @@ -4161,8 +4164,8 @@ def enumerate_tautomers(

Returns
-------
molecules: List[openff.toolkit.topology.Molecule]
A list of openff.toolkit.topology.Molecule instances not including the input molecule.
molecules: list[openff.toolkit.Molecule]
A list of openff.toolkit.Molecule instances including the input molecule.
"""

if isinstance(toolkit_registry, ToolkitRegistry):
Expand Down
35 changes: 18 additions & 17 deletions openff/toolkit/utils/openeye_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,22 +811,24 @@ def _assign_aromaticity_and_stereo_from_3d(self, offmol):
return offmol_w_stereo_and_aro

def enumerate_protomers(
self, molecule: "Molecule", max_states: int = 10
) -> List["Molecule"]:
self,
molecule: "Molecule",
max_states: int = 10,
) -> list["Molecule"]:
"""
Enumerate the formal charges of a molecule to generate different protomoers.

Parameters
----------
molecule: openff.toolkit.topology.Molecule
molecule: openff.toolkit.Molecule
The molecule whose state we should enumerate

max_states: int optional, default=10,
The maximum number of protomer states to be returned.

Returns
-------
molecules: List[openff.toolkit.topology.Molecule],
molecules: list[openff.toolkit.Molecule],
A list of the protomers of the input molecules not including the input.
"""

Expand Down Expand Up @@ -912,8 +914,10 @@ def enumerate_stereoisomers(
return molecules[:max_isomers]

def enumerate_tautomers(
self, molecule: "Molecule", max_states: int = 20
) -> List["Molecule"]:
self,
molecule: "Molecule",
max_states: int = 20,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could go bigger than 20 if we wanted!

) -> list["Molecule"]:
"""
Enumerate the possible tautomers of the current molecule
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Enumerate the possible tautomers of the current molecule
Enumerate some possible tautomers of the current molecule


Expand All @@ -927,8 +931,8 @@ def enumerate_tautomers(

Returns
-------
molecules: List[openff.toolkit.topology.Molecule]
A list of openff.toolkit.topology.Molecule instances excluding the input molecule.
molecules: list[openff.toolkit.Molecule]
A list of openff.toolkit.Molecule instances excluding the input molecule.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
A list of openff.toolkit.Molecule instances excluding the input molecule.
A list of openff.toolkit.Molecule instances, including the input molecule unless it is pruned by the ``max_states`` argument.

"""
from openeye import oequacpac

Expand All @@ -945,16 +949,13 @@ def enumerate_tautomers(
tautomer_options.SetCarbonHybridization(False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

- tautomer_options.SetMaxTautomersGenerated(max_states + 1)
+ tautomer_options.SetMaxTautomersGenerated(max_states)

No longer need to generate an extra state so we can drop the input.


for tautomer in oequacpac.OEEnumerateTautomers(oemol, tautomer_options):
# remove the input tautomer from the output
taut = self.from_openeye(
tautomer, allow_undefined_stereo=True, _cls=molecule.__class__
)
if taut != molecule:
tautomers.append(
self.from_openeye(
tautomer, allow_undefined_stereo=True, _cls=molecule.__class__
)
tautomers.append(
self.from_openeye(
tautomer,
allow_undefined_stereo=True,
_cls=molecule.__class__,
)
)

return tautomers

Expand Down
21 changes: 12 additions & 9 deletions openff/toolkit/utils/rdkit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,23 +1339,25 @@ def enumerate_stereoisomers(
return molecules

def enumerate_tautomers(
self, molecule: "Molecule", max_states: int = 20
) -> List["Molecule"]:
self,
molecule: "Molecule",
max_states: int = 20,
) -> list["Molecule"]:
"""
Enumerate the possible tautomers of the current molecule.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Enumerate the possible tautomers of the current molecule.
Enumerate some possible tautomers of the current molecule.


Parameters
----------
molecule: openff.toolkit.topology.Molecule
molecule: openff.toolkit.Molecule
The molecule whose state we should enumerate

max_states: int optional, default=20
The maximum amount of molecules that should be returned

Returns
-------
molecules: List[openff.toolkit.topology.Molecule]
A list of openff.toolkit.topology.Molecule instances not including the input molecule.
molecules: list[openff.toolkit.Molecule]
A list of openff.toolkit.Molecule instances including the input molecule.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
A list of openff.toolkit.Molecule instances including the input molecule.
A list of openff.toolkit.Molecule instances including the input molecule unless it has been pruned by the ``max_states`` argument.

"""

from rdkit import Chem
Expand All @@ -1371,11 +1373,12 @@ def enumerate_tautomers(
molecules = []
for taut in tautomers:
taut_hs = Chem.AddHs(taut)
mol = self.from_smiles(
Chem.MolToSmiles(taut_hs), allow_undefined_stereo=True
molecules.append(
self.from_smiles(
Chem.MolToSmiles(taut_hs),
allow_undefined_stereo=True,
)
)
if mol != molecule:
molecules.append(mol)

return molecules[:max_states]

Expand Down