Skip to content

Commit

Permalink
Merge pull request #1024 from openforcefield/more-gromacs-residue-id
Browse files Browse the repository at this point in the history
Test more cases of handling GROMACS residue IDs
  • Loading branch information
mattwthompson committed Aug 30, 2024
2 parents 9947373 + 197240d commit 57dd53a
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 58 deletions.
4 changes: 3 additions & 1 deletion devtools/conda-envs/dev_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- openeye
dependencies:
# Core
- python =3.10
- python
- pip
- numpy
- pydantic =2
Expand Down Expand Up @@ -34,6 +34,8 @@ dependencies:
# Drivers
- gromacs
- lammps >=2023.08.02
# https://github.com/conda-forge/lammps-feedstock/issues/207
- openmpi =4
- panedr
# Typing
- mypy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,57 +98,6 @@ def test_vaccum_warning(self, sage):
with pytest.warns(UserWarning, match="gitlab"):
out.to_gro("tmp.gro")

@pytest.mark.slow
@skip_if_missing("openmm")
def test_residue_info(self, sage):
"""Test that residue information is passed through to .gro files."""
import mdtraj

protein = Molecule.from_polymer_pdb(
get_data_file_path(
"proteins/MainChain_HIE.pdb",
"openff.toolkit",
),
)

ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml")

out = Interchange.from_smirnoff(
force_field=ff14sb,
topology=[protein],
)

out.to_gro("tmp.gro")

mdtraj_topology = mdtraj.load("tmp.gro").topology

for found_residue, original_residue in zip(
mdtraj_topology.residues,
out.topology.hierarchy_iterator("residues"),
):
assert found_residue.name == original_residue.residue_name
assert str(found_residue.resSeq) == original_residue.residue_number

@pytest.mark.slow
def test_atom_names_pdb(self):
peptide = Molecule.from_polymer_pdb(
get_data_file_path("proteins/MainChain_ALA_ALA.pdb", "openff.toolkit"),
)
ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml")

Interchange.from_smirnoff(ff14sb, peptide.to_topology()).to_gro(
"atom_names.gro",
)

pdb_object = openmm.app.PDBFile(
get_data_file_path("proteins/MainChain_ALA_ALA.pdb", "openff.toolkit"),
)
pdb_atom_names = [atom.name for atom in pdb_object.topology.atoms()]

openmm_atom_names = openmm.app.GromacsGroFile("atom_names.gro").atomNames

assert openmm_atom_names == pdb_atom_names


@needs_gmx
class TestGROMACS:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from importlib import resources
from math import exp

Expand Down Expand Up @@ -41,6 +42,14 @@ class _NeedsGROMACS:
pass


def parse_residue_ids(file) -> list[str]:
"""Parse residue IDs, and only the residue IDs, from a GROMACS .gro file."""
with open(file) as f:
ids = [line[:5].strip() for line in f.readlines()[2:-1]]

return ids


class TestToGro(_NeedsGROMACS):
def test_residue_names(self, sage):
"""Reproduce issue #642."""
Expand Down Expand Up @@ -143,8 +152,9 @@ def test_vaccum_warning(self, sage):
out.to_gro("tmp.gro")

@pytest.mark.slow
@pytest.mark.parametrize("offset_residue_ids", [True, False])
@skip_if_missing("openmm")
def test_residue_info(self, sage):
def test_residue_info(self, offset_residue_ids):
"""Test that residue information is passed through to .gro files."""
import mdtraj

Expand All @@ -155,6 +165,19 @@ def test_residue_info(self, sage):
),
)

if offset_residue_ids:
offset = random.randint(10, 20)

for atom in protein.atoms:
atom.metadata["residue_number"] = str(
int(atom.metadata["residue_number"]) + offset,
)

# Need to manually update residues _and_ atoms
# https://github.com/openforcefield/openff-toolkit/issues/1921
for residue in protein.residues:
residue.residue_number = str(int(residue.residue_number) + offset)

ff14sb = ForceField("ff14sb_off_impropers_0.0.3.offxml")

out = Interchange.from_smirnoff(
Expand All @@ -171,7 +194,54 @@ def test_residue_info(self, sage):
out.topology.hierarchy_iterator("residues"),
):
assert found_residue.name == original_residue.residue_name
assert str(found_residue.resSeq) == original_residue.residue_number
found_index = [*original_residue.atoms][0].metadata["residue_number"]
assert str(found_residue.resSeq) == found_index

def test_fill_in_residue_ids(self, sage):
"""Ensure that, if inputs have no residue_number, they are generated on-the-fly."""
sage.create_interchange(
Topology.from_molecules(
[MoleculeWithConformer.from_smiles(smi) for smi in ["CC", "O", "C"]],
),
).to_gro("fill.gro")

expected_residue_ids = 8 * ["1"] + 3 * ["2"] + 5 * ["3"]

found_residue_ids = parse_residue_ids("fill.gro")

for expected, found in zip(expected_residue_ids, found_residue_ids):
assert expected == found

def test_atom_index_gt_100_000(self, water, sage):
"""Ensure that atom indices are written correctly for large numbers."""
water.add_hierarchy_scheme(
("residue_name", "residue_number"),
"residues",
)

topology = Topology.from_molecules(2 * [water])

topology.box_vectors = unit.Quantity([4, 4, 4], unit.nanometer)

# Can't just update atoms' metadata, neeed to create these scheme/element objects
# and need to also modify the residue objects themselves
for molecule_index, molecule in enumerate(topology.molecules):
for atom in molecule.atoms:
atom.metadata["residue_number"] = str(molecule_index + 123_456)

for residue_index, residue in enumerate(topology.residues):
residue.residue_number = str(residue_index + 123_456)

interchange = sage.create_interchange(topology)

interchange.to_gro("large.gro")

expected_residue_ids = 3 * ["23456"] + 3 * ["23457"]

found_residue_ids = parse_residue_ids("large.gro")

for expected, found in zip(expected_residue_ids, found_residue_ids):
assert expected == found

@pytest.mark.slow
def test_atom_names_pdb(self):
Expand Down
12 changes: 12 additions & 0 deletions openff/interchange/components/interchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ def to_gromacs(
Molecule names in written files are not guaranteed to match the `Moleclue.name` attribute of the
molecules in the topology, especially if they are empty strings or not unique.
See `to_gro` and `to_top` for more details.
"""
from openff.interchange.interop.gromacs.export._export import GROMACSWriter
from openff.interchange.smirnoff._gromacs import _convert
Expand Down Expand Up @@ -505,6 +507,16 @@ def to_gro(self, file_path: Path | str, decimal: int = 3):
decimal: int, default=3
The number of decimal places to use when writing the GROMACS coordinate file.
Residue IDs must be positive integers (or string representations thereof).
Residue IDs greater than 99,999 are reduced modulo 100,000 in line with common GROMACS practice.
Residue names and IDs from the topology are used, if present, and otherwise are generated internally.
Behavior when some, but not all, residue names and IDs are present in the topology is undefined.
If residue IDs are generated internally, they are assigned sequentially to each molecule starting at 1.
"""
from openff.interchange.interop.gromacs.export._export import GROMACSWriter
from openff.interchange.smirnoff._gromacs import _convert
Expand Down
10 changes: 6 additions & 4 deletions openff/interchange/interop/gromacs/export/_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _write_atoms(
top.write(
f"{atom.index :6d} "
f"{mapping_to_reduced_atom_types[atom.atom_type] :6s}"
f"{atom.residue_index :8d} "
f"{atom.residue_index % 100_000 :8d} "
f"{atom.residue_name :8s} "
f"{atom.name :6s}"
f"{atom.charge_group_number :6d}"
Expand All @@ -211,7 +211,7 @@ def _write_atoms(
top.write(
f"{atom.index :6d} "
f"{atom.atom_type :6s}"
f"{atom.residue_index :8d} "
f"{atom.residue_index % 100_000 :8d} "
f"{atom.residue_name :8s} "
f"{atom.name :6s}"
f"{atom.charge_group_number :6d}"
Expand Down Expand Up @@ -448,15 +448,17 @@ def _write_gro(self, gro, decimal: int):
for molecule_name, molecule in self.system.molecule_types.items():
n_copies = self.system.molecules[molecule_name]

for _ in range(n_copies):
for copy_index in range(n_copies):

for atom in molecule.atoms:

gro.write(
f"%5d%-5s%5s%5d"
f"%{decimal + 5}.{decimal}f"
f"%{decimal + 5}.{decimal}f"
f"%{decimal + 5}.{decimal}f\n"
% (
atom.residue_index, # This needs to be looked up from a different data structure
(atom.residue_index + copy_index) % 100_000,
atom.residue_name[:5],
atom.name[:5],
(count + 1) % 100000,
Expand Down

0 comments on commit 57dd53a

Please sign in to comment.