Skip to content

Commit

Permalink
LAMMPS non-unique molecule ID hotfix (#1000)
Browse files Browse the repository at this point in the history
* Salted hash in _write_atoms() with molecule ID to guarantee unique IDs for Molecules with identical chemical tables

* Wrote unit test to check unique molecule IDs in Interchange.to_lammps() output

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Apply suggestions from code review

Incorporated suggestions for unique mol ID unit test improvements

Co-authored-by: Matt Thompson <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Removed unnecessary typing imports

* Consolidated pilot_mol init and conformer generation using MoleculeWithConformer

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Updated sidelen default, added assertion to enforce test for multiple Molecules

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Wrapped mol ID + CTAB hash with hash() to produce more literal value for molecule_hash

* Re-implemented LAMMPS file molecule ID extraction with in-Python lammps interface

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Added pytest parametrization of auxiliary args in test_unique_lammps_mol_ids()

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* MAINT: Streamline, skip generating positions

* FIX: Use benzene, not fulvene

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Matt Thompson <[email protected]>
  • Loading branch information
3 people authored Jul 16, 2024
1 parent 88e57e7 commit 2a4c999
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from pathlib import Path

import lammps
import numpy
import pytest
from openff.toolkit import Topology, unit
from openff.toolkit import ForceField, Quantity, Topology, unit
from openff.utilities import temporary_cd

from openff.interchange import Interchange
from openff.interchange._tests import MoleculeWithConformer, needs_lmp
from openff.interchange.components.mdconfig import MDConfig
from openff.interchange.drivers import get_lammps_energies, get_openmm_energies

rng = numpy.random.default_rng(821)


@needs_lmp
class TestLammps:
Expand All @@ -21,7 +28,12 @@ class TestLammps:
"C1COC(=O)O1", # This adds an improper, i2
],
)
def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
def test_to_lammps_single_mols(
self,
mol: str,
sage_unconstrained: ForceField,
n_mols: int,
) -> None:
"""
Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies.
Expand Down Expand Up @@ -62,3 +74,65 @@ def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
"Torsion": 0.02 * unit.kilojoule_per_mole,
},
)

@pytest.mark.parametrize("n_mols", [2, 3])
@pytest.mark.parametrize(
"smiles",
[
"CCO",
"N1CCCC1",
"c1ccccc1",
],
)
def test_unique_lammps_mol_ids(
self,
smiles,
sage_unconstrained,
n_mols,
) -> bool:
"""Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""

molecule = MoleculeWithConformer.from_smiles(smiles)
topology = Topology.from_molecules(n_mols * [molecule])

# Just use random positions since we're testing molecule IDs, not physics
topology.set_positions(
Quantity(
rng.random((topology.n_atoms, 3)),
"nanometer",
),
)
topology.box_vectors = Quantity([4, 4, 4], "nanometer")

with temporary_cd():
lammps_input_path = Path.cwd() / "temp.in"
lammps_data_path = Path.cwd() / "out.lmp"

interchange = sage_unconstrained.create_interchange(topology)
interchange.to_lammps(lammps_data_path)

mdconfig = MDConfig.from_interchange(interchange)
mdconfig.write_lammps_input(
interchange=interchange,
input_file=lammps_input_path,
)

# Extract molecule IDs from data file
with lammps.lammps(
cmdargs=["-screen", "none", "-log", "none"],
) as lmp:
lmp.file(
"temp.in",
)
written_mol_ids = {
mol_id
for _, mol_id in zip(
range(lmp.get_natoms()),
lmp.extract_atom("molecule"),
)
}

# these are expected to be [1...N] for each of N molecules
expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)}

assert expected_mol_ids == written_mol_ids
8 changes: 7 additions & 1 deletion openff/interchange/interop/lammps/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,13 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
atom_map[atom_index] = atom

for molecule_index, molecule in enumerate(interchange.topology.molecules):
molecule_hash = molecule.ordered_connection_table_hash()
# inject mol ID into hash to allow chemically-identical Molecules
# to be labelled with distinct IDs
molecule_hash = hash(
f"{molecule_index}_{molecule.ordered_connection_table_hash()}",
)
# TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers

molecule_map[molecule_hash] = molecule_index
for atom in molecule.atoms:
atom_molecule_map[atom] = molecule_hash
Expand Down

0 comments on commit 2a4c999

Please sign in to comment.