From 2a4c999b5be700c20f583b154a65a30cbde4864a Mon Sep 17 00:00:00 2001 From: Timotej Bernat <52677403+timbernat@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:32:04 -0600 Subject: [PATCH] LAMMPS non-unique molecule ID hotfix (#1000) * Salted hash in _write_atoms() with molecule ID to guarantee unique IDs for Molecules with identical chemical tables * Wrote unit test to check unique molecule IDs in Interchange.to_lammps() output * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply suggestions from code review Incorporated suggestions for unique mol ID unit test improvements Co-authored-by: Matt Thompson * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Removed unnecessary typing imports * Consolidated pilot_mol init and conformer generation using MoleculeWithConformer * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Updated sidelen default, added assertion to enforce test for multiple Molecules * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Wrapped mol ID + CTAB hash with hash() to produce more literal value for molecule_hash * Re-implemented LAMMPS file molecule ID extraction with in-Python lammps interface * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added pytest parametrization of auxiliary args in test_unique_lammps_mol_ids() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * MAINT: Streamline, skip generating positions * FIX: Use benzene, not fulvene --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Matt Thompson --- .../interop/lammps/export/test_lammps.py | 78 ++++++++++++++++++- .../interop/lammps/export/export.py | 8 +- 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index a179aaa3c..ad5fa3330 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,11 +1,18 @@ +from pathlib import Path + +import lammps import numpy import pytest -from openff.toolkit import Topology, unit +from openff.toolkit import ForceField, Quantity, Topology, unit +from openff.utilities import temporary_cd from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp +from openff.interchange.components.mdconfig import MDConfig from openff.interchange.drivers import get_lammps_energies, get_openmm_energies +rng = numpy.random.default_rng(821) + @needs_lmp class TestLammps: @@ -21,7 +28,12 @@ class TestLammps: "C1COC(=O)O1", # This adds an improper, i2 ], ) - def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols): + def test_to_lammps_single_mols( + self, + mol: str, + sage_unconstrained: ForceField, + n_mols: int, + ) -> None: """ Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies. @@ -62,3 +74,65 @@ def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols): "Torsion": 0.02 * unit.kilojoule_per_mole, }, ) + + @pytest.mark.parametrize("n_mols", [2, 3]) + @pytest.mark.parametrize( + "smiles", + [ + "CCO", + "N1CCCC1", + "c1ccccc1", + ], + ) + def test_unique_lammps_mol_ids( + self, + smiles, + sage_unconstrained, + n_mols, + ) -> bool: + """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" + + molecule = MoleculeWithConformer.from_smiles(smiles) + topology = Topology.from_molecules(n_mols * [molecule]) + + # Just use random positions since we're testing molecule IDs, not physics + topology.set_positions( + Quantity( + rng.random((topology.n_atoms, 3)), + "nanometer", + ), + ) + topology.box_vectors = Quantity([4, 4, 4], "nanometer") + + with temporary_cd(): + lammps_input_path = Path.cwd() / "temp.in" + lammps_data_path = Path.cwd() / "out.lmp" + + interchange = sage_unconstrained.create_interchange(topology) + interchange.to_lammps(lammps_data_path) + + mdconfig = MDConfig.from_interchange(interchange) + mdconfig.write_lammps_input( + interchange=interchange, + input_file=lammps_input_path, + ) + + # Extract molecule IDs from data file + with lammps.lammps( + cmdargs=["-screen", "none", "-log", "none"], + ) as lmp: + lmp.file( + "temp.in", + ) + written_mol_ids = { + mol_id + for _, mol_id in zip( + range(lmp.get_natoms()), + lmp.extract_atom("molecule"), + ) + } + + # these are expected to be [1...N] for each of N molecules + expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)} + + assert expected_mol_ids == written_mol_ids diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index e633a9681..8d2b5927c 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -294,7 +294,13 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): atom_map[atom_index] = atom for molecule_index, molecule in enumerate(interchange.topology.molecules): - molecule_hash = molecule.ordered_connection_table_hash() + # inject mol ID into hash to allow chemically-identical Molecules + # to be labelled with distinct IDs + molecule_hash = hash( + f"{molecule_index}_{molecule.ordered_connection_table_hash()}", + ) + # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers + molecule_map[molecule_hash] = molecule_index for atom in molecule.atoms: atom_molecule_map[atom] = molecule_hash