From ab2b4efade71bce4d0fd21d977cdb9304a92b3dd Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 9 Jul 2024 13:20:38 -0600 Subject: [PATCH 01/17] Salted hash in _write_atoms() with molecule ID to guarantee unique IDs for Molecules with identical chemical tables --- openff/interchange/interop/lammps/export/export.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index e633a9681..015b6b2a6 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -294,7 +294,10 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): atom_map[atom_index] = atom for molecule_index, molecule in enumerate(interchange.topology.molecules): - molecule_hash = molecule.ordered_connection_table_hash() + # molecule_hash = molecule.ordered_connection_table_hash() + molecule_hash = f'{molecule_index}_{molecule.ordered_connection_table_hash()}' # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs) + # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers + molecule_map[molecule_hash] = molecule_index for atom in molecule.atoms: atom_molecule_map[atom] = molecule_hash From 2b4bc0c151ef60ca441c367c5a0a8656da422613 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 9 Jul 2024 13:21:10 -0600 Subject: [PATCH 02/17] Wrote unit test to check unique molecule IDs in Interchange.to_lammps() output --- .../interop/lammps/export/test_lammps.py | 86 ++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index a179aaa3c..a97c09c5a 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,6 +1,11 @@ +from typing import Any, Type, Dict + import numpy import pytest -from openff.toolkit import Topology, unit + +from pathlib import Path +from openff.toolkit import Molecule, Topology, unit +from openff.toolkit.typing.engines.smirnoff import ForceField from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp @@ -21,7 +26,7 @@ class TestLammps: "C1COC(=O)O1", # This adds an improper, i2 ], ) - def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols): + def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField, n_mols : int) -> None: """ Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies. @@ -62,3 +67,80 @@ def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols): "Torsion": 0.02 * unit.kilojoule_per_mole, }, ) + + + def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceField, sidelen : int=0) -> bool: + '''Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule''' + temp_lammps_path = Path.cwd() / 'temp.lmp' # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file + + # BUILD TOPOLOGY + ## 0) generate pilot Molecule and conformer + pilot_mol = Molecule.from_smiles(smiles) + pilot_mol.generate_conformers(n_conformers=1) + # pilot_mol.assign_partial_charges(partial_charge_method='gasteiger') # generate dummy charges for testing + + ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) + conf = pilot_mol.conformers[0] + COM = conf.mean(axis=0) + conf_centered = conf - COM + + radii = numpy.linalg.norm(conf_centered, axis=1) + r_eff = radii.max() + + ## 2) generate 3D integer lattice to tile mols onto + xyz_offsets = numpy.column_stack([ # integral offsets from 0...(sidelen - 1) along 3 axes + axis_offsets.ravel() for axis_offsets in numpy.meshgrid( + *[numpy.arange(sidelen) for _ in range(3)] # the 3 here is for 3-dimensions + ) + ]) + + ## 3) build topology by tiling + tiled_top = Topology() + for int_offset in xyz_offsets: + mol = Molecule.from_smiles(smiles) + mol.add_conformer((conf_centered + 2*r_eff*int_offset).to('nm')) # space copied by effective diameter + tiled_top.add_molecule(mol) + + ## 3a) set periodic box tightly around extremem positions in filled topology + box_dims = tiled_top.get_positions().ptp(axis=0) + box_vectors = numpy.eye(3) * box_dims # convert to diagonal matrix to get proper shape + + # EXPORT TO LAMMPS + interchange = Interchange.from_smirnoff(sage_unconstrained, tiled_top)#, charge_from_molecules=[pilot_mol]) + # UNRELATED QUESTION: why are waters not correctly recognized? + # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml) + interchange.box = box_vectors + interchange.to_lammps(temp_lammps_path) + + # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY + with temp_lammps_path.open('r') as lmp_file: # pull out text from temporary lammps file ... + all_lines = [line for line in lmp_file.read().split('\n') if line] # ... separate by newlines, remove empty lines ... + atom_lines = all_lines[all_lines.index('Atoms')+1:all_lines.index('Bonds')] #... extract atoms block ... + temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it + + ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info + ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps + def extract_info_from_lammps_atom_lines(atom_line : str) -> Dict[str, Any]: + '''Parse atom info from a single atom-block linds in a .lmp/.lammps files''' + KEYWORDS : dict[str, Type]= { # reference for the distinct + 'atom_index' : int, + 'molecule_index' : int, + 'atom_type' : int, + 'charge' : float, + 'x-pos' : float, + 'y-pos' : float, + 'z-pos' : float, + } + + return { + field_label : FieldType(str_val) + for str_val, (field_label, FieldType) in zip(atom_line.split('\t'), KEYWORDS.items()) + } + + written_mol_ids : set[int] = set() + for atom_line in atom_lines: + atom_info = extract_info_from_lammps_atom_lines(atom_line) + written_mol_ids.add(atom_info['molecule_index']) + expected_mol_ids = set(i + 1 for i in range(interchange.topology.n_molecules)) # we'd like for each of the N molecules to have ids from [1...N] + + return (expected_mol_ids == written_mol_ids) \ No newline at end of file From 48ef64b60e0fc7395a2c8e309a900afceb4d99f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 19:27:10 +0000 Subject: [PATCH 03/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../interop/lammps/export/test_lammps.py | 101 +++++++++++------- .../interop/lammps/export/export.py | 4 +- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index a97c09c5a..bdef4d438 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,9 +1,8 @@ -from typing import Any, Type, Dict +from pathlib import Path +from typing import Any, Dict, Type import numpy import pytest - -from pathlib import Path from openff.toolkit import Molecule, Topology, unit from openff.toolkit.typing.engines.smirnoff import ForceField @@ -26,7 +25,9 @@ class TestLammps: "C1COC(=O)O1", # This adds an improper, i2 ], ) - def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField, n_mols : int) -> None: + def test_to_lammps_single_mols( + self, mol: str, sage_unconstrained: ForceField, n_mols: int + ) -> None: """ Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies. @@ -68,10 +69,13 @@ def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField, }, ) - - def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceField, sidelen : int=0) -> bool: - '''Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule''' - temp_lammps_path = Path.cwd() / 'temp.lmp' # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file + def test_unique_lammps_mol_ids( + self, smiles: str, sage_unconstrained: ForceField, sidelen: int = 0 + ) -> bool: + """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" + temp_lammps_path = ( + Path.cwd() / "temp.lmp" + ) # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file # BUILD TOPOLOGY ## 0) generate pilot Molecule and conformer @@ -88,59 +92,80 @@ def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceFie r_eff = radii.max() ## 2) generate 3D integer lattice to tile mols onto - xyz_offsets = numpy.column_stack([ # integral offsets from 0...(sidelen - 1) along 3 axes - axis_offsets.ravel() for axis_offsets in numpy.meshgrid( - *[numpy.arange(sidelen) for _ in range(3)] # the 3 here is for 3-dimensions - ) - ]) + xyz_offsets = numpy.column_stack( + [ # integral offsets from 0...(sidelen - 1) along 3 axes + axis_offsets.ravel() + for axis_offsets in numpy.meshgrid( + *[ + numpy.arange(sidelen) for _ in range(3) + ], # the 3 here is for 3-dimensions + ) + ] + ) ## 3) build topology by tiling tiled_top = Topology() for int_offset in xyz_offsets: mol = Molecule.from_smiles(smiles) - mol.add_conformer((conf_centered + 2*r_eff*int_offset).to('nm')) # space copied by effective diameter + mol.add_conformer( + (conf_centered + 2 * r_eff * int_offset).to("nm") + ) # space copied by effective diameter tiled_top.add_molecule(mol) ## 3a) set periodic box tightly around extremem positions in filled topology box_dims = tiled_top.get_positions().ptp(axis=0) - box_vectors = numpy.eye(3) * box_dims # convert to diagonal matrix to get proper shape + box_vectors = ( + numpy.eye(3) * box_dims + ) # convert to diagonal matrix to get proper shape # EXPORT TO LAMMPS - interchange = Interchange.from_smirnoff(sage_unconstrained, tiled_top)#, charge_from_molecules=[pilot_mol]) + interchange = Interchange.from_smirnoff( + sage_unconstrained, tiled_top + ) # , charge_from_molecules=[pilot_mol]) # UNRELATED QUESTION: why are waters not correctly recognized? # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml) interchange.box = box_vectors interchange.to_lammps(temp_lammps_path) # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY - with temp_lammps_path.open('r') as lmp_file: # pull out text from temporary lammps file ... - all_lines = [line for line in lmp_file.read().split('\n') if line] # ... separate by newlines, remove empty lines ... - atom_lines = all_lines[all_lines.index('Atoms')+1:all_lines.index('Bonds')] #... extract atoms block ... - temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it - - ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info + with temp_lammps_path.open( + "r" + ) as lmp_file: # pull out text from temporary lammps file ... + all_lines = [ + line for line in lmp_file.read().split("\n") if line + ] # ... separate by newlines, remove empty lines ... + atom_lines = all_lines[ + all_lines.index("Atoms") + 1 : all_lines.index("Bonds") + ] # ... extract atoms block ... + temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it + + ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps - def extract_info_from_lammps_atom_lines(atom_line : str) -> Dict[str, Any]: - '''Parse atom info from a single atom-block linds in a .lmp/.lammps files''' - KEYWORDS : dict[str, Type]= { # reference for the distinct - 'atom_index' : int, - 'molecule_index' : int, - 'atom_type' : int, - 'charge' : float, - 'x-pos' : float, - 'y-pos' : float, - 'z-pos' : float, + def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]: + """Parse atom info from a single atom-block linds in a .lmp/.lammps files""" + KEYWORDS: dict[str, type] = { # reference for the distinct + "atom_index": int, + "molecule_index": int, + "atom_type": int, + "charge": float, + "x-pos": float, + "y-pos": float, + "z-pos": float, } return { - field_label : FieldType(str_val) - for str_val, (field_label, FieldType) in zip(atom_line.split('\t'), KEYWORDS.items()) + field_label: FieldType(str_val) + for str_val, (field_label, FieldType) in zip( + atom_line.split("\t"), KEYWORDS.items() + ) } - written_mol_ids : set[int] = set() + written_mol_ids: set[int] = set() for atom_line in atom_lines: atom_info = extract_info_from_lammps_atom_lines(atom_line) - written_mol_ids.add(atom_info['molecule_index']) - expected_mol_ids = set(i + 1 for i in range(interchange.topology.n_molecules)) # we'd like for each of the N molecules to have ids from [1...N] + written_mol_ids.add(atom_info["molecule_index"]) + expected_mol_ids = { + i + 1 for i in range(interchange.topology.n_molecules) + } # we'd like for each of the N molecules to have ids from [1...N] - return (expected_mol_ids == written_mol_ids) \ No newline at end of file + return expected_mol_ids == written_mol_ids diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index 015b6b2a6..f62dcc89e 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -295,9 +295,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): for molecule_index, molecule in enumerate(interchange.topology.molecules): # molecule_hash = molecule.ordered_connection_table_hash() - molecule_hash = f'{molecule_index}_{molecule.ordered_connection_table_hash()}' # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs) + molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}" # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs) # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers - + molecule_map[molecule_hash] = molecule_index for atom in molecule.atoms: atom_molecule_map[atom] = molecule_hash From d988cbf7b102eabd0a7dbe70bce511c3296c69c8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat <52677403+timbernat@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:09:22 -0600 Subject: [PATCH 04/17] Apply suggestions from code review Incorporated suggestions for unique mol ID unit test improvements Co-authored-by: Matt Thompson --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 7 +++---- openff/interchange/interop/lammps/export/export.py | 5 +++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index bdef4d438..dd311ac4e 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -3,8 +3,7 @@ import numpy import pytest -from openff.toolkit import Molecule, Topology, unit -from openff.toolkit.typing.engines.smirnoff import ForceField +from openff.toolkit import Molecule, Topology, unit, ForceField from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp @@ -75,7 +74,7 @@ def test_unique_lammps_mol_ids( """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" temp_lammps_path = ( Path.cwd() / "temp.lmp" - ) # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file + ) # BUILD TOPOLOGY ## 0) generate pilot Molecule and conformer @@ -168,4 +167,4 @@ def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]: i + 1 for i in range(interchange.topology.n_molecules) } # we'd like for each of the N molecules to have ids from [1...N] - return expected_mol_ids == written_mol_ids + assert expected_mol_ids == written_mol_ids diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index f62dcc89e..5978d83fc 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -294,8 +294,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): atom_map[atom_index] = atom for molecule_index, molecule in enumerate(interchange.topology.molecules): - # molecule_hash = molecule.ordered_connection_table_hash() - molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}" # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs) + # inject mol ID into hash to allow chemically-identical Molecules + # to be labelled with distinct IDs + molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}" # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers molecule_map[molecule_hash] = molecule_index From 2e3de08f256c259890f0f92f225b8bb9ab235604 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:14:09 +0000 Subject: [PATCH 05/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../interop/lammps/export/test_lammps.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index dd311ac4e..6f5e5ebd9 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -3,7 +3,7 @@ import numpy import pytest -from openff.toolkit import Molecule, Topology, unit, ForceField +from openff.toolkit import ForceField, Molecule, Topology, unit from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp @@ -25,7 +25,10 @@ class TestLammps: ], ) def test_to_lammps_single_mols( - self, mol: str, sage_unconstrained: ForceField, n_mols: int + self, + mol: str, + sage_unconstrained: ForceField, + n_mols: int, ) -> None: """ Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies. @@ -69,12 +72,13 @@ def test_to_lammps_single_mols( ) def test_unique_lammps_mol_ids( - self, smiles: str, sage_unconstrained: ForceField, sidelen: int = 0 + self, + smiles: str, + sage_unconstrained: ForceField, + sidelen: int = 0, ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" - temp_lammps_path = ( - Path.cwd() / "temp.lmp" - ) + temp_lammps_path = Path.cwd() / "temp.lmp" # BUILD TOPOLOGY ## 0) generate pilot Molecule and conformer @@ -99,7 +103,7 @@ def test_unique_lammps_mol_ids( numpy.arange(sidelen) for _ in range(3) ], # the 3 here is for 3-dimensions ) - ] + ], ) ## 3) build topology by tiling @@ -107,7 +111,7 @@ def test_unique_lammps_mol_ids( for int_offset in xyz_offsets: mol = Molecule.from_smiles(smiles) mol.add_conformer( - (conf_centered + 2 * r_eff * int_offset).to("nm") + (conf_centered + 2 * r_eff * int_offset).to("nm"), ) # space copied by effective diameter tiled_top.add_molecule(mol) @@ -119,7 +123,8 @@ def test_unique_lammps_mol_ids( # EXPORT TO LAMMPS interchange = Interchange.from_smirnoff( - sage_unconstrained, tiled_top + sage_unconstrained, + tiled_top, ) # , charge_from_molecules=[pilot_mol]) # UNRELATED QUESTION: why are waters not correctly recognized? # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml) @@ -128,7 +133,7 @@ def test_unique_lammps_mol_ids( # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY with temp_lammps_path.open( - "r" + "r", ) as lmp_file: # pull out text from temporary lammps file ... all_lines = [ line for line in lmp_file.read().split("\n") if line @@ -155,7 +160,8 @@ def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]: return { field_label: FieldType(str_val) for str_val, (field_label, FieldType) in zip( - atom_line.split("\t"), KEYWORDS.items() + atom_line.split("\t"), + KEYWORDS.items(), ) } From 424de3ba75509b5b290cad53d741deacfe6d0ab6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 18:15:05 -0600 Subject: [PATCH 06/17] Removed unnecessary typing imports --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 6f5e5ebd9..c893a785b 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Any, Dict, Type +from typing import Any import numpy import pytest From 4c50cd337dbe432b916be50644354ae7abcf41b6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 18:22:42 -0600 Subject: [PATCH 07/17] Consolidated pilot_mol init and conformer generation using MoleculeWithConformer --- .../unit_tests/interop/lammps/export/test_lammps.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index c893a785b..4fd325de9 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -81,12 +81,9 @@ def test_unique_lammps_mol_ids( temp_lammps_path = Path.cwd() / "temp.lmp" # BUILD TOPOLOGY - ## 0) generate pilot Molecule and conformer - pilot_mol = Molecule.from_smiles(smiles) - pilot_mol.generate_conformers(n_conformers=1) - # pilot_mol.assign_partial_charges(partial_charge_method='gasteiger') # generate dummy charges for testing - ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) + pilot_mol = MoleculeWithConformer.from_smiles(smiles) # this will serve as a prototype for all other Molecule copies in the Topology + conf = pilot_mol.conformers[0] COM = conf.mean(axis=0) conf_centered = conf - COM @@ -125,9 +122,7 @@ def test_unique_lammps_mol_ids( interchange = Interchange.from_smirnoff( sage_unconstrained, tiled_top, - ) # , charge_from_molecules=[pilot_mol]) - # UNRELATED QUESTION: why are waters not correctly recognized? - # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml) + ) interchange.box = box_vectors interchange.to_lammps(temp_lammps_path) From 46505da0cceee3b8c36c1da6910792d75af0ae4a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:23:09 +0000 Subject: [PATCH 08/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 4fd325de9..006a198fd 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -82,7 +82,9 @@ def test_unique_lammps_mol_ids( # BUILD TOPOLOGY ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) - pilot_mol = MoleculeWithConformer.from_smiles(smiles) # this will serve as a prototype for all other Molecule copies in the Topology + pilot_mol = MoleculeWithConformer.from_smiles( + smiles + ) # this will serve as a prototype for all other Molecule copies in the Topology conf = pilot_mol.conformers[0] COM = conf.mean(axis=0) From 8c1fc7bb2dc0061e73bcb596bdac683579833c16 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 18:27:48 -0600 Subject: [PATCH 09/17] Updated sidelen default, added assertion to enforce test for multiple Molecules --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 006a198fd..2a1f5cc62 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -75,10 +75,11 @@ def test_unique_lammps_mol_ids( self, smiles: str, sage_unconstrained: ForceField, - sidelen: int = 0, + sidelen: int = 2, ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" temp_lammps_path = Path.cwd() / "temp.lmp" + assert(isinstance(sidelen, int) and sidelen > 1) # BUILD TOPOLOGY ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) From 99d3fdf16b341800e6e4810f448d787ee238d4e9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:30:51 +0000 Subject: [PATCH 10/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 2a1f5cc62..60d4e2edc 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -79,12 +79,12 @@ def test_unique_lammps_mol_ids( ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" temp_lammps_path = Path.cwd() / "temp.lmp" - assert(isinstance(sidelen, int) and sidelen > 1) + assert isinstance(sidelen, int) and sidelen > 1 # BUILD TOPOLOGY ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) pilot_mol = MoleculeWithConformer.from_smiles( - smiles + smiles, ) # this will serve as a prototype for all other Molecule copies in the Topology conf = pilot_mol.conformers[0] From 5266a28e7786f88cec155d5b8e253bfbd06a7fb8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 20:02:01 -0600 Subject: [PATCH 11/17] Wrapped mol ID + CTAB hash with hash() to produce more literal value for molecule_hash --- openff/interchange/interop/lammps/export/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index 5978d83fc..ff5036b75 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -296,7 +296,7 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): for molecule_index, molecule in enumerate(interchange.topology.molecules): # inject mol ID into hash to allow chemically-identical Molecules # to be labelled with distinct IDs - molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}" + molecule_hash = hash(f"{molecule_index}_{molecule.ordered_connection_table_hash()}") # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers molecule_map[molecule_hash] = molecule_index From 8b72acde8e0c70cded0a81ad5d54b4e68be3660e Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 20:04:34 -0600 Subject: [PATCH 12/17] Re-implemented LAMMPS file molecule ID extraction with in-Python lammps interface --- .../interop/lammps/export/test_lammps.py | 80 ++++++++----------- 1 file changed, 32 insertions(+), 48 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 2a1f5cc62..27d2e524e 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,13 +1,14 @@ -from pathlib import Path -from typing import Any - import numpy import pytest +import lammps +from pathlib import Path + from openff.toolkit import ForceField, Molecule, Topology, unit from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp from openff.interchange.drivers import get_lammps_energies, get_openmm_energies +from openff.interchange.components.mdconfig import MDConfig @needs_lmp @@ -78,9 +79,16 @@ def test_unique_lammps_mol_ids( sidelen: int = 2, ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" - temp_lammps_path = Path.cwd() / "temp.lmp" assert(isinstance(sidelen, int) and sidelen > 1) + # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is + # the filename hard-coded into the read_data output of MDConfig.write_lammps_input() + # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?) + cwd = Path.cwd() + lmp_file_name : str = 'temp' + lammps_input_path = cwd / f'{lmp_file_name}.in' + lammps_data_path = cwd / 'out.lmp' + # BUILD TOPOLOGY ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) pilot_mol = MoleculeWithConformer.from_smiles( @@ -112,7 +120,7 @@ def test_unique_lammps_mol_ids( mol = Molecule.from_smiles(smiles) mol.add_conformer( (conf_centered + 2 * r_eff * int_offset).to("nm"), - ) # space copied by effective diameter + ) # space copies by effective diameter tiled_top.add_molecule(mol) ## 3a) set periodic box tightly around extremem positions in filled topology @@ -127,48 +135,24 @@ def test_unique_lammps_mol_ids( tiled_top, ) interchange.box = box_vectors - interchange.to_lammps(temp_lammps_path) - - # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY - with temp_lammps_path.open( - "r", - ) as lmp_file: # pull out text from temporary lammps file ... - all_lines = [ - line for line in lmp_file.read().split("\n") if line - ] # ... separate by newlines, remove empty lines ... - atom_lines = all_lines[ - all_lines.index("Atoms") + 1 : all_lines.index("Bonds") - ] # ... extract atoms block ... - temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it - - ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info - ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps - def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]: - """Parse atom info from a single atom-block linds in a .lmp/.lammps files""" - KEYWORDS: dict[str, type] = { # reference for the distinct - "atom_index": int, - "molecule_index": int, - "atom_type": int, - "charge": float, - "x-pos": float, - "y-pos": float, - "z-pos": float, - } - - return { - field_label: FieldType(str_val) - for str_val, (field_label, FieldType) in zip( - atom_line.split("\t"), - KEYWORDS.items(), - ) - } - - written_mol_ids: set[int] = set() - for atom_line in atom_lines: - atom_info = extract_info_from_lammps_atom_lines(atom_line) - written_mol_ids.add(atom_info["molecule_index"]) - expected_mol_ids = { - i + 1 for i in range(interchange.topology.n_molecules) - } # we'd like for each of the N molecules to have ids from [1...N] + interchange.to_lammps(lammps_data_path) + + mdconfig = MDConfig.from_interchange(interchange) + mdconfig.write_lammps_input(interchange=interchange, input_file=lammps_input_path) + + ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY + with lammps.lammps(cmdargs=['-screen', 'none', '-log', 'none']) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files + lmp.file('temp.in') # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file + written_mol_ids = set( + mol_id + for _, mol_id in zip(range(lmp.get_natoms()), lmp.extract_atom('molecule')) + ) # need to zip with range capped at n_atoms, otherwise will iterate forever + + # dispose of the lammps files once we've read them to leave no trace on disc + lammps_input_path.unlink() + lammps_data_path.unlink() + + # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N] + expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)} assert expected_mol_ids == written_mol_ids From e3f1d27416f150a9799d25b07401a35f46fe4fc5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:14:18 +0000 Subject: [PATCH 13/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../interop/lammps/export/test_lammps.py | 44 +++++++++++-------- .../interop/lammps/export/export.py | 4 +- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 4decb2657..1b46068b2 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -1,14 +1,14 @@ -import numpy -import pytest -import lammps from pathlib import Path +import lammps +import numpy +import pytest from openff.toolkit import ForceField, Molecule, Topology, unit from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp -from openff.interchange.drivers import get_lammps_energies, get_openmm_energies from openff.interchange.components.mdconfig import MDConfig +from openff.interchange.drivers import get_lammps_energies, get_openmm_energies @needs_lmp @@ -79,15 +79,15 @@ def test_unique_lammps_mol_ids( sidelen: int = 2, ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" - assert(isinstance(sidelen, int) and sidelen > 1) + assert isinstance(sidelen, int) and sidelen > 1 # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is # the filename hard-coded into the read_data output of MDConfig.write_lammps_input() # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?) cwd = Path.cwd() - lmp_file_name : str = 'temp' - lammps_input_path = cwd / f'{lmp_file_name}.in' - lammps_data_path = cwd / 'out.lmp' + lmp_file_name: str = "temp" + lammps_input_path = cwd / f"{lmp_file_name}.in" + lammps_data_path = cwd / "out.lmp" # BUILD TOPOLOGY ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) @@ -138,21 +138,29 @@ def test_unique_lammps_mol_ids( interchange.to_lammps(lammps_data_path) mdconfig = MDConfig.from_interchange(interchange) - mdconfig.write_lammps_input(interchange=interchange, input_file=lammps_input_path) + mdconfig.write_lammps_input( + interchange=interchange, input_file=lammps_input_path + ) ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY - with lammps.lammps(cmdargs=['-screen', 'none', '-log', 'none']) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files - lmp.file('temp.in') # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file - written_mol_ids = set( + with lammps.lammps( + cmdargs=["-screen", "none", "-log", "none"] + ) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files + lmp.file( + "temp.in" + ) # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file + written_mol_ids = { mol_id - for _, mol_id in zip(range(lmp.get_natoms()), lmp.extract_atom('molecule')) - ) # need to zip with range capped at n_atoms, otherwise will iterate forever - + for _, mol_id in zip( + range(lmp.get_natoms()), lmp.extract_atom("molecule") + ) + } # need to zip with range capped at n_atoms, otherwise will iterate forever + # dispose of the lammps files once we've read them to leave no trace on disc - lammps_input_path.unlink() - lammps_data_path.unlink() + lammps_input_path.unlink() + lammps_data_path.unlink() # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N] - expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)} + expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)} assert expected_mol_ids == written_mol_ids diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index ff5036b75..97ec71517 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -296,7 +296,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): for molecule_index, molecule in enumerate(interchange.topology.molecules): # inject mol ID into hash to allow chemically-identical Molecules # to be labelled with distinct IDs - molecule_hash = hash(f"{molecule_index}_{molecule.ordered_connection_table_hash()}") + molecule_hash = hash( + f"{molecule_index}_{molecule.ordered_connection_table_hash()}" + ) # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers molecule_map[molecule_hash] = molecule_index From 652ad4359a48bd03aed9f9a06bbdfb05cfc5a25a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 15 Jul 2024 20:26:18 -0600 Subject: [PATCH 14/17] Added pytest parametrization of auxiliary args in test_unique_lammps_mol_ids() --- .../unit_tests/interop/lammps/export/test_lammps.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 1b46068b2..3a191d8ce 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -72,6 +72,17 @@ def test_to_lammps_single_mols( }, ) + @pytest.mark.parametrize("sidelen", [2, 3]) + @pytest.mark.parametrize( + "smiles", + [ + "O", # nothing particularly special about these molecules + "CCO", # just testing that unique IDs hold for diverse chemistries + "N1CCCC1", + "c1cccc1c", + + ] + ) def test_unique_lammps_mol_ids( self, smiles: str, From e4ffb11be294d7a7f2d1b1966366accdbc7eef32 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 02:26:47 +0000 Subject: [PATCH 15/17] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../interop/lammps/export/test_lammps.py | 19 ++++++++++--------- .../interop/lammps/export/export.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index 3a191d8ce..e00a979ae 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -75,13 +75,12 @@ def test_to_lammps_single_mols( @pytest.mark.parametrize("sidelen", [2, 3]) @pytest.mark.parametrize( "smiles", - [ - "O", # nothing particularly special about these molecules - "CCO", # just testing that unique IDs hold for diverse chemistries + [ + "O", # nothing particularly special about these molecules + "CCO", # just testing that unique IDs hold for diverse chemistries "N1CCCC1", "c1cccc1c", - - ] + ], ) def test_unique_lammps_mol_ids( self, @@ -150,20 +149,22 @@ def test_unique_lammps_mol_ids( mdconfig = MDConfig.from_interchange(interchange) mdconfig.write_lammps_input( - interchange=interchange, input_file=lammps_input_path + interchange=interchange, + input_file=lammps_input_path, ) ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY with lammps.lammps( - cmdargs=["-screen", "none", "-log", "none"] + cmdargs=["-screen", "none", "-log", "none"], ) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files lmp.file( - "temp.in" + "temp.in", ) # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file written_mol_ids = { mol_id for _, mol_id in zip( - range(lmp.get_natoms()), lmp.extract_atom("molecule") + range(lmp.get_natoms()), + lmp.extract_atom("molecule"), ) } # need to zip with range capped at n_atoms, otherwise will iterate forever diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py index 97ec71517..8d2b5927c 100644 --- a/openff/interchange/interop/lammps/export/export.py +++ b/openff/interchange/interop/lammps/export/export.py @@ -297,7 +297,7 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict): # inject mol ID into hash to allow chemically-identical Molecules # to be labelled with distinct IDs molecule_hash = hash( - f"{molecule_index}_{molecule.ordered_connection_table_hash()}" + f"{molecule_index}_{molecule.ordered_connection_table_hash()}", ) # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers From 04bfc1dfe6fc0ab55307ee41652db4e3f6cc993c Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 16 Jul 2024 11:53:18 -0500 Subject: [PATCH 16/17] MAINT: Streamline, skip generating positions --- .../interop/lammps/export/test_lammps.py | 130 ++++++------------ 1 file changed, 45 insertions(+), 85 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index e00a979ae..aa1071bed 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -3,13 +3,16 @@ import lammps import numpy import pytest -from openff.toolkit import ForceField, Molecule, Topology, unit +from openff.toolkit import ForceField, Quantity, Topology, unit +from openff.utilities import temporary_cd from openff.interchange import Interchange from openff.interchange._tests import MoleculeWithConformer, needs_lmp from openff.interchange.components.mdconfig import MDConfig from openff.interchange.drivers import get_lammps_energies, get_openmm_energies +rng = numpy.random.default_rng(821) + @needs_lmp class TestLammps: @@ -72,107 +75,64 @@ def test_to_lammps_single_mols( }, ) - @pytest.mark.parametrize("sidelen", [2, 3]) + @pytest.mark.parametrize("n_mols", [2, 3]) @pytest.mark.parametrize( "smiles", [ - "O", # nothing particularly special about these molecules - "CCO", # just testing that unique IDs hold for diverse chemistries + "CCO", "N1CCCC1", "c1cccc1c", ], ) def test_unique_lammps_mol_ids( self, - smiles: str, - sage_unconstrained: ForceField, - sidelen: int = 2, + smiles, + sage_unconstrained, + n_mols, ) -> bool: """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule""" - assert isinstance(sidelen, int) and sidelen > 1 - - # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is - # the filename hard-coded into the read_data output of MDConfig.write_lammps_input() - # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?) - cwd = Path.cwd() - lmp_file_name: str = "temp" - lammps_input_path = cwd / f"{lmp_file_name}.in" - lammps_data_path = cwd / "out.lmp" - - # BUILD TOPOLOGY - ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling) - pilot_mol = MoleculeWithConformer.from_smiles( - smiles, - ) # this will serve as a prototype for all other Molecule copies in the Topology - - conf = pilot_mol.conformers[0] - COM = conf.mean(axis=0) - conf_centered = conf - COM - - radii = numpy.linalg.norm(conf_centered, axis=1) - r_eff = radii.max() - - ## 2) generate 3D integer lattice to tile mols onto - xyz_offsets = numpy.column_stack( - [ # integral offsets from 0...(sidelen - 1) along 3 axes - axis_offsets.ravel() - for axis_offsets in numpy.meshgrid( - *[ - numpy.arange(sidelen) for _ in range(3) - ], # the 3 here is for 3-dimensions - ) - ], - ) - ## 3) build topology by tiling - tiled_top = Topology() - for int_offset in xyz_offsets: - mol = Molecule.from_smiles(smiles) - mol.add_conformer( - (conf_centered + 2 * r_eff * int_offset).to("nm"), - ) # space copies by effective diameter - tiled_top.add_molecule(mol) - - ## 3a) set periodic box tightly around extremem positions in filled topology - box_dims = tiled_top.get_positions().ptp(axis=0) - box_vectors = ( - numpy.eye(3) * box_dims - ) # convert to diagonal matrix to get proper shape - - # EXPORT TO LAMMPS - interchange = Interchange.from_smirnoff( - sage_unconstrained, - tiled_top, - ) - interchange.box = box_vectors - interchange.to_lammps(lammps_data_path) + molecule = MoleculeWithConformer.from_smiles(smiles) + topology = Topology.from_molecules(n_mols * [molecule]) - mdconfig = MDConfig.from_interchange(interchange) - mdconfig.write_lammps_input( - interchange=interchange, - input_file=lammps_input_path, + # Just use random positions since we're testing molecule IDs, not physics + topology.set_positions( + Quantity( + rng.random((topology.n_atoms, 3)), + "nanometer", + ), ) + topology.box_vectors = Quantity([4, 4, 4], "nanometer") - ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY - with lammps.lammps( - cmdargs=["-screen", "none", "-log", "none"], - ) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files - lmp.file( - "temp.in", - ) # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file - written_mol_ids = { - mol_id - for _, mol_id in zip( - range(lmp.get_natoms()), - lmp.extract_atom("molecule"), - ) - } # need to zip with range capped at n_atoms, otherwise will iterate forever + with temporary_cd(): + lammps_input_path = Path.cwd() / "temp.in" + lammps_data_path = Path.cwd() / "out.lmp" - # dispose of the lammps files once we've read them to leave no trace on disc - lammps_input_path.unlink() - lammps_data_path.unlink() + interchange = sage_unconstrained.create_interchange(topology) + interchange.to_lammps(lammps_data_path) - # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N] + mdconfig = MDConfig.from_interchange(interchange) + mdconfig.write_lammps_input( + interchange=interchange, + input_file=lammps_input_path, + ) + + # Extract molecule IDs from data file + with lammps.lammps( + cmdargs=["-screen", "none", "-log", "none"], + ) as lmp: + lmp.file( + "temp.in", + ) + written_mol_ids = { + mol_id + for _, mol_id in zip( + range(lmp.get_natoms()), + lmp.extract_atom("molecule"), + ) + } + + # these are expected to be [1...N] for each of N molecules expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)} assert expected_mol_ids == written_mol_ids From 2911b38a859ff70681e008cfb107a19f28164421 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 16 Jul 2024 12:11:21 -0500 Subject: [PATCH 17/17] FIX: Use benzene, not fulvene --- .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py index aa1071bed..ad5fa3330 100644 --- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py +++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py @@ -81,7 +81,7 @@ def test_to_lammps_single_mols( [ "CCO", "N1CCCC1", - "c1cccc1c", + "c1ccccc1", ], ) def test_unique_lammps_mol_ids(