Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LAMMPS non-unique molecule ID hotfix #1000

Merged
merged 18 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
ab2b4ef
Salted hash in _write_atoms() with molecule ID to guarantee unique ID…
timbernat Jul 9, 2024
2b4bc0c
Wrote unit test to check unique molecule IDs in Interchange.to_lammps…
timbernat Jul 9, 2024
48ef64b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 9, 2024
d988cbf
Apply suggestions from code review
timbernat Jul 16, 2024
2e3de08
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2024
424de3b
Removed unnecessary typing imports
timbernat Jul 16, 2024
4c50cd3
Consolidated pilot_mol init and conformer generation using MoleculeWi…
timbernat Jul 16, 2024
46505da
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2024
8c1fc7b
Updated sidelen default, added assertion to enforce test for multiple…
timbernat Jul 16, 2024
99d3fdf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2024
5266a28
Wrapped mol ID + CTAB hash with hash() to produce more literal value …
timbernat Jul 16, 2024
8b72acd
Re-implemented LAMMPS file molecule ID extraction with in-Python lamm…
timbernat Jul 16, 2024
5d49deb
Merge remote-tracking branch 'refs/remotes/origin/lammps-hotfix' into…
timbernat Jul 16, 2024
e3f1d27
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2024
652ad43
Added pytest parametrization of auxiliary args in test_unique_lammps_…
timbernat Jul 16, 2024
e4ffb11
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2024
04bfc1d
MAINT: Streamline, skip generating positions
mattwthompson Jul 16, 2024
2911b38
FIX: Use benzene, not fulvene
mattwthompson Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from pathlib import Path
from typing import Any, Dict, Type
timbernat marked this conversation as resolved.
Show resolved Hide resolved

import numpy
import pytest
from openff.toolkit import Topology, unit
from openff.toolkit import Molecule, Topology, unit
from openff.toolkit.typing.engines.smirnoff import ForceField
timbernat marked this conversation as resolved.
Show resolved Hide resolved

from openff.interchange import Interchange
from openff.interchange._tests import MoleculeWithConformer, needs_lmp
Expand All @@ -21,7 +25,9 @@ class TestLammps:
"C1COC(=O)O1", # This adds an improper, i2
],
)
def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
def test_to_lammps_single_mols(
self, mol: str, sage_unconstrained: ForceField, n_mols: int
) -> None:
"""
Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies.

Expand Down Expand Up @@ -62,3 +68,104 @@ def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
"Torsion": 0.02 * unit.kilojoule_per_mole,
},
)

def test_unique_lammps_mol_ids(
self, smiles: str, sage_unconstrained: ForceField, sidelen: int = 0
timbernat marked this conversation as resolved.
Show resolved Hide resolved
) -> bool:
"""Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
temp_lammps_path = (
Path.cwd() / "temp.lmp"
) # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file
timbernat marked this conversation as resolved.
Show resolved Hide resolved

# BUILD TOPOLOGY
## 0) generate pilot Molecule and conformer
pilot_mol = Molecule.from_smiles(smiles)
pilot_mol.generate_conformers(n_conformers=1)
timbernat marked this conversation as resolved.
Show resolved Hide resolved
# pilot_mol.assign_partial_charges(partial_charge_method='gasteiger') # generate dummy charges for testing

## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
conf = pilot_mol.conformers[0]
COM = conf.mean(axis=0)
conf_centered = conf - COM

radii = numpy.linalg.norm(conf_centered, axis=1)
r_eff = radii.max()

## 2) generate 3D integer lattice to tile mols onto
xyz_offsets = numpy.column_stack(
[ # integral offsets from 0...(sidelen - 1) along 3 axes
axis_offsets.ravel()
for axis_offsets in numpy.meshgrid(
*[
numpy.arange(sidelen) for _ in range(3)
], # the 3 here is for 3-dimensions
)
]
)

## 3) build topology by tiling
tiled_top = Topology()
for int_offset in xyz_offsets:
mol = Molecule.from_smiles(smiles)
mol.add_conformer(
(conf_centered + 2 * r_eff * int_offset).to("nm")
) # space copied by effective diameter
tiled_top.add_molecule(mol)

## 3a) set periodic box tightly around extremem positions in filled topology
box_dims = tiled_top.get_positions().ptp(axis=0)
box_vectors = (
numpy.eye(3) * box_dims
) # convert to diagonal matrix to get proper shape

# EXPORT TO LAMMPS
interchange = Interchange.from_smirnoff(
sage_unconstrained, tiled_top
) # , charge_from_molecules=[pilot_mol])
# UNRELATED QUESTION: why are waters not correctly recognized?
# PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml)
timbernat marked this conversation as resolved.
Show resolved Hide resolved
interchange.box = box_vectors
interchange.to_lammps(temp_lammps_path)

# EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
timbernat marked this conversation as resolved.
Show resolved Hide resolved
with temp_lammps_path.open(
"r"
) as lmp_file: # pull out text from temporary lammps file ...
all_lines = [
line for line in lmp_file.read().split("\n") if line
] # ... separate by newlines, remove empty lines ...
atom_lines = all_lines[
all_lines.index("Atoms") + 1 : all_lines.index("Bonds")
] # ... extract atoms block ...
temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it

## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info
## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps
def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]:
"""Parse atom info from a single atom-block linds in a .lmp/.lammps files"""
KEYWORDS: dict[str, type] = { # reference for the distinct
"atom_index": int,
"molecule_index": int,
"atom_type": int,
"charge": float,
"x-pos": float,
"y-pos": float,
"z-pos": float,
}

return {
field_label: FieldType(str_val)
for str_val, (field_label, FieldType) in zip(
atom_line.split("\t"), KEYWORDS.items()
)
}

written_mol_ids: set[int] = set()
for atom_line in atom_lines:
atom_info = extract_info_from_lammps_atom_lines(atom_line)
written_mol_ids.add(atom_info["molecule_index"])
expected_mol_ids = {
i + 1 for i in range(interchange.topology.n_molecules)
} # we'd like for each of the N molecules to have ids from [1...N]

return expected_mol_ids == written_mol_ids
timbernat marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 4 additions & 1 deletion openff/interchange/interop/lammps/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,10 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
atom_map[atom_index] = atom

for molecule_index, molecule in enumerate(interchange.topology.molecules):
molecule_hash = molecule.ordered_connection_table_hash()
# molecule_hash = molecule.ordered_connection_table_hash()
molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}" # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs)
timbernat marked this conversation as resolved.
Show resolved Hide resolved
# TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers

molecule_map[molecule_hash] = molecule_index
for atom in molecule.atoms:
atom_molecule_map[atom] = molecule_hash
Expand Down
Loading