From ab2b4efade71bce4d0fd21d977cdb9304a92b3dd Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Tue, 9 Jul 2024 13:20:38 -0600
Subject: [PATCH 01/17] Salted hash in _write_atoms() with molecule ID to
 guarantee unique IDs for Molecules with identical chemical tables

---
 openff/interchange/interop/lammps/export/export.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index e633a9681..015b6b2a6 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -294,7 +294,10 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
         atom_map[atom_index] = atom
 
     for molecule_index, molecule in enumerate(interchange.topology.molecules):
-        molecule_hash = molecule.ordered_connection_table_hash()
+        # molecule_hash = molecule.ordered_connection_table_hash()
+        molecule_hash = f'{molecule_index}_{molecule.ordered_connection_table_hash()}' # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs)
+        # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
+        
         molecule_map[molecule_hash] = molecule_index
         for atom in molecule.atoms:
             atom_molecule_map[atom] = molecule_hash

From 2b4bc0c151ef60ca441c367c5a0a8656da422613 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Tue, 9 Jul 2024 13:21:10 -0600
Subject: [PATCH 02/17] Wrote unit test to check unique molecule IDs in
 Interchange.to_lammps() output

---
 .../interop/lammps/export/test_lammps.py      | 86 ++++++++++++++++++-
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index a179aaa3c..a97c09c5a 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -1,6 +1,11 @@
+from typing import Any, Type, Dict
+
 import numpy
 import pytest
-from openff.toolkit import Topology, unit
+
+from pathlib import Path
+from openff.toolkit import Molecule, Topology, unit
+from openff.toolkit.typing.engines.smirnoff import ForceField
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
@@ -21,7 +26,7 @@ class TestLammps:
             "C1COC(=O)O1",  # This adds an improper, i2
         ],
     )
-    def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
+    def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField, n_mols : int) -> None:
         """
         Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies.
 
@@ -62,3 +67,80 @@ def test_to_lammps_single_mols(self, mol, sage_unconstrained, n_mols):
                 "Torsion": 0.02 * unit.kilojoule_per_mole,
             },
         )
+
+
+    def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceField, sidelen : int=0) -> bool:
+        '''Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule'''
+        temp_lammps_path = Path.cwd() / 'temp.lmp' # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file
+
+        # BUILD TOPOLOGY
+        ## 0) generate pilot Molecule and conformer
+        pilot_mol = Molecule.from_smiles(smiles)
+        pilot_mol.generate_conformers(n_conformers=1)
+        # pilot_mol.assign_partial_charges(partial_charge_method='gasteiger') # generate dummy charges for testing
+
+        ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
+        conf = pilot_mol.conformers[0]
+        COM = conf.mean(axis=0)
+        conf_centered = conf - COM
+
+        radii = numpy.linalg.norm(conf_centered, axis=1)
+        r_eff = radii.max()
+
+        ## 2) generate 3D integer lattice to tile mols onto
+        xyz_offsets = numpy.column_stack([ # integral offsets from 0...(sidelen - 1) along 3 axes
+            axis_offsets.ravel() for axis_offsets in numpy.meshgrid(
+                *[numpy.arange(sidelen) for _ in range(3)] # the 3 here is for 3-dimensions
+            )
+        ])
+
+        ## 3) build topology by tiling
+        tiled_top = Topology()
+        for int_offset in xyz_offsets:
+            mol = Molecule.from_smiles(smiles)
+            mol.add_conformer((conf_centered + 2*r_eff*int_offset).to('nm')) # space copied by effective diameter
+            tiled_top.add_molecule(mol)
+
+        ## 3a) set periodic box tightly around extremem positions in filled topology
+        box_dims = tiled_top.get_positions().ptp(axis=0)
+        box_vectors = numpy.eye(3) * box_dims # convert to diagonal matrix to get proper shape
+
+        # EXPORT TO LAMMPS
+        interchange = Interchange.from_smirnoff(sage_unconstrained, tiled_top)#, charge_from_molecules=[pilot_mol])
+        # UNRELATED QUESTION: why are waters not correctly recognized?
+        # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml)
+        interchange.box = box_vectors
+        interchange.to_lammps(temp_lammps_path)
+
+        # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
+        with temp_lammps_path.open('r') as lmp_file: # pull out text from temporary lammps file ...
+            all_lines = [line for line in lmp_file.read().split('\n') if line]          # ... separate by newlines, remove empty lines ...
+            atom_lines = all_lines[all_lines.index('Atoms')+1:all_lines.index('Bonds')] #... extract atoms block ...
+        temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it
+            
+        ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info 
+        ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps
+        def extract_info_from_lammps_atom_lines(atom_line : str) -> Dict[str, Any]:
+            '''Parse atom info from a single atom-block linds in a .lmp/.lammps files'''
+            KEYWORDS : dict[str, Type]= {  # reference for the distinct 
+                'atom_index' : int, 
+                'molecule_index' : int, 
+                'atom_type' : int, 
+                'charge' : float,
+                'x-pos'  : float,
+                'y-pos'  : float,
+                'z-pos'  : float,
+            }
+
+            return {
+                field_label : FieldType(str_val)
+                    for str_val, (field_label, FieldType) in zip(atom_line.split('\t'), KEYWORDS.items())
+            }
+
+        written_mol_ids : set[int] = set()
+        for atom_line in atom_lines:
+            atom_info = extract_info_from_lammps_atom_lines(atom_line)
+            written_mol_ids.add(atom_info['molecule_index'])
+        expected_mol_ids = set(i + 1 for i in range(interchange.topology.n_molecules)) # we'd like for each of the N molecules to have ids from [1...N]
+
+        return (expected_mol_ids == written_mol_ids)
\ No newline at end of file

From 48ef64b60e0fc7395a2c8e309a900afceb4d99f4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Jul 2024 19:27:10 +0000
Subject: [PATCH 03/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../interop/lammps/export/test_lammps.py      | 101 +++++++++++-------
 .../interop/lammps/export/export.py           |   4 +-
 2 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index a97c09c5a..bdef4d438 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -1,9 +1,8 @@
-from typing import Any, Type, Dict
+from pathlib import Path
+from typing import Any, Dict, Type
 
 import numpy
 import pytest
-
-from pathlib import Path
 from openff.toolkit import Molecule, Topology, unit
 from openff.toolkit.typing.engines.smirnoff import ForceField
 
@@ -26,7 +25,9 @@ class TestLammps:
             "C1COC(=O)O1",  # This adds an improper, i2
         ],
     )
-    def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField, n_mols : int) -> None:
+    def test_to_lammps_single_mols(
+        self, mol: str, sage_unconstrained: ForceField, n_mols: int
+    ) -> None:
         """
         Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies.
 
@@ -68,10 +69,13 @@ def test_to_lammps_single_mols(self, mol : str, sage_unconstrained : ForceField,
             },
         )
 
-
-    def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceField, sidelen : int=0) -> bool:
-        '''Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule'''
-        temp_lammps_path = Path.cwd() / 'temp.lmp' # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file
+    def test_unique_lammps_mol_ids(
+        self, smiles: str, sage_unconstrained: ForceField, sidelen: int = 0
+    ) -> bool:
+        """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
+        temp_lammps_path = (
+            Path.cwd() / "temp.lmp"
+        )  # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file
 
         # BUILD TOPOLOGY
         ## 0) generate pilot Molecule and conformer
@@ -88,59 +92,80 @@ def test_unique_lammps_mol_ids(self, smiles : str, sage_unconstrained : ForceFie
         r_eff = radii.max()
 
         ## 2) generate 3D integer lattice to tile mols onto
-        xyz_offsets = numpy.column_stack([ # integral offsets from 0...(sidelen - 1) along 3 axes
-            axis_offsets.ravel() for axis_offsets in numpy.meshgrid(
-                *[numpy.arange(sidelen) for _ in range(3)] # the 3 here is for 3-dimensions
-            )
-        ])
+        xyz_offsets = numpy.column_stack(
+            [  # integral offsets from 0...(sidelen - 1) along 3 axes
+                axis_offsets.ravel()
+                for axis_offsets in numpy.meshgrid(
+                    *[
+                        numpy.arange(sidelen) for _ in range(3)
+                    ],  # the 3 here is for 3-dimensions
+                )
+            ]
+        )
 
         ## 3) build topology by tiling
         tiled_top = Topology()
         for int_offset in xyz_offsets:
             mol = Molecule.from_smiles(smiles)
-            mol.add_conformer((conf_centered + 2*r_eff*int_offset).to('nm')) # space copied by effective diameter
+            mol.add_conformer(
+                (conf_centered + 2 * r_eff * int_offset).to("nm")
+            )  # space copied by effective diameter
             tiled_top.add_molecule(mol)
 
         ## 3a) set periodic box tightly around extremem positions in filled topology
         box_dims = tiled_top.get_positions().ptp(axis=0)
-        box_vectors = numpy.eye(3) * box_dims # convert to diagonal matrix to get proper shape
+        box_vectors = (
+            numpy.eye(3) * box_dims
+        )  # convert to diagonal matrix to get proper shape
 
         # EXPORT TO LAMMPS
-        interchange = Interchange.from_smirnoff(sage_unconstrained, tiled_top)#, charge_from_molecules=[pilot_mol])
+        interchange = Interchange.from_smirnoff(
+            sage_unconstrained, tiled_top
+        )  # , charge_from_molecules=[pilot_mol])
         # UNRELATED QUESTION: why are waters not correctly recognized?
         # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml)
         interchange.box = box_vectors
         interchange.to_lammps(temp_lammps_path)
 
         # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
-        with temp_lammps_path.open('r') as lmp_file: # pull out text from temporary lammps file ...
-            all_lines = [line for line in lmp_file.read().split('\n') if line]          # ... separate by newlines, remove empty lines ...
-            atom_lines = all_lines[all_lines.index('Atoms')+1:all_lines.index('Bonds')] #... extract atoms block ...
-        temp_lammps_path.unlink() # ...and finally, unceremoniously kill the file once we're done with it
-            
-        ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info 
+        with temp_lammps_path.open(
+            "r"
+        ) as lmp_file:  # pull out text from temporary lammps file ...
+            all_lines = [
+                line for line in lmp_file.read().split("\n") if line
+            ]  # ... separate by newlines, remove empty lines ...
+            atom_lines = all_lines[
+                all_lines.index("Atoms") + 1 : all_lines.index("Bonds")
+            ]  # ... extract atoms block ...
+        temp_lammps_path.unlink()  # ...and finally, unceremoniously kill the file once we're done with it
+
+        ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info
         ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps
-        def extract_info_from_lammps_atom_lines(atom_line : str) -> Dict[str, Any]:
-            '''Parse atom info from a single atom-block linds in a .lmp/.lammps files'''
-            KEYWORDS : dict[str, Type]= {  # reference for the distinct 
-                'atom_index' : int, 
-                'molecule_index' : int, 
-                'atom_type' : int, 
-                'charge' : float,
-                'x-pos'  : float,
-                'y-pos'  : float,
-                'z-pos'  : float,
+        def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]:
+            """Parse atom info from a single atom-block linds in a .lmp/.lammps files"""
+            KEYWORDS: dict[str, type] = {  # reference for the distinct
+                "atom_index": int,
+                "molecule_index": int,
+                "atom_type": int,
+                "charge": float,
+                "x-pos": float,
+                "y-pos": float,
+                "z-pos": float,
             }
 
             return {
-                field_label : FieldType(str_val)
-                    for str_val, (field_label, FieldType) in zip(atom_line.split('\t'), KEYWORDS.items())
+                field_label: FieldType(str_val)
+                for str_val, (field_label, FieldType) in zip(
+                    atom_line.split("\t"), KEYWORDS.items()
+                )
             }
 
-        written_mol_ids : set[int] = set()
+        written_mol_ids: set[int] = set()
         for atom_line in atom_lines:
             atom_info = extract_info_from_lammps_atom_lines(atom_line)
-            written_mol_ids.add(atom_info['molecule_index'])
-        expected_mol_ids = set(i + 1 for i in range(interchange.topology.n_molecules)) # we'd like for each of the N molecules to have ids from [1...N]
+            written_mol_ids.add(atom_info["molecule_index"])
+        expected_mol_ids = {
+            i + 1 for i in range(interchange.topology.n_molecules)
+        }  # we'd like for each of the N molecules to have ids from [1...N]
 
-        return (expected_mol_ids == written_mol_ids)
\ No newline at end of file
+        return expected_mol_ids == written_mol_ids
diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index 015b6b2a6..f62dcc89e 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -295,9 +295,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
 
     for molecule_index, molecule in enumerate(interchange.topology.molecules):
         # molecule_hash = molecule.ordered_connection_table_hash()
-        molecule_hash = f'{molecule_index}_{molecule.ordered_connection_table_hash()}' # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs)
+        molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}"  # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs)
         # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
-        
+
         molecule_map[molecule_hash] = molecule_index
         for atom in molecule.atoms:
             atom_molecule_map[atom] = molecule_hash

From d988cbf7b102eabd0a7dbe70bce511c3296c69c8 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <52677403+timbernat@users.noreply.github.com>
Date: Mon, 15 Jul 2024 18:09:22 -0600
Subject: [PATCH 04/17] Apply suggestions from code review

Incorporated suggestions for unique mol ID unit test improvements

Co-authored-by: Matt Thompson <mattwthompson@protonmail.com>
---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py | 7 +++----
 openff/interchange/interop/lammps/export/export.py         | 5 +++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index bdef4d438..dd311ac4e 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -3,8 +3,7 @@
 
 import numpy
 import pytest
-from openff.toolkit import Molecule, Topology, unit
-from openff.toolkit.typing.engines.smirnoff import ForceField
+from openff.toolkit import Molecule, Topology, unit, ForceField
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
@@ -75,7 +74,7 @@ def test_unique_lammps_mol_ids(
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
         temp_lammps_path = (
             Path.cwd() / "temp.lmp"
-        )  # NOTE: lammps writer doesn't currently support IO-like object passing, so need to intercept output with temporary file
+        )
 
         # BUILD TOPOLOGY
         ## 0) generate pilot Molecule and conformer
@@ -168,4 +167,4 @@ def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]:
             i + 1 for i in range(interchange.topology.n_molecules)
         }  # we'd like for each of the N molecules to have ids from [1...N]
 
-        return expected_mol_ids == written_mol_ids
+        assert expected_mol_ids == written_mol_ids
diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index f62dcc89e..5978d83fc 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -294,8 +294,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
         atom_map[atom_index] = atom
 
     for molecule_index, molecule in enumerate(interchange.topology.molecules):
-        # molecule_hash = molecule.ordered_connection_table_hash()
-        molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}"  # salt with mol ID to allow chemically-identical Molecules to be labelled with distinct IDs)
+        # inject mol ID into hash to allow chemically-identical Molecules
+        # to be labelled with distinct IDs
+        molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}"
         # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
 
         molecule_map[molecule_hash] = molecule_index

From 2e3de08f256c259890f0f92f225b8bb9ab235604 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 00:14:09 +0000
Subject: [PATCH 05/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../interop/lammps/export/test_lammps.py      | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index dd311ac4e..6f5e5ebd9 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -3,7 +3,7 @@
 
 import numpy
 import pytest
-from openff.toolkit import Molecule, Topology, unit, ForceField
+from openff.toolkit import ForceField, Molecule, Topology, unit
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
@@ -25,7 +25,10 @@ class TestLammps:
         ],
     )
     def test_to_lammps_single_mols(
-        self, mol: str, sage_unconstrained: ForceField, n_mols: int
+        self,
+        mol: str,
+        sage_unconstrained: ForceField,
+        n_mols: int,
     ) -> None:
         """
         Test that Interchange.to_openmm Interchange.to_lammps report sufficiently similar energies.
@@ -69,12 +72,13 @@ def test_to_lammps_single_mols(
         )
 
     def test_unique_lammps_mol_ids(
-        self, smiles: str, sage_unconstrained: ForceField, sidelen: int = 0
+        self,
+        smiles: str,
+        sage_unconstrained: ForceField,
+        sidelen: int = 0,
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
-        temp_lammps_path = (
-            Path.cwd() / "temp.lmp"
-        )
+        temp_lammps_path = Path.cwd() / "temp.lmp"
 
         # BUILD TOPOLOGY
         ## 0) generate pilot Molecule and conformer
@@ -99,7 +103,7 @@ def test_unique_lammps_mol_ids(
                         numpy.arange(sidelen) for _ in range(3)
                     ],  # the 3 here is for 3-dimensions
                 )
-            ]
+            ],
         )
 
         ## 3) build topology by tiling
@@ -107,7 +111,7 @@ def test_unique_lammps_mol_ids(
         for int_offset in xyz_offsets:
             mol = Molecule.from_smiles(smiles)
             mol.add_conformer(
-                (conf_centered + 2 * r_eff * int_offset).to("nm")
+                (conf_centered + 2 * r_eff * int_offset).to("nm"),
             )  # space copied by effective diameter
             tiled_top.add_molecule(mol)
 
@@ -119,7 +123,8 @@ def test_unique_lammps_mol_ids(
 
         # EXPORT TO LAMMPS
         interchange = Interchange.from_smirnoff(
-            sage_unconstrained, tiled_top
+            sage_unconstrained,
+            tiled_top,
         )  # , charge_from_molecules=[pilot_mol])
         # UNRELATED QUESTION: why are waters not correctly recognized?
         # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml)
@@ -128,7 +133,7 @@ def test_unique_lammps_mol_ids(
 
         # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
         with temp_lammps_path.open(
-            "r"
+            "r",
         ) as lmp_file:  # pull out text from temporary lammps file ...
             all_lines = [
                 line for line in lmp_file.read().split("\n") if line
@@ -155,7 +160,8 @@ def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]:
             return {
                 field_label: FieldType(str_val)
                 for str_val, (field_label, FieldType) in zip(
-                    atom_line.split("\t"), KEYWORDS.items()
+                    atom_line.split("\t"),
+                    KEYWORDS.items(),
                 )
             }
 

From 424de3ba75509b5b290cad53d741deacfe6d0ab6 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 18:15:05 -0600
Subject: [PATCH 06/17] Removed unnecessary typing imports

---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 6f5e5ebd9..c893a785b 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Dict, Type
+from typing import Any
 
 import numpy
 import pytest

From 4c50cd337dbe432b916be50644354ae7abcf41b6 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 18:22:42 -0600
Subject: [PATCH 07/17] Consolidated pilot_mol init and conformer generation
 using MoleculeWithConformer

---
 .../unit_tests/interop/lammps/export/test_lammps.py   | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index c893a785b..4fd325de9 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -81,12 +81,9 @@ def test_unique_lammps_mol_ids(
         temp_lammps_path = Path.cwd() / "temp.lmp"
 
         # BUILD TOPOLOGY
-        ## 0) generate pilot Molecule and conformer
-        pilot_mol = Molecule.from_smiles(smiles)
-        pilot_mol.generate_conformers(n_conformers=1)
-        # pilot_mol.assign_partial_charges(partial_charge_method='gasteiger') # generate dummy charges for testing
-
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
+        pilot_mol = MoleculeWithConformer.from_smiles(smiles) # this will serve as a prototype for all other Molecule copies in the Topology
+
         conf = pilot_mol.conformers[0]
         COM = conf.mean(axis=0)
         conf_centered = conf - COM
@@ -125,9 +122,7 @@ def test_unique_lammps_mol_ids(
         interchange = Interchange.from_smirnoff(
             sage_unconstrained,
             tiled_top,
-        )  # , charge_from_molecules=[pilot_mol])
-        # UNRELATED QUESTION: why are waters not correctly recognized?
-        # PDB residue name on output is always UNK for Molecule.from_smiles("O"), EVEN when using tip3p.offxml)
+        )
         interchange.box = box_vectors
         interchange.to_lammps(temp_lammps_path)
 

From 46505da0cceee3b8c36c1da6910792d75af0ae4a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 00:23:09 +0000
Subject: [PATCH 08/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 4fd325de9..006a198fd 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -82,7 +82,9 @@ def test_unique_lammps_mol_ids(
 
         # BUILD TOPOLOGY
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
-        pilot_mol = MoleculeWithConformer.from_smiles(smiles) # this will serve as a prototype for all other Molecule copies in the Topology
+        pilot_mol = MoleculeWithConformer.from_smiles(
+            smiles
+        )  # this will serve as a prototype for all other Molecule copies in the Topology
 
         conf = pilot_mol.conformers[0]
         COM = conf.mean(axis=0)

From 8c1fc7bb2dc0061e73bcb596bdac683579833c16 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 18:27:48 -0600
Subject: [PATCH 09/17] Updated sidelen default, added assertion to enforce
 test for multiple Molecules

---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 006a198fd..2a1f5cc62 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -75,10 +75,11 @@ def test_unique_lammps_mol_ids(
         self,
         smiles: str,
         sage_unconstrained: ForceField,
-        sidelen: int = 0,
+        sidelen: int = 2,
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
         temp_lammps_path = Path.cwd() / "temp.lmp"
+        assert(isinstance(sidelen, int) and sidelen > 1)
 
         # BUILD TOPOLOGY
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)

From 99d3fdf16b341800e6e4810f448d787ee238d4e9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 00:30:51 +0000
Subject: [PATCH 10/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 2a1f5cc62..60d4e2edc 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -79,12 +79,12 @@ def test_unique_lammps_mol_ids(
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
         temp_lammps_path = Path.cwd() / "temp.lmp"
-        assert(isinstance(sidelen, int) and sidelen > 1)
+        assert isinstance(sidelen, int) and sidelen > 1
 
         # BUILD TOPOLOGY
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
         pilot_mol = MoleculeWithConformer.from_smiles(
-            smiles
+            smiles,
         )  # this will serve as a prototype for all other Molecule copies in the Topology
 
         conf = pilot_mol.conformers[0]

From 5266a28e7786f88cec155d5b8e253bfbd06a7fb8 Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 20:02:01 -0600
Subject: [PATCH 11/17] Wrapped mol ID + CTAB hash with hash() to produce more
 literal value for molecule_hash

---
 openff/interchange/interop/lammps/export/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index 5978d83fc..ff5036b75 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -296,7 +296,7 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
     for molecule_index, molecule in enumerate(interchange.topology.molecules):
         # inject mol ID into hash to allow chemically-identical Molecules
         # to be labelled with distinct IDs
-        molecule_hash = f"{molecule_index}_{molecule.ordered_connection_table_hash()}"
+        molecule_hash = hash(f"{molecule_index}_{molecule.ordered_connection_table_hash()}")
         # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
 
         molecule_map[molecule_hash] = molecule_index

From 8b72acde8e0c70cded0a81ad5d54b4e68be3660e Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 20:04:34 -0600
Subject: [PATCH 12/17] Re-implemented LAMMPS file molecule ID extraction with
 in-Python lammps interface

---
 .../interop/lammps/export/test_lammps.py      | 80 ++++++++-----------
 1 file changed, 32 insertions(+), 48 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 2a1f5cc62..27d2e524e 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -1,13 +1,14 @@
-from pathlib import Path
-from typing import Any
-
 import numpy
 import pytest
+import lammps
+from pathlib import Path
+
 from openff.toolkit import ForceField, Molecule, Topology, unit
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
 from openff.interchange.drivers import get_lammps_energies, get_openmm_energies
+from openff.interchange.components.mdconfig import MDConfig
 
 
 @needs_lmp
@@ -78,9 +79,16 @@ def test_unique_lammps_mol_ids(
         sidelen: int = 2,
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
-        temp_lammps_path = Path.cwd() / "temp.lmp"
         assert(isinstance(sidelen, int) and sidelen > 1)
 
+        # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is
+        # the filename hard-coded into the read_data output of MDConfig.write_lammps_input()
+        # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?)
+        cwd = Path.cwd()
+        lmp_file_name : str = 'temp'
+        lammps_input_path = cwd / f'{lmp_file_name}.in'  
+        lammps_data_path  = cwd / 'out.lmp' 
+
         # BUILD TOPOLOGY
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
         pilot_mol = MoleculeWithConformer.from_smiles(
@@ -112,7 +120,7 @@ def test_unique_lammps_mol_ids(
             mol = Molecule.from_smiles(smiles)
             mol.add_conformer(
                 (conf_centered + 2 * r_eff * int_offset).to("nm"),
-            )  # space copied by effective diameter
+            )  # space copies by effective diameter
             tiled_top.add_molecule(mol)
 
         ## 3a) set periodic box tightly around extremem positions in filled topology
@@ -127,48 +135,24 @@ def test_unique_lammps_mol_ids(
             tiled_top,
         )
         interchange.box = box_vectors
-        interchange.to_lammps(temp_lammps_path)
-
-        # EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
-        with temp_lammps_path.open(
-            "r",
-        ) as lmp_file:  # pull out text from temporary lammps file ...
-            all_lines = [
-                line for line in lmp_file.read().split("\n") if line
-            ]  # ... separate by newlines, remove empty lines ...
-            atom_lines = all_lines[
-                all_lines.index("Atoms") + 1 : all_lines.index("Bonds")
-            ]  # ... extract atoms block ...
-        temp_lammps_path.unlink()  # ...and finally, unceremoniously kill the file once we're done with it
-
-        ## SIDENOTE: would be nice if there was an easier, string-parsing-free way to extract this info
-        ## Could enable some kind of Interchange.from_lammps() functionality in the future, perhaps
-        def extract_info_from_lammps_atom_lines(atom_line: str) -> dict[str, Any]:
-            """Parse atom info from a single atom-block linds in a .lmp/.lammps files"""
-            KEYWORDS: dict[str, type] = {  # reference for the distinct
-                "atom_index": int,
-                "molecule_index": int,
-                "atom_type": int,
-                "charge": float,
-                "x-pos": float,
-                "y-pos": float,
-                "z-pos": float,
-            }
-
-            return {
-                field_label: FieldType(str_val)
-                for str_val, (field_label, FieldType) in zip(
-                    atom_line.split("\t"),
-                    KEYWORDS.items(),
-                )
-            }
-
-        written_mol_ids: set[int] = set()
-        for atom_line in atom_lines:
-            atom_info = extract_info_from_lammps_atom_lines(atom_line)
-            written_mol_ids.add(atom_info["molecule_index"])
-        expected_mol_ids = {
-            i + 1 for i in range(interchange.topology.n_molecules)
-        }  # we'd like for each of the N molecules to have ids from [1...N]
+        interchange.to_lammps(lammps_data_path)
+
+        mdconfig = MDConfig.from_interchange(interchange)
+        mdconfig.write_lammps_input(interchange=interchange, input_file=lammps_input_path)
+
+        ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
+        with lammps.lammps(cmdargs=['-screen', 'none', '-log', 'none']) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files
+            lmp.file('temp.in') # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file
+            written_mol_ids = set(
+                mol_id
+                    for _, mol_id in zip(range(lmp.get_natoms()), lmp.extract_atom('molecule')) 
+            )  # need to zip with range capped at n_atoms, otherwise will iterate forever
+        
+        # dispose of the lammps files once we've read them to leave no trace on disc
+        lammps_input_path.unlink() 
+        lammps_data_path.unlink() 
+
+        # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N]
+        expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)}  
 
         assert expected_mol_ids == written_mol_ids

From e3f1d27416f150a9799d25b07401a35f46fe4fc5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 02:14:18 +0000
Subject: [PATCH 13/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../interop/lammps/export/test_lammps.py      | 44 +++++++++++--------
 .../interop/lammps/export/export.py           |  4 +-
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 4decb2657..1b46068b2 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -1,14 +1,14 @@
-import numpy
-import pytest
-import lammps
 from pathlib import Path
 
+import lammps
+import numpy
+import pytest
 from openff.toolkit import ForceField, Molecule, Topology, unit
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
-from openff.interchange.drivers import get_lammps_energies, get_openmm_energies
 from openff.interchange.components.mdconfig import MDConfig
+from openff.interchange.drivers import get_lammps_energies, get_openmm_energies
 
 
 @needs_lmp
@@ -79,15 +79,15 @@ def test_unique_lammps_mol_ids(
         sidelen: int = 2,
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
-        assert(isinstance(sidelen, int) and sidelen > 1)
+        assert isinstance(sidelen, int) and sidelen > 1
 
         # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is
         # the filename hard-coded into the read_data output of MDConfig.write_lammps_input()
         # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?)
         cwd = Path.cwd()
-        lmp_file_name : str = 'temp'
-        lammps_input_path = cwd / f'{lmp_file_name}.in'  
-        lammps_data_path  = cwd / 'out.lmp' 
+        lmp_file_name: str = "temp"
+        lammps_input_path = cwd / f"{lmp_file_name}.in"
+        lammps_data_path = cwd / "out.lmp"
 
         # BUILD TOPOLOGY
         ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
@@ -138,21 +138,29 @@ def test_unique_lammps_mol_ids(
         interchange.to_lammps(lammps_data_path)
 
         mdconfig = MDConfig.from_interchange(interchange)
-        mdconfig.write_lammps_input(interchange=interchange, input_file=lammps_input_path)
+        mdconfig.write_lammps_input(
+            interchange=interchange, input_file=lammps_input_path
+        )
 
         ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
-        with lammps.lammps(cmdargs=['-screen', 'none', '-log', 'none']) as lmp: # Ask LAMMPS nicely not to spam console or produce stray log files
-            lmp.file('temp.in') # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file
-            written_mol_ids = set(
+        with lammps.lammps(
+            cmdargs=["-screen", "none", "-log", "none"]
+        ) as lmp:  # Ask LAMMPS nicely not to spam console or produce stray log files
+            lmp.file(
+                "temp.in"
+            )  # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file
+            written_mol_ids = {
                 mol_id
-                    for _, mol_id in zip(range(lmp.get_natoms()), lmp.extract_atom('molecule')) 
-            )  # need to zip with range capped at n_atoms, otherwise will iterate forever
-        
+                for _, mol_id in zip(
+                    range(lmp.get_natoms()), lmp.extract_atom("molecule")
+                )
+            }  # need to zip with range capped at n_atoms, otherwise will iterate forever
+
         # dispose of the lammps files once we've read them to leave no trace on disc
-        lammps_input_path.unlink() 
-        lammps_data_path.unlink() 
+        lammps_input_path.unlink()
+        lammps_data_path.unlink()
 
         # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N]
-        expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)}  
+        expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)}
 
         assert expected_mol_ids == written_mol_ids
diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index ff5036b75..97ec71517 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -296,7 +296,9 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
     for molecule_index, molecule in enumerate(interchange.topology.molecules):
         # inject mol ID into hash to allow chemically-identical Molecules
         # to be labelled with distinct IDs
-        molecule_hash = hash(f"{molecule_index}_{molecule.ordered_connection_table_hash()}")
+        molecule_hash = hash(
+            f"{molecule_index}_{molecule.ordered_connection_table_hash()}"
+        )
         # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
 
         molecule_map[molecule_hash] = molecule_index

From 652ad4359a48bd03aed9f9a06bbdfb05cfc5a25a Mon Sep 17 00:00:00 2001
From: Timotej Bernat <tibe3324@colorado.edu>
Date: Mon, 15 Jul 2024 20:26:18 -0600
Subject: [PATCH 14/17] Added pytest parametrization of auxiliary args in
 test_unique_lammps_mol_ids()

---
 .../unit_tests/interop/lammps/export/test_lammps.py   | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 1b46068b2..3a191d8ce 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -72,6 +72,17 @@ def test_to_lammps_single_mols(
             },
         )
 
+    @pytest.mark.parametrize("sidelen", [2, 3])
+    @pytest.mark.parametrize(
+        "smiles",
+        [ 
+            "O",   # nothing particularly special about these molecules
+            "CCO", # just testing that unique IDs hold for diverse chemistries
+            "N1CCCC1",
+            "c1cccc1c",
+
+        ]
+    )
     def test_unique_lammps_mol_ids(
         self,
         smiles: str,

From e4ffb11be294d7a7f2d1b1966366accdbc7eef32 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 02:26:47 +0000
Subject: [PATCH 15/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../interop/lammps/export/test_lammps.py      | 19 ++++++++++---------
 .../interop/lammps/export/export.py           |  2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index 3a191d8ce..e00a979ae 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -75,13 +75,12 @@ def test_to_lammps_single_mols(
     @pytest.mark.parametrize("sidelen", [2, 3])
     @pytest.mark.parametrize(
         "smiles",
-        [ 
-            "O",   # nothing particularly special about these molecules
-            "CCO", # just testing that unique IDs hold for diverse chemistries
+        [
+            "O",  # nothing particularly special about these molecules
+            "CCO",  # just testing that unique IDs hold for diverse chemistries
             "N1CCCC1",
             "c1cccc1c",
-
-        ]
+        ],
     )
     def test_unique_lammps_mol_ids(
         self,
@@ -150,20 +149,22 @@ def test_unique_lammps_mol_ids(
 
         mdconfig = MDConfig.from_interchange(interchange)
         mdconfig.write_lammps_input(
-            interchange=interchange, input_file=lammps_input_path
+            interchange=interchange,
+            input_file=lammps_input_path,
         )
 
         ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
         with lammps.lammps(
-            cmdargs=["-screen", "none", "-log", "none"]
+            cmdargs=["-screen", "none", "-log", "none"],
         ) as lmp:  # Ask LAMMPS nicely not to spam console or produce stray log files
             lmp.file(
-                "temp.in"
+                "temp.in",
             )  # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file
             written_mol_ids = {
                 mol_id
                 for _, mol_id in zip(
-                    range(lmp.get_natoms()), lmp.extract_atom("molecule")
+                    range(lmp.get_natoms()),
+                    lmp.extract_atom("molecule"),
                 )
             }  # need to zip with range capped at n_atoms, otherwise will iterate forever
 
diff --git a/openff/interchange/interop/lammps/export/export.py b/openff/interchange/interop/lammps/export/export.py
index 97ec71517..8d2b5927c 100644
--- a/openff/interchange/interop/lammps/export/export.py
+++ b/openff/interchange/interop/lammps/export/export.py
@@ -297,7 +297,7 @@ def _write_atoms(lmp_file: IO, interchange: Interchange, atom_type_map: dict):
         # inject mol ID into hash to allow chemically-identical Molecules
         # to be labelled with distinct IDs
         molecule_hash = hash(
-            f"{molecule_index}_{molecule.ordered_connection_table_hash()}"
+            f"{molecule_index}_{molecule.ordered_connection_table_hash()}",
         )
         # TODO: see if this fix also applied for the same issue in GROMACS/AMBER writers
 

From 04bfc1dfe6fc0ab55307ee41652db4e3f6cc993c Mon Sep 17 00:00:00 2001
From: "Matthew W. Thompson" <mattwthompson@protonmail.com>
Date: Tue, 16 Jul 2024 11:53:18 -0500
Subject: [PATCH 16/17] MAINT: Streamline, skip generating positions

---
 .../interop/lammps/export/test_lammps.py      | 130 ++++++------------
 1 file changed, 45 insertions(+), 85 deletions(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index e00a979ae..aa1071bed 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -3,13 +3,16 @@
 import lammps
 import numpy
 import pytest
-from openff.toolkit import ForceField, Molecule, Topology, unit
+from openff.toolkit import ForceField, Quantity, Topology, unit
+from openff.utilities import temporary_cd
 
 from openff.interchange import Interchange
 from openff.interchange._tests import MoleculeWithConformer, needs_lmp
 from openff.interchange.components.mdconfig import MDConfig
 from openff.interchange.drivers import get_lammps_energies, get_openmm_energies
 
+rng = numpy.random.default_rng(821)
+
 
 @needs_lmp
 class TestLammps:
@@ -72,107 +75,64 @@ def test_to_lammps_single_mols(
             },
         )
 
-    @pytest.mark.parametrize("sidelen", [2, 3])
+    @pytest.mark.parametrize("n_mols", [2, 3])
     @pytest.mark.parametrize(
         "smiles",
         [
-            "O",  # nothing particularly special about these molecules
-            "CCO",  # just testing that unique IDs hold for diverse chemistries
+            "CCO",
             "N1CCCC1",
             "c1cccc1c",
         ],
     )
     def test_unique_lammps_mol_ids(
         self,
-        smiles: str,
-        sage_unconstrained: ForceField,
-        sidelen: int = 2,
+        smiles,
+        sage_unconstrained,
+        n_mols,
     ) -> bool:
         """Test to see if interop.lammps.export._write_atoms() writes unique ids for each distinct Molecule"""
-        assert isinstance(sidelen, int) and sidelen > 1
-
-        # NOTE: the input file name can have an arbitrary name, but the data file !MUST! be named "out.lmp", as this is
-        # the filename hard-coded into the read_data output of MDConfig.write_lammps_input()
-        # Would be really nice to have more control over this programmatically in the future (perhaps have optional "data_file" kwarg?)
-        cwd = Path.cwd()
-        lmp_file_name: str = "temp"
-        lammps_input_path = cwd / f"{lmp_file_name}.in"
-        lammps_data_path = cwd / "out.lmp"
-
-        # BUILD TOPOLOGY
-        ## 1) compute effective radius as the greatest atomic distance from barycenter (avoids collisions when tiling)
-        pilot_mol = MoleculeWithConformer.from_smiles(
-            smiles,
-        )  # this will serve as a prototype for all other Molecule copies in the Topology
-
-        conf = pilot_mol.conformers[0]
-        COM = conf.mean(axis=0)
-        conf_centered = conf - COM
-
-        radii = numpy.linalg.norm(conf_centered, axis=1)
-        r_eff = radii.max()
-
-        ## 2) generate 3D integer lattice to tile mols onto
-        xyz_offsets = numpy.column_stack(
-            [  # integral offsets from 0...(sidelen - 1) along 3 axes
-                axis_offsets.ravel()
-                for axis_offsets in numpy.meshgrid(
-                    *[
-                        numpy.arange(sidelen) for _ in range(3)
-                    ],  # the 3 here is for 3-dimensions
-                )
-            ],
-        )
 
-        ## 3) build topology by tiling
-        tiled_top = Topology()
-        for int_offset in xyz_offsets:
-            mol = Molecule.from_smiles(smiles)
-            mol.add_conformer(
-                (conf_centered + 2 * r_eff * int_offset).to("nm"),
-            )  # space copies by effective diameter
-            tiled_top.add_molecule(mol)
-
-        ## 3a) set periodic box tightly around extremem positions in filled topology
-        box_dims = tiled_top.get_positions().ptp(axis=0)
-        box_vectors = (
-            numpy.eye(3) * box_dims
-        )  # convert to diagonal matrix to get proper shape
-
-        # EXPORT TO LAMMPS
-        interchange = Interchange.from_smirnoff(
-            sage_unconstrained,
-            tiled_top,
-        )
-        interchange.box = box_vectors
-        interchange.to_lammps(lammps_data_path)
+        molecule = MoleculeWithConformer.from_smiles(smiles)
+        topology = Topology.from_molecules(n_mols * [molecule])
 
-        mdconfig = MDConfig.from_interchange(interchange)
-        mdconfig.write_lammps_input(
-            interchange=interchange,
-            input_file=lammps_input_path,
+        # Just use random positions since we're testing molecule IDs, not physics
+        topology.set_positions(
+            Quantity(
+                rng.random((topology.n_atoms, 3)),
+                "nanometer",
+            ),
         )
+        topology.box_vectors = Quantity([4, 4, 4], "nanometer")
 
-        ## 4) EXTRACT ATOM INFO FROM WRITTEN LAMMPS FILE TO TEST IF MOLEUCLE IDS ARE BEING WRITTEN CORRECTLY
-        with lammps.lammps(
-            cmdargs=["-screen", "none", "-log", "none"],
-        ) as lmp:  # Ask LAMMPS nicely not to spam console or produce stray log files
-            lmp.file(
-                "temp.in",
-            )  # can't use lmp.command('read_data ...'), as atom/bond/pair/dihedral styles are not set in the Interchange-generated LAMMPS data file
-            written_mol_ids = {
-                mol_id
-                for _, mol_id in zip(
-                    range(lmp.get_natoms()),
-                    lmp.extract_atom("molecule"),
-                )
-            }  # need to zip with range capped at n_atoms, otherwise will iterate forever
+        with temporary_cd():
+            lammps_input_path = Path.cwd() / "temp.in"
+            lammps_data_path = Path.cwd() / "out.lmp"
 
-        # dispose of the lammps files once we've read them to leave no trace on disc
-        lammps_input_path.unlink()
-        lammps_data_path.unlink()
+            interchange = sage_unconstrained.create_interchange(topology)
+            interchange.to_lammps(lammps_data_path)
 
-        # if all has gone well, we'd expect for each of the N molecules to have ids from [1...N]
+            mdconfig = MDConfig.from_interchange(interchange)
+            mdconfig.write_lammps_input(
+                interchange=interchange,
+                input_file=lammps_input_path,
+            )
+
+            # Extract molecule IDs from data file
+            with lammps.lammps(
+                cmdargs=["-screen", "none", "-log", "none"],
+            ) as lmp:
+                lmp.file(
+                    "temp.in",
+                )
+                written_mol_ids = {
+                    mol_id
+                    for _, mol_id in zip(
+                        range(lmp.get_natoms()),
+                        lmp.extract_atom("molecule"),
+                    )
+                }
+
+        # these are expected to be [1...N] for each of N molecules
         expected_mol_ids = {i + 1 for i in range(interchange.topology.n_molecules)}
 
         assert expected_mol_ids == written_mol_ids

From 2911b38a859ff70681e008cfb107a19f28164421 Mon Sep 17 00:00:00 2001
From: "Matthew W. Thompson" <mattwthompson@protonmail.com>
Date: Tue, 16 Jul 2024 12:11:21 -0500
Subject: [PATCH 17/17] FIX: Use benzene, not fulvene

---
 .../_tests/unit_tests/interop/lammps/export/test_lammps.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
index aa1071bed..ad5fa3330 100644
--- a/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
+++ b/openff/interchange/_tests/unit_tests/interop/lammps/export/test_lammps.py
@@ -81,7 +81,7 @@ def test_to_lammps_single_mols(
         [
             "CCO",
             "N1CCCC1",
-            "c1cccc1c",
+            "c1ccccc1",
         ],
     )
     def test_unique_lammps_mol_ids(