diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f73c256a..b2a67b5a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -125,7 +125,7 @@ jobs: python devtools/scripts/molecule-regressions.py - name: Run mypy - if: ${{ matrix.python-version == '3.10' }} + if: ${{ matrix.python-version == '3.11' }} run: | # As of 01/23, JAX with mypy is too slow to use without a pre-built cache # https://github.com/openforcefield/openff-interchange/pull/578#issuecomment-1369979875 diff --git a/docs/releasehistory.md b/docs/releasehistory.md index e41babb1..0291d588 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -23,6 +23,8 @@ Please note that all releases prior to a version 1.0.0 are considered pre-releas * Previously-deprecated examples are removed. * `ProperTorsionKey` no longer accepts an empty tuple as atom indices. * Fixes a regression in which some `ElectrostaticsCollection.charges` properties did not return cached values. +* The `charge_from_molecules` argument must include only molecules that contain partial charges and are non-isomorphic with each other. +* The `charge_from_molecules` argument as used by the OpenFF Toolkit is handled internally as `molecules_with_preset_charges`. ## 0.3.30 - 2024-08 diff --git a/docs/using/edges.md b/docs/using/edges.md index 06211d4a..b7795b9b 100644 --- a/docs/using/edges.md +++ b/docs/using/edges.md @@ -1,5 +1,34 @@ # Sharp edges +## Quirks of charge assignment + +### Charge assignment hierarchy + +Interchange, following the [SMIRNOFF specification](https://openforcefield.github.io/standards/standards/smirnoff/#partial-charge-and-electrostatics-models), assigns charges to (heavy) atoms with the following priority: + +1. **Preset charges**: Look for molecule matches in the `charge_from_molecules` argument +2. **Library charges**: Look for chemical environment matches in library charges +3. **Charge increments**: Look for chemical environment matches in charge increments +4. **AM1-BCC**: Try to run some variant of AM1-BCC (presumably this is were graph charges go) + +If charges are successfully assigned using a method, no lower-priority methods in this list are attempted. For example: + +* A force field with library charge parameters for peptides (i.e. a biopolymer force field, covering all appropriate residues) will NOT try to call AM1-BCC on a biopolymers. +* If a ligand is successfully assigned preset charges, chemical environment matching of library charges and charge increments will be skipped, as will AM1-BCC. +* If a variant of AM1-BCC (i.e. using something other than AM1 and/or using custom BCCs) is encoded in a `` section, other AM1-BCC implementations will not be called. +* If preset charges are not provided, a force field like OpenFF's Parsley or Sage lines without (many) library charges or charge increments will attempt AM1-BCC on all molecules (except water and monoatomic ions). + +After all of these steps are complete and all heavy atoms given partial charges, virtual sites are assigned charges using the values of `charge_increment`s in the virtual site parameters. Because virtual site charge are only described by the force field, using preset charges with virtual sites is discouraged. + +### Preset charges + +The following restrictions are in place when using preset charges: + +* All molecules in the the `charge_from_molecules` list must be non-isomorphic with each other. +* All molecules in the the `charge_from_molecules` list must have partial charges. + +Using preset charges with virtual sites is discouraged as it can provide surprising results. + ## Quirks of `from_openmm` ### Modified masses are ignored diff --git a/openff/interchange/_tests/conftest.py b/openff/interchange/_tests/conftest.py index e9fa9cbe..25a2ff70 100644 --- a/openff/interchange/_tests/conftest.py +++ b/openff/interchange/_tests/conftest.py @@ -84,7 +84,7 @@ def sage_with_sigma_hole(sage): smirks="[#6:1]-[#17:2]", distance=1.4 * unit.angstrom, type="BondCharge", - match="once", + match="all_permutations", charge_increment1=0.1 * unit.elementary_charge, charge_increment2=0.2 * unit.elementary_charge, ), diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_create.py b/openff/interchange/_tests/unit_tests/smirnoff/test_create.py index 58e70516..84959447 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_create.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_create.py @@ -7,6 +7,7 @@ from openff.interchange._tests import get_test_file_path from openff.interchange.constants import kcal_mol, kcal_mol_a2 from openff.interchange.exceptions import ( + PresetChargesError, UnassignedAngleError, UnassignedBondError, UnassignedTorsionError, @@ -21,6 +22,7 @@ _upconvert_vdw_handler, library_charge_from_molecule, ) +from openff.interchange.warnings import PresetChargesAndVirtualSitesWarning def _get_interpolated_bond_k(bond_handler) -> float: @@ -160,7 +162,8 @@ def test_library_charges_from_molecule(): assert library_charges.charge == [*mol.partial_charges] -class TestChargeFromMolecules: +class TestPresetCharges: + @pytest.mark.slow def test_charge_from_molecules_basic(self, sage): molecule = Molecule.from_smiles("CCO") molecule.assign_partial_charges(partial_charge_method="am1bcc") @@ -231,6 +234,98 @@ def test_charges_from_molecule_reordered( assert numpy.allclose(expected_charges, found_charges) + def test_error_when_any_missing_partial_charges(self, sage): + topology = Topology.from_molecules( + [ + Molecule.from_smiles("C"), + Molecule.from_smiles("CCO"), + ], + ) + + with pytest.raises( + PresetChargesError, + match="All.*must have partial charges", + ): + sage.create_interchange( + topology, + charge_from_molecules=[Molecule.from_smiles("C")], + ) + + def test_error_multiple_isomorphic_molecules(self, sage, ethanol, reversed_ethanol): + # these ethanol fixtures *do* have charges, if they didn't have charges it would be + # ambiguous which error should be raised + topology = Topology.from_molecules([ethanol, reversed_ethanol, ethanol]) + + with pytest.raises( + PresetChargesError, + match="All molecules.*isomorphic.*unique", + ): + sage.create_interchange( + topology, + charge_from_molecules=[ethanol, reversed_ethanol], + ) + + def test_virtual_site_charge_increments_applied_after_preset_charges_water( + self, + tip4p, + water, + ): + """ + Test that charge increments to/from virtual sites are still applied after preset charges are set. + + This is funky user input, since preset charges override library charges, which are important for water. + """ + water.partial_charges = Quantity( + [-2.0, 1.0, 1.0], + "elementary_charge", + ) + + with pytest.warns(PresetChargesAndVirtualSitesWarning): + # This dict is probably ordered O H H VS + charges = tip4p.create_interchange( + water.to_topology(), + charge_from_molecules=[water], + )["Electrostatics"].charges + + # tip4p_fb.offxml is meant to result in charges of -1.0517362213526 (VS) 0 (O) and 0.5258681106763 (H) + # the force field has 0.0 for all library charges (skipping AM1-BCC), so preset charges screw this up. + # the resulting charges, if using preset charges of [-2, 1, 1], should be the result of adding together + # [-2, 1, 1, 0] (preset charges) and + # [0, 1.5258681106763001, 1.5258681106763001, -1.0517362213526] (charge increments) + + numpy.testing.assert_allclose( + [charge.m for charge in charges.values()], + [-2.0, 1.5258681106763001, 1.5258681106763001, -1.0517362213526], + ) + + def test_virtual_site_charge_increments_applied_after_preset_charges_ligand( + self, + sage_with_sigma_hole, + ): + ligand = Molecule.from_mapped_smiles("[C:1]([Cl:2])([Cl:3])([Cl:4])[Cl:5]") + + ligand.partial_charges = Quantity( + [1.2, -0.3, -0.3, -0.3, -0.3], + "elementary_charge", + ) + + with pytest.warns(PresetChargesAndVirtualSitesWarning): + # This dict is probably ordered C Cl Cl Cl Cl VS VS VS VS + charges = sage_with_sigma_hole.create_interchange( + ligand.to_topology(), + charge_from_molecules=[ligand], + )["Electrostatics"].charges + + # this fake sigma hole parameter shifts 0.1 e from the carbon and 0.2 e from the chlorine, so the + # resulting charges should be like + # [1.2, -0.3, -0.3, -0.3, -0.3] + + # [0.4, 0.2, 0.2, 0.2, 0.2, -0.3, -0.3, -0.3, -0.3] + + numpy.testing.assert_allclose( + [charge.m for charge in charges.values()], + [1.6, -0.1, -0.1, -0.1, -0.1, -0.3, -0.3, -0.3, -0.3], + ) + @skip_if_missing("openmm") class TestPartialBondOrdersFromMolecules: diff --git a/openff/interchange/common/_nonbonded.py b/openff/interchange/common/_nonbonded.py index d55b0de6..be2b5202 100644 --- a/openff/interchange/common/_nonbonded.py +++ b/openff/interchange/common/_nonbonded.py @@ -101,7 +101,7 @@ class ElectrostaticsCollection(_NonbondedCollection): nonperiodic_potential: Literal["Coulomb", "cutoff", "no-cutoff"] = Field("Coulomb") exception_potential: Literal["Coulomb"] = Field("Coulomb") - _charges: dict[Any, _ElementaryChargeQuantity] = PrivateAttr(default_factory=dict) + _charges: dict[Any, _ElementaryChargeQuantity] = PrivateAttr() _charges_cached: bool = PrivateAttr(default=False) @computed_field # type: ignore[misc] @@ -110,7 +110,7 @@ def charges( self, ) -> dict[TopologyKey | LibraryChargeTopologyKey | VirtualSiteKey, _ElementaryChargeQuantity]: """Get the total partial charge on each atom, including virtual sites.""" - if len(self._charges) == 0 or self._charges_cached is False: # type: ignore[has-type] + if len(self._charges) == 0 or self._charges_cached is False: self._charges.update(self._get_charges(include_virtual_sites=False)) self._charges_cached = True diff --git a/openff/interchange/components/interchange.py b/openff/interchange/components/interchange.py index 4b3fd94e..f7261d4e 100644 --- a/openff/interchange/components/interchange.py +++ b/openff/interchange/components/interchange.py @@ -109,7 +109,9 @@ def from_smirnoff( positions are taken from the molecules in topology, if present on all molecules. charge_from_molecules : `List[openff.toolkit.molecule.Molecule]`, optional If specified, partial charges will be taken from the given molecules - instead of being determined by the force field. + instead of being determined by the force field. All molecules in this list + must have partial charges assigned and must not be isomorphic with any other + molecules in the list. partial_bond_orders_from_molecules : List[openff.toolkit.molecule.Molecule], optional If specified, partial bond orders will be taken from the given molecules instead of being determined by the force field. @@ -144,7 +146,7 @@ def from_smirnoff( topology=topology, box=box, positions=positions, - charge_from_molecules=charge_from_molecules, + molecules_with_preset_charges=charge_from_molecules, partial_bond_orders_from_molecules=partial_bond_orders_from_molecules, allow_nonintegral_charges=allow_nonintegral_charges, ) diff --git a/openff/interchange/exceptions.py b/openff/interchange/exceptions.py index be0967d9..6cf325d8 100644 --- a/openff/interchange/exceptions.py +++ b/openff/interchange/exceptions.py @@ -314,6 +314,12 @@ class MissingPartialChargesError(InterchangeException): """ +class PresetChargesError(InterchangeException): + """ + The `charge_from_molecules` is structured in an undefined way. + """ + + class UnassignedValenceError(InterchangeException): """Exception raised when there are valence terms for which a ParameterHandler did not find matches.""" diff --git a/openff/interchange/foyer/_nonbonded.py b/openff/interchange/foyer/_nonbonded.py index bfc94310..e3f180a4 100644 --- a/openff/interchange/foyer/_nonbonded.py +++ b/openff/interchange/foyer/_nonbonded.py @@ -60,7 +60,7 @@ class FoyerElectrostaticsHandler(ElectrostaticsCollection): force_field_key: str = "atoms" cutoff: _DistanceQuantity = 9.0 * unit.angstrom - _charges: dict[TopologyKey, Quantity] = PrivateAttr(default_factory=dict) # type: ignore[assignment] + _charges: dict[TopologyKey, Quantity] = PrivateAttr(dict()) def store_charges( self, diff --git a/openff/interchange/smirnoff/_create.py b/openff/interchange/smirnoff/_create.py index 56dfab7d..6411eac9 100644 --- a/openff/interchange/smirnoff/_create.py +++ b/openff/interchange/smirnoff/_create.py @@ -1,3 +1,5 @@ +import warnings + from openff.toolkit import ForceField, Molecule, Quantity, Topology from openff.toolkit.typing.engines.smirnoff import ParameterHandler from openff.toolkit.typing.engines.smirnoff.plugins import load_handler_plugins @@ -8,6 +10,7 @@ from openff.interchange.components.toolkit import _check_electrostatics_handlers from openff.interchange.exceptions import ( MissingParameterHandlerError, + PresetChargesError, SMIRNOFFHandlersNotImplementedError, ) from openff.interchange.plugins import load_smirnoff_plugins @@ -25,6 +28,7 @@ SMIRNOFFProperTorsionCollection, ) from openff.interchange.smirnoff._virtual_sites import SMIRNOFFVirtualSiteCollection +from openff.interchange.warnings import PresetChargesAndVirtualSitesWarning _SUPPORTED_PARAMETER_HANDLERS: set[str] = { "Constraints", @@ -93,17 +97,61 @@ def validate_topology(value): ) +def _preprocess_preset_charges( + molecules_with_preset_charges: list[Molecule] | None, +) -> list[Molecule] | None: + """ + Pre-process the molecules_with_preset_charges argument. + + If molecules_with_preset_charges is None, return None. + + If molecules_with_preset_charges is list[Molecule], ensure that + + 1. The input is a list of Molecules + 2. Each molecule has assign partial charges + 3. Ensure no molecules are isomorphic with another in the list + + """ + if molecules_with_preset_charges is None: + return None + + molecule_set = {molecule.to_smiles() for molecule in molecules_with_preset_charges} + + if len(molecule_set) != len(molecules_with_preset_charges): + raise PresetChargesError( + "All molecules in the molecules_with_preset_charges list must be isomorphically unique from each other", + ) + + for molecule in molecules_with_preset_charges: + if molecule.partial_charges is None: + raise PresetChargesError( + "All molecules in the molecules_with_preset_charges list must have partial charges assigned.", + ) + + return molecules_with_preset_charges + + def _create_interchange( force_field: ForceField, topology: Topology | list[Molecule], box: Quantity | None = None, positions: Quantity | None = None, - charge_from_molecules: list[Molecule] | None = None, + molecules_with_preset_charges: list[Molecule] | None = None, partial_bond_orders_from_molecules: list[Molecule] | None = None, allow_nonintegral_charges: bool = False, ) -> Interchange: + molecules_with_preset_charges = _preprocess_preset_charges(molecules_with_preset_charges) + _check_supported_handlers(force_field) + if molecules_with_preset_charges is not None and "VirtualSites" in force_field.registered_parameter_handlers: + warnings.warn( + "Preset charges were provided (via `charge_from_molecules`) alongside a force field that includes " + "virtual site parameters. Note that virtual sites will be applied charges from the force field and " + "cannot be given preset charges.", + PresetChargesAndVirtualSitesWarning, + ) + # interchange = Interchange(topology=topology) # or maybe interchange = Interchange(topology=validate_topology(topology)) @@ -138,7 +186,7 @@ def _create_interchange( interchange, force_field, interchange.topology, - charge_from_molecules, + molecules_with_preset_charges, allow_nonintegral_charges, ) _plugins(interchange, force_field, interchange.topology) @@ -274,7 +322,7 @@ def _electrostatics( interchange: Interchange, force_field: ForceField, topology: Topology, - charge_from_molecules: list[Molecule] | None = None, + molecules_with_preset_charges: list[Molecule] | None = None, allow_nonintegral_charges: bool = False, ): if "Electrostatics" not in force_field.registered_parameter_handlers: @@ -304,7 +352,7 @@ def _electrostatics( if handler is not None ], topology=topology, - charge_from_molecules=charge_from_molecules, + molecules_with_preset_charges=molecules_with_preset_charges, allow_nonintegral_charges=allow_nonintegral_charges, ), }, diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index bde6bc3d..f95e5ce2 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -273,7 +273,7 @@ class SMIRNOFFElectrostaticsCollection(ElectrostaticsCollection, SMIRNOFFCollect ) # type: ignore[assignment] exception_potential: Literal["Coulomb"] = Field("Coulomb") - _charges: dict[Any, _ElementaryChargeQuantity] = PrivateAttr(default_factory=dict) + _charges: dict[Any, _ElementaryChargeQuantity] = PrivateAttr(dict()) _charges_cached: bool = PrivateAttr(default=False) @classmethod @@ -304,6 +304,19 @@ def charges( ) -> dict[TopologyKey | LibraryChargeTopologyKey | VirtualSiteKey, _ElementaryChargeQuantity]: """Get the total partial charge on each atom, including virtual sites.""" if len(self._charges) == 0 or self._charges_cached is False: + # TODO: Clearing this dict **should not be necessary** but in some cases in appears + # that this class attribute persists in some cases, possibly only on a single + # thread. Ideas to try include + # * Drop @computed_field ? + # * Don't handle caching ourselves and let Pydantic do it (i.e. have this + # function simply retrun ._get_charges(...) + # + # Hopefully this isn't a major issue - caches for large systems should still + # only be build once + # + # Some more context: https://github.com/openforcefield/openff-interchange/issues/842#issuecomment-2394211357 + self._charges.clear() + self._charges.update(self._get_charges(include_virtual_sites=True)) self._charges_cached = True @@ -349,7 +362,7 @@ def _get_charges( if potential_key.associated_handler in ( "LibraryCharges", "ToolkitAM1BCCHandler", - "charge_from_molecules", + "molecules_with_preset_charges", "ExternalSource", ): charges[atom_index] = _add_charges( @@ -416,7 +429,7 @@ def create( cls, parameter_handler: Any, topology: Topology, - charge_from_molecules=None, + molecules_with_preset_charges=None, allow_nonintegral_charges: bool = False, ) -> Self: """ @@ -455,7 +468,7 @@ def create( handler.store_matches( parameter_handlers, topology, - charge_from_molecules=charge_from_molecules, + molecules_with_preset_charges=molecules_with_preset_charges, allow_nonintegral_charges=allow_nonintegral_charges, ) handler._charges = dict() @@ -777,12 +790,12 @@ def _assign_charges_from_molecules( cls, topology: Topology, unique_molecule: Molecule, - charge_from_molecules=Optional[list[Molecule]], + molecules_with_preset_charges=Optional[list[Molecule]], ) -> tuple[bool, dict, dict]: - if charge_from_molecules is None: + if molecules_with_preset_charges is None: return False, dict(), dict() - for molecule_with_charges in charge_from_molecules: + for molecule_with_charges in molecules_with_preset_charges: if molecule_with_charges.is_isomorphic_with(unique_molecule): break else: @@ -810,7 +823,7 @@ def _assign_charges_from_molecules( potential_key = PotentialKey( id=mapped_smiles, mult=index_in_molecule_with_charges, # Not sure this prevents clashes in some corner cases - associated_handler="charge_from_molecules", + associated_handler="molecules_with_preset_charges", bond_order=None, ) potential = Potential(parameters={"charge": partial_charge}) @@ -823,7 +836,7 @@ def store_matches( self, parameter_handler: ElectrostaticsHandlerType | list[ElectrostaticsHandlerType], topology: Topology, - charge_from_molecules=None, + molecules_with_preset_charges=None, allow_nonintegral_charges: bool = False, ) -> None: """ @@ -846,10 +859,10 @@ def store_matches( flag, matches, potentials = self._assign_charges_from_molecules( topology, unique_molecule, - charge_from_molecules, + molecules_with_preset_charges, ) # TODO: Here is where the toolkit calls self.check_charges_assigned(). Do we skip this - # entirely given that we are not accepting `charge_from_molecules`? + # entirely given that we are not accepting `molecules_with_preset_charges`? if not flag: # TODO: Rename this method to something like `_find_matches` diff --git a/openff/interchange/warnings.py b/openff/interchange/warnings.py index 67ca825b..3a3dc4aa 100644 --- a/openff/interchange/warnings.py +++ b/openff/interchange/warnings.py @@ -21,5 +21,9 @@ class ForceFieldModificationWarning(UserWarning): """Warning for when a ForceField is modified.""" +class PresetChargesAndVirtualSitesWarning(UserWarning): + """Warning when possibly using preset charges and virtual sites together.""" + + class InterchangeCombinationWarning(UserWarning): """Warning for when combining Interchange objects."""