From 032028205ed3a9b952d773723856395f0988442f Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 19 Sep 2024 10:37:30 -0500 Subject: [PATCH 01/10] ENH: Initial implementation of charge assignment logging --- openff/interchange/smirnoff/_nonbonded.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index ee751f53..0f5f8dc6 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -1,5 +1,6 @@ import copy import functools +import logging import warnings from collections.abc import Iterable from typing import Any, Literal, Optional, Union @@ -41,6 +42,8 @@ from openff.interchange.smirnoff._base import SMIRNOFFCollection from openff.interchange.warnings import ForceFieldModificationWarning +logger = logging.getLogger(__name__) + ElectrostaticsHandlerType = Union[ ElectrostaticsHandler, ToolkitAM1BCCHandler, @@ -501,6 +504,10 @@ def _library_charge_to_potentials( matches[topology_key] = potential_key potentials[potential_key] = potential + logger.info( + f"Charge section LibraryCharges applied {charge.m} to atom index {atom_index}", + ) + return matches, potentials @classmethod @@ -678,6 +685,11 @@ def _find_charge_model_matches( matches[SingleAtomChargeTopologyKey(this_atom_index=atom_index)] = potential_key + logger.info( + f"Charge section {handler_name}, method {partial_charge_method}, applied {partial_charge.m} " + f"to atom {atom_index}", + ) + return partial_charge_method, matches, potentials @classmethod @@ -815,6 +827,11 @@ def _assign_charges_from_molecules( matches[topology_key] = potential_key potentials[potential_key] = potential + logger.info( + f"Atom with topology index {index_in_topology} getting prespecified charge {partial_charge} " + f"from a molecule with mapped smiles {mapped_smiles}", + ) + return True, matches, potentials def store_matches( From 754cbe5985cea5a23b5c542690981c661f33bea4 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 19 Sep 2024 17:02:52 -0500 Subject: [PATCH 02/10] TST: Test some cases of charge assignment logging --- .../test_charge_assignment_logging.py | 223 ++++++++++++++++++ openff/interchange/models.py | 2 + openff/interchange/smirnoff/_nonbonded.py | 50 ++-- 3 files changed, 260 insertions(+), 15 deletions(-) create mode 100644 openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py new file mode 100644 index 00000000..eabc586c --- /dev/null +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -0,0 +1,223 @@ +"""Test charge assignment logging.""" + +import logging +from collections import defaultdict + +import pytest +from openff.toolkit import Molecule, Topology +from openff.toolkit.utils import OPENEYE_AVAILABLE + +""" +Hierarchy is +1. Match molceules with preset charges +2. Match chemical environment against library charges +3. Match chemical environment against charge increments +4. Run AM1-BCC (or a variant) on remaining molecules + +Test cases +---------- + +0. Sage with a ligand in vacuum +* Ligand gets AM1-BCC + +1. Sage with a ligand in water/ions +* Water gets library charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +2. Sage with a ligand in non-water solvent +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +2. Sage with a ligand in mixed solvent +* Water gets library charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +4. ff14SB with Sage +* Protein gets library charges +* Water gets library charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +5. ff14SB with Sage and preset charges on Protein A +* Protein A gets preset charges +* Other proteins get library charges +* Water gets library charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +6. Sage with ligand and OPC water +* Water gets library charges +* Water virtual sites get ?? +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +7. Sage with preset charges on ligand A +* Water gets library charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligand A gets preset charges +* Other ligands get AM1-BCC + +8. Sage with preset charges on water +* Water gets preset charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligands get AM1-BCC + +9. Sage with (ligand) virtual site parameters +* Water gets preset charges +* Ions get library charges +* Non-water solvent gets AM1-BCC +* Ligand heavy atoms get AM1-BCC and charge increments +* Virtual sites get charge increments + +10. AM1-with-custom-BCCs Sage with ligand and ions water +* Water gets library charges +* Ions get library charges +* Ligand gets charge increments + +Other details +* Specifics of charge method (AM1-BCC, AM1-BCC-ELF10, AM1-BCC via NAGL) +* Molecules with preset charges can be similar but not exact enough +* Preset charges and virtual sites is underspecified and cannot be tested +""" + +# TODO: Handle whether or not NAGL/graph charges are applied +AM1BCC_KEY = "am1bccelf10" if OPENEYE_AVAILABLE else "am1bcc" + + +def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: + """ + Map partial charge assignment methods to atom indices. + """ + info = defaultdict(list) + + for record in caplog.records: + assert record.levelname == "INFO", "Only INFO logs are expected." + + message = record.msg + + if message.startswith("Charge section LibraryCharges"): + info["library"].append(int(message.split("atom index ")[-1])) + + elif message.startswith("Charge section ToolkitAM1BCC"): + info[caplog.records[0].msg.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) + + elif message.startswith("Preset charges"): + info["preset"].append(int(message.split("atom index")[-1])) + + else: + raise ValueError(f"Unexpected log message {message}") + + return info + + +def check_method( + atom_indices: tuple[int], + method: str, +): + """ + Check logs that a given set of atom indices was assigned by a given partial charge assignment method. + """ + pass + + +@pytest.fixture +def ions() -> Topology: + return Topology.from_molecules( + [ + Molecule.from_smiles(smiles) + for smiles in [ + "[Na+]", + "[Cl-]", + "[Na+]", + "[Cl-]", + ] + ], + ) + + +@pytest.fixture +def ligand() -> Molecule: + return Molecule.from_mapped_smiles("[C:1]([C:2]([O:3][H:9])([H:7])[H:8])([H:4])([H:5])[H:6]") + + +@pytest.fixture +def water_and_ions(water, ions) -> Topology: + topology = Topology.from_molecules(3 * [water]) + for molecule in ions.molecules: + topology.add_molecule(molecule) + + return topology + + +@pytest.fixture +def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: + topology = ligand.to_topology() + + for molecule in water_and_ions.molecules: + topology.add_molecule(molecule) + + return topology + + +""" +1. Sage with a ligand in water/ions +2. Sage with a ligand in non-water solvent +3. Sage with a ligand in mixed solvent +4. ff14SB with Sage +5. ff14SB with Sage and preset charges on Protein A +6. Sage with ligand and OPC water +7. Sage with preset charges on ligand A +8. Sage with preset charges on water +9. Sage with (ligand) virtual site parameters +10. AM1-with-custom-BCCs Sage with ligand and ions water +""" + + +def test_case0(caplog, sage, ligand): + with caplog.at_level(logging.INFO): + sage.create_interchange(ligand.to_topology()) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are ethanol, getting AM1-BCC + assert sorted(info[AM1BCC_KEY]) == [*range(0, 9)] + + +def test_case1(caplog, sage, ligand_and_water_and_ions): + with caplog.at_level(logging.INFO): + sage.create_interchange(ligand_and_water_and_ions) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are ethanol, getting AM1-BCC + assert sorted(info[AM1BCC_KEY]) == [*range(0, 9)] + + # atoms 9 through 21 are water + ions, getting library charges + assert sorted(info["library"]) == [*range(9, 22)] + + +def test_case7(caplog, sage, ligand_and_water_and_ions): + ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") + + with caplog.at_level(logging.INFO): + sage.create_interchange( + ligand_and_water_and_ions, + charge_from_molecules=[ligand_and_water_and_ions.molecule(0)], + ) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are ethanol, getting preset charges + assert sorted(info["preset"]) == [*range(0, 9)] + + # atoms 9 through 21 are water + ions, getting library charges + assert sorted(info["library"]) == [*range(9, 22)] diff --git a/openff/interchange/models.py b/openff/interchange/models.py index 68b43f45..4ca9421a 100644 --- a/openff/interchange/models.py +++ b/openff/interchange/models.py @@ -298,6 +298,8 @@ class SingleAtomChargeTopologyKey(LibraryChargeTopologyKey): Shim class for storing the result of charge_from_molecules. """ + extras: dict = dict() # noqa: RUF012 + class ChargeModelTopologyKey(_BaseModel): """Subclass of `TopologyKey` for use with charge models only.""" diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index 0f5f8dc6..c8747458 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -504,10 +504,6 @@ def _library_charge_to_potentials( matches[topology_key] = potential_key potentials[potential_key] = potential - logger.info( - f"Charge section LibraryCharges applied {charge.m} to atom index {atom_index}", - ) - return matches, potentials @classmethod @@ -683,12 +679,15 @@ def _find_charge_model_matches( ) potentials[potential_key] = Potential(parameters={"charge": partial_charge}) - matches[SingleAtomChargeTopologyKey(this_atom_index=atom_index)] = potential_key - - logger.info( - f"Charge section {handler_name}, method {partial_charge_method}, applied {partial_charge.m} " - f"to atom {atom_index}", - ) + matches[ + SingleAtomChargeTopologyKey( + this_atom_index=atom_index, + extras={ + "handler": handler_name, + "partial_charge_method": partial_charge_method, + }, + ) + ] = potential_key return partial_charge_method, matches, potentials @@ -816,6 +815,7 @@ def _assign_charges_from_molecules( index_in_topology = atom_map[index_in_molecule_with_charges] topology_key = SingleAtomChargeTopologyKey( this_atom_index=index_in_topology, + extras={"handler": "preset"}, ) potential_key = PotentialKey( id=mapped_smiles, @@ -827,11 +827,6 @@ def _assign_charges_from_molecules( matches[topology_key] = potential_key potentials[potential_key] = potential - logger.info( - f"Atom with topology index {index_in_topology} getting prespecified charge {partial_charge} " - f"from a molecule with mapped smiles {mapped_smiles}", - ) - return True, matches, potentials def store_matches( @@ -896,6 +891,31 @@ def store_matches( # Have this new key (on a duplicate molecule) point to the same potential # as the old key (on a unique/reference molecule) + if type(new_key) is LibraryChargeTopologyKey: + logger.info( + "Charge section LibraryCharges applied to (topology) atom index " + f"{topology_atom_index}", + ) + + elif type(new_key) is SingleAtomChargeTopologyKey: + if new_key.extras["handler"] == "ToolkitAM1BCCHandler": + logger.info( + "Charge section ToolkitAM1BCC, using charge method " + f"{new_key.extras['partial_charge_method']}, " + f"applied to (topology) atom index {topology_atom_index}", + ) + + elif new_key.extras["handler"] == "preset": + logger.info( + f"Preset charges applied to atom index {topology_atom_index}", + ) + + else: + raise ValueError(f"Unhandled handler {new_key.extras['handler']}") + + else: + raise ValueError(f"Unhandled key type {type(new_key)}") + self.key_map[new_key] = matches[key] topology_charges = [0.0] * topology.n_atoms From 3eef5daf28c1c3868a384b09a8fa3ae9505149dd Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 10:37:42 -0500 Subject: [PATCH 03/10] TST: Add more test cases --- .../test_charge_assignment_logging.py | 78 +++++++++++++++++-- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index eabc586c..566b3d97 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -149,6 +149,22 @@ def ligand() -> Molecule: return Molecule.from_mapped_smiles("[C:1]([C:2]([O:3][H:9])([H:7])[H:8])([H:4])([H:5])[H:6]") +@pytest.fixture +def solvent() -> Molecule: + return Molecule.from_mapped_smiles("[H:3][C:1]([H:4])([H:5])[N:2]([H:6])[H:7]") + + +def ligand_in_solvent(ligand, solvent) -> Topology: + return Topology.from_molecules( + [ + ligand, + solvent, + solvent, + solvent, + ], + ) + + @pytest.fixture def water_and_ions(water, ions) -> Topology: topology = Topology.from_molecules(3 * [water]) @@ -169,14 +185,15 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: """ -1. Sage with a ligand in water/ions -2. Sage with a ligand in non-water solvent -3. Sage with a ligand in mixed solvent +0.xSage with just a ligand +1.xSage with a ligand in water/ions +2.xSage with a ligand in non-water solvent +3.xSage with a ligand in mixed solvent 4. ff14SB with Sage 5. ff14SB with Sage and preset charges on Protein A 6. Sage with ligand and OPC water -7. Sage with preset charges on ligand A -8. Sage with preset charges on water +7.xSage with preset charges on ligand A +8.xSage with preset charges on water 9. Sage with (ligand) virtual site parameters 10. AM1-with-custom-BCCs Sage with ligand and ions water """ @@ -205,6 +222,39 @@ def test_case1(caplog, sage, ligand_and_water_and_ions): assert sorted(info["library"]) == [*range(9, 22)] +def test_case2(caplog, sage, ligand, solvent): + topology = Topology.from_molecules([ligand, solvent, solvent, solvent]) + + with caplog.at_level(logging.INFO): + sage.create_interchange(topology) + + info = map_methods_to_atom_indices(caplog) + + # everything should get AM1-BCC charges + assert sorted(info[AM1BCC_KEY]) == [*range(0, topology.n_atoms)] + + +def test_case3(caplog, sage, ligand_and_water_and_ions, solvent): + for index in range(3): + ligand_and_water_and_ions.add_molecule(solvent) + + ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") + + with caplog.at_level(logging.INFO): + sage.create_interchange( + ligand_and_water_and_ions, + ) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are ethanol, getting AM1-BCC, + # and also solvent molecules (starting at index 22) + assert sorted(info[AM1BCC_KEY]) == [*range(0, 9), *range(22, 22 + 3 * 7)] + + # atoms 9 through 21 are water + ions, getting library charges + assert sorted(info["library"]) == [*range(9, 22)] + + def test_case7(caplog, sage, ligand_and_water_and_ions): ligand_and_water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") @@ -221,3 +271,21 @@ def test_case7(caplog, sage, ligand_and_water_and_ions): # atoms 9 through 21 are water + ions, getting library charges assert sorted(info["library"]) == [*range(9, 22)] + + +def test_case8(caplog, sage, water_and_ions): + water_and_ions.molecule(0).assign_partial_charges(partial_charge_method="gasteiger") + + with caplog.at_level(logging.INFO): + sage.create_interchange( + water_and_ions, + charge_from_molecules=[water_and_ions.molecule(0)], + ) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are water, getting preset charges + assert sorted(info["preset"]) == [*range(0, 9)] + + # atoms 9 through 12 are ions, getting library charges + assert sorted(info["library"]) == [*range(9, 13)] From bd9762ac3500954b787d08714735948e9ff916b8 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 11:13:28 -0500 Subject: [PATCH 04/10] ENH: Log virtual site charge increments --- openff/interchange/smirnoff/_nonbonded.py | 1 + openff/interchange/smirnoff/_virtual_sites.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index c8747458..fa85a18b 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -331,6 +331,7 @@ def _get_charges( total_charge: Quantity = numpy.sum(parameter_value) # assumes virtual sites can only have charges determined in one step + charges[topology_key] = -1.0 * total_charge # Apply increments to "orientation" atoms diff --git a/openff/interchange/smirnoff/_virtual_sites.py b/openff/interchange/smirnoff/_virtual_sites.py index 08b4aacf..19688fe6 100644 --- a/openff/interchange/smirnoff/_virtual_sites.py +++ b/openff/interchange/smirnoff/_virtual_sites.py @@ -1,3 +1,4 @@ +import logging import math from typing import Literal @@ -23,6 +24,8 @@ SMIRNOFFvdWCollection, ) +logger = logging.getLogger(__name__) + _DEGREES_TO_RADIANS = numpy.pi / 180.0 @@ -178,6 +181,12 @@ def store_potentials( # type: ignore[override] ), }, ) + + logger.info( + "Charge section VirtualSites applied to virtual site with orientation atoms " + f"{virtual_site_key.orientation_atom_indices}", + ) + electrostatics_collection.key_map[virtual_site_key] = electrostatics_key electrostatics_collection.potentials[electrostatics_key] = electrostatics_potential From e32ed5f1dc0fdddf3e13e6f6123ceeeb36cfc1cb Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 11:27:06 -0500 Subject: [PATCH 05/10] TST: Test virtual site charge assignment logging --- .../test_charge_assignment_logging.py | 83 +++++++++++++++---- 1 file changed, 68 insertions(+), 15 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 566b3d97..ad777e2a 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -1,10 +1,11 @@ """Test charge assignment logging.""" import logging +import re from collections import defaultdict import pytest -from openff.toolkit import Molecule, Topology +from openff.toolkit import ForceField, Molecule, Topology from openff.toolkit.utils import OPENEYE_AVAILABLE """ @@ -95,7 +96,7 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: """ - Map partial charge assignment methods to atom indices. + Map partial charge assignment methods to (sorted) atom indices. """ info = defaultdict(list) @@ -110,13 +111,24 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l elif message.startswith("Charge section ToolkitAM1BCC"): info[caplog.records[0].msg.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) + # without also pulling the virtual site - particle mapping (which is different for each engine) + # it's hard to store more information than the orientation atoms that are affected by each + # virtual site's charges + elif message.startswith("Charge section VirtualSites"): + orientation_atoms: list[int] = [ + int(value.strip()) for value in re.findall(r"\((.*?)\)", message)[0].split(",") + ] + + for atom in orientation_atoms: + info["orientation"].append(atom) + elif message.startswith("Preset charges"): info["preset"].append(int(message.split("atom index")[-1])) else: raise ValueError(f"Unexpected log message {message}") - return info + return {key: sorted(val) for key, val in info.items()} def check_method( @@ -191,10 +203,10 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: 3.xSage with a ligand in mixed solvent 4. ff14SB with Sage 5. ff14SB with Sage and preset charges on Protein A -6. Sage with ligand and OPC water +6.xSage with ligand and OPC water 7.xSage with preset charges on ligand A 8.xSage with preset charges on water -9. Sage with (ligand) virtual site parameters +9.xSage with (ligand) virtual site parameters 10. AM1-with-custom-BCCs Sage with ligand and ions water """ @@ -206,7 +218,7 @@ def test_case0(caplog, sage, ligand): info = map_methods_to_atom_indices(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert sorted(info[AM1BCC_KEY]) == [*range(0, 9)] + assert info[AM1BCC_KEY] == [*range(0, 9)] def test_case1(caplog, sage, ligand_and_water_and_ions): @@ -216,10 +228,10 @@ def test_case1(caplog, sage, ligand_and_water_and_ions): info = map_methods_to_atom_indices(caplog) # atoms 0 through 8 are ethanol, getting AM1-BCC - assert sorted(info[AM1BCC_KEY]) == [*range(0, 9)] + assert info[AM1BCC_KEY] == [*range(0, 9)] # atoms 9 through 21 are water + ions, getting library charges - assert sorted(info["library"]) == [*range(9, 22)] + assert info["library"] == [*range(9, 22)] def test_case2(caplog, sage, ligand, solvent): @@ -231,7 +243,7 @@ def test_case2(caplog, sage, ligand, solvent): info = map_methods_to_atom_indices(caplog) # everything should get AM1-BCC charges - assert sorted(info[AM1BCC_KEY]) == [*range(0, topology.n_atoms)] + assert info[AM1BCC_KEY] == [*range(0, topology.n_atoms)] def test_case3(caplog, sage, ligand_and_water_and_ions, solvent): @@ -249,10 +261,34 @@ def test_case3(caplog, sage, ligand_and_water_and_ions, solvent): # atoms 0 through 8 are ethanol, getting AM1-BCC, # and also solvent molecules (starting at index 22) - assert sorted(info[AM1BCC_KEY]) == [*range(0, 9), *range(22, 22 + 3 * 7)] + assert info[AM1BCC_KEY] == [*range(0, 9), *range(22, 22 + 3 * 7)] # atoms 9 through 21 are water + ions, getting library charges - assert sorted(info["library"]) == [*range(9, 22)] + assert info["library"] == [*range(9, 22)] + + +def test_case6(caplog, ligand, water): + force_field = ForceField("openff-2.0.0.offxml", "opc.offxml") + + topology = Topology.from_molecules([ligand, water, water, water]) + + with caplog.at_level(logging.INFO): + force_field.create_interchange(topology) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 8 are ethanol, getting AM1-BCC charges + assert info[AM1BCC_KEY] == [*range(0, 9)] + + # atoms 9 through 17 are water atoms, getting library charges + assert info["library"] == [*range(9, 18)] + + # particles 18 through 20 are water virtual sites, but the current logging strategy + # makes it hard to match these up (and the particle indices are different OpenMM/GROMACS/etc) + + # can still check that orientation atoms are subject to virtual site + # charge increments (even if the increment is +0.0 e) + assert info["orientation"] == [*range(9, 18)] def test_case7(caplog, sage, ligand_and_water_and_ions): @@ -267,10 +303,10 @@ def test_case7(caplog, sage, ligand_and_water_and_ions): info = map_methods_to_atom_indices(caplog) # atoms 0 through 8 are ethanol, getting preset charges - assert sorted(info["preset"]) == [*range(0, 9)] + assert info["preset"] == [*range(0, 9)] # atoms 9 through 21 are water + ions, getting library charges - assert sorted(info["library"]) == [*range(9, 22)] + assert info["library"] == [*range(9, 22)] def test_case8(caplog, sage, water_and_ions): @@ -285,7 +321,24 @@ def test_case8(caplog, sage, water_and_ions): info = map_methods_to_atom_indices(caplog) # atoms 0 through 8 are water, getting preset charges - assert sorted(info["preset"]) == [*range(0, 9)] + assert info["preset"] == [*range(0, 9)] # atoms 9 through 12 are ions, getting library charges - assert sorted(info["library"]) == [*range(9, 13)] + assert info["library"] == [*range(9, 13)] + + +def test_case9(caplog, sage_with_bond_charge): + with caplog.at_level(logging.INFO): + sage_with_bond_charge.create_interchange( + Molecule.from_mapped_smiles( + "[H:3][C:1]([H:4])([H:5])[Cl:2]", + ).to_topology(), + ) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 5 are ligand, getting AM1-BCC charges + assert info[AM1BCC_KEY] == [*range(0, 5)] + + # atoms 0 and 1 are the orientation atoms of the sigma hole virtual site + assert info["orientation"] == [0, 1] From 716eac34db977eb7e1dd65f7b1e147b787e77455 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 13:31:08 -0500 Subject: [PATCH 06/10] ENH: Log charge increment model charge assignment --- .../test_charge_assignment_logging.py | 76 ++++++++++++++++--- openff/interchange/smirnoff/_nonbonded.py | 16 ++++ 2 files changed, 83 insertions(+), 9 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index ad777e2a..08dea99b 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -92,6 +92,7 @@ # TODO: Handle whether or not NAGL/graph charges are applied AM1BCC_KEY = "am1bccelf10" if OPENEYE_AVAILABLE else "am1bcc" +NAGL_KEY = "openff-gnn-am1bcc-0.1.0-rc.2.pt" def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, list[int]]: @@ -125,20 +126,46 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l elif message.startswith("Preset charges"): info["preset"].append(int(message.split("atom index")[-1])) + elif message.startswith("Charge section ChargeIncrementModel"): + if "using charge method" in message: + info[f"chargeincrements_{message.split(',')[1].split(' ')[-1]}"].append( + int(message.split("atom index ")[-1]), + ) + + elif "applying charge increment" in message: + # TODO: Store the "other" atoms? + info["chargeincrements"].append(int(message.split("atom ")[1].split(" ")[0])) + else: raise ValueError(f"Unexpected log message {message}") return {key: sorted(val) for key, val in info.items()} -def check_method( - atom_indices: tuple[int], - method: str, -): - """ - Check logs that a given set of atom indices was assigned by a given partial charge assignment method. - """ - pass +@pytest.fixture +def sage_with_nagl(sage): + from openff.toolkit.typing.engines.smirnoff.parameters import ChargeIncrementModelHandler, ChargeIncrementType + + sage.register_parameter_handler( + parameter_handler=ChargeIncrementModelHandler( + version=0.3, + partial_charge_method=NAGL_KEY, + ), + ) + + # Add dummy "BCCs" for testing, even though this model has BCCs built into + # the partial charge assignment method + sage["ChargeIncrementModel"].add_parameter( + parameter=ChargeIncrementType( + smirks="[#6:1]-[#1:2]", + charge_increment=[ + "0.1 elementary_charge", + "-0.1 elementary_charge", + ], + ), + ) + + return sage @pytest.fixture @@ -207,7 +234,7 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: 7.xSage with preset charges on ligand A 8.xSage with preset charges on water 9.xSage with (ligand) virtual site parameters -10. AM1-with-custom-BCCs Sage with ligand and ions water +10.xAM1-with-custom-BCCs Sage with ligand and ions water """ @@ -342,3 +369,34 @@ def test_case9(caplog, sage_with_bond_charge): # atoms 0 and 1 are the orientation atoms of the sigma hole virtual site assert info["orientation"] == [0, 1] + + +def test_case10(caplog, sage_with_nagl, ligand): + from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper + from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper + from openff.toolkit.utils.toolkit_registry import ToolkitRegistry, toolkit_registry_manager + + pytest.importorskip("openff.nagl") + pytest.importorskip("rdkit") + + with caplog.at_level(logging.INFO): + with toolkit_registry_manager( + toolkit_registry=ToolkitRegistry( + toolkit_precedence=[NAGLToolkitWrapper, RDKitToolkitWrapper], + ), + ): + sage_with_nagl.create_interchange(ligand.to_topology()) + + info = map_methods_to_atom_indices(caplog) + + # atoms 0 through 5 are ligand, getting NAGL charges + assert info[f"chargeincrements_{NAGL_KEY}"] == [*range(0, 9)] + + # TODO: These are logged symmetrically (i.e. hydrogens are listed) + # even though the charges appear to be correct, assert should + # simply by == [0, 1] since the hydrogens shouldn't be caught + assert 0 in info["chargeincrements"] + assert 1 in info["chargeincrements"] + + # the standard AM1-BCC should not have ran + assert AM1BCC_KEY not in info diff --git a/openff/interchange/smirnoff/_nonbonded.py b/openff/interchange/smirnoff/_nonbonded.py index fa85a18b..650dcaaf 100644 --- a/openff/interchange/smirnoff/_nonbonded.py +++ b/openff/interchange/smirnoff/_nonbonded.py @@ -386,6 +386,11 @@ def _get_charges( parameter_value, ) + logger.info( + "Charge section ChargeIncrementModel, applying charge increment from atom " + f"{topology_key.this_atom_index} to atoms {topology_key.other_atom_indices}", + ) + else: raise NotImplementedError() @@ -914,6 +919,17 @@ def store_matches( else: raise ValueError(f"Unhandled handler {new_key.extras['handler']}") + elif type(new_key) is ChargeModelTopologyKey: + logger.info( + "Charge section ChargeIncrementModel, using charge method " + f"{new_key.partial_charge_method}, " + f"applied to (topology) atom index {new_key.this_atom_index}", + ) + + elif type(new_key) is ChargeIncrementTopologyKey: + # here is where the actual increments could be logged + pass + else: raise ValueError(f"Unhandled key type {type(new_key)}") From 452a7f10816c3e54fb60d86ab770353a3bb19170 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 13:46:23 -0500 Subject: [PATCH 07/10] TST: Test cases with ff14SB and protein --- .../test_charge_assignment_logging.py | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 08dea99b..54a9f9e4 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -8,6 +8,8 @@ from openff.toolkit import ForceField, Molecule, Topology from openff.toolkit.utils import OPENEYE_AVAILABLE +from openff.interchange._tests import get_protein + """ Hierarchy is 1. Match molceules with preset charges @@ -110,7 +112,7 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l info["library"].append(int(message.split("atom index ")[-1])) elif message.startswith("Charge section ToolkitAM1BCC"): - info[caplog.records[0].msg.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) + info[message.split(", ")[1].split(" ")[-1]].append(int(message.split("atom index ")[-1])) # without also pulling the virtual site - particle mapping (which is different for each engine) # it's hard to store more information than the orientation atoms that are affected by each @@ -294,6 +296,45 @@ def test_case3(caplog, sage, ligand_and_water_and_ions, solvent): assert info["library"] == [*range(9, 22)] +@pytest.mark.slow +@pytest.mark.parametrize("preset_on_protein", [True, False]) +def test_cases4_5(caplog, ligand_and_water_and_ions, preset_on_protein): + ff = ForceField("ff14sb_off_impropers_0.0.3.offxml", "openff-2.0.0.offxml") + + complex = get_protein("MainChain_ALA_ALA").to_topology() + + for molecule in ligand_and_water_and_ions.molecules: + complex.add_molecule(molecule) + + if preset_on_protein: + complex.molecule(0).assign_partial_charges(partial_charge_method="zeros") + + with caplog.at_level(logging.INFO): + if preset_on_protein: + ff.create_interchange(complex, charge_from_molecules=[complex.molecule(0)]) + else: + ff.create_interchange(complex) + + info = map_methods_to_atom_indices(caplog) + + assert info[AM1BCC_KEY] == [*range(complex.molecule(0).n_atoms, complex.molecule(0).n_atoms + 9)] + + if preset_on_protein: + # protein gets preset charges + assert info["preset"] == [*range(0, complex.molecule(0).n_atoms)] + + # everything after the protein and ligand should get library charges + assert info["library"] == [ + *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + ] + else: + # the protein and everything after the ligand should get library charges + assert info["library"] == [ + *range(0, complex.molecule(0).n_atoms), + *range(complex.molecule(0).n_atoms + 9, complex.n_atoms), + ] + + def test_case6(caplog, ligand, water): force_field = ForceField("openff-2.0.0.offxml", "opc.offxml") From 9f7d004a2c86dad91783aaf3d19c45790f3ab9b5 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 14:25:26 -0500 Subject: [PATCH 08/10] FIX: Fix test --- .../unit_tests/smirnoff/test_charge_assignment_logging.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index 54a9f9e4..ff071ef3 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -104,7 +104,9 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l info = defaultdict(list) for record in caplog.records: - assert record.levelname == "INFO", "Only INFO logs are expected." + # skip logged warnings from upstreams/other packages + if record.name.startswith("openff.interchange"): + assert record.levelname == "INFO", "Only INFO logs are expected." message = record.msg @@ -230,8 +232,8 @@ def ligand_and_water_and_ions(ligand, water_and_ions) -> Topology: 1.xSage with a ligand in water/ions 2.xSage with a ligand in non-water solvent 3.xSage with a ligand in mixed solvent -4. ff14SB with Sage -5. ff14SB with Sage and preset charges on Protein A +4.xff14SB with Sage +5.xff14SB with Sage and preset charges on Protein A 6.xSage with ligand and OPC water 7.xSage with preset charges on ligand A 8.xSage with preset charges on water From 609993375f30b28bb5647a3a27bf5465d72c19f5 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 14:51:16 -0500 Subject: [PATCH 09/10] DOC: Document charge assignment logging --- docs/index.md | 1 + docs/releasehistory.md | 4 ++++ docs/using/advanced.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 docs/using/advanced.md diff --git a/docs/index.md b/docs/index.md index 15bc0ffb..c9b3249e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,6 +31,7 @@ using/plugins.md using/status.md using/edges.md using/experimental.md +using/advanced.md ``` diff --git a/docs/releasehistory.md b/docs/releasehistory.md index 3a73dfcc..ebd6abf0 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -11,6 +11,10 @@ Dates are given in YYYY-MM-DD format. Please note that all releases prior to a version 1.0.0 are considered pre-releases and many API changes will come before a stable release. +## Current development + +* #1053 Logs, at the level of `logging.INFO`, how charges are assigned by SMIRNOFF force fields to each atom and virtual site. + ## 0.4.0 - 2024 * Pydantic v2 is now used at runtime. As a consequence, models containing `Interchange`s cannot also use models from the v1 API. diff --git a/docs/using/advanced.md b/docs/using/advanced.md new file mode 100644 index 00000000..4fe85b1f --- /dev/null +++ b/docs/using/advanced.md @@ -0,0 +1,42 @@ +# Advanced features + +## Charge assignment logging + +SMIRNOFF force fields support several different partial charge assignment methods. These are applied, in the following order + +1. Look for preset charges from the `charge_from_molecules` argument +1. Look for chemical environment matches within the `` section +1. Look for chemical environment matches within the `` section +1. Try to run AM1-BCC or some variant + +If a molecule gets charges from one method, attempts to match charges for later methods are skipped. For more on how SMIRNOFF defines this behavior, see [this issue](https://github.com/openforcefield/standards/issues/68) and linked discussions. + +Given this complexity, it may be useful to track how each atom actually got charges assigned. Interchange has opt-in logging to track this behavior. This uses the [standard library `logging` module](https://docs.python.org/3/library/logging.html) at the `INFO` level. The easiest way to get started is by adding something like `logging.basicConfig(level=logging.INFO)` to the beginning of a script or program. For example, this script: + +```python +import logging + +from openff.toolkit import ForceField, Molecule + +logging.basicConfig(level=logging.INFO) + +ForceField("openff-2.2.0.offxml").create_interchange( + Molecule.from_smiles("CCO").to_topology() +) +``` + +will produce output including something like + +```shell +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 0 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 1 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 2 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 3 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 4 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 5 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 6 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 7 +INFO:openff.interchange.smirnoff._nonbonded:Charge section ToolkitAM1BCC, using charge method am1bcc, applied to (topology) atom index 8 +``` + +This functionality is only available with SMIRNOFF force fields. From a3679db05bbb843378913d1c0cabf1e1d143dae4 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Fri, 20 Sep 2024 14:53:33 -0500 Subject: [PATCH 10/10] FIX: Fix test --- .../unit_tests/smirnoff/test_charge_assignment_logging.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py index ff071ef3..3e8bedda 100644 --- a/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py +++ b/openff/interchange/_tests/unit_tests/smirnoff/test_charge_assignment_logging.py @@ -107,6 +107,8 @@ def map_methods_to_atom_indices(caplog: pytest.LogCaptureFixture) -> dict[str, l # skip logged warnings from upstreams/other packages if record.name.startswith("openff.interchange"): assert record.levelname == "INFO", "Only INFO logs are expected." + else: + continue message = record.msg