Skip to content

Commit 8cb3e59

Browse files
authored
Add verbose options for mol2 and element (#729)
* add verbose options to reduce warnings for parsing elements and mol2 file * adjust unit tests
1 parent f5aa91b commit 8cb3e59

File tree

4 files changed

+64
-41
lines changed

4 files changed

+64
-41
lines changed

gmso/core/element.py

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class Config:
6464
allow_mutation = False
6565

6666

67-
def element_by_symbol(symbol):
67+
def element_by_symbol(symbol, verbose=False):
6868
"""Search for an element by its symbol.
6969
7070
Look up an element from a list of known elements by symbol.
@@ -73,7 +73,9 @@ def element_by_symbol(symbol):
7373
Parameters
7474
----------
7575
symbol : str
76-
Element symbol to look for, digits and spaces are removed before search
76+
Element symbol to look for, digits and spaces are removed before search.
77+
verbose : bool, optional, default=False
78+
If True, raise warnings if symbol has been trimmed before search.
7779
7880
Returns
7981
-------
@@ -83,7 +85,7 @@ def element_by_symbol(symbol):
8385
"""
8486
symbol_trimmed = sub(r"[0-9 -]", "", symbol).capitalize()
8587

86-
if symbol_trimmed != symbol:
88+
if symbol_trimmed != symbol and verbose:
8789
msg = (
8890
f"Numbers and spaces are not considered when searching by element symbol.\n"
8991
f"{symbol} became {symbol_trimmed}"
@@ -94,7 +96,7 @@ def element_by_symbol(symbol):
9496
return matched_element
9597

9698

97-
def element_by_name(name):
99+
def element_by_name(name, verbose=False):
98100
"""Search for an element by its name.
99101
100102
Look up an element from a list of known elements by name.
@@ -103,7 +105,9 @@ def element_by_name(name):
103105
Parameters
104106
----------
105107
name : str
106-
Element name to look for, digits and spaces are removed before search
108+
Element name to look for, digits and spaces are removed before search.
109+
verbose : bool, optional, default=False
110+
If True, raise warnings if name has been trimmed before search.
107111
108112
Returns
109113
-------
@@ -113,7 +117,7 @@ def element_by_name(name):
113117
"""
114118
name_trimmed = sub(r"[0-9 -]", "", name).lower()
115119

116-
if name_trimmed != name:
120+
if name_trimmed != name and verbose:
117121
msg = (
118122
"Numbers and spaces are not considered when searching by element name. \n"
119123
f"{name} became {name_trimmed}"
@@ -124,7 +128,7 @@ def element_by_name(name):
124128
return matched_element
125129

126130

127-
def element_by_atomic_number(atomic_number):
131+
def element_by_atomic_number(atomic_number, verbose=False):
128132
"""Search for an element by its atomic number.
129133
130134
Look up an element from a list of known elements by atomic number.
@@ -134,7 +138,9 @@ def element_by_atomic_number(atomic_number):
134138
----------
135139
atomic_number : int
136140
Element atomic number that need to look for
137-
if a string is provided, only numbers are considered during the search
141+
if a string is provided, only numbers are considered during the search.
142+
verbose : bool, optional, default=False
143+
If True, raise warnings if atomic_number has been trimmed before search.
138144
139145
Returns
140146
-------
@@ -146,7 +152,7 @@ def element_by_atomic_number(atomic_number):
146152
atomic_number_trimmed = int(
147153
sub("[a-z -]", "", atomic_number.lower()).lstrip("0")
148154
)
149-
if str(atomic_number_trimmed) != atomic_number:
155+
if str(atomic_number_trimmed) != atomic_number and verbose:
150156
msg = (
151157
f"Letters and spaces are not considered when searching by element atomic number. \n "
152158
f"{atomic_number} became {atomic_number_trimmed}"
@@ -162,7 +168,7 @@ def element_by_atomic_number(atomic_number):
162168
return matched_element
163169

164170

165-
def element_by_mass(mass, exact=True):
171+
def element_by_mass(mass, exact=True, verbose=False):
166172
"""Search for an element by its mass.
167173
168174
Look up an element from a list of known elements by mass.
@@ -173,11 +179,13 @@ def element_by_mass(mass, exact=True):
173179
----------
174180
mass : int, float
175181
Element mass that need to look for, if a string is provided,
176-
only numbers are considered during the search
177-
Mass unyt is assumed to be u.amu, unless specfied (which will be converted to u.amu)
182+
only numbers are considered during the search.
183+
Mass unyt is assumed to be u.amu, unless specfied (which will be converted to u.amu).
178184
exact : bool, optional, default=True
179185
This method can be used to search for an exact mass (up to the first decimal place)
180-
or search for an element mass closest to the mass entered
186+
or search for an element mass closest to the mass entered.
187+
verbose : bool, optional, default=False
188+
If True, raise warnings if mass has been trimmed before search.
181189
182190
Returns
183191
-------
@@ -188,7 +196,7 @@ def element_by_mass(mass, exact=True):
188196
if isinstance(mass, str):
189197
# Convert to float if a string is provided
190198
mass_trimmed = np.round(float(sub(r"[a-z -]", "", mass.lower())))
191-
if str(mass_trimmed) != mass:
199+
if str(mass_trimmed) != mass and verbose:
192200
msg1 = (
193201
f"Letters and spaces are not considered when searching by element mass.\n"
194202
f"{mass} became {mass_trimmed}"
@@ -208,13 +216,14 @@ def element_by_mass(mass, exact=True):
208216
mass_closest = min(
209217
mass_dict.keys(), key=lambda k: abs(k - mass_trimmed)
210218
)
211-
msg2 = f"Closest mass to {mass_trimmed}: {mass_closest}"
212-
warnings.warn(msg2)
219+
if verbose:
220+
msg2 = f"Closest mass to {mass_trimmed}: {mass_closest}"
221+
warnings.warn(msg2)
213222
matched_element = mass_dict.get(mass_closest)
214223
return matched_element
215224

216225

217-
def element_by_smarts_string(smarts_string):
226+
def element_by_smarts_string(smarts_string, verbose=False):
218227
"""Search for an element by a given SMARTS string.
219228
220229
Look up an element from a list of known elements by SMARTS string.
@@ -228,6 +237,8 @@ def element_by_smarts_string(smarts_string):
228237
and look up an Element. Note that this means some SMARTS grammar may
229238
not be parsed properly. For details, see
230239
https://github.com/mosdef-hub/foyer/issues/63
240+
verbose : bool, optional, default=False
241+
If True, raise warnings if smarts_string has been trimmed before search.
231242
232243
Returns
233244
-------
@@ -267,7 +278,7 @@ def element_by_smarts_string(smarts_string):
267278
return matched_element
268279

269280

270-
def element_by_atom_type(atom_type):
281+
def element_by_atom_type(atom_type, verbose=False):
271282
"""Search for an element by a given gmso AtomType object.
272283
273284
Look up an element from a list of known elements by atom type.
@@ -280,6 +291,8 @@ def element_by_atom_type(atom_type):
280291
looked up in the order of mass, name, and finally definition (the
281292
SMARTS string). Because of the loose structure of this class, a
282293
successful lookup is not guaranteed.
294+
verbose : bool, optional, default=False
295+
If True, raise warnings if atom_type has been trimmed before search.
283296
284297
Returns
285298
-------
@@ -291,11 +304,15 @@ def element_by_atom_type(atom_type):
291304
matched_element = None
292305

293306
if matched_element is None and atom_type.mass:
294-
matched_element = element_by_mass(atom_type.mass, exact=False)
307+
matched_element = element_by_mass(
308+
atom_type.mass, exact=False, verbose=verbose
309+
)
295310
if matched_element is None and atom_type.name:
296-
matched_element = element_by_symbol(atom_type.name)
311+
matched_element = element_by_symbol(atom_type.name, verbose=verbose)
297312
if matched_element is None and atom_type.definition:
298-
matched_element = element_by_smarts_string(atom_type.definition)
313+
matched_element = element_by_smarts_string(
314+
atom_type.definition, verbose=verbose
315+
)
299316

300317
if matched_element is None:
301318
raise GMSOError(

gmso/formats/mol2.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313

1414
@loads_as(".mol2")
15-
def read_mol2(filename, site_type="atom"):
15+
def read_mol2(filename, site_type="atom", verbose=False):
1616
"""Read in a TRIPOS mol2 file format into a gmso topology object.
1717
1818
Creates a Topology from a mol2 file structure. This will read in the
@@ -28,6 +28,8 @@ def read_mol2(filename, site_type="atom"):
2828
tells the reader to consider the elements saved in the mol2 file, and
2929
if the type is 'lj', to not try to identify the element of the site,
3030
instead saving the site name.
31+
verbose : bool, optional, default=False
32+
If True, raise warnings for any assumptions made during the parsing.
3133
3234
Returns
3335
-------
@@ -79,13 +81,13 @@ def read_mol2(filename, site_type="atom"):
7981
"Skipping current section and moving to the next RTI header."
8082
)
8183
else:
82-
supported_rti[section](topology, sections[section])
84+
supported_rti[section](topology, sections[section], verbose)
8385

8486
# TODO: read in parameters to correct attribute as well. This can be saved in various rti sections.
8587
return topology
8688

8789

88-
def _parse_lj(top, section):
90+
def _parse_lj(top, section, verbose):
8991
"""Parse atom of lj style from mol2 file."""
9092
for line in section:
9193
if line.strip():
@@ -95,9 +97,10 @@ def _parse_lj(top, section):
9597
try:
9698
charge = float(content[8])
9799
except IndexError:
98-
warnings.warn(
99-
f"No charge was detected for site {content[1]} with index {content[0]}"
100-
)
100+
if verbose:
101+
warnings.warn(
102+
f"No charge was detected for site {content[1]} with index {content[0]}"
103+
)
101104
charge = None
102105

103106
atom = Atom(
@@ -109,7 +112,7 @@ def _parse_lj(top, section):
109112
top.add_site(atom)
110113

111114

112-
def _parse_atom(top, section):
115+
def _parse_atom(top, section, verbose):
113116
"""Parse atom information from the mol2 file."""
114117

115118
def parse_ele(*symbols):
@@ -127,7 +130,7 @@ def parse_ele(*symbols):
127130
position = [float(x) for x in content[2:5]] * u.Å
128131
element = parse_ele(content[5], content[1])
129132

130-
if not element:
133+
if not element and verbose:
131134
warnings.warn(
132135
f"No element detected for site {content[1]} with index {content[0]}, "
133136
"consider manually adding the element to the topology"
@@ -136,9 +139,10 @@ def parse_ele(*symbols):
136139
try:
137140
charge = float(content[8])
138141
except IndexError:
139-
warnings.warn(
140-
f"No charge was detected for site {content[1]} with index {content[0]}"
141-
)
142+
if verbose:
143+
warnings.warn(
144+
f"No charge was detected for site {content[1]} with index {content[0]}"
145+
)
142146
charge = None
143147
molecule = top.label if top.__dict__.get("label") else top.name
144148
atom = Atom(
@@ -152,7 +156,7 @@ def parse_ele(*symbols):
152156
top.add_site(atom)
153157

154158

155-
def _parse_bond(top, section):
159+
def _parse_bond(top, section, verbose):
156160
"""Parse bond information from the mol2 file."""
157161
for line in section:
158162
if line.strip():
@@ -166,7 +170,7 @@ def _parse_bond(top, section):
166170
top.add_connection(bond)
167171

168172

169-
def _parse_box(top, section):
173+
def _parse_box(top, section, verbose):
170174
"""Parse box information from the mol2 file."""
171175
if top.box:
172176
warnings.warn(
@@ -182,6 +186,6 @@ def _parse_box(top, section):
182186
)
183187

184188

185-
def _parse_molecule(top, section):
189+
def _parse_molecule(top, section, verbose):
186190
"""Parse molecule information from the mol2 file."""
187191
top.label = str(section[0].strip())

gmso/tests/test_element.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_element(self):
2020
def test_element_by_name(self):
2121
for idx, name in enumerate(["Carbon", "carbon", " CarBon 12 "]):
2222
with pytest.warns(UserWarning if idx != 1 else None):
23-
carbon = element.element_by_name(name)
23+
carbon = element.element_by_name(name, verbose=True)
2424

2525
assert carbon.name == element.Carbon.name
2626
assert carbon.symbol == element.Carbon.symbol
@@ -29,7 +29,7 @@ def test_element_by_name(self):
2929
def test_element_by_symbol(self):
3030
for idx, symbol in enumerate(["N", "n", " N7"]):
3131
with pytest.warns(UserWarning if idx != 0 else None):
32-
nitrogen = element.element_by_symbol(symbol)
32+
nitrogen = element.element_by_symbol(symbol, verbose=True)
3333

3434
assert nitrogen.name == element.Nitrogen.name
3535
assert nitrogen.symbol == element.Nitrogen.symbol

gmso/tests/test_mol2.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def test_read_mol2(self):
5858
UserWarning,
5959
match=r"No charge was detected for site C with index 1",
6060
):
61-
top = Topology.load(get_fn("ethane.mol2"))
61+
top = Topology.load(get_fn("ethane.mol2"), verbose=True)
6262
assert list(top.sites)[0].charge is None
6363

6464
def test_residue(self):
@@ -96,7 +96,9 @@ def test_no_charge_lj(self):
9696
match=r"No charge was detected for site .* with index \d+$",
9797
):
9898
top = Topology.load(
99-
get_path("methane_missing_charge.mol2"), site_type="lj"
99+
get_path("methane_missing_charge.mol2"),
100+
site_type="lj",
101+
verbose=True,
100102
)
101103

102104
def test_wrong_path(self):
@@ -118,7 +120,7 @@ def test_broken_files(self):
118120
UserWarning,
119121
match=r"This mol2 file has two boxes to be read in, only reading in one with dimensions Box\(a=0.72",
120122
):
121-
Topology.load(get_fn("broken.mol2"))
123+
Topology.load(get_fn("broken.mol2"), verbose=True)
122124

123125
def test_benzene_mol2_elements(self):
124126
top = Topology.load(get_fn("benzene.mol2"))
@@ -132,7 +134,7 @@ def test_neopentane_mol2_elements(self):
132134
match=r"No element detected for site .+ with index \d+, "
133135
r"consider manually adding the element to the topology$",
134136
):
135-
top = Topology.load(get_fn("neopentane.mol2"))
137+
top = Topology.load(get_fn("neopentane.mol2"), verbose=True)
136138

137139
def test_mol2_residues(self):
138140
top = Topology.load(get_fn("parmed.mol2"))

0 commit comments

Comments
 (0)