Skip to content

Commit 4f55339

Browse files
TallJimbotimj
authored andcommitted
Add logic to recognize non-standard keys in DataCoordinate.
This reimplements some of the special handling of non-standard keys from Butler._findDatasetRef in the hopes of being able to move it all down to Registry (and thus work on many more interfaces). But it's just a start at that; I realized while trying to make Butler._findDatasetRef use the new code that we really need to make queryDatasets work on CALIBRATION collections first. But I think what I've done so far will still be useful eventually, so I'm keeping it.
1 parent 66c40c0 commit 4f55339

File tree

1 file changed

+113
-9
lines changed

1 file changed

+113
-9
lines changed

python/lsst/daf/butler/core/dimensions/_coordinate.py

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
)
3737

3838
from abc import abstractmethod
39+
import logging
3940
import numbers
4041
from typing import (
4142
AbstractSet,
@@ -63,6 +64,8 @@
6364
from .._containers import HeterogeneousDimensionRecordAbstractSet
6465
from ...registry import Registry
6566

67+
log = logging.getLogger(__name__)
68+
6669
DataIdKey = Union[str, Dimension]
6770
"""Type annotation alias for the keys that can be used to index a
6871
DataCoordinate.
@@ -164,6 +167,9 @@ def standardize(
164167
universe: Optional[DimensionUniverse] = None,
165168
defaults: Optional[DataCoordinate] = None,
166169
records: Optional[HeterogeneousDimensionRecordAbstractSet] = None,
170+
unused_dimensions: Optional[Dict[str, DataIdValue]] = None,
171+
unused_constraints: Optional[NameLookupMapping[DimensionElement, Dict[str, Any]]] = None,
172+
check_types: Optional[bool] = None,
167173
**kwargs: Any
168174
) -> DataCoordinate:
169175
"""Standardize the supplied dataId.
@@ -192,6 +198,44 @@ def standardize(
192198
Container of `DimensionRecord` instances that may be used to
193199
fill in missing keys and/or attach records. If provided, the
194200
returned object is guaranteed to have `hasRecords` return `True`.
201+
unused_dimensions : `dict`, optional
202+
A mapping that will be populated with any given key-value pairs
203+
that identify unrelated dimensions or implied dimensions that could
204+
not be returned (i.e. when some implied dimensions are missing, and
205+
hence `hasFull` cannot be `True` on the returned object).
206+
unused_constraints : `dict`, optional
207+
A mapping that will be populated with any given key-value pairs
208+
that cannot be included in the returned `DataCoordinate`. These
209+
fall into trhee categories:
210+
211+
- Fully-qualified constraints on `DimensionRecord` fields (e.g.
212+
``exposure.day_obs=20250101``).
213+
214+
- Unqualified constraints on `DimensionRecord` fields (e.g.
215+
``day_obs=20250101``). These are expanded to constraints on
216+
all matching elements in the dimension universe.
217+
218+
- Values for dimensions that have the wrong type for the primary
219+
key, but can be transformed into constraints on an alternate key
220+
(e.g. ``detector='S11R11'`` ->
221+
``detector.full_name='S11R11'``). These are only considered if
222+
``check_types`` is `True`.
223+
224+
If this argument is not `None`, it should be a mapping that
225+
supports lookups for all `DimensionElement` names in the universe,
226+
returning a `dict` to be populated with ``field: value``
227+
constraints. ``defaultdict(dict)`` is a convenient way to
228+
construct such a mapping.
229+
230+
If this argument is `None` (the default), it is assumed that
231+
calling code cannot handle make use of attribute constraints, and
232+
`ValueError` will be raised if they appear. Key-value pairs for
233+
dimensions that are not part of the graph are silently ignored,
234+
as are those for implied dimensions when some implied dimensions
235+
are missing.
236+
check_types : `bool`, optional
237+
If `True` (default) check value types against the expected types
238+
for that key, and transform ``numpy`` integer types to `int`.
195239
**kwargs
196240
Additional keyword arguments are treated like additional key-value
197241
pairs in ``mapping``.
@@ -204,7 +248,9 @@ def standardize(
204248
Raises
205249
------
206250
TypeError
207-
Raised if the set of optional arguments provided is not supported.
251+
Raised if the set of optional arguments provided is not supported,
252+
or if a value has the wrong type and cannot be transformed to an
253+
alternate constraint.
208254
KeyError
209255
Raised if a key-value pair for a required dimension is missing.
210256
"""
@@ -234,15 +280,75 @@ def standardize(
234280
universe = defaults.universe
235281
else:
236282
raise TypeError("universe must be provided if graph and defaults are not.")
237-
if not (d.keys() <= universe.getStaticDimensions().names):
283+
non_dimension_keys = set(d.keys() - universe.getStaticDimensions().names)
284+
if non_dimension_keys and unused_constraints is not None:
285+
for key in non_dimension_keys:
286+
element_name, sep, attr_name = key.partition(".")
287+
if sep:
288+
try:
289+
element = universe[element_name]
290+
if attr_name in element.RecordClass.fields.names:
291+
unused_constraints[element_name][attr_name] = d.pop(key)
292+
except LookupError:
293+
# If this doesn't work, we just leave this key in
294+
# non_dimension_keys, and later exception-raising code
295+
# will take care of it.
296+
pass
297+
else:
298+
# This isn't a dimension name, and it isn't something like
299+
# 'element.attribute'; maybe it's an element attribute
300+
# where we have to infer the element(s).
301+
value = d[key]
302+
for element in universe.getStaticElements():
303+
if key in element.RecordClass.fields.names:
304+
unused_constraints[element.name][key] = value
305+
log.debug("Creating constraint %s.%s=%s from data ID key %s.",
306+
element.name, key, value, key)
307+
d.pop(key, None) # drop from dict the first time we use it.
308+
# Drop keys that we put into `unused` and dropped from `d`.
309+
non_dimension_keys.intersection_update(d.keys())
310+
if non_dimension_keys:
311+
# We still have some keys we don't recognize.
238312
# We silently ignore keys that aren't relevant for this particular
239313
# data ID, but keys that aren't relevant for any possible data ID
240314
# are a bug that we want to report to the user. This is especially
241315
# important because other code frequently forwards unrecognized
242316
# kwargs here.
243317
raise ValueError(
244-
f"Unrecognized key(s) for data ID: {d.keys() - universe.getStaticDimensions().names}."
318+
f"Unrecognized key(s) for data ID: {non_dimension_keys}. "
319+
"Note that non-dimension column constraints may only appear "
320+
"without the table/dimension name if it can be inferred from "
321+
"the set of dimensions to be constrained."
245322
)
323+
if check_types:
324+
for key, value in list(d.items()): # copy so we can remove in loop
325+
if isinstance(value, numbers.Integral): # type: ignore
326+
d[key] = value # type: ignore
327+
dimension = universe.getStaticDimensions()[key]
328+
if not isinstance(value, dimension.primaryKey.getPythonType()):
329+
if unused_constraints is not None:
330+
for alternate in dimension.alternateKeys:
331+
if isinstance(value, alternate.getPythonType()):
332+
unused_constraints[key][alternate.name] = value
333+
del d[key]
334+
log.debug("Converting dimension %s to %s.%s=%s",
335+
key, key, alternate.name, value)
336+
break
337+
else:
338+
expected = [str(dimension.primaryKey.getPythonType())]
339+
expected.extend(
340+
f"{alternate.getPythonType()} ({alternate.name})"
341+
for alternate in dimension.alternateKeys
342+
)
343+
raise TypeError(
344+
f"Wrong type for {key}={value}; expected one of "
345+
f"{expected}, got {type(value)}."
346+
)
347+
else:
348+
raise TypeError(
349+
f"Wrong type for {key}={value}; expected "
350+
f"{dimension.primaryKey.getPythonType()}, got {type(value)}."
351+
)
246352
if graph is None:
247353
graph = DimensionGraph(universe, names=d.keys())
248354
if not graph.dimensions:
@@ -295,16 +401,14 @@ def standardize(
295401
d.setdefault(dimension.name, None)
296402
r.setdefault(dimension.name, None)
297403
if d.keys() >= graph.dimensions.names:
298-
values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
404+
values = tuple(d.pop(name) for name in graph._dataCoordinateIndices.keys())
299405
else:
300406
try:
301-
values = tuple(d[name] for name in graph.required.names)
407+
values = tuple(d.pop(name) for name in graph.required.names)
302408
except KeyError as err:
303409
raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
304-
# Some backends cannot handle numpy.int64 type which is a subclass of
305-
# numbers.Integral; convert that to int.
306-
values = tuple(int(val) if isinstance(val, numbers.Integral) # type: ignore
307-
else val for val in values)
410+
if unused_dimensions is not None:
411+
unused_dimensions.update(d)
308412
result: DataCoordinate = _BasicTupleDataCoordinate(graph, values)
309413
if r.keys() >= graph.elements.names:
310414
result = result.expanded(r)

0 commit comments

Comments
 (0)