3636)
3737
3838from abc import abstractmethod
39+ import logging
3940import numbers
4041from typing import (
4142 AbstractSet ,
6364 from .._containers import HeterogeneousDimensionRecordAbstractSet
6465 from ...registry import Registry
6566
67+ log = logging .getLogger (__name__ )
68+
6669DataIdKey = Union [str , Dimension ]
6770"""Type annotation alias for the keys that can be used to index a
6871DataCoordinate.
@@ -164,6 +167,9 @@ def standardize(
164167 universe : Optional [DimensionUniverse ] = None ,
165168 defaults : Optional [DataCoordinate ] = None ,
166169 records : Optional [HeterogeneousDimensionRecordAbstractSet ] = None ,
170+ unused_dimensions : Optional [Dict [str , DataIdValue ]] = None ,
171+ unused_constraints : Optional [NameLookupMapping [DimensionElement , Dict [str , Any ]]] = None ,
172+ check_types : Optional [bool ] = None ,
167173 ** kwargs : Any
168174 ) -> DataCoordinate :
169175 """Standardize the supplied dataId.
@@ -192,6 +198,44 @@ def standardize(
192198 Container of `DimensionRecord` instances that may be used to
193199 fill in missing keys and/or attach records. If provided, the
194200 returned object is guaranteed to have `hasRecords` return `True`.
201+ unused_dimensions : `dict`, optional
202+ A mapping that will be populated with any given key-value pairs
203+ that identify unrelated dimensions or implied dimensions that could
204+ not be returned (i.e. when some implied dimensions are missing, and
205+ hence `hasFull` cannot be `True` on the returned object).
206+ unused_constraints : `dict`, optional
207+ A mapping that will be populated with any given key-value pairs
208+ that cannot be included in the returned `DataCoordinate`. These
209+ fall into trhee categories:
210+
211+ - Fully-qualified constraints on `DimensionRecord` fields (e.g.
212+ ``exposure.day_obs=20250101``).
213+
214+ - Unqualified constraints on `DimensionRecord` fields (e.g.
215+ ``day_obs=20250101``). These are expanded to constraints on
216+ all matching elements in the dimension universe.
217+
218+ - Values for dimensions that have the wrong type for the primary
219+ key, but can be transformed into constraints on an alternate key
220+ (e.g. ``detector='S11R11'`` ->
221+ ``detector.full_name='S11R11'``). These are only considered if
222+ ``check_types`` is `True`.
223+
224+ If this argument is not `None`, it should be a mapping that
225+ supports lookups for all `DimensionElement` names in the universe,
226+ returning a `dict` to be populated with ``field: value``
227+ constraints. ``defaultdict(dict)`` is a convenient way to
228+ construct such a mapping.
229+
230+ If this argument is `None` (the default), it is assumed that
231+ calling code cannot handle make use of attribute constraints, and
232+ `ValueError` will be raised if they appear. Key-value pairs for
233+ dimensions that are not part of the graph are silently ignored,
234+ as are those for implied dimensions when some implied dimensions
235+ are missing.
236+ check_types : `bool`, optional
237+ If `True` (default) check value types against the expected types
238+ for that key, and transform ``numpy`` integer types to `int`.
195239 **kwargs
196240 Additional keyword arguments are treated like additional key-value
197241 pairs in ``mapping``.
@@ -204,7 +248,9 @@ def standardize(
204248 Raises
205249 ------
206250 TypeError
207- Raised if the set of optional arguments provided is not supported.
251+ Raised if the set of optional arguments provided is not supported,
252+ or if a value has the wrong type and cannot be transformed to an
253+ alternate constraint.
208254 KeyError
209255 Raised if a key-value pair for a required dimension is missing.
210256 """
@@ -234,15 +280,75 @@ def standardize(
234280 universe = defaults .universe
235281 else :
236282 raise TypeError ("universe must be provided if graph and defaults are not." )
237- if not (d .keys () <= universe .getStaticDimensions ().names ):
283+ non_dimension_keys = set (d .keys () - universe .getStaticDimensions ().names )
284+ if non_dimension_keys and unused_constraints is not None :
285+ for key in non_dimension_keys :
286+ element_name , sep , attr_name = key .partition ("." )
287+ if sep :
288+ try :
289+ element = universe [element_name ]
290+ if attr_name in element .RecordClass .fields .names :
291+ unused_constraints [element_name ][attr_name ] = d .pop (key )
292+ except LookupError :
293+ # If this doesn't work, we just leave this key in
294+ # non_dimension_keys, and later exception-raising code
295+ # will take care of it.
296+ pass
297+ else :
298+ # This isn't a dimension name, and it isn't something like
299+ # 'element.attribute'; maybe it's an element attribute
300+ # where we have to infer the element(s).
301+ value = d [key ]
302+ for element in universe .getStaticElements ():
303+ if key in element .RecordClass .fields .names :
304+ unused_constraints [element .name ][key ] = value
305+ log .debug ("Creating constraint %s.%s=%s from data ID key %s." ,
306+ element .name , key , value , key )
307+ d .pop (key , None ) # drop from dict the first time we use it.
308+ # Drop keys that we put into `unused` and dropped from `d`.
309+ non_dimension_keys .intersection_update (d .keys ())
310+ if non_dimension_keys :
311+ # We still have some keys we don't recognize.
238312 # We silently ignore keys that aren't relevant for this particular
239313 # data ID, but keys that aren't relevant for any possible data ID
240314 # are a bug that we want to report to the user. This is especially
241315 # important because other code frequently forwards unrecognized
242316 # kwargs here.
243317 raise ValueError (
244- f"Unrecognized key(s) for data ID: { d .keys () - universe .getStaticDimensions ().names } ."
318+ f"Unrecognized key(s) for data ID: { non_dimension_keys } . "
319+ "Note that non-dimension column constraints may only appear "
320+ "without the table/dimension name if it can be inferred from "
321+ "the set of dimensions to be constrained."
245322 )
323+ if check_types :
324+ for key , value in list (d .items ()): # copy so we can remove in loop
325+ if isinstance (value , numbers .Integral ): # type: ignore
326+ d [key ] = value # type: ignore
327+ dimension = universe .getStaticDimensions ()[key ]
328+ if not isinstance (value , dimension .primaryKey .getPythonType ()):
329+ if unused_constraints is not None :
330+ for alternate in dimension .alternateKeys :
331+ if isinstance (value , alternate .getPythonType ()):
332+ unused_constraints [key ][alternate .name ] = value
333+ del d [key ]
334+ log .debug ("Converting dimension %s to %s.%s=%s" ,
335+ key , key , alternate .name , value )
336+ break
337+ else :
338+ expected = [str (dimension .primaryKey .getPythonType ())]
339+ expected .extend (
340+ f"{ alternate .getPythonType ()} ({ alternate .name } )"
341+ for alternate in dimension .alternateKeys
342+ )
343+ raise TypeError (
344+ f"Wrong type for { key } ={ value } ; expected one of "
345+ f"{ expected } , got { type (value )} ."
346+ )
347+ else :
348+ raise TypeError (
349+ f"Wrong type for { key } ={ value } ; expected "
350+ f"{ dimension .primaryKey .getPythonType ()} , got { type (value )} ."
351+ )
246352 if graph is None :
247353 graph = DimensionGraph (universe , names = d .keys ())
248354 if not graph .dimensions :
@@ -295,16 +401,14 @@ def standardize(
295401 d .setdefault (dimension .name , None )
296402 r .setdefault (dimension .name , None )
297403 if d .keys () >= graph .dimensions .names :
298- values = tuple (d [ name ] for name in graph ._dataCoordinateIndices .keys ())
404+ values = tuple (d . pop ( name ) for name in graph ._dataCoordinateIndices .keys ())
299405 else :
300406 try :
301- values = tuple (d [ name ] for name in graph .required .names )
407+ values = tuple (d . pop ( name ) for name in graph .required .names )
302408 except KeyError as err :
303409 raise KeyError (f"No value in data ID ({ mapping } ) for required dimension { err } ." ) from err
304- # Some backends cannot handle numpy.int64 type which is a subclass of
305- # numbers.Integral; convert that to int.
306- values = tuple (int (val ) if isinstance (val , numbers .Integral ) # type: ignore
307- else val for val in values )
410+ if unused_dimensions is not None :
411+ unused_dimensions .update (d )
308412 result : DataCoordinate = _BasicTupleDataCoordinate (graph , values )
309413 if r .keys () >= graph .elements .names :
310414 result = result .expanded (r )
0 commit comments