Skip to content

Commit

Permalink
Merge pull request #630 from davidhassell/cfa-0.6
Browse files Browse the repository at this point in the history
Changes to implement CFA-0.6
  • Loading branch information
davidhassell authored Apr 24, 2023
2 parents c5e5201 + 31a53c4 commit 2b05275
Show file tree
Hide file tree
Showing 65 changed files with 6,022 additions and 1,631 deletions.
12 changes: 11 additions & 1 deletion Changelog.rst
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
version 3.14.2
version 3.15.0
--------------

**2023-04-??**

* Re-introduction of CFA-netCDF functionality for CFA-0.6
(https://github.com/NCAS-CMS/cf-python/issues/451,
https://github.com/NCAS-CMS/cf-python/issues/475,
https://github.com/NCAS-CMS/cf-python/issues/637)
* New function: `cf.CFA`
* New method: `cf.Data.get_cfa_write`
* New method: `cf.Data.set_cfa_write`
* Fix excessive memory use arising from `cf.Field.regrids` and
`cf.Field.regridc`
(https://github.com/NCAS-CMS/cf-python/issues/623)
* Fixed bug in `cf.Field.read` when reading UM/PP data that are
runlength encoded (https://github.com/NCAS-CMS/cf-python/issues/621)
* Removed benign UserWarning from `cf.Field.percentile`
(https://github.com/NCAS-CMS/cf-python/issues/619)
* Changed dependency: ``1.10.1.0<=cfdm<1.10.2.0``

----

Expand Down Expand Up @@ -49,6 +57,8 @@ version 3.14.0
https://github.com/NCAS-CMS/cf-python/issues/428)
* Backwards incompatible API changes to facilitate the use of Dask
(https://github.com/NCAS-CMS/cf-python/issues/579)
* Removal of CFA-0.4 functionality (CFA-0.6 will introduced at a later
version).
* New method: `cf.Field.get_original_filenames`
(https://github.com/NCAS-CMS/cf-python/issues/448)
* New method: `cf.Field.to_dask_array`
Expand Down
9 changes: 5 additions & 4 deletions cf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
raise ImportError(_error0 + str(error1))

__cf_version__ = cfdm.core.__cf_version__
__cfa_version__ = "0.6.2"

from packaging.version import Version
import importlib.util
Expand Down Expand Up @@ -188,8 +189,8 @@
)

# Check the version of cfdm
_minimum_vn = "1.10.0.3"
_maximum_vn = "1.10.1.0"
_minimum_vn = "1.10.1.0"
_maximum_vn = "1.10.2.0"
_cfdm_version = Version(cfdm.__version__)
if not Version(_minimum_vn) <= _cfdm_version < Version(_maximum_vn):
raise RuntimeError(
Expand All @@ -198,7 +199,7 @@
)

# Check the version of dask
_minimum_vn = "2022.12.1"
_minimum_vn = "2022.02.1"
if Version(dask.__version__) < Version(_minimum_vn):
raise RuntimeError(
f"Bad dask version: cf requires dask>={_minimum_vn}. "
Expand Down Expand Up @@ -262,7 +263,7 @@
)

from .data.fragment import (
MissingFragmentArray,
FullFragmentArray,
NetCDFFragmentArray,
UMFragmentArray,
)
Expand Down
195 changes: 154 additions & 41 deletions cf/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
from collections import namedtuple
from operator import itemgetter

import numpy as np
from cfdm import is_log_level_debug, is_log_level_detail, is_log_level_info
from numpy import argsort as numpy_argsort
from numpy import dtype as numpy_dtype
from numpy import sort as numpy_sort

from .auxiliarycoordinate import AuxiliaryCoordinate
from .data.data import Data
from .data import Data
from .data.array import FullArray
from .decorators import (
_manage_log_level_via_verbose_attr,
_manage_log_level_via_verbosity,
_reset_log_emergence_level,
)
from .domainaxis import DomainAxis
from .fieldancillary import FieldAncillary
from .fieldlist import FieldList
from .functions import _DEPRECATION_ERROR_FUNCTION_KWARGS, _numpy_allclose
from .functions import atol as cf_atol
Expand All @@ -26,7 +26,7 @@
logger = logging.getLogger(__name__)


_dtype_float = numpy_dtype(float)
_dtype_float = np.dtype(float)

# # --------------------------------------------------------------------
# # Global properties, as defined in Appendix A of the CF conventions.
Expand Down Expand Up @@ -134,10 +134,11 @@ def __init__(
equal=None,
exist=None,
ignore=None,
dimension=(),
dimension=None,
relaxed_identities=False,
ncvar_identities=False,
field_identity=None,
field_ancillaries=(),
copy=True,
):
"""**initialisation**
Expand Down Expand Up @@ -207,6 +208,11 @@ def __init__(
coordinate whose datum is the property's value and the
property itself is deleted from that field.
field_ancillaries: (sequence of) `str`, optional
See `cf.aggregate` for details.
.. versionadded:: TODOCFAVER
copy: `bool` optional
If False then do not copy fields prior to aggregation.
Setting this option to False may change input fields in
Expand Down Expand Up @@ -289,41 +295,22 @@ def __init__(
"no identity; consider setting " "relaxed_identities"
)
return
# elif not self.has_data:
# self.message = "{} has no data".format(f.__class__.__name__)
# return

# ------------------------------------------------------------
# Promote selected properties to 1-d, size 1 auxiliary
# coordinates
# coordinates with new independent domain axes
# ------------------------------------------------------------
_copy = copy
for prop in dimension:
value = f.get_property(prop, None)
if value is None:
continue

aux_coord = AuxiliaryCoordinate(
properties={"long_name": prop},
data=Data([value], units=""),
copy=False,
)
aux_coord.nc_set_variable(prop)
aux_coord.id = prop

if _copy:
# Copy the field, as we're about to change it.
f = f.copy()
self.field = f
_copy = False

axis = f.set_construct(DomainAxis(1))
f.set_construct(aux_coord, axes=[axis], copy=False)
if dimension:
f = self.promote_to_auxiliary_coordinate(dimension)

f.del_property(prop)
# ------------------------------------------------------------
# Promote selected properties to field ancillaries that span
# the same domain axes as the field
# ------------------------------------------------------------
if field_ancillaries:
f = self.promote_to_field_ancillary(field_ancillaries)

if dimension:
construct_axes = f.constructs.data_axes()
construct_axes = f.constructs.data_axes()

self.units = self.canonical_units(
f, self.identity, relaxed_units=relaxed_units
Expand Down Expand Up @@ -400,7 +387,6 @@ def __init__(
"coordrefs": self.find_coordrefs(axis),
}
)
# 'size' : None})

# Find the 1-d auxiliary coordinates which span this axis
aux_coords = {
Expand Down Expand Up @@ -546,10 +532,10 @@ def __init__(
# Field ancillaries
# ------------------------------------------------------------
self.field_anc = {}
field_ancillaries = f.constructs.filter_by_type(
field_ancs = f.constructs.filter_by_type(
"field_ancillary", todict=True
)
for key, field_anc in field_ancillaries.items():
for key, field_anc in field_ancs.items():
# Find this field ancillary's identity
identity = self.field_ancillary_has_identity_and_data(field_anc)
if identity is None:
Expand Down Expand Up @@ -1395,6 +1381,117 @@ def find_coordrefs(self, key):

return tuple(sorted(names))

def promote_to_auxiliary_coordinate(self, properties):
"""Promote properties to auxiliary coordinate constructs.
Each property is converted to a 1-d auxilliary coordinate
construct that spans a new independent size 1 domain axis of
the field, and the property is deleted.
... versionadded:: TODOCFAVER
:Parameters:
properties: sequence of `str`
The names of the properties to be promoted.
:Returns:
`Field` or `Domain`
The field or domain with the new auxiliary coordinate
constructs.
"""
f = self.field

copy = True
for prop in properties:
value = f.get_property(prop, None)
if value is None:
continue

aux_coord = AuxiliaryCoordinate(
properties={"long_name": prop},
data=Data([value]),
copy=False,
)
aux_coord.nc_set_variable(prop)
aux_coord.id = prop

if copy:
# Copy the field as we're about to change it
f = f.copy()
copy = False

axis = f.set_construct(DomainAxis(1))
f.set_construct(aux_coord, axes=[axis], copy=False)
f.del_property(prop)

self.field = f
return f

def promote_to_field_ancillary(self, properties):
"""Promote properties to field ancillary constructs.
For each input field, each property is converted to a field
ancillary construct that spans the entire domain, with the
constant value of the property.
The `Data` of any new field ancillary construct is marked
as a CFA term, meaning that it will only be written to disk if
the parent field construct is written as a CFA aggregation
variable, and in that case the field ancillary is written as a
non-standard CFA aggregation instruction variable, rather than
a CF-netCDF ancillary variable.
If a domain construct is being aggregated then it is always
returned unchanged.
... versionadded:: TODOCFAVER
:Parameters:
properties: sequnce of `str`
The names of the properties to be promoted.
:Returns:
`Field` or `Domain`
The field or domain with the new field ancillary
constructs.
"""
f = self.field
if f.construct_type != "field":
return f

copy = True
for prop in properties:
value = f.get_property(prop, None)
if value is None:
continue

data = Data(
FullArray(value, shape=f.shape, dtype=np.array(value).dtype)
)
data._cfa_set_term(True)

field_anc = FieldAncillary(
data=data, properties={"long_name": prop}, copy=False
)
field_anc.id = prop

if copy:
# Copy the field as we're about to change it
f = f.copy()
copy = False

f.set_construct(field_anc, axes=f.get_data_axes(), copy=False)
f.del_property(prop)

self.field = f
return f


@_manage_log_level_via_verbosity
def aggregate(
Expand Down Expand Up @@ -1423,6 +1520,7 @@ def aggregate(
no_overlap=False,
shared_nc_domain=False,
field_identity=None,
field_ancillaries=None,
info=False,
):
"""Aggregate field constructs into as few field constructs as
Expand Down Expand Up @@ -1649,6 +1747,16 @@ def aggregate(
numbers. The default value is set by the
`cf.rtol` function.
field_ancillaries: (sequence of) `str`, optional
Create new field ancillary constructs for each input field
which has one or more of the given properties. For each
input field, each property is converted to a field
ancillary construct that spans the entire domain, with the
constant value of the property, and the property itself is
deleted.
.. versionadded:: TODOCFAVER
no_overlap:
Use the *overlap* parameter instead.
Expand Down Expand Up @@ -1705,6 +1813,7 @@ def aggregate(
"\ninfo=2 maps to verbose=3"
"\ninfo=3 maps to verbose=-1",
version="3.5.0",
removed_at="4.0.0",
) # pragma: no cover

# Initialise the cache for coordinate and cell measure hashes,
Expand Down Expand Up @@ -1738,6 +1847,9 @@ def aggregate(
if isinstance(dimension, str):
dimension = (dimension,)

if isinstance(field_ancillaries, str):
field_ancillaries = (field_ancillaries,)

if exist_all and equal_all:
raise ValueError(
"Only one of 'exist_all' and 'equal_all' can be True, since "
Expand Down Expand Up @@ -1808,6 +1920,7 @@ def aggregate(
ncvar_identities=ncvar_identities,
field_identity=field_identity,
respect_valid=respect_valid,
field_ancillaries=field_ancillaries,
copy=copy,
)

Expand Down Expand Up @@ -2220,7 +2333,7 @@ def _create_hash_and_first_values(
# ... or which doesn't have a dimension coordinate but
# does have one or more 1-d auxiliary coordinates
aux = m_axis_identity["keys"][0]
sort_indices = numpy_argsort(field.constructs[aux].array)
sort_indices = np.argsort(field.constructs[aux].array)
m_sort_keys[axis] = aux
null_sort = False

Expand Down Expand Up @@ -2662,8 +2775,8 @@ def _get_hfl(

if create_flb:
# Record the bounds of the first and last (sorted) cells
first = numpy_sort(array[0, ...])
last = numpy_sort(array[-1, ...])
first = np.sort(array[0, ...])
last = np.sort(array[-1, ...])
hfl_cache.flb[key] = (first, last)

if first_and_last_values or first_and_last_bounds:
Expand Down
Loading

0 comments on commit 2b05275

Please sign in to comment.