diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml deleted file mode 100644 index 65947d33..00000000 --- a/.github/workflows/pre-commit.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: pre-commit - -on: - pull_request: - push: - branches: [main] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - uses: pre-commit/action@v3.0.1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc5c1d05..5952a6f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,14 +5,34 @@ repos: - id: trailing-whitespace - id: check-ast - id: debug-statements + - id: end-of-file-fixer + - id: check-docstring-first - id: check-added-large-files - id: requirements-txt-fixer - id: file-contents-sorter + files: requirements-dev.txt -- repo: https://github.com/PyCQA/flake8 - rev: 7.1.0 +- repo: https://github.com/asottile/add-trailing-comma + rev: v3.1.0 hooks: - - id: flake8 + - id: add-trailing-comma + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.9 + hooks: + - id: ruff + args: ["--fix", "--show-fixes"] + - id: ruff-format + +- repo: https://github.com/bdice/nb-strip-paths + rev: v0.1.0 + hooks: + - id: nb-strip-paths + +- repo: https://github.com/tox-dev/pyproject-fmt + rev: 2.1.3 + hooks: + - id: pyproject-fmt - repo: https://github.com/codespell-project/codespell rev: v2.3.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 47ec88b3..18f3991b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -245,5 +245,3 @@ importance of writing tested and maintainable software. * [How to Contribute to Open Source Guide](https://opensource.guide/how-to-contribute/) * [Zen of Scientific Software Maintenance](https://jrleeman.github.io/ScientificSoftwareMaintenance/) - - diff --git a/LICENSE.txt b/LICENSE.txt index e06d2081..5c304d1a 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -199,4 +199,3 @@ Apache License WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - diff --git a/docs/source/conf.py b/docs/source/conf.py index d4930282..8fc88b2e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # # ioos_qc documentation build configuration file, created by -# sphinx-quickstart on Thu Aug 18 10:56:41 2016. +# sphinx-quickstart on Mon Oct 9 21:28:42 2017. # # This file is execfile()d with the current directory set to its # containing dir. @@ -16,13 +16,10 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) -p = os.path.abspath( - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), -) -sys.path.insert(0, p) # -- General configuration ------------------------------------------------ @@ -74,19 +71,20 @@ master_doc = "index" # General information about the project. +import datetime + +year = datetime.datetime.now(tz=datetime.timezone.utc).date().year project = "ioos_qc" -copyright = "2022, IOOS" +copyright = f"2022-{year}, IOOS" author = "IOOS" # The version info for the project yo're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. -import ioos_qc # noqa -# The short X.Y version. -version = ioos_qc.__version__ -# The full version, including alpha/beta/rc tags. -release = ioos_qc.__version__ +import ioos_qc + +version = release = ioos_qc.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -274,15 +272,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -292,8 +287,13 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, "ioos_qc.tex", "ioos_qc Documentation", - "IOOS", "manual"), + ( + master_doc, + "ioos_qc.tex", + "ioos_qc Documentation", + "IOOS", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/docs/source/examples/QartodTestExample_SalinityAttenuation.ipynb b/docs/source/examples/QartodTestExample_SalinityAttenuation.ipynb index 7a320cba..285e3987 100644 --- a/docs/source/examples/QartodTestExample_SalinityAttenuation.ipynb +++ b/docs/source/examples/QartodTestExample_SalinityAttenuation.ipynb @@ -12,7 +12,7 @@ "The selected time period shows the tidal influence on salinity over a spring-neap time period.\n", "Near the end of the selected period, there is a decrease in the range of salinity corresponding with biofouling.\n", "\n", - "The data was downloaded from the [Center for Coastal Margin and Prediction (CMOP) Data Explorer](http://amb6400b.stccmop.org/ws/product/offeringplot_ctime.py?handlegaps=true&series=time,sandi.790.A.CTD.salt.PD0&width=8.54&height=2.92&starttime=2001-7-1%200:00&endtime=2001-09-5%2023:59)." + "The data was downloaded from the [Center for Coastal Margin and Prediction (CMOP) Data Explorer](https://amb6400b.stccmop.org/ws/product/offeringplot_ctime.py?handlegaps=true&series=time,sandi.790.A.CTD.salt.PD0&width=8.54&height=2.92&starttime=2001-7-1%200:00&endtime=2001-09-5%2023:59)." ] }, { @@ -531,7 +531,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.12.4" }, "nbsphinx": { "orphan": true diff --git a/docs/source/examples/QartodTestExample_netCDF.ipynb b/docs/source/examples/QartodTestExample_netCDF.ipynb index f6ce35d9..2a6a1f34 100644 --- a/docs/source/examples/QartodTestExample_netCDF.ipynb +++ b/docs/source/examples/QartodTestExample_netCDF.ipynb @@ -118,7 +118,7 @@ "outputs": [], "source": [ "import xarray as xr\n", - "from erddapy.url_handling import urlopen\n", + "from erddapy.core.url import urlopen\n", "from netCDF4 import Dataset\n", "\n", "\n", @@ -549,7 +549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.12.4" }, "nbsphinx": { "orphan": true diff --git a/docs/source/examples/usage/Config.ipynb b/docs/source/examples/usage/Config.ipynb index 6e1f4af0..1cc941ff 100644 --- a/docs/source/examples/usage/Config.ipynb +++ b/docs/source/examples/usage/Config.ipynb @@ -111,4 +111,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/examples/usage/ContextConfig.ipynb b/docs/source/examples/usage/ContextConfig.ipynb index 03b28aaf..13c757b7 100644 --- a/docs/source/examples/usage/ContextConfig.ipynb +++ b/docs/source/examples/usage/ContextConfig.ipynb @@ -97,4 +97,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/examples/usage/NetcdfStream.ipynb b/docs/source/examples/usage/NetcdfStream.ipynb index 8351c3dc..aa27bdf8 100644 --- a/docs/source/examples/usage/NetcdfStream.ipynb +++ b/docs/source/examples/usage/NetcdfStream.ipynb @@ -171,4 +171,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/examples/usage/NumpyStream.ipynb b/docs/source/examples/usage/NumpyStream.ipynb index 7047d2b3..ae6542ca 100644 --- a/docs/source/examples/usage/NumpyStream.ipynb +++ b/docs/source/examples/usage/NumpyStream.ipynb @@ -163,4 +163,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/examples/usage/PandasStream.ipynb b/docs/source/examples/usage/PandasStream.ipynb index a3ad2210..9785849d 100644 --- a/docs/source/examples/usage/PandasStream.ipynb +++ b/docs/source/examples/usage/PandasStream.ipynb @@ -207,4 +207,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/examples/usage/XarrayStream.ipynb b/docs/source/examples/usage/XarrayStream.ipynb index 271d30da..fdfe2842 100644 --- a/docs/source/examples/usage/XarrayStream.ipynb +++ b/docs/source/examples/usage/XarrayStream.ipynb @@ -177,4 +177,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 0ce8ca48..c4bd08f3 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -4,4 +4,3 @@ Installation Install with `conda `_:: $ conda install -c conda-forge ioos_qc - diff --git a/docs/source/releases.rst b/docs/source/releases.rst index 5b4acdd9..9b979d77 100644 --- a/docs/source/releases.rst +++ b/docs/source/releases.rst @@ -139,4 +139,3 @@ Specific changes to be aware of include: #################### See https://github.com/ioos/qartod and https://ioos.github.io/qartod/ - diff --git a/ioos_qc/__init__.py b/ioos_qc/__init__.py index 2e8b85f5..78dd2795 100644 --- a/ioos_qc/__init__.py +++ b/ioos_qc/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +"""IOOS QARTOD and other Quality Control tests implemented in Python.""" try: from ._version import __version__ diff --git a/ioos_qc/argo.py b/ioos_qc/argo.py index 451ceed3..f7fad14d 100644 --- a/ioos_qc/argo.py +++ b/ioos_qc/argo.py @@ -1,5 +1,6 @@ #!/usr/bin/env python """Tests based on the ARGO QC manual.""" + import logging import warnings from numbers import Real as N @@ -13,11 +14,13 @@ L = logging.getLogger(__name__) -@add_flag_metadata(stanard_name="pressure_increasing_test_quality_flag", - long_name="Pressure Increasing Test Quality Flag") +@add_flag_metadata( + stanard_name="pressure_increasing_test_quality_flag", + long_name="Pressure Increasing Test Quality Flag", +) def pressure_increasing_test(inp): """Returns an array of flag values where each input is flagged with SUSPECT if - it does not monotonically increase + it does not monotonically increase. Ref: ARGO QC Manual: 8. Pressure increasing test @@ -44,14 +47,17 @@ def pressure_increasing_test(inp): return flags -@add_flag_metadata(standard_name="speed_test_quality_flag", - long_name="Speed Test Quality Flag") -def speed_test(lon: Sequence[N], - lat: Sequence[N], - tinp: Sequence[N], - suspect_threshold: float, - fail_threshold: float, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="speed_test_quality_flag", + long_name="Speed Test Quality Flag", +) +def speed_test( + lon: Sequence[N], + lat: Sequence[N], + tinp: Sequence[N], + suspect_threshold: float, + fail_threshold: float, +) -> np.ma.core.MaskedArray: """Checks that the calculated speed between two points is within reasonable bounds. This test calculates a speed between subsequent points by @@ -91,7 +97,8 @@ def speed_test(lon: Sequence[N], tinp = mapdates(tinp) if lon.shape != lat.shape or lon.shape != tinp.shape: - raise ValueError(f"Lon ({lon.shape}) and lat ({lat.shape}) and tinp ({tinp.shape}) must be the same shape") + msg = f"Lon ({lon.shape}) and lat ({lat.shape}) and tinp ({tinp.shape}) must be the same shape" + raise ValueError(msg) # Save original shape original_shape = lon.shape @@ -120,7 +127,9 @@ def speed_test(lon: Sequence[N], # calculate speed in m/s speed = np.ma.zeros(tinp.size, dtype="float") - speed[1:] = np.abs(dist[1:] / np.diff(tinp).astype("timedelta64[s]").astype(float)) + speed[1:] = np.abs( + dist[1:] / np.diff(tinp).astype("timedelta64[s]").astype(float), + ) with np.errstate(invalid="ignore"): flag_arr[speed > suspect_threshold] = QartodFlags.SUSPECT diff --git a/ioos_qc/axds.py b/ioos_qc/axds.py index 8b18ba66..f3d6baa4 100644 --- a/ioos_qc/axds.py +++ b/ioos_qc/axds.py @@ -1,5 +1,6 @@ #!/usr/bin/env python """Tests based on the IOOS QARTOD manuals.""" + import logging from collections import namedtuple from typing import Sequence, Tuple @@ -17,14 +18,17 @@ span = namedtuple("Span", "minv maxv") -@add_flag_metadata(standard_name="gross_range_test_quality_flag", - long_name="Gross Range Test Quality Flag") -def valid_range_test(inp : Sequence[any], - valid_span : Tuple[any, any], - dtype : np.dtype = None, - start_inclusive : bool = True, - end_inclusive : bool = False, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="gross_range_test_quality_flag", + long_name="Gross Range Test Quality Flag", +) +def valid_range_test( + inp: Sequence[any], + valid_span: Tuple[any, any], + dtype: np.dtype = None, + start_inclusive: bool = True, + end_inclusive: bool = False, +) -> np.ma.core.MaskedArray: """Checks that values are within a min/max range. This is not unlike a `qartod.gross_range_test` with fail and suspect bounds being equal, except that here we specify the inclusive range that should pass instead of the exclusive bounds which should fail. This also supports datetime-like @@ -60,14 +64,20 @@ def valid_range_test(inp : Sequence[any], # This is required because we don't want to restrict a user from using a pd.Series # directly with this function. If the data was coming from a Store, it would # always be a numpy array. - elif dtype is None and hasattr(inp, "values") and hasattr(inp.values, "dtype"): + elif ( + dtype is None + and hasattr(inp, "values") + and hasattr(inp.values, "dtype") + ): dtype = inp.values.dtype # Save original shape original_shape = inp.shape if dtype is None: - L.warning("Trying to guess data input type, try specifying the dtype parameter") + L.warning( + "Trying to guess data input type, try specifying the dtype parameter", + ) # Try to figure out the dtype so masked values can be calculated try: # Try datetime-like objects @@ -77,11 +87,15 @@ def valid_range_test(inp : Sequence[any], try: # Try floating point numbers inp = np.ma.masked_invalid(np.array(inp).astype(np.floating)) - valid_span = np.ma.masked_invalid(np.array(valid_span).astype(np.floating)) + valid_span = np.ma.masked_invalid( + np.array(valid_span).astype(np.floating), + ) except BaseException: # Well, we tried. + msg = "Could not determine the type of input, try using the dtype parameter" raise ValueError( - "Could not determine the type of input, try using the dtype parameter") + msg, + ) else: inp = np.ma.masked_invalid(np.array(inp, dtype=dtype)) valid_span = np.ma.masked_invalid(np.array(valid_span, dtype=dtype)) diff --git a/ioos_qc/config.py b/ioos_qc/config.py index 465a984d..7b98395a 100644 --- a/ioos_qc/config.py +++ b/ioos_qc/config.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -"""QC Config objects +"""QC Config objects. Module to store the different QC modules in ioos_qc @@ -8,6 +8,7 @@ tw (namedtuple): The TimeWindow namedtuple definition """ + import io import logging import warnings @@ -53,7 +54,7 @@ def __key__(self): def __hash__(self): return hash(self.__key__()) - def __repr__(self): + def __repr__(self) -> str: return f"" @@ -125,7 +126,7 @@ def __eq__(self, other): return self.__key__() == other.__key__() return NotImplemented - def __repr__(self): + def __repr__(self) -> str: ret = f" List[Call]: - """Extracts call objects from a source object + """Extracts call objects from a source object. Args: ---- @@ -201,13 +200,12 @@ def extract_calls(source) -> List[Call]: return [source] elif isinstance(source, (tuple, list)): # list of Call objects - calls = [ c for c in source if isinstance(c, Call) ] + calls = [c for c in source if isinstance(c, Call)] # list of objects with the 'calls' attribute [ - calls.extend([ - x for x in c.calls if isinstance(x, Call) - ]) - for c in source if hasattr(c, "calls") + calls.extend([x for x in c.calls if isinstance(x, Call)]) + for c in source + if hasattr(c, "calls") ] return calls elif isinstance(source, Config): @@ -230,7 +228,12 @@ class Config: class only pairs various formats and versions of a config into a list of Call objects. """ - def __init__(self, source, version=None, default_stream_key="_stream"): + def __init__( + self, + source, + version=None, + default_stream_key="_stream", + ) -> None: """Args: ---- source: The QC configuration representation in one of the following formats: @@ -266,16 +269,21 @@ def __init__(self, source, version=None, default_stream_key="_stream"): self._calls += list(ContextConfig(self.config).calls) elif dict_depth(self.config) >= 4: # This is a Config - self._calls += list(ContextConfig(odict(streams=self.config)).calls) + self._calls += list( + ContextConfig(odict(streams=self.config)).calls, + ) else: # This is a QcConfig - self._calls += list(ContextConfig(odict(streams={default_stream_key: self.config})).calls) - #raise ValueError("Can not add context to a QC Config object. Create it manually.") + self._calls += list( + ContextConfig( + odict(streams={default_stream_key: self.config}), + ).calls, + ) + # raise ValueError("Can not add context to a QC Config object. Create it manually.") @property def contexts(self): - """Group the calls into context groups and return them - """ + """Group the calls into context groups and return them.""" contexts = {} for c in self._calls: if c.context in contexts: @@ -286,8 +294,7 @@ def contexts(self): @property def stream_ids(self): - """Return a list of unique stream_ids for the Config - """ + """Return a list of unique stream_ids for the Config.""" streams = [] stream_map = {} @@ -310,21 +317,23 @@ def calls(self): @property def aggregate_calls(self): return [ - c for c in self._calls + c + for c in self._calls if hasattr(c.func, "aggregate") and c.func.aggregate is True ] - def has(self, stream_id : str, method: Union[callable, str]): + def has(self, stream_id: str, method: Union[callable, str]): if isinstance(method, str): for c in self._calls: if c.stream_id == stream_id and c.method_path == method: return c elif isinstance(method, callable): for c in self._calls: - if (c.stream_id == stream_id and - c.method == method.__module__ and - c.method == method.__name__ - ): + if ( + c.stream_id == stream_id + and c.method == method.__module__ + and c.method == method.__name__ + ): return c return False @@ -355,7 +364,7 @@ def add(self, source) -> None: class ContextConfig: - """A collection of a Region, a TimeWindow and a list of Config objects + """A collection of a Region, a TimeWindow and a list of Config objects. Defines a set of quality checks to run against multiple input streams. This can include a region and a time window to subset any DataStreams by before running checks. @@ -389,7 +398,7 @@ class ContextConfig: """ - def __init__(self, source: ConfigTypes): + def __init__(self, source: ConfigTypes) -> None: self.config = load_config_as_dict(source) self._calls = [] @@ -403,16 +412,23 @@ def __init__(self, source: ConfigTypes): self.region = self.config["region"] elif self.config["region"] and "features" in self.config["region"]: # Feature based GeoJSON - self.region = GeometryCollection([ - shape(feature["geometry"]) for feature in self.config["region"]["features"] - ]) + self.region = GeometryCollection( + [ + shape(feature["geometry"]) + for feature in self.config["region"]["features"] + ], + ) elif self.config["region"] and "geometry" in self.config["region"]: # Geometry based GeoJSON - self.region = GeometryCollection([ - shape(self.config["region"]["geometry"]), - ]) + self.region = GeometryCollection( + [ + shape(self.config["region"]["geometry"]), + ], + ) else: - L.warning("Ignoring region because it could not be parsed, is it valid GeoJSON?") + L.warning( + "Ignoring region because it could not be parsed, is it valid GeoJSON?", + ) # Window if "window" in self.config and isinstance(self.config["window"], tw): @@ -436,18 +452,21 @@ def __init__(self, source: ConfigTypes): run later by iterating over the configs. """ for stream_id, sc in self.config["streams"].items(): - for package, modules in sc.items(): try: testpackage = import_module(f"ioos_qc.{package}") except ImportError: - L.warning(f'No ioos_qc package "{package}" was found, skipping.') + L.warning( + f'No ioos_qc package "{package}" was found, skipping.', + ) continue for testname, kwargs in modules.items(): kwargs = kwargs or {} if not hasattr(testpackage, testname): - L.warning(f'No ioos_qc method "{package}.{testname}" was found, skipping') + L.warning( + f'No ioos_qc method "{package}.{testname}" was found, skipping', + ) continue else: runfunc = getattr(testpackage, testname) @@ -481,9 +500,9 @@ def add(self, source) -> None: """ extracted = extract_calls(source) - self._calls.extend([ e for e in extracted if e.context == self.context ]) + self._calls.extend([e for e in extracted if e.context == self.context]) - def __str__(self): + def __str__(self) -> str: # sc = list(self.streams.keys()) return ( f"" ) - def __repr__(self): + def __repr__(self) -> str: return self.__str__() class QcConfig(Config): - def __init__(self, source, default_stream_key="_stream"): + def __init__(self, source, default_stream_key="_stream") -> None: """A Config objects with no concept of a Stream ID. Typically used when running QC on a single stream. This just sets up a stream with the name passed in as the "default_stream_key" parameter. @@ -525,6 +544,7 @@ def __init__(self, source, default_stream_key="_stream"): def run(self, **passedkwargs): from ioos_qc.streams import NumpyStream + # Cleanup kwarg names passedkwargs["time"] = passedkwargs.pop("tinp", None) passedkwargs["z"] = passedkwargs.pop("zinp", None) @@ -543,10 +563,11 @@ def run(self, **passedkwargs): class NcQcConfig(Config): - def __init__(self, *args, **kwargs): + def __init__(self, *args, **kwargs) -> None: + msg = ( + "The NcQcConfig object has been replaced by ioos_qc.config.Config " + "and ioos_qc.streams.XarrayStream" + ) raise NotImplementedError( - - "The NcQcConfig object has been replaced by ioos_qc.config.Config " - "and ioos_qc.streams.XarrayStream", - + msg, ) diff --git a/ioos_qc/config_creator/config_creator.py b/ioos_qc/config_creator/config_creator.py index 12727c8f..fb7cbb95 100644 --- a/ioos_qc/config_creator/config_creator.py +++ b/ioos_qc/config_creator/config_creator.py @@ -145,14 +145,15 @@ class CreatorConfig(dict): :param dict: JSON schema for CreatorConfig """ - def __init__(self, path_or_dict, schema=QC_CONFIG_CREATOR_SCHEMA): + def __init__(self, path_or_dict, schema=QC_CONFIG_CREATOR_SCHEMA) -> None: if isinstance(path_or_dict, (Path, str)): with open(path_or_dict) as f: config = json.load(f) elif isinstance(path_or_dict, dict): config = path_or_dict else: - raise ValueError("Input is not valid file path or dict") + msg = "Input is not valid file path or dict" + raise ValueError(msg) validate(instance=config, schema=schema) datasets = {} @@ -170,7 +171,7 @@ def __init__(self, path_or_dict, schema=QC_CONFIG_CREATOR_SCHEMA): } self.update(datasets) - def __str__(self): + def __str__(self) -> str: return json.dumps(self, indent=4, sort_keys=True) @@ -203,20 +204,21 @@ class QcVariableConfig(dict): ")", ] - def __init__(self, path_or_dict, schema=VARIABLE_CONFIG_SCHEMA): + def __init__(self, path_or_dict, schema=VARIABLE_CONFIG_SCHEMA) -> None: if isinstance(path_or_dict, (Path, str)): with open(path_or_dict) as f: config = json.load(f) elif isinstance(path_or_dict, dict): config = path_or_dict else: - raise ValueError("Input is not valid file path or dict") + msg = "Input is not valid file path or dict" + raise ValueError(msg) L.debug("Validating schema...") validate(instance=config, schema=schema) # validate test specifications only contain allowed stats and operators - for test in config["tests"].keys(): + for test in config["tests"]: for test_name, test_def in config["tests"][test].items(): if test_name == "bbox": continue @@ -224,25 +226,27 @@ def __init__(self, path_or_dict, schema=VARIABLE_CONFIG_SCHEMA): self.update(config) - def _validate_fx(self, input_fx, test_name): - """Throws exception if input_fx contains tokens not specifically allowed""" + def _validate_fx(self, input_fx, test_name) -> None: + """Throws exception if input_fx contains tokens not specifically allowed.""" tokens = input_fx.split(" ") for token in tokens: try: _ = float(token) except ValueError: - if token not in self.allowed_stats and \ - token not in self.allowed_operators and \ - token not in self.allowed_groupings: + if ( + token not in self.allowed_stats + and token not in self.allowed_operators + and token not in self.allowed_groupings + ): msg = ( f"{token} not allowed in min/max specification in config of {test_name}.\n" - f"Allowable stats are: {[s for s in self.allowed_stats]}.\n" - f"Allowable operators are: {[o for o in self.allowed_operators]}." - f"Allowable groupings are: {[o for o in self.allowed_groupings]}." + f"Allowable stats are: {list(self.allowed_stats)}.\n" + f"Allowable operators are: {list(self.allowed_operators)}." + f"Allowable groupings are: {list(self.allowed_groupings)}." ) raise ValueError(msg) - def __str__(self): + def __str__(self) -> str: return json.dumps(self, indent=4, sort_keys=True) @@ -260,7 +264,7 @@ class QcConfigCreator: """ - def __init__(self, creator_config): + def __init__(self, creator_config) -> None: self.config = creator_config self.datasets = self._load_datasets() self.dataset_years = self._determine_dataset_years() @@ -280,7 +284,8 @@ def create_config(self, variable_config): stats = self._get_stats(variable_config) L.debug("Creating config...") test_configs = { - name: self._create_test_section(name, variable_config, stats) for name in variable_config["tests"].keys() + name: self._create_test_section(name, variable_config, stats) + for name in variable_config["tests"] } return { @@ -290,9 +295,12 @@ def create_config(self, variable_config): } def _load_datasets(self): - """Load datasets""" + """Load datasets.""" L.debug(f"Loading {len(self.config)} datasets...") - return {name: xr.load_dataset(self.config[name]["file_path"]) for name in self.config.keys()} + return { + name: xr.load_dataset(self.config[name]["file_path"]) + for name in self.config + } def _determine_dataset_years(self): """Determine year used in datasets, return as dict {dataset_name, year}. @@ -310,13 +318,14 @@ def _determine_dataset_years(self): return years def _var2var_in_file(self, var): - """Return variable name used in the dataset and dataset name""" + """Return variable name used in the dataset and dataset name.""" for dataset_name, dataset in self.config.items(): - if var in dataset["variables"].keys(): + if var in dataset["variables"]: return dataset["variables"][var], dataset_name + return None def var2dataset(self, var): - """Return dataset name and dataset for given variable (as named in qc_config, not in the file)""" + """Return dataset name and dataset for given variable (as named in qc_config, not in the file).""" _, dataset_name = self._var2var_in_file(var) return dataset_name, self.datasets[dataset_name] @@ -324,21 +333,49 @@ def var2dataset(self, var): def _create_test_section(self, test_name, variable_config, test_limits): """Given test_name, QcVariableConfig and test_limits, return qc_config section for that test.""" if test_name == "spike_test": - return self.__create_spike_section(test_name, variable_config, test_limits) + return self.__create_spike_section( + test_name, + variable_config, + test_limits, + ) elif test_name == "location_test": return self.__create_location_section(test_name, variable_config) elif test_name == "rate_of_change_test": - return self.__create_rate_of_change_section(test_name, variable_config, test_limits) + return self.__create_rate_of_change_section( + test_name, + variable_config, + test_limits, + ) elif test_name == "flat_line_test": - return self.__create_flat_line_section(test_name, variable_config, test_limits) + return self.__create_flat_line_section( + test_name, + variable_config, + test_limits, + ) else: - return self.__create_span_section(test_name, variable_config, test_limits) + return self.__create_span_section( + test_name, + variable_config, + test_limits, + ) def __create_span_section(self, test_name, variable_config, stats): - suspect_min = fx_parser.eval_fx(variable_config["tests"][test_name]["suspect_min"], stats) - suspect_max = fx_parser.eval_fx(variable_config["tests"][test_name]["suspect_max"], stats) - fail_min = fx_parser.eval_fx(variable_config["tests"][test_name]["fail_min"], stats) - fail_max = fx_parser.eval_fx(variable_config["tests"][test_name]["fail_max"], stats) + suspect_min = fx_parser.eval_fx( + variable_config["tests"][test_name]["suspect_min"], + stats, + ) + suspect_max = fx_parser.eval_fx( + variable_config["tests"][test_name]["suspect_max"], + stats, + ) + fail_min = fx_parser.eval_fx( + variable_config["tests"][test_name]["fail_min"], + stats, + ) + fail_max = fx_parser.eval_fx( + variable_config["tests"][test_name]["fail_max"], + stats, + ) return { "suspect_span": [suspect_min, suspect_max], @@ -346,8 +383,14 @@ def __create_span_section(self, test_name, variable_config, stats): } def __create_spike_section(self, test_name, variable_config, stats): - suspect_threshold = fx_parser.eval_fx(variable_config["tests"][test_name]["suspect_threshold"], stats) - fail_threshold = fx_parser.eval_fx(variable_config["tests"][test_name]["fail_threshold"], stats) + suspect_threshold = fx_parser.eval_fx( + variable_config["tests"][test_name]["suspect_threshold"], + stats, + ) + fail_threshold = fx_parser.eval_fx( + variable_config["tests"][test_name]["fail_threshold"], + stats, + ) return { "suspect_threshold": suspect_threshold, @@ -355,9 +398,18 @@ def __create_spike_section(self, test_name, variable_config, stats): } def __create_flat_line_section(self, test_name, variable_config, stats): - suspect_threshold = fx_parser.eval_fx(variable_config["tests"][test_name]["suspect_threshold"], stats) - fail_threshold = fx_parser.eval_fx(variable_config["tests"][test_name]["fail_threshold"], stats) - tolerance = fx_parser.eval_fx(variable_config["tests"][test_name]["tolerance"], stats) + suspect_threshold = fx_parser.eval_fx( + variable_config["tests"][test_name]["suspect_threshold"], + stats, + ) + fail_threshold = fx_parser.eval_fx( + variable_config["tests"][test_name]["fail_threshold"], + stats, + ) + tolerance = fx_parser.eval_fx( + variable_config["tests"][test_name]["tolerance"], + stats, + ) return { "suspect_threshold": suspect_threshold, @@ -370,16 +422,30 @@ def __create_location_section(self, test_name, variable_config): "bbox": variable_config["tests"][test_name]["bbox"], } - def __create_rate_of_change_section(self, test_name, variable_config, stats): - threshold = fx_parser.eval_fx(variable_config["tests"][test_name]["threshold"], stats) + def __create_rate_of_change_section( + self, + test_name, + variable_config, + stats, + ): + threshold = fx_parser.eval_fx( + variable_config["tests"][test_name]["threshold"], + stats, + ) return { "threshold": threshold, } def _get_stats(self, variable_config): - """Return dict of stats (min, max, mean, std) for given config""" - start_time = datetime.datetime.strptime(variable_config["start_time"], "%Y-%m-%d") - end_time = datetime.datetime.strptime(variable_config["end_time"], "%Y-%m-%d") + """Return dict of stats (min, max, mean, std) for given config.""" + start_time = datetime.datetime.strptime( + variable_config["start_time"], + "%Y-%m-%d", + ) + end_time = datetime.datetime.strptime( + variable_config["end_time"], + "%Y-%m-%d", + ) time_range = slice(start_time, end_time) subset = self._get_subset( variable_config["variable"], @@ -395,7 +461,7 @@ def _get_stats(self, variable_config): } def _get_subset(self, var, bbox, time_slice, depth=0, pad_delta=0.5): - """Get subset of data""" + """Get subset of data.""" _, ds = self.var2dataset(var) lat_mask = np.logical_and( @@ -410,7 +476,13 @@ def _get_subset(self, var, bbox, time_slice, depth=0, pad_delta=0.5): # if there is no data in the subset, increase bounding box in an iterative fashion # - both are interpolated to daily values L.debug(f"Subsetting {var} by depth={depth} and {bbox}...") - subset = self.__get_daily_interp_subset(var, time_slice, depth, lat_mask, lon_mask) + subset = self.__get_daily_interp_subset( + var, + time_slice, + depth, + lat_mask, + lon_mask, + ) padded = 0 while np.nansum(subset) == 0: @@ -431,7 +503,13 @@ def _get_subset(self, var, bbox, time_slice, depth=0, pad_delta=0.5): ) padded += 1 - subset = self.__get_daily_interp_subset(var, time_slice, depth, lat_mask, lon_mask) + subset = self.__get_daily_interp_subset( + var, + time_slice, + depth, + lat_mask, + lon_mask, + ) L.info(f"Used bounding box: {bbox}") return subset @@ -439,7 +517,8 @@ def _get_subset(self, var, bbox, time_slice, depth=0, pad_delta=0.5): def __apply_bbox_pad(self, bbox, pad): # Prevent infinite attempts of expanding bounding box for valid data if bbox == ["-180", "-90", "180", "90"]: - raise RuntimeError(f"No valid data found in maximum bounding box {bbox}") + msg = f"No valid data found in maximum bounding box {bbox}" + raise RuntimeError(msg) def apply_pad(val, lat_or_lon, min_or_max): if lat_or_lon == "lat": @@ -466,7 +545,14 @@ def apply_pad(val, lat_or_lon, min_or_max): return new_bbox - def __get_daily_interp_subset(self, var, time_slice, depth, lat_mask, lon_mask): + def __get_daily_interp_subset( + self, + var, + time_slice, + depth, + lat_mask, + lon_mask, + ): ds_name, ds = self.var2dataset(var) var_in_file, _ = self._var2var_in_file(var) @@ -491,7 +577,8 @@ def __daily_cubic_interp(self, var, time_slice): """ if (time_slice.stop - time_slice.start).days > 365: - raise NotImplementedError("Maximum of 365 days available for config_creator") + msg = "Maximum of 365 days available for config_creator" + raise NotImplementedError(msg) x = var.time.dt.dayofyear y = var.data @@ -516,7 +603,8 @@ def __daily_cubic_interp(self, var, time_slice): # - assume that NaNs are same shape through time (i.e. will not work with wetting-drying) y_no_nans = y[~np.isnan(y)].reshape(y.shape[0], -1) if y_no_nans.size == 0: - raise ValueError("CubicSpline require y to the finite.") + msg = "CubicSpline require y to the finite." + raise ValueError(msg) spline = CubicSpline(x, y_no_nans, bc_type="periodic") # Get days of year for independent variable @@ -532,7 +620,7 @@ def __daily_cubic_interp(self, var, time_slice): return spline(days) - def __str__(self): + def __str__(self) -> str: return json.dumps(self.config, indent=4, sort_keys=True) def __rpr__(self): @@ -540,9 +628,10 @@ def __rpr__(self): def to_json(qc_config, out_file=None): - """Given qc_config return json""" + """Given qc_config return json.""" if out_file: with open(out_file, "w") as outfile: json.dump(outfile, qc_config) + return None else: return json.dumps(qc_config) diff --git a/ioos_qc/config_creator/fx_parser.py b/ioos_qc/config_creator/fx_parser.py index 736e1ab4..50738da9 100644 --- a/ioos_qc/config_creator/fx_parser.py +++ b/ioos_qc/config_creator/fx_parser.py @@ -107,7 +107,7 @@ def BNF(): expr_list = delimitedList(Group(expr)) # add parse action that replaces the function identifier with a (name, number of args) tuple fn_call = (ident + lpar - Group(expr_list) + rpar).setParseAction( - lambda t: t.insert(0, (t.pop(0), len(t[0]))) + lambda t: t.insert(0, (t.pop(0), len(t[0]))), ) atom = ( addop[...] @@ -144,13 +144,13 @@ def evaluate_stack(s, stats): elif op == "E": return math.e # 2.718281828 elif op == "mean": - return stats['mean'] + return stats["mean"] elif op == "min": - return stats['min'] + return stats["min"] elif op == "max": - return stats['max'] + return stats["max"] elif op == "std": - return stats['std'] + return stats["std"] elif op in fn: # note: args are pushed onto the stack in reverse order args = reversed([evaluate_stack(s, stats) for _ in range(num_args)]) diff --git a/ioos_qc/config_creator/get_assets.py b/ioos_qc/config_creator/get_assets.py index 8bdcf982..42da7826 100644 --- a/ioos_qc/config_creator/get_assets.py +++ b/ioos_qc/config_creator/get_assets.py @@ -1,5 +1,6 @@ #!python -"""Download and process source data used for ConfigCreator""" +"""Download and process source data used for ConfigCreator.""" + import logging import shutil from pathlib import Path @@ -23,7 +24,7 @@ } -def ocean_atlas_download(output_dir, month, sources=SOURCES): +def ocean_atlas_download(output_dir, month, sources=SOURCES) -> None: variable_map = { "temperature": "t", "salinity": "s", @@ -31,9 +32,17 @@ def ocean_atlas_download(output_dir, month, sources=SOURCES): } for name, name_in_file in variable_map.items(): if name in ["temperature", "salinity"]: - url = sources["OCEAN_ATLAS"]["ts_url"].format(name, name_in_file, month.month) + url = sources["OCEAN_ATLAS"]["ts_url"].format( + name, + name_in_file, + month.month, + ) else: - url = sources["OCEAN_ATLAS"]["other_url"].format(name, name_in_file, month.month) + url = sources["OCEAN_ATLAS"]["other_url"].format( + name, + name_in_file, + month.month, + ) r = request.urlopen(url) data = r.read() @@ -43,7 +52,7 @@ def ocean_atlas_download(output_dir, month, sources=SOURCES): f.write(data) -def ocean_atlas_merge_variables(output_dir, month): +def ocean_atlas_merge_variables(output_dir, month) -> None: ocean_atlas_files = output_dir.glob(f"ocean_atlas_*_{month.month:02}.nc") ocean_atlas_files = list(ocean_atlas_files) ocean_atlas_files.sort() @@ -60,12 +69,12 @@ def ocean_atlas_merge_variables(output_dir, month): nco.ncks(input=str(f), output=str(outfile), options=options) -def ocean_atlas_variable_enhance(output_dir, month): +def ocean_atlas_variable_enhance(output_dir, month) -> None: fname = output_dir / f"ocean_atlas_{month.month:02}.nc" # only keep variables needed nco = Nco() - vars_to_keep = ",".join(["s_an", "t_an", "o_an"]) + vars_to_keep = "s_an,t_an,o_an" options = [ "-h", f"-v {vars_to_keep}", @@ -91,9 +100,11 @@ def ocean_atlas_variable_enhance(output_dir, month): nco.ncks(input=str(fname), output=str(fname), options=options) -def ocean_atlas_merge_time(output_dir): +def ocean_atlas_merge_time(output_dir) -> None: variable_merged_files = output_dir.glob("ocean_atlas_??.nc") - variable_merged_files = [str(merged_file) for merged_file in list(variable_merged_files)] + variable_merged_files = [ + str(merged_file) for merged_file in list(variable_merged_files) + ] variable_merged_files.sort() output_file = output_dir.parent / "ocean_atlas.nc" @@ -101,10 +112,14 @@ def ocean_atlas_merge_time(output_dir): options = [ "-A", ] - nco.ncrcat(input=variable_merged_files, output=str(output_file), options=options) + nco.ncrcat( + input=variable_merged_files, + output=str(output_file), + options=options, + ) -def ocean_atlas_enhance(output_dir): +def ocean_atlas_enhance(output_dir) -> None: output_file = output_dir.parent / "ocean_atlas.nc" output_tmp_file = output_dir.parent / "ocean_atlas_tmp.nc" @@ -114,7 +129,11 @@ def ocean_atlas_enhance(output_dir): "-O", "-a _FillValue,,o,f,-127", ] - nco.ncatted(input=str(output_file), output=str(output_tmp_file), options=options) + nco.ncatted( + input=str(output_file), + output=str(output_tmp_file), + options=options, + ) # pack to use bytes # - requires output file defined with -o option @@ -123,10 +142,14 @@ def ocean_atlas_enhance(output_dir): "-M flt_byt", f"-o {output_file!s}", ] - nco.ncpdq(input=str(output_tmp_file), output=str(output_file), options=options) + nco.ncpdq( + input=str(output_tmp_file), + output=str(output_file), + options=options, + ) -def get_ocean_atlas(output_dir): +def get_ocean_atlas(output_dir) -> None: time_range = xr.cftime_range(start="2018", end="2018-12-31", freq="MS") for month in time_range: logger.info(f"downloading Ocean Atlas for {month}") @@ -137,7 +160,7 @@ def get_ocean_atlas(output_dir): ocean_atlas_enhance(output_dir) -def narr_download(output_dir, sources=SOURCES): +def narr_download(output_dir, sources=SOURCES) -> None: variables = { "air": "air.sig995.mon.ltm.nc", "rhum": "rhum.sig995.mon.ltm.nc", @@ -156,7 +179,7 @@ def narr_download(output_dir, sources=SOURCES): f.write(data) -def narr_merge_variables(output_dir): +def narr_merge_variables(output_dir) -> None: narr_files = output_dir.glob("narr_*.nc") narr_files = list(narr_files) narr_files.sort() @@ -173,7 +196,7 @@ def narr_merge_variables(output_dir): nco.ncks(input=str(f), output=str(outfile), options=options) -def narr_enhance(output_dir): +def narr_enhance(output_dir) -> None: outfile = output_dir.parent / "narr.nc" outtmp = output_dir.parent / "narr_tmp.nc" @@ -214,25 +237,27 @@ def narr_enhance(output_dir): time.units = new_units -def get_narr(output_dir): +def get_narr(output_dir) -> None: logger.info("downloading NARR") narr_download(output_dir) narr_merge_variables(output_dir) narr_enhance(output_dir) -def remove_tmp_files(dirs_to_delete): +def remove_tmp_files(dirs_to_delete) -> None: logger.info("removing tmp files") for dir in dirs_to_delete: logger.info(f"removing {dir}") shutil.rmtree(str(dir)) -def main(output_dir, remove_tmp_files=False): +def main(output_dir, remove_tmp_files=False) -> None: output_dir = Path(output_dir).resolve() output_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Downloading and saving data for QcConfigCreator to {output_dir}") + logger.info( + f"Downloading and saving data for QcConfigCreator to {output_dir}", + ) logger.info("Downloading Ocean Atlas") ocean_atlas_dir = output_dir / "ocean_atlas" ocean_atlas_dir.mkdir(exist_ok=True) diff --git a/ioos_qc/config_creator/make_config.py b/ioos_qc/config_creator/make_config.py index 9a798e74..ae0741a4 100644 --- a/ioos_qc/config_creator/make_config.py +++ b/ioos_qc/config_creator/make_config.py @@ -23,7 +23,13 @@ ) args = parser.parse_args() - creator_config = CreatorConfig(args.creator_config_file, QC_CONFIG_CREATOR_SCHEMA) - variable_config = QcVariableConfig(args.variable_config_file, VARIABLE_CONFIG_SCHEMA) + creator_config = CreatorConfig( + args.creator_config_file, + QC_CONFIG_CREATOR_SCHEMA, + ) + variable_config = QcVariableConfig( + args.variable_config_file, + VARIABLE_CONFIG_SCHEMA, + ) qc = QcConfigCreator(creator_config) qc.create_config(variable_config) diff --git a/ioos_qc/gliders.py b/ioos_qc/gliders.py index 0c3e1d9a..b2d4a862 100644 --- a/ioos_qc/gliders.py +++ b/ioos_qc/gliders.py @@ -1,10 +1,14 @@ #!/usr/bin/env python """Deprecated module. Consider using ARGO instead.""" + import warnings from ioos_qc import argo def pressure_check(inp): - warnings.warn("gliders.pressure_check has been replaced by argo.pressure_increasing_test", DeprecationWarning) + warnings.warn( + "gliders.pressure_check has been replaced by argo.pressure_increasing_test", + DeprecationWarning, + ) return argo.pressure_increasing_test(inp) diff --git a/ioos_qc/plotting.py b/ioos_qc/plotting.py index 2753c2e4..f3b30ebb 100644 --- a/ioos_qc/plotting.py +++ b/ioos_qc/plotting.py @@ -13,7 +13,7 @@ def bokeh_plot(data, var_name, results, title, module, test_name): def bokeh_plot_var(time, data, var_name, results, title, module, test_name): - """Method to plot QC results using Bokeh""" + """Method to plot QC results using Bokeh.""" if module not in results or test_name not in results[module]: L.warning(f"No results for test {module}.{test_name} found") return None @@ -25,32 +25,67 @@ def bokeh_plot_var(time, data, var_name, results, title, module, test_name): qc_fail = np.ma.masked_where(qc_test != 4, data) qc_notrun = np.ma.masked_where(qc_test != 2, data) - p1 = plotting.figure(x_axis_type="datetime", title=test_name + " : " + title) + p1 = plotting.figure( + x_axis_type="datetime", + title=test_name + " : " + title, + ) p1.grid.grid_line_alpha = 0.3 p1.xaxis.axis_label = "Time" p1.yaxis.axis_label = "Data" - p1.line(time, data, legend_label="data", color="#A6CEE3") - p1.circle(time, qc_notrun, size=2, legend_label="qc not run", color="gray", alpha=0.2) - p1.circle(time, qc_pass, size=4, legend_label="qc pass", color="green", alpha=0.5) - p1.circle(time, qc_suspect, size=4, legend_label="qc suspect", color="orange", alpha=0.7) - p1.circle(time, qc_fail, size=6, legend_label="qc fail", color="red", alpha=1.0) - p1.circle(time, qc_notrun, size=6, legend_label="qc not eval", color="gray", alpha=1.0) + p1.line(time, data, legend_label="data", color="#A6CEE3") + p1.circle( + time, + qc_notrun, + size=2, + legend_label="qc not run", + color="gray", + alpha=0.2, + ) + p1.circle( + time, + qc_pass, + size=4, + legend_label="qc pass", + color="green", + alpha=0.5, + ) + p1.circle( + time, + qc_suspect, + size=4, + legend_label="qc suspect", + color="orange", + alpha=0.7, + ) + p1.circle( + time, + qc_fail, + size=6, + legend_label="qc fail", + color="red", + alpha=1.0, + ) + p1.circle( + time, + qc_notrun, + size=6, + legend_label="qc not eval", + color="gray", + alpha=1.0, + ) return p1 def bokeh_multi_plot(stream, results, title, **kwargs): - kwargs = { - - "merge_tools": True, - "toolbar_location": "below", - "sizing_mode": "scale_width", - "plot_width": 600, - "plot_height": 200, - "ncols": 2 - , + "merge_tools": True, + "toolbar_location": "below", + "sizing_mode": "scale_width", + "plot_width": 600, + "plot_height": 200, + "ncols": 2, **kwargs, } @@ -61,21 +96,27 @@ def bokeh_multi_plot(stream, results, title, **kwargs): def bokeh_multi_var(stream, results, title): for vname, qcobj in results.items(): for modu, tests in qcobj.items(): - for testname, testresults in tests.items(): - plt = bokeh_plot_var(stream.time(), stream.data(vname), vname, qcobj, title, modu, testname) + for testname in tests: + plt = bokeh_plot_var( + stream.time(), + stream.data(vname), + vname, + qcobj, + title, + modu, + testname, + ) yield plt def bokeh_plot_collected_results(results, **kwargs): kwargs = { - - "merge_tools": True, - "toolbar_location": "below", - "sizing_mode": "scale_width", - "plot_width": 600, - "plot_height": 200, - "ncols": 2 - , + "merge_tools": True, + "toolbar_location": "below", + "sizing_mode": "scale_width", + "plot_width": 600, + "plot_height": 200, + "ncols": 2, **kwargs, } @@ -97,11 +138,46 @@ def bokeh_plot_collected_result(cr): qc_fail = np.ma.masked_where(cr.results != 4, cr.data) qc_notrun = np.ma.masked_where(cr.results != 2, cr.data) - p1.line(cr.tinp, cr.data, legend_label="data", color="#A6CEE3") - p1.circle(cr.tinp, qc_notrun, size=3, legend_label="qc not run", color="gray", alpha=0.2) - p1.circle(cr.tinp, qc_pass, size=4, legend_label="qc pass", color="green", alpha=0.5) - p1.circle(cr.tinp, qc_suspect, size=4, legend_label="qc suspect", color="orange", alpha=0.7) - p1.circle(cr.tinp, qc_fail, size=6, legend_label="qc fail", color="red", alpha=1.0) - p1.circle(cr.tinp, qc_notrun, size=3, legend_label="qc not eval", color="gray", alpha=1.0) + p1.line(cr.tinp, cr.data, legend_label="data", color="#A6CEE3") + p1.circle( + cr.tinp, + qc_notrun, + size=3, + legend_label="qc not run", + color="gray", + alpha=0.2, + ) + p1.circle( + cr.tinp, + qc_pass, + size=4, + legend_label="qc pass", + color="green", + alpha=0.5, + ) + p1.circle( + cr.tinp, + qc_suspect, + size=4, + legend_label="qc suspect", + color="orange", + alpha=0.7, + ) + p1.circle( + cr.tinp, + qc_fail, + size=6, + legend_label="qc fail", + color="red", + alpha=1.0, + ) + p1.circle( + cr.tinp, + qc_notrun, + size=3, + legend_label="qc not eval", + color="gray", + alpha=1.0, + ) return p1 diff --git a/ioos_qc/qartod.py b/ioos_qc/qartod.py index 0df28199..8d931d4b 100644 --- a/ioos_qc/qartod.py +++ b/ioos_qc/qartod.py @@ -1,10 +1,11 @@ #!/usr/bin/env python """Tests based on the IOOS QARTOD manuals.""" + import logging import warnings from collections import namedtuple from numbers import Real as N -from typing import Dict, List, Sequence, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -46,18 +47,20 @@ class QartodFlags: span = namedtuple("Span", "minv maxv") -@add_flag_metadata(standard_name="aggregate_quality_flag", - long_name="Aggregate Quality Flag", - aggregate=True) +@add_flag_metadata( + standard_name="aggregate_quality_flag", + long_name="Aggregate Quality Flag", + aggregate=True, +) def aggregate(results: List) -> np.ma.MaskedArray: - """Runs qartod_compare against all other qartod tests in results. - """ - all_tests = [ r.results for r in results ] + """Runs qartod_compare against all other qartod tests in results.""" + all_tests = [r.results for r in results] return qartod_compare(all_tests) -def qartod_compare(vectors : Sequence[Sequence[N]], - ) -> np.ma.MaskedArray: +def qartod_compare( + vectors: Sequence[Sequence[N]], +) -> np.ma.MaskedArray: """Aggregates an array of flags by precedence into a single array. Args: @@ -71,8 +74,8 @@ def qartod_compare(vectors : Sequence[Sequence[N]], """ shapes = [v.shape[0] for v in vectors] # Assert that all of the vectors are the same size. - assert all([s == shapes[0] for s in shapes]) - assert all([v.ndim == 1 for v in vectors]) + assert all(s == shapes[0] for s in shapes) + assert all(v.ndim == 1 for v in vectors) result = np.ma.empty(shapes[0]) result.fill(QartodFlags.MISSING) @@ -93,13 +96,16 @@ def qartod_compare(vectors : Sequence[Sequence[N]], return result.astype("uint8") -@add_flag_metadata(standard_name="location_test_quality_flag", - long_name="Location Test Quality Flag") -def location_test(lon : Sequence[N], - lat : Sequence[N], - bbox : Tuple[N, N, N, N] = (-180, -90, 180, 90), - range_max : N = None, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="location_test_quality_flag", + long_name="Location Test Quality Flag", +) +def location_test( + lon: Sequence[N], + lat: Sequence[N], + bbox: Tuple[N, N, N, N] = (-180, -90, 180, 90), + range_max: Optional[N] = None, +) -> np.ma.core.MaskedArray: """Checks that a location is within reasonable bounds. Checks that longitude and latitude are within reasonable bounds defaulting @@ -131,8 +137,9 @@ def location_test(lon : Sequence[N], lon = np.ma.masked_invalid(np.array(lon).astype(np.float64)) if lon.shape != lat.shape: + msg = f"Lon ({lon.shape}) and lat ({lat.shape}) are different shapes" raise ValueError( - f"Lon ({lon.shape}) and lat ({lat.shape}) are different shapes", + msg, ) # Save original shape @@ -160,18 +167,25 @@ def location_test(lon : Sequence[N], # Ignore warnings when comparing NaN values even though they are masked # https://github.com/numpy/numpy/blob/master/doc/release/1.8.0-notes.rst#runtime-warnings-when-comparing-nan-numbers with np.errstate(invalid="ignore"): - flag_arr[(lon < bbox.minx) | (lat < bbox.miny) | - (lon > bbox.maxx) | (lat > bbox.maxy)] = QartodFlags.FAIL + flag_arr[ + (lon < bbox.minx) + | (lat < bbox.miny) + | (lon > bbox.maxx) + | (lat > bbox.maxy) + ] = QartodFlags.FAIL return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="gross_range_test_quality_flag", - long_name="Gross Range Test Quality Flag") -def gross_range_test(inp : Sequence[N], - fail_span : Tuple[N, N], - suspect_span : Tuple[N, N] = None, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="gross_range_test_quality_flag", + long_name="Gross Range Test Quality Flag", +) +def gross_range_test( + inp: Sequence[N], + fail_span: Tuple[N, N], + suspect_span: Optional[Tuple[N, N]] = None, +) -> np.ma.core.MaskedArray: """Checks that values are within reasonable range bounds. Given a 2-tuple of minimum/maximum values, flag data outside of the given @@ -209,10 +223,13 @@ def gross_range_test(inp : Sequence[N], assert isfixedlength(suspect_span, 2) uspan = span(*sorted(suspect_span)) if uspan.minv < sspan.minv or uspan.maxv > sspan.maxv: - raise ValueError(f"Suspect {uspan} must fall within the Fail {sspan}") + msg = f"Suspect {uspan} must fall within the Fail {sspan}" + raise ValueError(msg) # Flag suspect outside of user span with np.errstate(invalid="ignore"): - flag_arr[(inp < uspan.minv) | (inp > uspan.maxv)] = QartodFlags.SUSPECT + flag_arr[(inp < uspan.minv) | (inp > uspan.maxv)] = ( + QartodFlags.SUSPECT + ) # Flag suspect outside of sensor span with np.errstate(invalid="ignore"): @@ -222,7 +239,7 @@ def gross_range_test(inp : Sequence[N], class ClimatologyConfig: - """Objects to hold the config for a Climatology test + """Objects to hold the config for a Climatology test. Args: ---- @@ -246,15 +263,18 @@ class ClimatologyConfig: """ - mem = namedtuple("window", [ - "tspan", - "fspan", - "vspan", - "zspan", - "period", - ]) - - def __init__(self, members=None): + mem = namedtuple( + "window", + [ + "tspan", + "fspan", + "vspan", + "zspan", + "period", + ], + ) + + def __init__(self, members=None) -> None: members = members or [] self._members = members @@ -262,7 +282,7 @@ def __init__(self, members=None): def members(self): return self._members - def values(self, tind : pd.Timestamp, zind=None): + def values(self, tind: pd.Timestamp, zind=None): """Args: ---- tind: Value to test for inclusion between time bounds @@ -270,7 +290,6 @@ def values(self, tind : pd.Timestamp, zind=None): """ span = (None, None) for m in self._members: - if m.period is not None: # If a period is defined, extract the attribute from the # pd.Timestamp object before comparison. The min and max @@ -291,24 +310,28 @@ def values(self, tind : pd.Timestamp, zind=None): span = m.vspan return span - def add(self, - tspan : Tuple[N, N], - vspan : Tuple[N, N], - fspan : Tuple[N, N] = None, - zspan : Tuple[N, N] = None, - period : str = None, - ) -> None: - + def add( + self, + tspan: Tuple[N, N], + vspan: Tuple[N, N], + fspan: Optional[Tuple[N, N]] = None, + zspan: Optional[Tuple[N, N]] = None, + period: Optional[str] = None, + ) -> None: assert isfixedlength(tspan, 2) # If period is defined, tspan is a numeric # if it isn't defined, its a parsable date if period is not None: tspan = span(*sorted(tspan)) else: - tspan = span(*sorted([ - pd.Timestamp(tspan[0]), - pd.Timestamp(tspan[1]), - ])) + tspan = span( + *sorted( + [ + pd.Timestamp(tspan[0]), + pd.Timestamp(tspan[1]), + ], + ), + ) assert isfixedlength(vspan, 2) vspan = span(*sorted(vspan)) @@ -326,7 +349,8 @@ def add(self, try: getattr(pd.Timestamp.now(), period) except AttributeError: - raise ValueError('The period "{period}" is not recognized') + msg = 'The period "{period}" is not recognized' + raise ValueError(msg) self._members.append( self.mem( @@ -339,7 +363,6 @@ def add(self, ) def check(self, tinp, inp, zinp): - # Start with everything as UNKNOWN (2) flag_arr = np.ma.empty(inp.size, dtype="uint8") flag_arr.fill(QartodFlags.UNKNOWN) @@ -351,14 +374,16 @@ def check(self, tinp, inp, zinp): # Member spans are applied in order and any data points that fall into # more than one member are flagged by each one. for m in self._members: - if m.period is not None: # If a period is defined, extract the attribute from the # pd.DatetimeIndex object before comparison. The min and max # values are in this period unit already. if m.period in WEEK_PERIODS: # The weekofyear accessor was depreacated - tinp_copy = pd.Index(tinp.isocalendar().week, dtype="int64") + tinp_copy = pd.Index( + tinp.isocalendar().week, + dtype="int64", + ) else: tinp_copy = getattr(tinp, m.period).to_series() else: @@ -379,15 +404,23 @@ def check(self, tinp, inp, zinp): # Only test non-masked values between the min and max. # Ignore warnings about comparing masked values with np.errstate(invalid="ignore"): - z_idx = (~zinp.mask) & (zinp >= m.zspan.minv) & (zinp <= m.zspan.maxv) + z_idx = ( + (~zinp.mask) + & (zinp >= m.zspan.minv) + & (zinp <= m.zspan.maxv) + ) else: # If there is no z data in the config, don't try to filter by depth! # Set z_idx to all True to prevent filtering # Must use inp.data to create masked array so that the masked value is ignored when we assign the FAIL, SUSPECT, and GOOD flags - z_idx = np.ma.array(data=~np.isnan(inp.data), mask=inp.mask, fill_value=999999) + z_idx = np.ma.array( + data=~np.isnan(inp.data), + mask=inp.mask, + fill_value=999999, + ) # Combine the T and Z indexes - values_idx = (t_idx & z_idx) + values_idx = t_idx & z_idx # Failed and suspect data for this value span. Combining fail_idx or # suspect_idx with values_idx represents the subsets of data that should be @@ -401,8 +434,12 @@ def check(self, tinp, inp, zinp): with np.errstate(invalid="ignore"): flag_arr[(values_idx & fail_idx)] = QartodFlags.FAIL - flag_arr[(values_idx & ~fail_idx & suspect_idx)] = QartodFlags.SUSPECT - flag_arr[(values_idx & ~fail_idx & ~suspect_idx)] = QartodFlags.GOOD + flag_arr[(values_idx & ~fail_idx & suspect_idx)] = ( + QartodFlags.SUSPECT + ) + flag_arr[(values_idx & ~fail_idx & ~suspect_idx)] = ( + QartodFlags.GOOD + ) return flag_arr @@ -418,13 +455,16 @@ def convert(config): return c -@add_flag_metadata(standard_name="climatology_test_quality_flag", - long_name="Climatology Test Quality Flag") -def climatology_test(config : Union[ClimatologyConfig, Sequence[Dict[str, Tuple]]], - inp : Sequence[N], - tinp : Sequence[N], - zinp : Sequence[N], - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="climatology_test_quality_flag", + long_name="Climatology Test Quality Flag", +) +def climatology_test( + config: Union[ClimatologyConfig, Sequence[Dict[str, Tuple]]], + inp: Sequence[N], + tinp: Sequence[N], + zinp: Sequence[N], +) -> np.ma.core.MaskedArray: """Checks that values are within reasonable range bounds and flags as SUSPECT. Data for which no ClimatologyConfig member exists is marked as UNKNOWN. @@ -469,14 +509,17 @@ def climatology_test(config : Union[ClimatologyConfig, Sequence[Dict[str, Tuple] return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="spike_test_quality_flag", - long_name="Spike Test Quality Flag") -def spike_test(inp: Sequence[N], - suspect_threshold: N = None, - fail_threshold: N = None, - method: str = "average", - ) -> np.ma.core.MaskedArray: - """Check for spikes by checking neighboring data against thresholds +@add_flag_metadata( + standard_name="spike_test_quality_flag", + long_name="Spike Test Quality Flag", +) +def spike_test( + inp: Sequence[N], + suspect_threshold: Optional[N] = None, + fail_threshold: Optional[N] = None, + method: str = "average", +) -> np.ma.core.MaskedArray: + """Check for spikes by checking neighboring data against thresholds. Determine if there is a spike at data point n-1 by subtracting the midpoint of n and n-2 and taking the absolute value of this @@ -531,11 +574,11 @@ def spike_test(inp: Sequence[N], # Make sure that only the record (n) where the difference prior and after are opposite are considered with np.errstate(invalid="ignore"): - diff[1:-1][ref[:-1]*ref[1:] >= 0] = 0 + diff[1:-1][ref[:-1] * ref[1:] >= 0] = 0 else: + msg = f'Unknown method: "{method}", only "average" and "differential" methods are available' raise ValueError( - f'Unknown method: "{method}", only "average" and "differential" methods are available', - + msg, ) # Start with everything as passing (1) @@ -561,12 +604,15 @@ def spike_test(inp: Sequence[N], return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="rate_of_change_test_quality_flag", - long_name="Rate of Change Test Quality Flag") -def rate_of_change_test(inp : Sequence[N], - tinp : Sequence[N], - threshold : float, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="rate_of_change_test_quality_flag", + long_name="Rate of Change Test Quality Flag", +) +def rate_of_change_test( + inp: Sequence[N], + tinp: Sequence[N], + threshold: float, +) -> np.ma.core.MaskedArray: """Checks the first order difference of a series of values to see if there are any values exceeding a threshold defined by the inputs. These are then marked as SUSPECT. It is up to the test operator @@ -604,7 +650,9 @@ def rate_of_change_test(inp : Sequence[N], roc = np.ma.zeros(inp.size, dtype="float") tinp = mapdates(tinp).flatten() - roc[1:] = np.abs(np.diff(inp) / np.diff(tinp).astype("timedelta64[s]").astype(float)) + roc[1:] = np.abs( + np.diff(inp) / np.diff(tinp).astype("timedelta64[s]").astype(float), + ) with np.errstate(invalid="ignore"): flag_arr[roc > threshold] = QartodFlags.SUSPECT @@ -615,17 +663,20 @@ def rate_of_change_test(inp : Sequence[N], return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="flat_line_test_quality_flag", - long_name="Flat Line Test Quality Flag") -def flat_line_test(inp: Sequence[N], - tinp: Sequence[N], - suspect_threshold: int, - fail_threshold: int, - tolerance: N = 0, - ) -> np.ma.MaskedArray: +@add_flag_metadata( + standard_name="flat_line_test_quality_flag", + long_name="Flat Line Test Quality Flag", +) +def flat_line_test( + inp: Sequence[N], + tinp: Sequence[N], + suspect_threshold: int, + fail_threshold: int, + tolerance: N = 0, +) -> np.ma.MaskedArray: """Check for consecutively repeated values within a tolerance. Missing and masked data is flagged as UNKNOWN. - More information: https://github.com/ioos/ioos_qc/pull/11 + More information: https://github.com/ioos/ioos_qc/pull/11. Args: ---- @@ -668,19 +719,20 @@ def flat_line_test(inp: Sequence[N], tinp = mapdates(tinp).flatten() # The thresholds are in seconds so we round make sure the interval is also in seconds - time_interval = np.median(np.diff(tinp)).astype("timedelta64[s]").astype(float) + time_interval = ( + np.median(np.diff(tinp)).astype("timedelta64[s]").astype(float) + ) def rolling_window(a, window): - """https://rigtorp.se/2011/01/01/rolling-statistics-numpy.html - """ + """https://rigtorp.se/2011/01/01/rolling-statistics-numpy.html.""" if len(a) < window: return np.ma.MaskedArray(np.empty((0, window + 1))) shape = a.shape[:-1] + (a.shape[-1] - window + 1, window + 1) - strides = a.strides + (a.strides[-1],) + strides = (*a.strides, a.strides[-1]) arr = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) return np.ma.masked_invalid(arr[:-1, :]) - def run_test(test_threshold, flag_value): + def run_test(test_threshold, flag_value) -> None: # convert time thresholds to number of observations count = (int(test_threshold) / time_interval).astype(int) @@ -706,19 +758,22 @@ def run_test(test_threshold, flag_value): return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="attenuated_signal_test_quality_flag", - long_name="Attenuated Signal Test Quality Flag") -def attenuated_signal_test(inp : Sequence[N], - tinp : Sequence[N], - suspect_threshold: N, - fail_threshold: N, - test_period: N = None, - min_obs: N = None, - min_period: int = None, - check_type : str = "std", - *args, - **kwargs, - ) -> np.ma.MaskedArray: +@add_flag_metadata( + standard_name="attenuated_signal_test_quality_flag", + long_name="Attenuated Signal Test Quality Flag", +) +def attenuated_signal_test( + inp: Sequence[N], + tinp: Sequence[N], + suspect_threshold: N, + fail_threshold: N, + test_period: Optional[N] = None, + min_obs: Optional[N] = None, + min_period: Optional[int] = None, + check_type: str = "std", + *args, + **kwargs, +) -> np.ma.MaskedArray: """Check for near-flat-line conditions using a range or standard deviation. Missing and masked data is flagged as UNKNOWN. @@ -756,15 +811,18 @@ def attenuated_signal_test(inp : Sequence[N], window_func = lambda x: x.std() # noqa check_func = np.std elif check_type == "range": + def window_func(w): # When pandas>=1.0 and numba are installed, this is about twice as fast try: return w.apply(np.ptp, raw=True, engine="numba") except (ImportError, TypeError, NumbaTypeError): return w.apply(np.ptp, raw=True) + check_func = np.ptp else: - raise ValueError(f'Check type "{check_type}" is not one of ["std", "range"]') + msg = f'Check type "{check_type}" is not one of ["std", "range"]' + raise ValueError(msg) tinp = mapdates(tinp) with warnings.catch_warnings(): @@ -781,7 +839,9 @@ def window_func(w): if min_obs is not None: min_periods = min_obs elif min_period is not None: - time_interval = np.median(np.diff(tinp)).astype("timedelta64[s]").astype(float) + time_interval = ( + np.median(np.diff(tinp)).astype("timedelta64[s]").astype(float) + ) min_periods = (min_period / time_interval).astype(int) else: min_periods = None @@ -802,13 +862,16 @@ def window_func(w): return flag_arr.reshape(original_shape) -@add_flag_metadata(standard_name="density_inversion_test_flag", - long_name="Density Inversion Test Flag") -def density_inversion_test(inp: Sequence[N], - zinp: Sequence[N], - suspect_threshold: float = None, - fail_threshold: float = None, - ) -> np.ma.core.MaskedArray: +@add_flag_metadata( + standard_name="density_inversion_test_flag", + long_name="Density Inversion Test Flag", +) +def density_inversion_test( + inp: Sequence[N], + zinp: Sequence[N], + suspect_threshold: Optional[float] = None, + fail_threshold: Optional[float] = None, +) -> np.ma.core.MaskedArray: """With few exceptions, potential water density will increase with increasing pressure. When vertical profile data is obtained, this test is used to flag as failed T, C, and SP observations, which yield densities that do not sufficiently increase with pressure. A small operator-selected density @@ -840,7 +903,8 @@ def density_inversion_test(inp: Sequence[N], # Make sure both inputs are the same size. if inp.shape != zinp.shape: - raise ValueError(f"Density ({inp.shape}) and depth ({zinp.shape}) must be the same shape") + msg = f"Density ({inp.shape}) and depth ({zinp.shape}) must be the same shape" + raise ValueError(msg) # Start with everything as passing flag_arr = QartodFlags.GOOD * np.ma.ones(inp.size, dtype="uint8") diff --git a/ioos_qc/results.py b/ioos_qc/results.py index 24733f52..2ca0a866 100644 --- a/ioos_qc/results.py +++ b/ioos_qc/results.py @@ -18,7 +18,7 @@ class CallResult(NamedTuple): function: callable results: np.ndarray - def __repr__(self): + def __repr__(self) -> str: return f"" @@ -32,7 +32,7 @@ class ContextResult(NamedTuple): lat: np.ndarray = None lon: np.ndarray = None - def __repr__(self): + def __repr__(self) -> str: return f"" @@ -49,7 +49,7 @@ class CollectedResult: lat: np.ndarray = None lon: np.ndarray = None - def __repr__(self): + def __repr__(self) -> str: return f"" def function_name(self) -> str: @@ -65,6 +65,7 @@ def collect_results(results, how="list"): return collect_results_list(results) elif how in ["dict", dict]: return collect_results_dict(results) + return None def collect_results_list(results): @@ -76,7 +77,6 @@ def collect_results_list(results): # ContextResults for r in results: - cr = None # Shortcut for CallResult objects when someone uses QcConfig.run() directly # and doesn't go through a Stream object @@ -93,7 +93,6 @@ def collect_results_list(results): # CallResults for tr in r.results: - cr = CollectedResult( stream_id=r.stream_id, package=tr.package, @@ -103,12 +102,30 @@ def collect_results_list(results): if cr.hash_key not in collected: # Set the initial values - cr.results = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=tr.results.dtype) - cr.data = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=r.data.dtype) - cr.tinp = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=r.tinp.dtype) - cr.zinp = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=r.zinp.dtype) - cr.lat = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=r.lat.dtype) - cr.lon = np.ma.masked_all(shape=r.subset_indexes.shape, dtype=r.lon.dtype) + cr.results = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=tr.results.dtype, + ) + cr.data = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=r.data.dtype, + ) + cr.tinp = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=r.tinp.dtype, + ) + cr.zinp = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=r.zinp.dtype, + ) + cr.lat = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=r.lat.dtype, + ) + cr.lon = np.ma.masked_all( + shape=r.subset_indexes.shape, + dtype=r.lon.dtype, + ) collected[cr.hash_key] = cr collected[cr.hash_key].results[r.subset_indexes] = tr.results @@ -142,7 +159,6 @@ def collect_results_dict(results): # ContextResults for r in results: - # Shortcut for CallResult objects when someone uses QcConfig.run() directly # and doesn't go through a Stream object if isinstance(r, CallResult): @@ -159,7 +175,11 @@ def collect_results_dict(results): testresults = tr.results if testname not in collected[r.stream_id][testpackage]: - collected[r.stream_id][testpackage][testname] = np.copy(flag_arr) - collected[r.stream_id][testpackage][testname][r.subset_indexes] = testresults + collected[r.stream_id][testpackage][testname] = np.copy( + flag_arr, + ) + collected[r.stream_id][testpackage][testname][r.subset_indexes] = ( + testresults + ) return collected diff --git a/ioos_qc/stores.py b/ioos_qc/stores.py index 469d0e8b..9574f695 100644 --- a/ioos_qc/stores.py +++ b/ioos_qc/stores.py @@ -4,7 +4,7 @@ import logging from importlib import import_module from pathlib import Path -from typing import List +from typing import List, Optional import h5netcdf.legacyapi as nc4 import numpy as np @@ -26,25 +26,22 @@ def column_from_collected_result(cr): class BaseStore: - - def save(self, *args, **kwargs): - """Serialize results to a store. This could save a file or publish messages. - """ + def save(self, *args, **kwargs) -> None: + """Serialize results to a store. This could save a file or publish messages.""" @property def stream_ids(self) -> List[str]: - """A list of stream_ids to save to the store - """ + """A list of stream_ids to save to the store.""" class PandasStore(BaseStore): - """Store results in a dataframe""" + """Store results in a dataframe.""" - def __init__(self, results, axes: dict = None): + def __init__(self, results, axes: Optional[dict] = None) -> None: # OK, time to evaluate the actual tests now that we need the results self.results = list(results) self.collected_results = collect_results(self.results, how="list") - self._stream_ids = [ cr.stream_id for cr in self.collected_results ] + self._stream_ids = [cr.stream_id for cr in self.collected_results] self.axes = axes or { "t": "time", "z": "z", @@ -56,9 +53,8 @@ def __init__(self, results, axes: dict = None): def stream_ids(self) -> List[str]: return self._stream_ids - def compute_aggregate(self, name="rollup"): - """Internally compute the total aggregate and add it to the results - """ + def compute_aggregate(self, name="rollup") -> None: + """Internally compute the total aggregate and add it to the results.""" agg = CollectedResult( stream_id="", package="qartod", @@ -68,42 +64,78 @@ def compute_aggregate(self, name="rollup"): ) self.collected_results.append(agg) - def save(self, - write_data: bool = False, - write_axes: bool = True, - include: list = None, - exclude: list = None) -> pd.DataFrame: - + def save( + self, + write_data: bool = False, + write_axes: bool = True, + include: Optional[list] = None, + exclude: Optional[list] = None, + ) -> pd.DataFrame: df = pd.DataFrame() for cr in self.collected_results: - # Add time axis - if write_axes is True and self.axes["t"] not in df and cr.tinp is not None and cr.tinp.size != 0: - L.info(f"Adding column {self.axes['t']} from stream {cr.stream_id}") + if ( + write_axes is True + and self.axes["t"] not in df + and cr.tinp is not None + and cr.tinp.size != 0 + ): + L.info( + f"Adding column {self.axes['t']} from stream {cr.stream_id}", + ) df[self.axes["t"]] = cr.tinp # Add z axis - if write_axes is True and self.axes["z"] not in df and cr.zinp is not None and cr.zinp.size != 0: - L.info(f"Adding column {self.axes['z']} from stream {cr.stream_id}") + if ( + write_axes is True + and self.axes["z"] not in df + and cr.zinp is not None + and cr.zinp.size != 0 + ): + L.info( + f"Adding column {self.axes['z']} from stream {cr.stream_id}", + ) df[self.axes["z"]] = cr.zinp # Add x axis - if write_axes is True and self.axes["x"] not in df and cr.lon is not None and cr.lon.size != 0: - L.info(f"Adding column {self.axes['x']} from stream {cr.stream_id}") + if ( + write_axes is True + and self.axes["x"] not in df + and cr.lon is not None + and cr.lon.size != 0 + ): + L.info( + f"Adding column {self.axes['x']} from stream {cr.stream_id}", + ) df[self.axes["x"]] = cr.lon # Add x axis - if write_axes is True and self.axes["y"] not in df and cr.lat is not None and cr.lat.size != 0: - L.info(f"Adding column {self.axes['y']} from stream {cr.stream_id}") + if ( + write_axes is True + and self.axes["y"] not in df + and cr.lat is not None + and cr.lat.size != 0 + ): + L.info( + f"Adding column {self.axes['y']} from stream {cr.stream_id}", + ) df[self.axes["y"]] = cr.lat # Inclusion list, skip everything not defined - if include is not None and (cr.function not in include and cr.stream_id not in include and cr.test not in include): + if include is not None and ( + cr.function not in include + and cr.stream_id not in include + and cr.test not in include + ): continue # Exclusion list, skip everything defined - if exclude is not None and (cr.function in exclude or cr.stream_id in exclude or cr.test in cr.test in include): + if exclude is not None and ( + cr.function in exclude + or cr.stream_id in exclude + or cr.test in cr.test in include + ): continue # Add data column @@ -117,18 +149,19 @@ def save(self, if column_name not in df: df[column_name] = cr.results else: - L.warning(f"Found duplicate QC results column: {column_name}, skipping.") + L.warning( + f"Found duplicate QC results column: {column_name}, skipping.", + ) return df class CFNetCDFStore(BaseStore): - - def __init__(self, results, axes=None, **kwargs): + def __init__(self, results, axes=None, **kwargs) -> None: # OK, time to evaluate the actual tests now that we need the results self.results = list(results) self.collected_results = collect_results(self.results, how="list") - self._stream_ids = [ cr.stream_id for cr in self.collected_results ] + self._stream_ids = [cr.stream_id for cr in self.collected_results] self.axes = axes or { "t": "time", "z": "z", @@ -140,7 +173,19 @@ def __init__(self, results, axes=None, **kwargs): def stream_ids(self) -> List[str]: return self._stream_ids - def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_data: bool = False, include: list = None, exclude: list = None, compute_aggregate: bool = False): + def save( + self, + path_or_ncd, + dsg, + config: Config, + dsg_kwargs: Optional[dict] = None, + write_data: bool = False, + include: Optional[list] = None, + exclude: Optional[list] = None, + compute_aggregate: bool = False, + ): + if dsg_kwargs is None: + dsg_kwargs = {} ps = PandasStore(self.results, self.axes) if compute_aggregate is True: ps.compute_aggregate(name="qc_rollup") @@ -150,7 +195,6 @@ def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_da # Write a new file attrs = {} for cr in ps.collected_results: - column_name = column_from_collected_result(cr) # Set the ancillary variables @@ -160,19 +204,29 @@ def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_da } else: # Update the source ancillary_variables - existing = getattr(attrs[cr.stream_id], "ancillary_variables", "").split(" ") + existing = getattr( + attrs[cr.stream_id], + "ancillary_variables", + "", + ).split(" ") existing += [column_name] attrs[cr.stream_id] = " ".join(list(set(existing))).strip() # determine standard name and long name. These should be defined on each test function # https://github.com/cf-convention/cf-conventions/issues/216 - standard_name = getattr(cr.function, "standard_name", "quality_flag") + standard_name = getattr( + cr.function, + "standard_name", + "quality_flag", + ) long_name = getattr(cr.function, "long_name", "Quality Flag") # Get flags from module attribute called FLAGS flags = inspect.getmodule(cr.function).FLAGS - varflagnames = [ d for d in flags.__dict__ if not d.startswith("__") ] - varflagvalues = [ getattr(flags, d) for d in varflagnames ] + varflagnames = [ + d for d in flags.__dict__ if not d.startswith("__") + ] + varflagvalues = [getattr(flags, d) for d in varflagnames] # Set QC variable attributes if column_name not in attrs: @@ -194,9 +248,9 @@ def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_da calls = config.calls_by_stream_id(cr.stream_id) calls = [ - c for c in calls + c + for c in calls if c.module == cr.package and c.method == cr.test - ] if not calls: # No stream_id found! @@ -208,31 +262,38 @@ def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_da if call.region: attrs[column_name]["ioos_qc_region"] = json.dumps( call.region, - cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, ) if call.window.starting or call.window.ending: attrs[column_name]["ioos_qc_window"] = json.dumps( call.window, - cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, ) qc_varconfig = json.dumps( call.kwargs, - cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, ) attrs[column_name]["ioos_qc_config"] = qc_varconfig if len(config.contexts) > 1: # We represent the config as one global config JSON object attrs["ioos_qc_config"] = json.dumps( - config.config, cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True, + config.config, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, ) dsg_kwargs = { **dsg_kwargs, - - "attributes": attrs, - + "attributes": attrs, } # pocean requires these default columns, which should be removed as a requirement @@ -242,12 +303,15 @@ def save(self, path_or_ncd, dsg, config: Config, dsg_kwargs: dict = {}, write_da df["profile"] = 0 if "z" not in df: df["z"] = 0 - ncd = dsg.from_dataframe(df, path_or_ncd, axes=self.axes, **dsg_kwargs) - return ncd + return dsg.from_dataframe( + df, + path_or_ncd, + axes=self.axes, + **dsg_kwargs, + ) class NetcdfStore: - def save(self, path_or_ncd, config, results): """Updates the given netcdf with test configuration and results. If there is already a variable for a given test, it will update that variable with the latest results. @@ -268,7 +332,6 @@ def save(self, path_or_ncd, config, results): return ValueError("Input is not a valid file path or Dataset") for vname, qcobj in results.items(): - if vname not in ncd.variables: L.warning(f"{vname} not found in the Dataset, skipping") continue @@ -279,15 +342,15 @@ def save(self, path_or_ncd, config, results): qcvar_names = [] for modu, tests in qcobj.items(): - try: testpackage = import_module(f"ioos_qc.{modu}") except ImportError: - L.error(f'No ioos_qc test package "{modu}" was found, skipping.') + L.error( + f'No ioos_qc test package "{modu}" was found, skipping.', + ) continue for testname, testresults in tests.items(): - # Try to find a qc variable that matches this config qcvars = ncd.get_variables_by_attributes( ioos_qc_module=modu, @@ -295,21 +358,33 @@ def save(self, path_or_ncd, config, results): ioos_qc_target=vname, ) if not qcvars: - qcvarname = cf_safe_name(vname + "." + modu + "." + testname) + qcvarname = cf_safe_name( + vname + "." + modu + "." + testname, + ) else: if len(qcvars) > 1: - names = [ v.name for v in qcvars ] - L.warning(f"Found more than one QC variable match: {names}") + names = [v.name for v in qcvars] + L.warning( + f"Found more than one QC variable match: {names}", + ) # Use the last one found qcvarname = qcvars[-1].name # Get flags from module attribute called FLAGS flags = testpackage.FLAGS - varflagnames = [ d for d in flags.__dict__ if not d.startswith("__") ] - varflagvalues = [ getattr(flags, d) for d in varflagnames ] + varflagnames = [ + d for d in flags.__dict__ if not d.startswith("__") + ] + varflagvalues = [ + getattr(flags, d) for d in varflagnames + ] if qcvarname not in ncd.variables: - v = ncd.createVariable(qcvarname, np.byte, source_var.dimensions) + v = ncd.createVariable( + qcvarname, + np.byte, + source_var.dimensions, + ) else: v = ncd[qcvarname] qcvar_names.append(qcvarname) @@ -337,23 +412,59 @@ def save(self, path_or_ncd, config, results): v.setncattr("ioos_qc_target", vname) # If there is only one context we can write variable specific configs if len(config.contexts) == 1: - varconfig = config.contexts[0].streams[vname].config[modu][testname] - varconfig = json.dumps(varconfig, cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True) + varconfig = ( + config.contexts[0] + .streams[vname] + .config[modu][testname] + ) + varconfig = json.dumps( + varconfig, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, + ) v.setncattr("ioos_qc_config", varconfig) - v.setncattr("ioos_qc_region", json.dumps(config.contexts[0].region, cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True)) - v.setncattr("ioos_qc_window", json.dumps(config.contexts[0].window, cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True)) + v.setncattr( + "ioos_qc_region", + json.dumps( + config.contexts[0].region, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, + ), + ) + v.setncattr( + "ioos_qc_window", + json.dumps( + config.contexts[0].window, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, + ), + ) # Update the source ancillary_variables - existing = getattr(source_var, "ancillary_variables", "").split(" ") + existing = getattr( + source_var, + "ancillary_variables", + "", + ).split(" ") if qcvar_names: existing += qcvar_names - source_var.ancillary_variables = " ".join(list(set(existing))).strip() + source_var.ancillary_variables = " ".join( + list(set(existing)), + ).strip() if len(config.contexts) > 1: # We can't represent these at the variable level, so make one global config ncd.setncattr( "ioos_qc_config", - json.dumps(config.config, cls=GeoNumpyDateEncoder, allow_nan=False, ignore_nan=True), + json.dumps( + config.config, + cls=GeoNumpyDateEncoder, + allow_nan=False, + ignore_nan=True, + ), ) finally: diff --git a/ioos_qc/streams.py b/ioos_qc/streams.py index ac2fa5d6..65938976 100644 --- a/ioos_qc/streams.py +++ b/ioos_qc/streams.py @@ -22,36 +22,42 @@ class BaseStream: """Each stream should define how to return a list of datastreams along with their time and depth association. Each of these streams will passed through quality control configurations and returned back to it. Each stream - needs to also define what to do with the resulting results (how to store them.) + needs to also define what to do with the resulting results (how to store them.). """ - def __init__(self, *args, **kwargs): - """df: the dataframe - """ + def __init__(self, *args, **kwargs) -> None: + """df: the dataframe.""" - def time(self): + def time(self) -> None: """Return the time array from the source dataset. This is useful when plotting QC results.""" - def data(self, stream_id): + def data(self, stream_id) -> None: """Return the data array from the source dataset based on stream_id. This is useful when plotting QC results. """ - def run(self, config : Config): + def run(self, config: Config) -> None: """Iterate over the configs, splitting the streams up by geographic and time window before applying the individual config using QcConfig.run(). Store results for future usage. """ class PandasStream: - - def __init__(self, df, time=None, z=None, lat=None, lon=None, geom=None): + def __init__( + self, + df, + time=None, + z=None, + lat=None, + lon=None, + geom=None, + ) -> None: """df: the dataframe time: the column to use for time z: the column to use for depth lat: the column to use for latitude, this or geom is required if using regional subsets lon: the column to use for longitude, this or geom is required if using regional subsets - geom: the column containing the geometry, this or lat and lon are required if using regional subsets + geom: the column containing the geometry, this or lat and lon are required if using regional subsets. """ self.df = df self.time_column = time or "time" @@ -67,7 +73,7 @@ def __init__(self, df, time=None, z=None, lat=None, lon=None, geom=None): self.lon_column, self.geom_column, ] - self.axis_columns = [ x for x in axis_columns if x in df ] + self.axis_columns = [x for x in axis_columns if x in df] def time(self): return self.df[self.time_column] @@ -75,15 +81,15 @@ def time(self): def data(self, stream_id): return self.df[stream_id] - def run(self, config : Config): - + def run(self, config: Config): for context, calls in config.contexts.items(): - # Subset first by the stream id in each call stream_ids = [] for call in calls: if call.stream_id not in self.df: - L.warning(f"{call.stream_id} is not a column in the dataframe, skipping") + L.warning( + f"{call.stream_id} is not a column in the dataframe, skipping", + ) continue stream_ids.append(call.stream_id) subset = self.df.loc[:, list(set(stream_ids + self.axis_columns))] @@ -98,14 +104,26 @@ def run(self, config : Config): # subset = subset[[ subset[self.geom_column].within(context.region) ]] pass - if context.window.starting is not None or context.window.ending is not None: + if ( + context.window.starting is not None + or context.window.ending is not None + ): if self.time_column in self.axis_columns: if context.window.starting: - subset = subset.loc[subset[self.time_column] >= context.window.starting, :] + subset = subset.loc[ + subset[self.time_column] + >= context.window.starting, + :, + ] if context.window.ending: - subset = subset.loc[subset[self.time_column] < context.window.ending, :] + subset = subset.loc[ + subset[self.time_column] < context.window.ending, + :, + ] else: - L.warning(f"Skipping window subset, {self.time_column} not in columns") + L.warning( + f"Skipping window subset, {self.time_column} not in columns", + ) # This is a boolean array of what was subset and tested based on the initial data feed # Take the index of the subset and set those to true @@ -126,44 +144,66 @@ def run(self, config : Config): # Perform the "run" function on each Call for call in calls: - # if call.is_aggregate: # # We compute aggregates using the results # continue if call.stream_id not in subset: - L.warning(f"{call.stream_id} not a column in the input dataframe, skipping") + L.warning( + f"{call.stream_id} not a column in the input dataframe, skipping", + ) continue data_input = subset.loc[:, call.stream_id] # This evaluates the generator test results - run_result = list(call.run( - inp=data_input, - **subset_kwargs, - )) + run_result = list( + call.run( + inp=data_input, + **subset_kwargs, + ), + ) yield ContextResult( results=run_result, stream_id=call.stream_id, subset_indexes=subset_indexes.values, data=data_input.values, - tinp=subset_kwargs.get("tinp", pd.Series(dtype="datetime64[ns]")).values, - zinp=subset_kwargs.get("zinp", pd.Series(dtype="float64")).values, - lat=subset_kwargs.get("lat", pd.Series(dtype="float64")).values, - lon=subset_kwargs.get("lon", pd.Series(dtype="float64")).values, + tinp=subset_kwargs.get( + "tinp", + pd.Series(dtype="datetime64[ns]"), + ).values, + zinp=subset_kwargs.get( + "zinp", + pd.Series(dtype="float64"), + ).values, + lat=subset_kwargs.get( + "lat", + pd.Series(dtype="float64"), + ).values, + lon=subset_kwargs.get( + "lon", + pd.Series(dtype="float64"), + ).values, ) class NumpyStream: - - def __init__(self, inp=None, time=None, z=None, lat=None, lon=None, geom=None): + def __init__( + self, + inp=None, + time=None, + z=None, + lat=None, + lon=None, + geom=None, + ) -> None: """inp: a numpy array or a dictionary of numpy arrays where the keys are the stream ids time: numpy array of date-like objects. z: numpy array of z lat: numpy array of latitude, this or geom is required if using regional subsets lon: numpy array of longitude, this or geom is required if using regional subsets - geom: numpy array of geometry, this or lat and lon are required if using regional subsets + geom: numpy array of geometry, this or lat and lon are required if using regional subsets. """ self.inp = inp try: @@ -183,9 +223,7 @@ def data(self, stream_id=None): return self.inp def run(self, config: Config): - for context, calls in config.contexts.items(): - # This is a boolean array of what was subset and tested based on the initial data feed # Take the index of the subset and set those to true subset_indexes = np.full_like(self.inp, 1, dtype=bool) @@ -196,16 +234,27 @@ def run(self, config: Config): if self.lat is not None and self.lon is not None: pass else: - L.warning('Skipping region subset, "lat" and "lon" must be passed into NumpySource') - - if context.window.starting is not None or context.window.ending is not None: + L.warning( + 'Skipping region subset, "lat" and "lon" must be passed into NumpySource', + ) + + if ( + context.window.starting is not None + or context.window.ending is not None + ): if self.tinp is not None: if context.window.starting: - subset_indexes = (subset_indexes) & (self.tinp >= context.window.starting) + subset_indexes = (subset_indexes) & ( + self.tinp >= context.window.starting + ) if context.window.ending: - subset_indexes = (subset_indexes) & (self.tinp < context.window.ending) + subset_indexes = (subset_indexes) & ( + self.tinp < context.window.ending + ) else: - L.warning('Skipping window subset, "time" array must be passed into "run"') + L.warning( + 'Skipping window subset, "time" array must be passed into "run"', + ) subset_kwargs = {} if self.tinp is not None: @@ -218,7 +267,6 @@ def run(self, config: Config): subset_kwargs["lat"] = self.lat[subset_indexes] for call in calls: - # If the input was passed in the config. # This is here for backwards compatibility and doesn't support # being a different size than what the subset/context size is. @@ -234,10 +282,14 @@ def run(self, config: Config): if call.stream_id in self.inp: runinput = self.inp[call.stream_id] else: - L.warning(f"{call.stream_id} not in input dict, skipping") + L.warning( + f"{call.stream_id} not in input dict, skipping", + ) continue else: - L.error(f"Input is not a dict or np.ndarray, skipping {call.stream_id}") + L.error( + f"Input is not a dict or np.ndarray, skipping {call.stream_id}", + ) continue # Slicing with [True] changes the shape of an array so always re-shape. That @@ -247,26 +299,47 @@ def run(self, config: Config): data_input = runinput[subset_indexes].reshape(original_shape) # This evaluates the generator test results - run_result = list(call.run( - inp=data_input, - **subset_kwargs, - )) + run_result = list( + call.run( + inp=data_input, + **subset_kwargs, + ), + ) yield ContextResult( results=run_result, stream_id=call.stream_id, subset_indexes=subset_indexes, data=data_input, - tinp=subset_kwargs.get("tinp", pd.Series(dtype="datetime64[ns]")).values, - zinp=subset_kwargs.get("zinp", pd.Series(dtype="float64").values), - lat=subset_kwargs.get("lat", pd.Series(dtype="float64").values), - lon=subset_kwargs.get("lon", pd.Series(dtype="float64").values), + tinp=subset_kwargs.get( + "tinp", + pd.Series(dtype="datetime64[ns]"), + ).values, + zinp=subset_kwargs.get( + "zinp", + pd.Series(dtype="float64").values, + ), + lat=subset_kwargs.get( + "lat", + pd.Series(dtype="float64").values, + ), + lon=subset_kwargs.get( + "lon", + pd.Series(dtype="float64").values, + ), ) class NetcdfStream: - - def __init__(self, path_or_ncd, time=None, z=None, lat=None, lon=None, geom=None): + def __init__( + self, + path_or_ncd, + time=None, + z=None, + lat=None, + lon=None, + geom=None, + ) -> None: self.path_or_ncd = path_or_ncd self.time_var = time or "time" @@ -299,21 +372,24 @@ def _open(self): return do_close, ds def run(self, config: Config): - do_close, ds = self._open() stream_ids = [] - for context, calls in config.contexts.items(): + for calls in config.contexts.values(): for call in calls: if call.stream_id not in ds.variables: - L.warning(f"{call.stream_id} is not a variable in the netCDF dataset, skipping") + L.warning( + f"{call.stream_id} is not a variable in the netCDF dataset, skipping", + ) continue stream_ids.append(call.stream_id) # Find any var specific kwargs to pass onto the run - varkwargs = { "inp": {} } + varkwargs = {"inp": {}} if self.time_var in ds.variables: - varkwargs["time"] = pd.DatetimeIndex(mapdates(ds.variables[self.time_var].values)) + varkwargs["time"] = pd.DatetimeIndex( + mapdates(ds.variables[self.time_var].values), + ) if self.z_var in ds.variables: varkwargs["z"] = ds.variables[self.z_var].values if self.lat_var in ds.variables: @@ -334,8 +410,14 @@ def run(self, config: Config): class XarrayStream: - - def __init__(self, path_or_ncd, time=None, z=None, lat=None, lon=None): + def __init__( + self, + path_or_ncd, + time=None, + z=None, + lat=None, + lon=None, + ) -> None: self.path_or_ncd = path_or_ncd self.time_var = time or "time" @@ -374,7 +456,6 @@ def _open(self): return do_close, ds def run(self, config: Config): - # Magic for nested key generation # https://stackoverflow.com/a/27809959 results = defaultdict(lambda: defaultdict(odict)) @@ -382,12 +463,12 @@ def run(self, config: Config): do_close, ds = self._open() for context, calls in config.contexts.items(): - for call in calls: - # Find any var specific kwargs to pass onto the run if call.stream_id not in ds.variables: - L.warning(f"{call.stream_id} is not a variable in the xarray dataset, skipping") + L.warning( + f"{call.stream_id} is not a variable in the xarray dataset, skipping", + ) continue # Because the variables could have different dimensions @@ -407,54 +488,105 @@ def run(self, config: Config): # Time subset if self.time_var in ds[call.stream_id].coords: if context.window.starting and context.window.ending: - label_indexes[self.time_var] = slice(context.window.starting, context.window.ending) + label_indexes[self.time_var] = slice( + context.window.starting, + context.window.ending, + ) subset_stream = ds[call.stream_id].sel(**label_indexes) if self.time_var in subset_stream.coords: # Already subset with the stream, best case. Good netCDF file. - subset_kwargs["tinp"] = subset_stream.coords[self.time_var].values - elif self.time_var in ds.variables and ds[self.time_var].dims == ds[call.stream_id].dims: + subset_kwargs["tinp"] = subset_stream.coords[ + self.time_var + ].values + elif ( + self.time_var in ds.variables + and ds[self.time_var].dims == ds[call.stream_id].dims + ): # Same dimensions as the stream, so use the same subset - subset_kwargs["tinp"] = ds[self.time_var].sel(**label_indexes).values - elif self.time_var in ds.variables and ds[self.time_var].size == ds[call.stream_id].size: + subset_kwargs["tinp"] = ( + ds[self.time_var].sel(**label_indexes).values + ) + elif ( + self.time_var in ds.variables + and ds[self.time_var].size == ds[call.stream_id].size + ): # Not specifically connected, but hey, the user asked for it - subset_kwargs["tinp"] = ds[self.time_var].sel(**label_indexes).values + subset_kwargs["tinp"] = ( + ds[self.time_var].sel(**label_indexes).values + ) if self.z_var in subset_stream.coords: # Already subset with the stream, best case. Good netCDF file. - subset_kwargs["zinp"] = subset_stream.coords[self.z_var].values - elif self.z_var in ds.variables and ds[self.z_var].dims == ds[call.stream_id].dims: + subset_kwargs["zinp"] = subset_stream.coords[ + self.z_var + ].values + elif ( + self.z_var in ds.variables + and ds[self.z_var].dims == ds[call.stream_id].dims + ): # Same dimensions as the stream, so use the same subset - subset_kwargs["zinp"] = ds[self.z_var].sel(**label_indexes).values - elif self.z_var in ds.variables and ds[self.z_var].size == ds[call.stream_id].size: + subset_kwargs["zinp"] = ( + ds[self.z_var].sel(**label_indexes).values + ) + elif ( + self.z_var in ds.variables + and ds[self.z_var].size == ds[call.stream_id].size + ): # Not specifically connected, but hey, the user asked for it - subset_kwargs["zinp"] = ds[self.z_var].sel(**label_indexes).values + subset_kwargs["zinp"] = ( + ds[self.z_var].sel(**label_indexes).values + ) if self.lat_var in subset_stream.coords: # Already subset with the stream, best case. Good netCDF file. - subset_kwargs["lat"] = subset_stream.coords[self.lat_var].values - elif self.lat_var in ds.variables and ds[self.lat_var].dims == ds[call.stream_id].dims: + subset_kwargs["lat"] = subset_stream.coords[ + self.lat_var + ].values + elif ( + self.lat_var in ds.variables + and ds[self.lat_var].dims == ds[call.stream_id].dims + ): # Same dimensions as the stream, so use the same subset - subset_kwargs["lat"] = ds[self.lat_var].sel(**label_indexes).values - elif self.lat_var in ds.variables and ds[self.lat_var].size == ds[call.stream_id].size: + subset_kwargs["lat"] = ( + ds[self.lat_var].sel(**label_indexes).values + ) + elif ( + self.lat_var in ds.variables + and ds[self.lat_var].size == ds[call.stream_id].size + ): # Not specifically connected, but hey, the user asked for it - subset_kwargs["lat"] = ds[self.lat_var].sel(**label_indexes).values + subset_kwargs["lat"] = ( + ds[self.lat_var].sel(**label_indexes).values + ) if self.lon_var in subset_stream.coords: # Already subset with the stream, best case. Good netCDF file. - subset_kwargs["lon"] = subset_stream.coords[self.lon_var].values - elif self.lon_var in ds.variables and ds[self.lon_var].dims == ds[call.stream_id].dims: + subset_kwargs["lon"] = subset_stream.coords[ + self.lon_var + ].values + elif ( + self.lon_var in ds.variables + and ds[self.lon_var].dims == ds[call.stream_id].dims + ): # Same dimensions as the stream, so use the same subset - subset_kwargs["lon"] = ds[self.lon_var].sel(**label_indexes).values - elif self.lon_var in ds.variables and ds[self.lon_var].size == ds[call.stream_id].size: + subset_kwargs["lon"] = ( + ds[self.lon_var].sel(**label_indexes).values + ) + elif ( + self.lon_var in ds.variables + and ds[self.lon_var].size == ds[call.stream_id].size + ): # Not specifically connected, but hey, the user asked for it - subset_kwargs["lon"] = ds[self.lon_var].sel(**label_indexes).values + subset_kwargs["lon"] = ( + ds[self.lon_var].sel(**label_indexes).values + ) data_input = subset_stream.values run_result = call.run( **subset_kwargs, - **dict(inp=data_input), + inp=data_input, ) # Here we turn the labeled xarray indexes into boolean index arrays that numpy @@ -464,8 +596,15 @@ def run(self, config: Config): # able to be used on the original data feed AS IS using a direct subset notation # data[subset_indexes]. I'm pretty sure this works and if it doesn't blame my cat. # We start by subsetting nothing - subset_indexes = np.full_like(ds[call.stream_id].values, 0, dtype=bool) - int_indexes = map_index_queries(ds[call.stream_id], label_indexes) + subset_indexes = np.full_like( + ds[call.stream_id].values, + 0, + dtype=bool, + ) + int_indexes = map_index_queries( + ds[call.stream_id], + label_indexes, + ) # This if-else clause is required only to support Python <3.8. # we can remove it when ioos_qc drops support for Python <=3.7. if isinstance(int_indexes, tuple): @@ -474,10 +613,12 @@ def run(self, config: Config): int_indexes = int_indexes.dim_indexers # Initial slicer will select everything. This selects all values in a dimension # if there are no labeled indexes for it. - slicers = [ slice(None) for x in range(ds[call.stream_id].ndim) ] + slicers = [slice(None) for x in range(ds[call.stream_id].ndim)] for index_key, index_value in int_indexes.items(): if index_key in ds[call.stream_id].dims: - slicers[ds[call.stream_id].dims.index(index_key)] = index_value + slicers[ds[call.stream_id].dims.index(index_key)] = ( + index_value + ) # We started with an empty subset_indexes, not set to True what we actually subset # using the labeled dimensions. @@ -493,10 +634,22 @@ def run(self, config: Config): stream_id=call.stream_id, subset_indexes=subset_indexes, data=data_input, - tinp=subset_kwargs.get("tinp", pd.Series(dtype="datetime64[ns]").values), - zinp=subset_kwargs.get("zinp", pd.Series(dtype="float64").values), - lat=subset_kwargs.get("lat", pd.Series(dtype="float64").values), - lon=subset_kwargs.get("lon", pd.Series(dtype="float64").values), + tinp=subset_kwargs.get( + "tinp", + pd.Series(dtype="datetime64[ns]").values, + ), + zinp=subset_kwargs.get( + "zinp", + pd.Series(dtype="float64").values, + ), + lat=subset_kwargs.get( + "lat", + pd.Series(dtype="float64").values, + ), + lon=subset_kwargs.get( + "lon", + pd.Series(dtype="float64").values, + ), ) if do_close is True: diff --git a/ioos_qc/utils.py b/ioos_qc/utils.py index 4e86143e..e665dbe6 100644 --- a/ioos_qc/utils.py +++ b/ioos_qc/utils.py @@ -21,23 +21,22 @@ def add_flag_metadata(**kwargs): - def wrapper(func : callable): + def wrapper(func: callable): for k, v in kwargs.items(): setattr(func, k, v) return func + return wrapper def openf(p, **kwargs): - """Helper to allow one-line-lambdas to read file contents - """ + """Helper to allow one-line-lambdas to read file contents.""" with open(p, **kwargs) as f: return f.read() def load_config_from_xarray(source): - """Load an xarray dataset as a config dict - """ + """Load an xarray dataset as a config dict.""" to_close = False if not isinstance(source, xr.Dataset): source = xr.open_dataset(source, decode_cf=False) @@ -59,7 +58,6 @@ def load_config_from_xarray(source): ) for dv in qc_dataset.data_vars: - if dv in qc_dataset.dims: continue @@ -69,11 +67,17 @@ def load_config_from_xarray(source): # Because a data variables can have more than one check # associated with it we need to merge any existing configs # for this variable - newdict = odict({ - vobj.ioos_qc_module: odict({ - vobj.ioos_qc_test: odict(json.loads(vobj.ioos_qc_config)), - }), - }) + newdict = odict( + { + vobj.ioos_qc_module: odict( + { + vobj.ioos_qc_test: odict( + json.loads(vobj.ioos_qc_config), + ), + }, + ), + }, + ) merged = dict_update( y.get(vobj.ioos_qc_target, {}), newdict, @@ -90,8 +94,9 @@ def load_config_from_xarray(source): return y -def load_config_as_dict(source : Union[str, dict, odict, Path, io.StringIO], - ) -> odict: +def load_config_as_dict( + source: Union[str, dict, odict, Path, io.StringIO], +) -> odict: """Load an object as a config dict. The source can be a dict, odict, YAML string, JSON string, a StringIO, or a file path to a valid YAML or JSON file. """ @@ -131,29 +136,29 @@ def load_config_as_dict(source : Union[str, dict, odict, Path, io.StringIO], except BaseException: continue - raise ValueError("Config source is not valid!") + msg = "Config source is not valid!" + raise ValueError(msg) -def isfixedlength(lst : Union[list, tuple], - length : int, - ) -> bool: +def isfixedlength( + lst: Union[list, tuple], + length: int, +) -> bool: if not isinstance(lst, (list, tuple)): - raise ValueError(f"Required: list/tuple, Got: {type(lst)}") + msg = f"Required: list/tuple, Got: {type(lst)}" + raise ValueError(msg) if len(lst) != length: + msg = f"Incorrect list/tuple length for {lst}. Required: {length}, Got: {len(lst)}" raise ValueError( - f"Incorrect list/tuple length for {lst}. Required: {length}, Got: {len(lst)}", + msg, ) return True -def isnan(v : Any) -> bool: - return ( - v is None or - v is np.nan or - v is np.ma.masked - ) +def isnan(v: Any) -> bool: + return v is None or v is np.nan or v is np.ma.masked def mapdates(dates): @@ -169,16 +174,19 @@ def mapdates(dates): else: try: # Finally try unix epoch seconds - return pd.to_datetime(dates, unit="s").values.astype("datetime64[ns]") + return pd.to_datetime(dates, unit="s").values.astype( + "datetime64[ns]", + ) except Exception: # strings work here but we don't advertise that return np.array(dates, dtype="datetime64[ns]") -def check_timestamps(times : np.ndarray, - max_time_interval : N = None, - ) -> bool: - """Sanity checks for timestamp arrays +def check_timestamps( + times: np.ndarray, + max_time_interval: N = None, +) -> bool: + """Sanity checks for timestamp arrays. Checks that the times supplied are in monotonically increasing chronological order, and optionally that time intervals between @@ -200,14 +208,15 @@ def check_timestamps(times : np.ndarray, # see if if there are any duplicate times. Then check that none of the # diffs exceeds the sorted time. zero = np.array(0, dtype=time_diff.dtype) - if not np.array_equal(time_diff, sort_diff) or np.any(sort_diff == zero) or (max_time_interval is not None and - np.any(sort_diff > max_time_interval)): - return False - else: - return True + return not ( + not np.array_equal(time_diff, sort_diff) + or np.any(sort_diff == zero) + or max_time_interval is not None + and np.any(sort_diff > max_time_interval) + ) -def dict_update(d : Mapping, u : Mapping) -> Mapping: +def dict_update(d: Mapping, u: Mapping) -> Mapping: # http://stackoverflow.com/a/3233356 for k, v in u.items(): if isinstance(d, Mapping): @@ -217,35 +226,34 @@ def dict_update(d : Mapping, u : Mapping) -> Mapping: else: d[k] = u[k] else: - d = { k: u[k] } + d = {k: u[k]} return d def dict_depth(d): - """Get the depth of a dict - """ + """Get the depth of a dict.""" # https://stackoverflow.com/a/23499101 if isinstance(d, dict): return 1 + (max(map(dict_depth, d.values())) if d else 0) return 0 -def cf_safe_name(name : str) -> str: +def cf_safe_name(name: str) -> str: import re + if isinstance(name, str): if re.match("^[0-9_]", name): # Add a letter to the front name = f"v_{name}" return re.sub(r"[^_a-zA-Z0-9]", "_", name) - raise ValueError(f'Could not convert "{name}" to a safe name') + msg = f'Could not convert "{name}" to a safe name' + raise ValueError(msg) class GeoNumpyDateEncoder(geojson.GeoJSONEncoder): - - def default(self, obj : Any) -> Any: - """If input object is an ndarray it will be converted into a list - """ + def default(self, obj: Any) -> Any: + """If input object is an ndarray it will be converted into a list.""" if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, np.generic): @@ -263,6 +271,7 @@ def default(self, obj : Any) -> Any: def great_circle_distance(lat_arr, lon_arr): def gc(y1, x1, y2, x2): return Geodesic.WGS84.Inverse(y1, x1, y2, x2)["s12"] + dist = np.ma.zeros(lon_arr.size, dtype=np.float64) dv = np.vectorize(gc) dist[1:] = dv(lat_arr[:-1], lon_arr[:-1], lat_arr[1:], lon_arr[1:]) diff --git a/pyproject.toml b/pyproject.toml index 7cc2f67e..774212be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires = [ ] [project] -name = "ioos_qc" +name = "ioos-qc" description = "IOOS QARTOD and Quality Control tests implemented in Python" readme = "README.md" license = { file = "LICENSE.txt" } @@ -28,14 +28,20 @@ dynamic = [ "dependencies", "version", ] +optional-dependencies.extras = [ + "bokeh", + "nco", + "numba", +] +optional-dependencies.test = [ + "dask", + "pytest", +] + urls.documentation = "https://ioos.github.io/ioos_qc" urls.homepage = "https://github.com/ioos/ioos_qc" urls.repository = "https://github.com/ioos/ioos_qc" -[project.optional-dependencies] -test = ["dask", "pytest"] -extras = ["bokeh", "nco", "numba"] - [tool.setuptools] packages = [ "ioos_qc", diff --git a/ruff.toml b/ruff.toml index 3f04dbc7..1f3c2e5e 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,10 +3,10 @@ line-length = 79 lint.select = ["ALL"] lint.ignore = [ - "D203", # 1 blank line required before class docstring - "D205", # 1 blank line required between summary line and description - "D213", # incompatible. Ignoring `multi-line-summary-second-line` - "TRY003", # Avoid specifying long messages outside the exception class + "D203", # 1 blank line required before class docstring + "D205", # 1 blank line required between summary line and description + "D213", # incompatible. Ignoring `multi-line-summary-second-line` + "TRY003", # Avoid specifying long messages outside the exception class ] [lint.extend-per-file-ignores] @@ -14,31 +14,42 @@ lint.ignore = [ "A001", # builtin-variable-shadowing "D100", # Missing docstring in public module "E402", # Module level import not at top of file + "E501", # Line too long "ERA001", # Found commented-out code "ERA001", # Found commented-out code "EXE001", # Shebang is present but file is not executable ] "test_*.py" = [ - "ANN001", # Missing type annotation for function argument - "ANN201", # Missing return type annotation for public function - "ANN202", # Missing return type annotation for private function - "INP001", # File is part of an implicit namespace package - "PD901", # Avoid using the generic variable name `df` for DataFrames - "S101", # Use of assert detected + "ANN001", # Missing type annotation for function argument + "ANN101", # Missing type annotation for `self` in method + "ANN201", # Missing return type annotation for public function + "ANN202", # Missing return type annotation for private function + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D104", # Missing docstring in public package + "E501", # Line too long (113 > 79) + "ERA001", # Found commented-out code + "INP001", # File is part of an implicit namespace package + "PD901", # Avoid using the generic variable name `df` for DataFrames + "PLR0913", # Too many arguments in function definition + "PLR2004", # Magic value used in comparison + "S101", # Use of assert detected + "SLF001", # Private member accessed ] # nbqa-ruff acts on converted .py so we cannot glob .ipynb :-/ # https://github.com/nbQA-dev/nbQA/issues/823 "notebooks/*" = [ - "ANN001", # Missing type annotation for function argument - "ANN201", # Missing return type annotation for public function - "B018", # Found useless expression. Either assign it to a variable or remove it - "D100", # Missing docstring in public module - "D103", # Missing docstring in public function - "E402", # Module level import not at top of file - "FBT003", # Boolean positional value in function call - "INP001", # File is part of an implicit namespace package - "PD901", # Avoid using the generic variable name `df` for DataFrames - "T201", # `print` found" + "ANN001", # Missing type annotation for function argument + "ANN201", # Missing return type annotation for public function + "B018", # Found useless expression. Either assign it to a variable or remove it + "D100", # Missing docstring in public module + "D103", # Missing docstring in public function + "E402", # Module level import not at top of file + "FBT003", # Boolean positional value in function call + "INP001", # File is part of an implicit namespace package + "PD901", # Avoid using the generic variable name `df` for DataFrames + "T201", # `print` found" ] [lint.pycodestyle] max-doc-length = 180 diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/data/creator_config.json b/tests/data/creator_config.json index 68a290e5..37cccf80 100644 --- a/tests/data/creator_config.json +++ b/tests/data/creator_config.json @@ -22,4 +22,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/data/qc_variable_config.json b/tests/data/qc_variable_config.json index e0c81259..eb36f16a 100644 --- a/tests/data/qc_variable_config.json +++ b/tests/data/qc_variable_config.json @@ -17,4 +17,4 @@ "fail_max": "mean / std" } } -} \ No newline at end of file +} diff --git a/tests/test_argo.py b/tests/test_argo.py index ac48673e..7f404f91 100644 --- a/tests/test_argo.py +++ b/tests/test_argo.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import unittest @@ -15,26 +14,32 @@ class ArgoSpeedTest(unittest.TestCase): - def setUp(self): - self.times = np.arange("2015-01-01 00:00:00", "2015-01-01 06:00:00", - step=np.timedelta64(1, "h"), dtype=np.datetime64) + self.times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 06:00:00", + step=np.timedelta64(1, "h"), + dtype=np.datetime64, + ) self.times_epoch_secs = [t.astype(int) for t in self.times] self.suspect_threshold = 1 # 1 m/s or 0.06 km/min or 3.6 km/hr self.fail_threshold = 3 # 3 m/s or 0.18 km/min or 10.8 km/hr def test_speed_test(self): - """Happy path: some pass, fail and suspect - """ + """Happy path: some pass, fail and suspect.""" # all pass lon = np.array([-71.05, -71.05, -71.05, -71.05, -71.05, -71.05]) lat = np.array([41.01, 41.02, 41.03, 41.04, 41.05, 41.05]) # 0km 1.1km 1.1km 1.1km 1.1km 0km npt.assert_array_equal( - argo.speed_test(lon, lat, self.times, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold), + argo.speed_test( + lon, + lat, + self.times, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ), np.array([2, 1, 1, 1, 1, 1]), ) @@ -42,18 +47,26 @@ def test_speed_test(self): lat = np.array([41.01, 41.02, 41.06, 41.50, 41.05, 41.05]) # 0km 1.1km 4.4km 48km 50km 0km npt.assert_array_equal( - argo.speed_test(lon, lat, self.times, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold), + argo.speed_test( + lon, + lat, + self.times, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ), np.array([2, 1, 3, 4, 4, 1]), ) def test_speed_test_edge_cases(self): # size 0 arr npt.assert_array_equal( - argo.speed_test([], [], [], - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold), + argo.speed_test( + [], + [], + [], + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ), np.array([]), ) @@ -62,9 +75,13 @@ def test_speed_test_edge_cases(self): lon = np.array([-71.05]) tinp = self.times[0:1] npt.assert_array_equal( - argo.speed_test(lon, lat, tinp, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold), + argo.speed_test( + lon, + lat, + tinp, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ), np.array([2]), ) @@ -73,58 +90,71 @@ def test_speed_test_edge_cases(self): lon = np.array([-71.05, -71.05]) tinp = self.times[0:2] npt.assert_array_equal( - argo.speed_test(lon, lat, tinp, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold), + argo.speed_test( + lon, + lat, + tinp, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ), np.array([2, 1]), ) def test_speed_test_error_scenario(self): - """Different shapes for lon/lat/tinp should error - """ + """Different shapes for lon/lat/tinp should error.""" tinp = self.times[0:2] # different tinp shape lat = np.array([41.01]) lon = np.array([-71.05]) - try: - argo.speed_test(lon, lat, tinp, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold) - pytest.fail("should throw exception for mismatched arrays") - except ValueError as expected: - assert "shape" in str(expected) + with pytest.raises(ValueError, match="must be the same shape"): + argo.speed_test( + lon, + lat, + tinp, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ) # different lat shape lat = np.array([41.01]) lon = np.array([-71.05, -71.05]) - try: - argo.speed_test(lon, lat, tinp, - suspect_threshold=self.suspect_threshold, - fail_threshold=self.fail_threshold) - pytest.fail("should throw exception for mismatched arrays") - except ValueError as expected: - assert "shape" in str(expected) + with pytest.raises(ValueError, match="must be the same shape"): + argo.speed_test( + lon, + lat, + tinp, + suspect_threshold=self.suspect_threshold, + fail_threshold=self.fail_threshold, + ) class ArgoPressureIncreasingTest(unittest.TestCase): - def test_pressure_downcast(self): # Standard downcast - pressure = np.array([0.0, 2.0, 2.1, 2.12, 2.3, 4.0, 14.2, 20.0], dtype="float32") + pressure = np.array( + [0.0, 2.0, 2.1, 2.12, 2.3, 4.0, 14.2, 20.0], + dtype="float32", + ) flags = argo.pressure_increasing_test(pressure) npt.assert_array_equal(flags, np.array([1, 1, 1, 1, 1, 1, 1, 1])) def test_pressure_upcast(self): # Standard upcast - pressure = np.array([0.0, 2.0, 2.1, 2.12, 2.3, 4.0, 14.2, 20.0], dtype="float32") + pressure = np.array( + [0.0, 2.0, 2.1, 2.12, 2.3, 4.0, 14.2, 20.0], + dtype="float32", + ) pressure = pressure[::-1] flags = argo.pressure_increasing_test(pressure) npt.assert_array_equal(flags, np.array([1, 1, 1, 1, 1, 1, 1, 1])) def test_pressure_shallow(self): # Shallow profiles should be flagged if it's stuck or decreasing - pressure = np.array([0.0, 2.0, 2.0, 1.99, 2.3, 2.4, 2.4, 2.5], dtype="float32") + pressure = np.array( + [0.0, 2.0, 2.0, 1.99, 2.3, 2.4, 2.4, 2.5], + dtype="float32", + ) flags = argo.pressure_increasing_test(pressure) npt.assert_array_equal(flags, np.array([1, 1, 3, 3, 1, 1, 3, 1])) @@ -137,7 +167,10 @@ def test_using_config(self): qc = QcConfig(config) r = qc.run( - inp=np.array([0.0, 2.0, 2.0, 1.99, 2.3, 2.4, 2.4, 2.5], dtype="float32"), + inp=np.array( + [0.0, 2.0, 2.0, 1.99, 2.3, 2.4, 2.4, 2.5], + dtype="float32", + ), ) expected = np.array([1, 1, 3, 3, 1, 1, 3, 1]) diff --git a/tests/test_axds.py b/tests/test_axds.py index 0853bbd8..3d093cbb 100644 --- a/tests/test_axds.py +++ b/tests/test_axds.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import unittest from datetime import datetime @@ -19,10 +18,13 @@ class AxdsValidTimeBoundsTest(unittest.TestCase): - def setUp(self): - self.times = np.arange("2015-01-01 00:00:00", "2015-01-01 06:00:00", - step=np.timedelta64(1, "h"), dtype=np.datetime64) + self.times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 06:00:00", + step=np.timedelta64(1, "h"), + dtype=np.datetime64, + ) def test_no_bounds(self): valid_spans = [ @@ -38,8 +40,10 @@ def test_no_bounds(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([1, 1, 1, 1, 1, 1]), ) @@ -57,8 +61,10 @@ def test_chop_start(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([4, 4, 1, 1, 1, 1]), ) @@ -76,8 +82,10 @@ def test_chop_end(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([1, 1, 1, 1, 4, 4]), ) @@ -95,8 +103,10 @@ def test_chop_ends(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([4, 4, 1, 1, 4, 4]), ) @@ -114,8 +124,10 @@ def test_chop_all(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([4, 4, 4, 4, 4, 4]), ) @@ -125,13 +137,19 @@ def test_empty_chop_ends(self): np.datetime64("2015-01-01T04:00:00"), ) - times = np.arange("2015-01-01 00:00:00", "2015-01-01 06:00:00", - step=np.timedelta64(1, "h"), dtype=np.datetime64) + times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 06:00:00", + step=np.timedelta64(1, "h"), + dtype=np.datetime64, + ) times[0:2] = np.datetime64("NaT") npt.assert_array_equal( - axds.valid_range_test(times, - valid_span=valid_span), + axds.valid_range_test( + times, + valid_span=valid_span, + ), np.array([9, 9, 1, 1, 4, 4]), ) @@ -141,13 +159,19 @@ def test_all_empty(self): np.datetime64("NaT"), ) - times = np.arange("2015-01-01 00:00:00", "2015-01-01 06:00:00", - step=np.timedelta64(1, "h"), dtype=np.datetime64) + times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 06:00:00", + step=np.timedelta64(1, "h"), + dtype=np.datetime64, + ) times[:] = np.datetime64("NaT") npt.assert_array_equal( - axds.valid_range_test(times, - valid_span=valid_span), + axds.valid_range_test( + times, + valid_span=valid_span, + ), np.array([9, 9, 9, 9, 9, 9]), ) @@ -165,49 +189,58 @@ def test_inclusive_exclusive(self): for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span), + axds.valid_range_test( + self.times, + valid_span=valid_span, + ), np.array([4, 4, 1, 1, 4, 4]), ) for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span, - start_inclusive=True, - end_inclusive=False), + axds.valid_range_test( + self.times, + valid_span=valid_span, + start_inclusive=True, + end_inclusive=False, + ), np.array([4, 4, 1, 1, 4, 4]), ) for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span, - start_inclusive=True, - end_inclusive=True), + axds.valid_range_test( + self.times, + valid_span=valid_span, + start_inclusive=True, + end_inclusive=True, + ), np.array([4, 4, 1, 1, 1, 4]), ) for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span, - start_inclusive=False, - end_inclusive=True), + axds.valid_range_test( + self.times, + valid_span=valid_span, + start_inclusive=False, + end_inclusive=True, + ), np.array([4, 4, 4, 1, 1, 4]), ) for valid_span in valid_spans: npt.assert_array_equal( - axds.valid_range_test(self.times, - valid_span=valid_span, - start_inclusive=False, - end_inclusive=False), + axds.valid_range_test( + self.times, + valid_span=valid_span, + start_inclusive=False, + end_inclusive=False, + ), np.array([4, 4, 4, 1, 4, 4]), ) def test_with_config(self): - config_str = """ variable1: axds: diff --git a/tests/test_config.py b/tests/test_config.py index 631abd85..b5ba7a5e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,9 +1,10 @@ -#!/usr/bin/env python +import datetime import logging +import sys import unittest -from datetime import datetime from functools import partial +import packaging from shapely.geometry import GeometryCollection, Point import ioos_qc @@ -13,6 +14,13 @@ L.setLevel(logging.INFO) L.handlers = [logging.StreamHandler()] +if packaging.version.parse( + f"{sys.version_info.major}.{sys.version_info.minor}", +) < packaging.version.parse( + "3.11", +): + datetime.UTC = datetime.timezone.utc + class StreamConfigLoadTest(unittest.TestCase): def setUp(self): @@ -111,8 +119,8 @@ def setUp(self): config_str = """ region: something window: - starting: 2020-01-01T00:00:00 - ending: 2020-04-01T00:00:00 + starting: 2020-01-01T00:00:00+00:00 + ending: 2020-04-01T00:00:00+00:00 streams: variable1: qartod: @@ -127,8 +135,24 @@ def setUp(self): self.config = Config(config_str) self.context = Context( window=tw( - starting=datetime(2020, 1, 1, 0, 0, 0), - ending=datetime(2020, 4, 1, 0, 0, 0), + starting=datetime.datetime( + 2020, + 1, + 1, + 0, + 0, + 0, + tzinfo=datetime.UTC, + ), + ending=datetime.datetime( + 2020, + 4, + 1, + 0, + 0, + 0, + tzinfo=datetime.UTC, + ), ), ) self.calls = [ @@ -170,8 +194,8 @@ def setUp(self): type: Point coordinates: [-72, 34] window: - starting: 2020-01-01T00:00:00 - ending: 2020-04-01T00:00:00 + starting: 2020-01-01T00:00:00+00:00 + ending: 2020-04-01T00:00:00+00:00 streams: variable1: qartod: @@ -187,8 +211,8 @@ def setUp(self): type: Point coordinates: [-80,40] window: - starting: 2020-01-01T00:00:00 - ending: 2020-04-01T00:00:00 + starting: 2020-01-01T00:00:00+00:00 + ending: 2020-04-01T00:00:00+00:00 streams: variable1: qartod: @@ -201,8 +225,16 @@ def setUp(self): fail_span: [0, 12] """ window = tw( - starting=datetime(2020, 1, 1, 0, 0, 0), - ending=datetime(2020, 4, 1, 0, 0, 0), + starting=datetime.datetime( + 2020, + 1, + 1, + 0, + 0, + 0, + tzinfo=datetime.UTC, + ), + ending=datetime.datetime(2020, 4, 1, 0, 0, 0, tzinfo=datetime.UTC), ) self.config = Config(config_str) self.context1 = Context( @@ -256,7 +288,7 @@ def setUp(self): def test_load(self): assert len(self.config.contexts) == 2 - for _, calls in self.config.contexts.items(): + for calls in self.config.contexts.values(): assert len(calls) == 2 for c in calls: assert c in self.calls diff --git a/tests/test_config_creator.py b/tests/test_config_creator.py index 25a9c7fe..eadc87ea 100644 --- a/tests/test_config_creator.py +++ b/tests/test_config_creator.py @@ -1,9 +1,11 @@ import datetime import logging +import sys import unittest from pathlib import Path import numpy as np +import packaging import pytest import xarray as xr @@ -17,57 +19,64 @@ L.setLevel(logging.INFO) L.addHandler(logging.StreamHandler()) +if packaging.version.parse( + f"{sys.version_info.major}.{sys.version_info.minor}", +) < packaging.version.parse( + "3.11", +): + datetime.UTC = datetime.timezone.utc -class TestCreatorConfig(unittest.TestCase): +class TestCreatorConfig(unittest.TestCase): def test_creator_config(self): creator_config_file = Path().parent / "tests/data/creator_config.json" creator_config = CreatorConfig(creator_config_file) - self.assertTrue("ocean_atlas" in creator_config.keys()) + assert "ocean_atlas" in creator_config ocean_atlas = creator_config["ocean_atlas"] - self.assertEqual(ocean_atlas["file_path"], "resources/ocean_atlas.nc") - self.assertEqual(len(ocean_atlas["variables"].keys()), 3) + assert ocean_atlas["file_path"] == "resources/ocean_atlas.nc" + assert len(ocean_atlas["variables"].keys()) == 3 vars_names = ["o2", "salinity", "temperature"] vars_in_files = ["o_an", "s_an", "t_an"] for var_name, var_in_file in zip(vars_names, vars_in_files): - self.assertEqual(ocean_atlas["variables"][var_name], var_in_file) + assert ocean_atlas["variables"][var_name] == var_in_file - self.assertTrue("narr" in creator_config.keys()) + assert "narr" in creator_config narr = creator_config["narr"] - self.assertEqual(narr["file_path"], "resources/narr.nc") - self.assertEqual(len(narr["variables"].keys()), 5) + assert narr["file_path"] == "resources/narr.nc" + assert len(narr["variables"].keys()) == 5 vars_names = ["air", "pres", "rhum", "uwnd", "vwnd"] vars_in_files = ["air", "slp", "rhum", "uwnd", "vwnd"] for var_name, var_in_file in zip(vars_names, vars_in_files): - self.assertEqual(narr["variables"][var_name], var_in_file) + assert narr["variables"][var_name] == var_in_file class TestQcVariableConfig(unittest.TestCase): - def test_init(self): - qc_variable_config_file = Path().parent / "tests/data/qc_variable_config.json" + qc_variable_config_file = ( + Path().parent / "tests/data/qc_variable_config.json" + ) config = QcVariableConfig(qc_variable_config_file) - self.assertEqual(config["variable"], "air") - self.assertEqual(config["bbox"], [-165, 70, 160, 80]) - self.assertEqual(config["start_time"], "2020-01-01") - self.assertEqual(config["end_time"], "2020-01-08") + assert config["variable"] == "air" + assert config["bbox"] == [-165, 70, 160, 80] + assert config["start_time"] == "2020-01-01" + assert config["end_time"] == "2020-01-08" - self.assertTrue("tests" in config) - self.assertTrue(len(config["tests"]), 2) + assert "tests" in config + assert len(config["tests"]), 2 spike_test = config["tests"]["spike_test"] - self.assertEqual(spike_test["suspect_min"], "3") - self.assertEqual(spike_test["suspect_max"], "( 1 + 2 )") - self.assertEqual(spike_test["fail_min"], "3 * 2 - 6") - self.assertEqual(spike_test["fail_max"], "3 * mean + std / ( max * min )") + assert spike_test["suspect_min"] == "3" + assert spike_test["suspect_max"] == "( 1 + 2 )" + assert spike_test["fail_min"] == "3 * 2 - 6" + assert spike_test["fail_max"] == "3 * mean + std / ( max * min )" gross_range_test = config["tests"]["gross_range_test"] - self.assertEqual(gross_range_test["suspect_min"], "min - std * 2") - self.assertEqual(gross_range_test["suspect_max"], "max + std / 2") - self.assertEqual(gross_range_test["fail_min"], "mean * std") - self.assertEqual(gross_range_test["fail_max"], "mean / std") + assert gross_range_test["suspect_min"] == "min - std * 2" + assert gross_range_test["suspect_max"] == "max + std / 2" + assert gross_range_test["fail_min"] == "mean * std" + assert gross_range_test["fail_max"] == "mean / std" def test_fail_config(self): input_config = { @@ -84,7 +93,7 @@ def test_fail_config(self): }, }, } - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match="kurtosis not allowed"): QcVariableConfig(input_config) input_config = { @@ -101,7 +110,7 @@ def test_fail_config(self): }, }, } - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match="% not allowed"): QcVariableConfig(input_config) input_config = { @@ -118,7 +127,7 @@ def test_fail_config(self): }, }, } - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match="import not allowed"): QcVariableConfig(input_config) @@ -130,25 +139,29 @@ def assets_exist(): return narr.exists() and ocean_atlas.exists() -@pytest.mark.skipif(assets_exist() is False, reason="NARR and Ocean Atlas not available. (Download via get_assets.py to test)") +@pytest.mark.skipif( + assets_exist() is False, + reason="NARR and Ocean Atlas not available. (Download via get_assets.py to test)", +) class TestQartodConfigurator(unittest.TestCase): - def setUp(self): creator_config_file = Path().parent / "tests/data/creator_config.json" self.creator_config = CreatorConfig(creator_config_file) self.config_creator = QcConfigCreator(self.creator_config) - qc_variable_config_file = Path().parent / "tests/data/qc_variable_config.json" + qc_variable_config_file = ( + Path().parent / "tests/data/qc_variable_config.json" + ) self.variable_config = QcVariableConfig(qc_variable_config_file) def test_file_load(self): config_creator = QcConfigCreator(self.creator_config) - for name, dataset in config_creator.datasets.items(): - self.assertIsInstance(dataset, xr.Dataset) + for dataset in config_creator.datasets.values(): + assert isinstance(dataset, xr.Dataset) def test_narr_datasets(self): - vars = [ + variables = [ "air", "rhum", "uwnd", @@ -156,29 +169,29 @@ def test_narr_datasets(self): "pres", ] - for var in vars: + for var in variables: _, ds = self.config_creator.var2dataset(var) # need to make sure the variable requested for qc is mapped to name in file for test var_in_file, _ = self.config_creator._var2var_in_file(var) - self.assertTrue(var_in_file in ds) + assert var_in_file in ds def test_ocean_atlas_get_dataset(self): - vars = [ + variables = [ "o2", "salinity", "temperature", ] - for var in vars: + for var in variables: _, ds = self.config_creator.var2dataset(var) # need to make sure the variable requested for qc is mapped to name in file for test var_in_file, _ = self.config_creator._var2var_in_file(var) - self.assertTrue(var_in_file in ds) + assert var_in_file in ds def test_narr_subset(self): var = "air" - start_time = datetime.datetime(2020, 1, 29) - end_time = datetime.datetime(2020, 2, 3) + start_time = datetime.datetime(2020, 1, 29, tzinfo=datetime.UTC) + end_time = datetime.datetime(2020, 2, 3, tzinfo=datetime.UTC) time_slice = slice(start_time, end_time) bbox = [ -165, @@ -188,14 +201,14 @@ def test_narr_subset(self): ] subset = self.config_creator._get_subset(var, bbox, time_slice) - self.assertIsInstance(subset, np.ndarray) - self.assertEqual(subset.shape, (5, 15)) - self.assertTrue(np.isclose(np.sum(subset), -2128.5109301700854)) + assert isinstance(subset, np.ndarray) + assert subset.shape == (5, 15) + assert np.isclose(np.sum(subset), -2128.5109301700854) def test_ocean_atlas_subset(self): var = "salinity" - start_time = datetime.datetime(2021, 9, 29) - end_time = datetime.datetime(2021, 10, 3) + start_time = datetime.datetime(2021, 9, 29, tzinfo=datetime.UTC) + end_time = datetime.datetime(2021, 10, 3, tzinfo=datetime.UTC) time_slice = slice(start_time, end_time) bbox = [ -165, @@ -205,8 +218,8 @@ def test_ocean_atlas_subset(self): ] subset = self.config_creator._get_subset(var, bbox, time_slice) - self.assertIsInstance(subset, np.ndarray) - self.assertTrue(np.equal(np.sum(subset), 5408.317574769495)) + assert isinstance(subset, np.ndarray) + assert np.equal(np.sum(subset), 5408.317574769495) def test_get_stats_config(self): var = "air" @@ -225,10 +238,10 @@ def test_get_stats_config(self): "end_time": end_time, } stats = self.config_creator._get_stats(config) - self.assertTrue(np.isclose(stats["min"], -30.7973671854256)) - self.assertTrue(np.isclose(stats["max"], -25.590733697168506)) - self.assertTrue(np.isclose(stats["mean"], -28.69111076703269)) - self.assertTrue(np.isclose(stats["std"], 1.8436437522010403)) + assert np.isclose(stats["min"], -30.7973671854256) + assert np.isclose(stats["max"], -25.590733697168506) + assert np.isclose(stats["mean"], -28.69111076703269) + assert np.isclose(stats["std"], 1.8436437522010403) def test_data(self): # use middle of bounding box and nearest neighbor as backup @@ -259,10 +272,22 @@ def test_data(self): }, } grt = config[var]["qartod"]["gross_range_test"] - self.assertEqual(grt["suspect_span"][0], ref["qartod"]["gross_range_test"]["suspect_span"][0]) - self.assertEqual(grt["suspect_span"][1], ref["qartod"]["gross_range_test"]["suspect_span"][1]) - self.assertEqual(grt["fail_span"][0], ref["qartod"]["gross_range_test"]["fail_span"][0]) - self.assertEqual(grt["fail_span"][1], ref["qartod"]["gross_range_test"]["fail_span"][1]) + assert ( + grt["suspect_span"][0] + == ref["qartod"]["gross_range_test"]["suspect_span"][0] + ) + assert ( + grt["suspect_span"][1] + == ref["qartod"]["gross_range_test"]["suspect_span"][1] + ) + assert ( + grt["fail_span"][0] + == ref["qartod"]["gross_range_test"]["fail_span"][0] + ) + assert ( + grt["fail_span"][1] + == ref["qartod"]["gross_range_test"]["fail_span"][1] + ) def test_no_data(self): # data not available for given box, so code expands box until it gets something @@ -291,4 +316,4 @@ def test_no_data(self): }, }, } - self.assertEqual(config[var], ref) + assert config[var] == ref diff --git a/tests/test_config_deprecated.py b/tests/test_config_deprecated.py index ef861929..90eb28b8 100644 --- a/tests/test_config_deprecated.py +++ b/tests/test_config_deprecated.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import io import json import logging @@ -23,7 +22,6 @@ class ConfigLoadTest(unittest.TestCase): - def setUp(self): template = """ qartod: @@ -36,7 +34,7 @@ def setUp(self): foo: [1, null] """ self.handle, self.yamlfile = tempfile.mkstemp(suffix=".yaml") - with open(self.yamlfile, "w") as f: + with Path(self.yamlfile).open("w") as f: f.write(template) self.expected_dict = { @@ -53,21 +51,21 @@ def setUp(self): def tearDown(self): os.close(self.handle) - os.remove(self.yamlfile) + Path(self.yamlfile).unlink() def test_load_yaml_dict_object(self): - with open(self.yamlfile) as f: + with Path(self.yamlfile).open() as f: y = yaml.load(f.read()) qc = QcConfig(y) assert qc.config == self.expected_dict def test_load_yaml_str(self): - with open(self.yamlfile) as f: + with Path(self.yamlfile).open() as f: qc = QcConfig(f.read()) assert qc.config == self.expected_dict def test_load_json_str(self): - with open(self.yamlfile) as f: + with Path(self.yamlfile).open() as f: js = json.dumps(yaml.load(f.read())) qc = QcConfig(js) assert qc.config == self.expected_dict @@ -83,7 +81,7 @@ def test_load_yaml_path_object(self): def test_load_json_stringio(self): st = io.StringIO() qc = QcConfig(self.yamlfile) - with open(self.yamlfile) as f: + with Path(self.yamlfile).open() as f: js = json.dumps(yaml.load(f.read())) st.write(js) qc = QcConfig(st) @@ -92,7 +90,7 @@ def test_load_json_stringio(self): def test_load_yaml_stringio(self): st = io.StringIO() - with open(self.yamlfile) as f: + with Path(self.yamlfile).open() as f: st.write(f.read()) qc = QcConfig(st) st.close() @@ -100,7 +98,6 @@ def test_load_yaml_stringio(self): class ConfigRunTest(unittest.TestCase): - def setUp(self): self.config = { "qartod": { @@ -125,15 +122,19 @@ def test_run(self): assert "aggregate" not in r["qartod"] def test_run_with_agg(self): - qc = QcConfig({"qartod": { - "gross_range_test": { - "fail_span": [0, 12], - }, - "spike_test": { - "suspect_threshold": 3, - "fail_threshold": 10, + qc = QcConfig( + { + "qartod": { + "gross_range_test": { + "fail_span": [0, 12], + }, + "spike_test": { + "suspect_threshold": 3, + "fail_threshold": 10, + }, + }, }, - }}) + ) inp = [-1, 0, 1, 2, 10, 3] expected_gross_range = np.array([4, 1, 1, 1, 1, 1]) expected_spike = np.array([2, 1, 1, 3, 3, 2]) @@ -142,18 +143,20 @@ def test_run_with_agg(self): inp=inp, ) - npt.assert_array_equal(r["qartod"]["gross_range_test"], expected_gross_range) + npt.assert_array_equal( + r["qartod"]["gross_range_test"], + expected_gross_range, + ) npt.assert_array_equal(r["qartod"]["spike_test"], expected_spike) def test_different_kwargs_run(self): - config = deepcopy(self.config) config["qartod"]["location_test"] = { "bbox": [-100, -40, 100, 40], } - xs = [ -101, -100, -99, 0, 99, 100, 101 ] - ys = [ -41, -40, -39, 0, 39, 40, 41 ] + xs = [-101, -100, -99, 0, 99, 100, 101] + ys = [-41, -40, -39, 0, 39, 40, 41] qc = QcConfig(config) r = qc.run( inp=list(range(7)), @@ -173,12 +176,11 @@ def test_different_kwargs_run(self): ) def test_with_values_in_config(self): - config = deepcopy(self.config) config["qartod"]["location_test"] = { "bbox": [-100, -40, 100, 40], - "lat": [ -41, -40, -39, 0, 39, 40, 41 ], - "lon": [ -101, -100, -99, 0, 99, 100, 101 ], + "lat": [-41, -40, -39, 0, 39, 40, 41], + "lon": [-101, -100, -99, 0, 99, 100, 101], } config["qartod"]["gross_range_test"]["inp"] = list(range(7)) @@ -211,7 +213,6 @@ class ClimatologyConfigConversionTest(unittest.TestCase): # Verify that we can parse and convert configs into a ClimatologyConfig object def setUp(self): - # Explicitly defined config self.cc = ClimatologyConfig() self.cc.add( @@ -240,16 +241,25 @@ def setUp(self): "config": [ { "vspan": (10, 20), - "tspan": (np.datetime64("2011-01"), np.datetime64("2011-07")), + "tspan": ( + np.datetime64("2011-01"), + np.datetime64("2011-07"), + ), }, { "vspan": (30, 40), - "tspan": (np.datetime64("2011-07"), np.datetime64("2012-01")), + "tspan": ( + np.datetime64("2011-07"), + np.datetime64("2012-01"), + ), }, { "vspan": (50, 60), "zspan": (0, 10), - "tspan": (np.datetime64("2012-01"), np.datetime64("2013-01")), + "tspan": ( + np.datetime64("2012-01"), + np.datetime64("2013-01"), + ), }, { "vspan": (10, 20), @@ -284,24 +294,33 @@ def setUp(self): period: month """ self.handle, self.yamlfile = tempfile.mkstemp(suffix=".yaml") - with open(self.yamlfile, "w") as f: + p = Path(self.yamlfile) + with p.open("w") as f: f.write(template) def tearDown(self): os.close(self.handle) - os.remove(self.yamlfile) + Path(self.yamlfile).unlink() def test_climatology_config_yaml_conversion(self): qc = QcConfig(self.yamlfile) - yaml_climatology_config = ClimatologyConfig.convert(qc.config["qartod"]["climatology_test"]["config"]) + yaml_climatology_config = ClimatologyConfig.convert( + qc.config["qartod"]["climatology_test"]["config"], + ) self._assert_cc_configs_equal(self.cc, yaml_climatology_config) def test_climatology_json_conversion(self): qc = QcConfig(self.json_config) - json_climatology_config = ClimatologyConfig.convert(qc.config["qartod"]["climatology_test"]["config"]) + json_climatology_config = ClimatologyConfig.convert( + qc.config["qartod"]["climatology_test"]["config"], + ) self._assert_cc_configs_equal(self.cc, json_climatology_config) - def _assert_cc_configs_equal(self, c1: ClimatologyConfig, c2: ClimatologyConfig): + def _assert_cc_configs_equal( + self, + c1: ClimatologyConfig, + c2: ClimatologyConfig, + ): assert len(c1.members) == len(c2.members) for idx in range(len(c1.members)): m1 = c1.members[idx] diff --git a/tests/test_performance.py b/tests/test_performance.py index 5efc41db..1006f954 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import unittest @@ -11,11 +10,11 @@ class PerformanceTest(unittest.TestCase): - def setUp(self): from pathlib import Path import pandas as pd + data = pd.read_csv(Path(__file__).parent / "data/20363_1000427.csv.gz") self.times = data["time_epoch"] self.inp = data["value"] @@ -25,12 +24,12 @@ def setUp(self): self.n = 10 def perf_test(self, qc, method_name=None, run_fn=None): - method_name = method_name if method_name is None and "argo" in qc.config: - method_name = list(qc.config["argo"])[0] + method_name = next(iter(qc.config["argo"])) if method_name is None: - method_name = list(qc.config["qartod"])[0] + method_name = next(iter(qc.config["qartod"])) if run_fn is None: + def run_fn(): qc.run( inp=self.inp, @@ -39,6 +38,7 @@ def run_fn(): ) import time + start = time.time() L.debug(f"running {method_name}...") @@ -49,184 +49,217 @@ def run_fn(): end = time.time() elapsed = end - start avg_elapsed = elapsed / self.n - L.info(f"results for {method_name}:\t\t{self.n} runs\n\t{elapsed}s total\n\t{avg_elapsed}s avg") + L.info( + f"results for {method_name}:\t\t{self.n} runs\n\t{elapsed}s total\n\t{avg_elapsed}s avg", + ) def test_location_test(self): - qc = QcConfig({ - "qartod": { - "location_test": { - "lon": self.lon, - "lat": self.lat, + qc = QcConfig( + { + "qartod": { + "location_test": { + "lon": self.lon, + "lat": self.lat, + }, }, }, - }) + ) self.perf_test(qc) def test_location_test__with_range_max(self): - qc = QcConfig({ - "qartod": { - "location_test": { - "lon": self.lon, - "lat": self.lat, - "range_max": 1, + qc = QcConfig( + { + "qartod": { + "location_test": { + "lon": self.lon, + "lat": self.lat, + "range_max": 1, + }, }, }, - }) + ) self.perf_test(qc) def test_speed_test(self): - qc = QcConfig({ - "argo": { - "speed_test": { - "tinp": self.times, - "lon": self.lon, - "lat": self.lat, - "suspect_threshold": 1, - "fail_threshold": 3, + qc = QcConfig( + { + "argo": { + "speed_test": { + "tinp": self.times, + "lon": self.lon, + "lat": self.lat, + "suspect_threshold": 1, + "fail_threshold": 3, + }, }, }, - }) + ) self.perf_test(qc) def test_pressure_increasing_test(self): - qc = QcConfig({ - "argo": { - "pressure_increasing_test": {}, + qc = QcConfig( + { + "argo": { + "pressure_increasing_test": {}, + }, }, - }) + ) self.perf_test(qc) def test_gross_range(self): - qc = QcConfig({ - "qartod": { - "gross_range_test": { - "suspect_span": [1, 11], - "fail_span": [0, 12], + qc = QcConfig( + { + "qartod": { + "gross_range_test": { + "suspect_span": [1, 11], + "fail_span": [0, 12], + }, }, }, - }) + ) self.perf_test(qc) def test_climatology_test(self): - qc = QcConfig({ - "qartod": { - "climatology_test": { - "config": [ - { - "vspan": (10, 20), - "tspan": (0, 1), - "period": "quarter", - }, - ], + qc = QcConfig( + { + "qartod": { + "climatology_test": { + "config": [ + { + "vspan": (10, 20), + "tspan": (0, 1), + "period": "quarter", + }, + ], + }, }, }, - }) + ) self.perf_test(qc) def test_spike_test(self): - qc = QcConfig({ - "qartod": { - "spike_test": { - "suspect_threshold": 3, - "fail_threshold": 6, + qc = QcConfig( + { + "qartod": { + "spike_test": { + "suspect_threshold": 3, + "fail_threshold": 6, + }, }, }, - }) + ) self.perf_test(qc) def test_rate_of_change_test(self): - qc = QcConfig({ - "qartod": { - "rate_of_change_test": { - "threshold": 2.5, + qc = QcConfig( + { + "qartod": { + "rate_of_change_test": { + "threshold": 2.5, + }, }, }, - }) + ) self.perf_test(qc) def test_flat_line_test(self): - qc = QcConfig({ - "qartod": { - "flat_line_test": { - "suspect_threshold": 43200, - "fail_threshold": 86400, - "tolerance": 1, + qc = QcConfig( + { + "qartod": { + "flat_line_test": { + "suspect_threshold": 43200, + "fail_threshold": 86400, + "tolerance": 1, + }, }, }, - }) + ) self.perf_test(qc) def test_attenuated_signal_test(self): - qc = QcConfig({ - "qartod": { - "attenuated_signal_test": { - "suspect_threshold": 5, - "fail_threshold": 2.5, + qc = QcConfig( + { + "qartod": { + "attenuated_signal_test": { + "suspect_threshold": 5, + "fail_threshold": 2.5, + }, }, }, - }) + ) self.perf_test(qc) def test_attenuated_signal_with_time_period_test_std(self): - qc = QcConfig({ - "qartod": { - "attenuated_signal_test": { - "suspect_threshold": 5, - "fail_threshold": 2.5, - "test_period": 86400, - "check_type": "std", + qc = QcConfig( + { + "qartod": { + "attenuated_signal_test": { + "suspect_threshold": 5, + "fail_threshold": 2.5, + "test_period": 86400, + "check_type": "std", + }, }, }, - }) + ) self.perf_test(qc) def test_attenuated_signal_with_time_period_test_range(self): - qc = QcConfig({ - "qartod": { - "attenuated_signal_test": { - "suspect_threshold": 5, - "fail_threshold": 2.5, - "test_period": 86400, - "check_type": "range", + qc = QcConfig( + { + "qartod": { + "attenuated_signal_test": { + "suspect_threshold": 5, + "fail_threshold": 2.5, + "test_period": 86400, + "check_type": "range", + }, }, }, - }) + ) self.perf_test(qc) def test_attenuated_signal_with_time_period_test(self): - qc = QcConfig({ - "qartod": { - "attenuated_signal_test": { - "suspect_threshold": 5, - "fail_threshold": 2.5, - "test_period": 86400, + qc = QcConfig( + { + "qartod": { + "attenuated_signal_test": { + "suspect_threshold": 5, + "fail_threshold": 2.5, + "test_period": 86400, + }, }, }, - }) + ) self.perf_test(qc) def test_qartod_compare(self): - qc = QcConfig({ - "qartod": { - "gross_range_test": { - "suspect_span": [1, 11], - "fail_span": [0, 12], - }, - "spike_test": { - "suspect_threshold": 3, - "fail_threshold": 6, - }, - "rate_of_change_test": { - "threshold": 2.5, + qc = QcConfig( + { + "qartod": { + "gross_range_test": { + "suspect_span": [1, 11], + "fail_span": [0, 12], + }, + "spike_test": { + "suspect_threshold": 3, + "fail_threshold": 6, + }, + "rate_of_change_test": { + "threshold": 2.5, + }, }, }, - }) + ) results = qc.run( inp=self.inp, tinp=self.times, zinp=self.zinp, ) - all_tests = [results["qartod"][test_name] for test_name in list(results["qartod"])] + all_tests = [ + results["qartod"][test_name] + for test_name in list(results["qartod"]) + ] def run_fn(): qartod.qartod_compare(all_tests) diff --git a/tests/test_qartod.py b/tests/test_qartod.py index 36bdceb4..77888acc 100644 --- a/tests/test_qartod.py +++ b/tests/test_qartod.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import unittest import warnings @@ -6,8 +5,9 @@ import numpy as np import numpy.testing as npt import pandas as pd +import pytest -from ioos_qc import qartod as qartod +from ioos_qc import qartod L = logging.getLogger("ioos_qc") L.setLevel(logging.INFO) @@ -15,22 +15,20 @@ def dask_arr(vals): - """If dask is enabled for this environment, return dask array of values. Otherwise, return values. - """ + """If dask is enabled for this environment, return dask array of values. Otherwise, return values.""" try: import dask.array as da + return da.from_array(vals, chunks=2) except ImportError: return vals class QartodLocationTest(unittest.TestCase): - def test_location(self): - """Ensure that longitudes and latitudes are within reasonable bounds. - """ - lon = [ 80.0, -78.5, 500.500] - lat = [np.nan, 50.0, -60.0] + """Ensure that longitudes and latitudes are within reasonable bounds.""" + lon = [80.0, -78.5, 500.500] + lat = [np.nan, 50.0, -60.0] npt.assert_array_equal( qartod.location_test(lon=lon, lat=lat), @@ -98,47 +96,50 @@ def test_single_location_nan(self): ) def test_location_bad_input(self): + match = "could not convert string to float:" # Wrong type lon - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match=match): qartod.location_test(lon="hello", lat=70) # Wrong type lat - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match=match): qartod.location_test(lon=70, lat="foo") # Wrong type bbox - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match="Required: list/tuple, Got:"): qartod.location_test(lon=70, lat=70, bbox="hi") # Wrong size bbox - with self.assertRaises(ValueError): + with pytest.raises( + ValueError, + match="Incorrect list/tuple length for", + ): qartod.location_test(lon=70, lat=70, bbox=(1, 2)) def test_location_bbox(self): - lon = [80, -78, -71, -79, 500] - lat = [None, 50, 59, 10, -60] + lon = [80, -78, -71, -79, 500] + lat = [None, 50, 59, 10, -60] npt.assert_array_equal( qartod.location_test(lon=lon, lat=lat, bbox=[-80, 40, -70, 60]), np.ma.array([4, 1, 1, 4, 4]), ) - lon = np.asarray([80, -78, -71, -79, 500], dtype=np.float64) - lat = np.asarray([None, 50, 59, 10, -60], dtype=np.float64) + lon = np.asarray([80, -78, -71, -79, 500], dtype=np.float64) + lat = np.asarray([None, 50, 59, 10, -60], dtype=np.float64) npt.assert_array_equal( qartod.location_test(lon=lon, lat=lat, bbox=[-80, 40, -70, 60]), np.ma.array([4, 1, 1, 4, 4]), ) - lon = dask_arr(np.asarray([80, -78, -71, -79, 500], dtype=np.float64)) - lat = dask_arr(np.asarray([None, 50, 59, 10, -60], dtype=np.float64)) + lon = dask_arr(np.asarray([80, -78, -71, -79, 500], dtype=np.float64)) + lat = dask_arr(np.asarray([None, 50, 59, 10, -60], dtype=np.float64)) npt.assert_array_equal( qartod.location_test(lon=lon, lat=lat, bbox=[-80, 40, -70, 60]), np.ma.array([4, 1, 1, 4, 4]), ) def test_location_distance_threshold(self): - """Tests a user defined distance threshold between successive points. - """ + """Tests a user defined distance threshold between successive points.""" lon = np.array([-71.05, -71.06, -80.0]) lat = np.array([41.0, 41.02, 45.05]) @@ -153,25 +154,36 @@ def test_location_distance_threshold(self): class QartodGrossRangeTest(unittest.TestCase): - def test_gross_range_check(self): """See if user and sensor ranges are picked up.""" fail_span = (10, 50) suspect_span = (20, 40) vals = [ - 5, 10, # Sensor range. - 15, # User range. - 20, 25, 30, 35, 40, # Valid - 45, # User range. - 51, # Sensor range. + 5, + 10, # Sensor range. + 15, # User range. + 20, + 25, + 30, + 35, + 40, # Valid + 45, # User range. + 51, # Sensor range. ] - result = np.ma.array([ - 4, 3, - 3, - 1, 1, 1, 1, 1, - 3, - 4, - ]) + result = np.ma.array( + [ + 4, + 3, + 3, + 1, + 1, + 1, + 1, + 1, + 3, + 4, + ], + ) with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -194,21 +206,22 @@ def test_gross_range_check(self): ) def test_gross_range_bad_input(self): - with self.assertRaises(ValueError): + match = "Required: list/tuple" + with pytest.raises(ValueError, match=match): qartod.gross_range_test( inp=np.array([5]), fail_span=10, suspect_span=(1, 1), ) - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match=match): qartod.gross_range_test( inp=np.array([5]), fail_span=(1, 1), suspect_span=10, ) - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match="Suspect Span"): qartod.gross_range_test( inp=np.array([5]), fail_span=(1, 1), @@ -220,23 +233,33 @@ def test_gross_range_check_masked(self): fail_span = (10, 50) suspect_span = (20, 40) vals = [ - None, # None - 10, # Sensor range. - 15, # User range. - 20, 25, 30, 35, 40, # Valid - np.nan, # np.nan - 51, # Sensor range. - np.ma.masked, # np.ma.masked + None, # None + 10, # Sensor range. + 15, # User range. + 20, + 25, + 30, + 35, + 40, # Valid + np.nan, # np.nan + 51, # Sensor range. + np.ma.masked, # np.ma.masked ] - result = np.ma.array([ - 9, - 3, - 3, - 1, 1, 1, 1, 1, - 9, - 4, - 9, - ]) + result = np.ma.array( + [ + 9, + 3, + 3, + 1, + 1, + 1, + 1, + 1, + 9, + 4, + 9, + ], + ) with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -246,7 +269,7 @@ def test_gross_range_check_masked(self): dask_arr(np.array(vals, dtype=np.float64)), ] - for i in inputs: + for _i in inputs: npt.assert_array_equal( qartod.gross_range_test( vals, @@ -254,12 +277,10 @@ def test_gross_range_check_masked(self): suspect_span, ), result, - ) class QartodClimatologyPeriodTest(unittest.TestCase): - def _run_test(self, tspan, period): cc = qartod.ClimatologyConfig() cc.add( @@ -336,7 +357,9 @@ class QartodClimatologyPeriodFullCoverageTest(unittest.TestCase): # and test data ranges across several years def setUp(self): - self.tinp = list(pd.date_range(start="2018-01-01", end="2020-12-31", freq="D")) + self.tinp = list( + pd.date_range(start="2018-01-01", end="2020-12-31", freq="D"), + ) self.values = np.ones(len(self.tinp)) self.zinp = np.zeros(len(self.tinp)) @@ -353,17 +376,17 @@ def test_quarterly_periods(self): vspan = (10, 20) cc = qartod.ClimatologyConfig() cc.add( - tspan=(0, 1), # Q1 + tspan=(0, 1), # Q1 period="quarter", vspan=vspan, ) cc.add( - tspan=(1, 3), # Q2-Q3 + tspan=(1, 3), # Q2-Q3 period="quarter", vspan=vspan, ) cc.add( - tspan=(3, 4), # Q4 + tspan=(3, 4), # Q4 period="quarter", vspan=vspan, ) @@ -373,27 +396,27 @@ def test_monthly_periods(self): vspan = (10, 20) cc = qartod.ClimatologyConfig() cc.add( - tspan=(0, 1), # jan + tspan=(0, 1), # jan period="month", vspan=vspan, ) cc.add( - tspan=(1, 2), # feb + tspan=(1, 2), # feb period="month", vspan=vspan, ) cc.add( - tspan=(2, 3), # mar + tspan=(2, 3), # mar period="month", vspan=vspan, ) cc.add( - tspan=(3, 10), # apr-nov + tspan=(3, 10), # apr-nov period="month", vspan=vspan, ) cc.add( - tspan=(10, 11), # dec + tspan=(10, 11), # dec period="month", vspan=vspan, ) @@ -403,17 +426,17 @@ def test_dayofyear_periods(self): vspan = (10, 20) cc = qartod.ClimatologyConfig() cc.add( - tspan=(0, 1), # first day of year + tspan=(0, 1), # first day of year period="dayofyear", vspan=vspan, ) cc.add( - tspan=(1, 363), # jan 2 thru dec 30 + tspan=(1, 363), # jan 2 thru dec 30 period="dayofyear", vspan=vspan, ) cc.add( - tspan=(363, 364), # last day of year + tspan=(363, 364), # last day of year period="dayofyear", vspan=vspan, ) @@ -423,17 +446,17 @@ def test_weekofyear_periods(self): vspan = (10, 20) cc = qartod.ClimatologyConfig() cc.add( - tspan=(0, 1), # first week of year + tspan=(0, 1), # first week of year period="weekofyear", vspan=vspan, ) cc.add( - tspan=(1, 50), # 2nd thru 51st week + tspan=(1, 50), # 2nd thru 51st week period="weekofyear", vspan=vspan, ) cc.add( - tspan=(50, 51), # last week of year + tspan=(50, 51), # last week of year period="weekofyear", vspan=vspan, ) @@ -650,7 +673,6 @@ def test_zspan_out_of_range_high(self): class QartodClimatologyDepthTest(unittest.TestCase): - def setUp(self): self.cc = qartod.ClimatologyConfig() # with depths @@ -771,7 +793,6 @@ def test_climatology_missing_values(self): class QartodClimatologyTest(unittest.TestCase): - def setUp(self): self.cc = qartod.ClimatologyConfig() self.cc.add( @@ -912,14 +933,12 @@ def test_climatology_test_depths(self): class QartodSpikeTest(unittest.TestCase): - def setUp(self): self.suspect_threshold = 25 self.fail_threshold = 50 def test_spike(self): - """Test to make ensure single value spike detection works properly. - """ + """Test to make ensure single value spike detection works properly.""" arr = [10, 12, 999.99, 13, 15, 40, 9, 9] # First and last elements should always be good data, unless someone @@ -942,8 +961,7 @@ def test_spike(self): ) def test_spike_negative_vals(self): - """Test to make spike detection works properly for negative values. - """ + """Test to make spike detection works properly for negative values.""" arr = [-10, -12, -999.99, -13, -15, -40, -9, -9] # First and last elements should always be good data, unless someone @@ -966,8 +984,7 @@ def test_spike_negative_vals(self): ) def test_spike_initial_final_values(self): - """The test is not defined for the initial and final values in the array - """ + """The test is not defined for the initial and final values in the array.""" arr = [-100, -99, -99, -98] expected = [2, 1, 1, 2] @@ -981,9 +998,23 @@ def test_spike_initial_final_values(self): ) def test_spike_masked(self): - """Test with missing data. - """ - arr = [10, 12, 999.99, 13, 15, 40, 9, 9, None, 10, 10, 999.99, 10, None] + """Test with missing data.""" + arr = [ + 10, + 12, + 999.99, + 13, + 15, + 40, + 9, + 9, + None, + 10, + 10, + 999.99, + 10, + None, + ] # First and last elements should always be good data, unless someone # has set a threshold to zero. @@ -1005,16 +1036,75 @@ def test_spike_masked(self): ) def test_spike_realdata(self): - """Test with real-world data. - """ + """Test with real-world data.""" suspect_threshold = 0.5 fail_threshold = 1 - arr = [-0.189, -0.0792, -0.0122, 0.0457, 0.0671, 0.0213, -0.0488, -0.1463, -0.2438, -0.3261, -0.3871, -0.4054, - -0.3932, -0.3383, -0.2804, -0.2347, -0.2134, -0.2347, -0.2926, -0.3597, -0.442, -0.509, 0, -0.5944, - -0.57, -0.4267, -0.2926, -0.1585, -0.0945, -0.0762] + arr = [ + -0.189, + -0.0792, + -0.0122, + 0.0457, + 0.0671, + 0.0213, + -0.0488, + -0.1463, + -0.2438, + -0.3261, + -0.3871, + -0.4054, + -0.3932, + -0.3383, + -0.2804, + -0.2347, + -0.2134, + -0.2347, + -0.2926, + -0.3597, + -0.442, + -0.509, + 0, + -0.5944, + -0.57, + -0.4267, + -0.2926, + -0.1585, + -0.0945, + -0.0762, + ] - expected = [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 2] + expected = [ + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + ] inputs = [ arr, @@ -1032,13 +1122,50 @@ def test_spike_realdata(self): ) def test_spike_methods(self): - """Test the different input methods and review the different flags expected. - """ + """Test the different input methods and review the different flags expected.""" inp = [3, 4.99, 5, 6, 8, 6, 6, 6.75, 6, 6, 5.3, 6, 6, 9, 5, None, 4, 4] - suspect_threshold = .5 + suspect_threshold = 0.5 fail_threshold = 1 - average_method_expected = [2, 3, 1, 1, 4, 3, 1, 3, 1, 1, 3, 1, 4, 4, 9, 9, 9, 2] - diff_method_expected = [2, 1, 1, 1, 4, 1, 1, 3, 1, 1, 3, 1, 1, 4, 9, 9, 9, 2] + average_method_expected = [ + 2, + 3, + 1, + 1, + 4, + 3, + 1, + 3, + 1, + 1, + 3, + 1, + 4, + 4, + 9, + 9, + 9, + 2, + ] + diff_method_expected = [ + 2, + 1, + 1, + 1, + 4, + 1, + 1, + 3, + 1, + 1, + 3, + 1, + 1, + 4, + 9, + 9, + 9, + 2, + ] # Test average method npt.assert_array_equal( @@ -1074,17 +1201,18 @@ def test_spike_methods(self): def test_spike_test_bad_method(self): inp = [3, 4.99, 5, 6, 8, 6, 6, 6.75, 6, 6, 5.3, 6, 6, 9, 5, None, 4, 4] - suspect_threshold = .5 + suspect_threshold = 0.5 fail_threshold = 1 - with self.assertRaises(ValueError): + match = "Unknown method:" + with pytest.raises(ValueError, match=match): qartod.spike_test( inp=inp, suspect_threshold=suspect_threshold, fail_threshold=fail_threshold, method="bad", ) - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match=match): qartod.spike_test( inp=inp, suspect_threshold=suspect_threshold, @@ -1094,9 +1222,47 @@ def test_spike_test_bad_method(self): def test_spike_test_inputs(self): inp = [3, 4.99, 5, 6, 8, 6, 6, 6.75, 6, 6, 5.3, 6, 6, 9, 5, None, 4, 4] - expected_suspect_only = [2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 1, 3, 3, 9, 9, 9, 2] - expected_fail_only = [2, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 4, 4, 9, 9, 9, 2] - suspect_threshold = .5 + expected_suspect_only = [ + 2, + 3, + 1, + 1, + 3, + 3, + 1, + 3, + 1, + 1, + 3, + 1, + 3, + 3, + 9, + 9, + 9, + 2, + ] + expected_fail_only = [ + 2, + 1, + 1, + 1, + 4, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 4, + 4, + 9, + 9, + 9, + 2, + ] + suspect_threshold = 0.5 fail_threshold = 1 npt.assert_array_equal( @@ -1104,27 +1270,84 @@ def test_spike_test_inputs(self): inp=inp, suspect_threshold=suspect_threshold, ), - expected_suspect_only) + expected_suspect_only, + ) npt.assert_array_equal( qartod.spike_test( inp=inp, fail_threshold=fail_threshold, ), - expected_fail_only) + expected_fail_only, + ) class QartodRateOfChangeTest(unittest.TestCase): - def setUp(self): - self.times = np.arange("2015-01-01 00:00:00", "2015-01-01 06:00:00", - step=np.timedelta64(15, "m"), dtype=np.datetime64) + self.times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 06:00:00", + step=np.timedelta64(15, "m"), + dtype=np.datetime64, + ) self.times_epoch_secs = [t.astype(int) for t in self.times] - self.threshold = 5 / 15 / 60 # 5 units per 15 minutes --> 5/15/60 units per second + self.threshold = ( + 5 / 15 / 60 + ) # 5 units per 15 minutes --> 5/15/60 units per second def test_rate_of_change(self): times = self.times - arr = [2, 10, 2.1, 3, 4, 5, 7, 10, 0, 2, 2.2, 2, 1, 2, 3, 90, 91, 92, 93, 1, 2, 3, 4, 5] - expected = [1, 3, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1] + arr = [ + 2, + 10, + 2.1, + 3, + 4, + 5, + 7, + 10, + 0, + 2, + 2.2, + 2, + 1, + 2, + 3, + 90, + 91, + 92, + 93, + 1, + 2, + 3, + 4, + 5, + ] + expected = [ + 1, + 3, + 3, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + ] inputs = [ arr, np.asarray(arr, dtype=np.float64), @@ -1172,17 +1395,35 @@ def test_rate_of_change_negative_values(self): class QartodFlatLineTest(unittest.TestCase): - def setUp(self): - self.times = np.arange("2015-01-01 00:00:00", "2015-01-01 03:30:00", - step=np.timedelta64(15, "m"), dtype=np.datetime64) + self.times = np.arange( + "2015-01-01 00:00:00", + "2015-01-01 03:30:00", + step=np.timedelta64(15, "m"), + dtype=np.datetime64, + ) self.times_epoch_secs = [t.astype(int) for t in self.times] - self.suspect_threshold = 3000 # 50 mins, or count of 3 + self.suspect_threshold = 3000 # 50 mins, or count of 3 self.fail_threshold = 4800 # 80 mins, or count of 5 self.tolerance = 0.01 def test_flat_line(self): - arr = [1, 2, 2.0001, 2, 2.0001, 2, 2.0001, 2, 4, 5, 3, 3.0001, 3.0005, 3.00001] + arr = [ + 1, + 2, + 2.0001, + 2, + 2.0001, + 2, + 2.0001, + 2, + 4, + 5, + 3, + 3.0001, + 3.0005, + 3.00001, + ] expected = [1, 1, 1, 1, 3, 3, 4, 4, 1, 1, 1, 1, 1, 3] inputs = [ arr, @@ -1239,7 +1480,8 @@ def test_flat_line(self): ) # test nothing fails - arr = np.random.random(len(self.times)) + rng = np.random.default_rng() + arr = rng.normal(size=len(self.times)) expected = np.ones_like(arr) npt.assert_array_equal( qartod.flat_line_test( @@ -1253,7 +1495,21 @@ def test_flat_line(self): ) def test_flat_line_starting_from_beginning(self): - arr = [2, 2.0001, 2, 2.0001, 2, 2.0001, 2, 4, 5, 3, 3.0001, 3.0005, 3.00001] + arr = [ + 2, + 2.0001, + 2, + 2.0001, + 2, + 2.0001, + 2, + 4, + 5, + 3, + 3.0001, + 3.0005, + 3.00001, + ] expected = [1, 1, 1, 3, 3, 4, 4, 1, 1, 1, 1, 1, 3] with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -1273,7 +1529,6 @@ def test_flat_line_starting_from_beginning(self): npt.assert_array_equal(result, expected) def test_flat_line_short_timeseries(self): - def check(time, arr, expected): result = qartod.flat_line_test( inp=arr, @@ -1284,13 +1539,21 @@ def check(time, arr, expected): ) npt.assert_array_equal(result, expected) - check(time=[], arr=[], expected=[]) - check(time=[0], arr=[5], expected=[1]) - check(time=[0, 1], arr=[5, 5], expected=[1, 1]) - check(time=[0, 1, 2], arr=[5, 5, 5], expected=[1, 1, 1]) - check(time=[0, 1, 2, 3], arr=[5, 5, 5, 5], expected=[1, 1, 1, 3]) - check(time=[0, 1, 2, 3, 4], arr=[5, 5, 5, 5, 5], expected=[1, 1, 1, 3, 3]) - check(time=[0, 1, 2, 3, 4, 5], arr=[5, 5, 5, 5, 5, 5], expected=[1, 1, 1, 3, 3, 4]) + check(time=[], arr=[], expected=[]) + check(time=[0], arr=[5], expected=[1]) + check(time=[0, 1], arr=[5, 5], expected=[1, 1]) + check(time=[0, 1, 2], arr=[5, 5, 5], expected=[1, 1, 1]) + check(time=[0, 1, 2, 3], arr=[5, 5, 5, 5], expected=[1, 1, 1, 3]) + check( + time=[0, 1, 2, 3, 4], + arr=[5, 5, 5, 5, 5], + expected=[1, 1, 1, 3, 3], + ) + check( + time=[0, 1, 2, 3, 4, 5], + arr=[5, 5, 5, 5, 5, 5], + expected=[1, 1, 1, 3, 3, 4], + ) def test_flat_line_with_spike(self): tolerance = 4 @@ -1309,7 +1572,22 @@ def test_flat_line_with_spike(self): npt.assert_array_equal(result, expected) def test_flat_line_missing_values(self): - arr = [1, None, np.ma.masked, 2, 2.0001, 2, 2.0001, 2, 4, None, 3, None, None, 3.00001] + arr = [ + 1, + None, + np.ma.masked, + 2, + 2.0001, + 2, + 2.0001, + 2, + 4, + None, + 3, + None, + None, + 3.00001, + ] expected = [1, 9, 9, 1, 3, 3, 4, 4, 1, 9, 1, 9, 9, 3] with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -1330,9 +1608,18 @@ def test_flat_line_missing_values(self): class QartodAttenuatedSignalTest(unittest.TestCase): - - def _run_test(self, times, signal, suspect_threshold, fail_threshold, check_type, expected, - test_period=None, min_obs=None, min_period=None): + def _run_test( + self, + times, + signal, + suspect_threshold, + fail_threshold, + check_type, + expected, + test_period=None, + min_obs=None, + min_period=None, + ): npt.assert_array_equal( qartod.attenuated_signal_test( inp=signal, @@ -1366,89 +1653,153 @@ def _run_test(self, times, signal, suspect_threshold, fail_threshold, check_type def test_attenuated_signal(self): # good signal, all pass signal = np.array([1, 2, 3, 4]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([1, 1, 1, 1]) - self._run_test(times=times, signal=signal, - suspect_threshold=0.75, fail_threshold=0.5, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=0.75, + fail_threshold=0.5, + check_type="std", + expected=expected, + ) # Only suspect signal = np.array([1, 2, 3, 4]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([3, 3, 3, 3]) - self._run_test(times=times, signal=signal, - suspect_threshold=5, fail_threshold=0, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=5, + fail_threshold=0, + check_type="std", + expected=expected, + ) # Not changing should fail signal = np.array([1, 1, 1, 1]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([4, 4, 4, 4]) - self._run_test(times=times, signal=signal, - suspect_threshold=10, fail_threshold=8, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=10, + fail_threshold=8, + check_type="std", + expected=expected, + ) # std deviation less than fail threshold signal = np.array([10, 20, 30, 40]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([4, 4, 4, 4]) - self._run_test(times=times, signal=signal, - suspect_threshold=100000, fail_threshold=1000, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=100000, + fail_threshold=1000, + check_type="std", + expected=expected, + ) def test_attenuated_signal_range(self): # range less than fail threshold signal = np.array([10, 20, 30, 40]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([4, 4, 4, 4]) - self._run_test(times=times, signal=signal, - suspect_threshold=50, fail_threshold=31, check_type="range", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=50, + fail_threshold=31, + check_type="range", + expected=expected, + ) # range less than suspect threshold signal = np.array([10, 20, 30, 40]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([3, 3, 3, 3]) - self._run_test(times=times, signal=signal, - suspect_threshold=31, fail_threshold=10, check_type="range", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=31, + fail_threshold=10, + check_type="range", + expected=expected, + ) signal = np.array([3, 4, 5, 8.1, 9, 8.5, 8.7, 8.4, 8.2, 8.35, 2, 1]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - self._run_test(times=times, signal=signal, - suspect_threshold=0.15, fail_threshold=0.1, check_type="range", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=0.15, + fail_threshold=0.1, + check_type="range", + expected=expected, + ) def test_attenuated_signal_time_window(self): # test time windowed range signal = [1, 2, 3, 100, 1000] - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(len(signal)) - ]) - time_window = 2 * 86400 # 2 days + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(len(signal)) + ], + ) + time_window = 2 * 86400 # 2 days def _run_test_time_window(min_obs, min_period, expected): - self._run_test(times=times, signal=signal, - suspect_threshold=100, fail_threshold=10, check_type="range", - expected=expected, - test_period=time_window, - min_obs=min_obs, - min_period=min_period) + self._run_test( + times=times, + signal=signal, + suspect_threshold=100, + fail_threshold=10, + check_type="range", + expected=expected, + test_period=time_window, + min_obs=min_obs, + min_period=min_period, + ) # zero min_obs -- initial values should fail min_obs = 0 @@ -1463,7 +1814,7 @@ def _run_test_time_window(min_obs, min_period, expected): _run_test_time_window(min_obs, min_period, expected) # min_obs the same size as time_window -- first window should be UNKNOWN - min_obs = 2 # 2 days (since 1 obs per day) + min_obs = 2 # 2 days (since 1 obs per day) min_period = None expected = [2, 4, 4, 3, 1] _run_test_time_window(min_obs, min_period, expected) @@ -1476,105 +1827,208 @@ def _run_test_time_window(min_obs, min_period, expected): def test_attenuated_signal_missing(self): signal = np.array([None, 2, 3, 4]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([9, 1, 1, 1]) - self._run_test(times=times, signal=signal, - suspect_threshold=0.75, fail_threshold=0.5, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=0.75, + fail_threshold=0.5, + check_type="std", + expected=expected, + ) signal = np.array([None, None, None, None]) - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(signal.size) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(signal.size) + ], + ) expected = np.array([9, 9, 9, 9]) - self._run_test(times=times, signal=signal, - suspect_threshold=0.75, fail_threshold=0.5, check_type="std", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=0.75, + fail_threshold=0.5, + check_type="std", + expected=expected, + ) # range less than 30 signal = [10, None, None, 40] - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(len(signal)) - ]) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(len(signal)) + ], + ) expected = np.array([4, 9, 9, 4]) - self._run_test(times=times, signal=signal, - suspect_threshold=50, fail_threshold=31, check_type="range", - expected=expected) + self._run_test( + times=times, + signal=signal, + suspect_threshold=50, + fail_threshold=31, + check_type="range", + expected=expected, + ) def test_attenuated_signal_missing_time_window(self): # test time windowed range with missing values signal = [1, None, 10, 100, 1000] - times = np.array([ - np.datetime64("2019-01-01") + np.timedelta64(i, "D") for i in range(len(signal)) - ]) - time_window = 2 * 86400 # 2 days - min_obs = 2 # 2 days (since 1 obs per day) + times = np.array( + [ + np.datetime64("2019-01-01") + np.timedelta64(i, "D") + for i in range(len(signal)) + ], + ) + time_window = 2 * 86400 # 2 days + min_obs = 2 # 2 days (since 1 obs per day) # test time windowed range expected = [2, 9, 2, 3, 1] - self._run_test(times=times, signal=signal, - suspect_threshold=100, fail_threshold=50, check_type="range", - expected=expected, - test_period=time_window, - min_obs=min_obs) + self._run_test( + times=times, + signal=signal, + suspect_threshold=100, + fail_threshold=50, + check_type="range", + expected=expected, + test_period=time_window, + min_obs=min_obs, + ) # test time windowed std expected = [2, 9, 2, 3, 1] time_window = 2 * 86400 - self._run_test(times=times, signal=signal, - suspect_threshold=150, fail_threshold=40, check_type="std", - expected=expected, - test_period=time_window, - min_obs=min_obs) + self._run_test( + times=times, + signal=signal, + suspect_threshold=150, + fail_threshold=40, + check_type="std", + expected=expected, + test_period=time_window, + min_obs=min_obs, + ) class QartodDensityInversionTest(unittest.TestCase): - - def _run_density_inversion_tests(self, density, depth, result, - suspect_threshold=-0.01, - fail_threshold=-.03): + def _run_density_inversion_tests( + self, + density, + depth, + result, + suspect_threshold=-0.01, + fail_threshold=-0.03, + ): # Try every possible input format combinations dens_inputs = [ density, np.asarray(density, dtype=np.float64), - dask_arr(np.asarray(density, dtype=np.float64))] + dask_arr(np.asarray(density, dtype=np.float64)), + ] depth_inputs = [ depth, np.asarray(depth, dtype=np.float64), - dask_arr(np.asarray(depth, dtype=np.float64))] + dask_arr(np.asarray(depth, dtype=np.float64)), + ] for rho in dens_inputs: for z in depth_inputs: - npt.assert_array_equal(qartod.density_inversion_test(inp=rho, zinp=z, - suspect_threshold=suspect_threshold, - fail_threshold=fail_threshold), - result) + npt.assert_array_equal( + qartod.density_inversion_test( + inp=rho, + zinp=z, + suspect_threshold=suspect_threshold, + fail_threshold=fail_threshold, + ), + result, + ) def test_density_inversion_downcast_flags(self): depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - density = [1024, 1024, 1023.98, 1024, 1025, 1026, 1025.9, 1026, 1026, None, 1026, 1027] + density = [ + 1024, + 1024, + 1023.98, + 1024, + 1025, + 1026, + 1025.9, + 1026, + 1026, + None, + 1026, + 1027, + ] result = [1, 3, 3, 1, 1, 4, 4, 1, 1, 9, 9, 1] self._run_density_inversion_tests(density, depth, result) def test_density_inversion_upcast_flags(self): depth = [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - density = [1026, None, 1026, 1026, 1025.9, 1026, 1025, 1024, 1023.98, 1024, 1024] + density = [ + 1026, + None, + 1026, + 1026, + 1025.9, + 1026, + 1025, + 1024, + 1023.98, + 1024, + 1024, + ] result = [1, 9, 9, 1, 4, 4, 1, 1, 3, 3, 1] self._run_density_inversion_tests(density, depth, result) def test_density_inversion_down_up_cast_flags(self): depth = [1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1] - density = [1024, 1024, 1023.98, 1024, 1025, 1026, 1025.9, 1026, 1026, - 1026, 1026, 1025.9, 1026, 1025, 1024, 1023.98, 1024, 1024] + density = [ + 1024, + 1024, + 1023.98, + 1024, + 1025, + 1026, + 1025.9, + 1026, + 1026, + 1026, + 1026, + 1025.9, + 1026, + 1025, + 1024, + 1023.98, + 1024, + 1024, + ] result = [1, 3, 3, 1, 1, 4, 4, 1, 1, 1, 1, 4, 4, 1, 1, 3, 3, 1] self._run_density_inversion_tests(density, depth, result) def test_density_inversion_stable_depth_flags(self): depth = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - density = [1026, None, 1026, 1026, 1025.9, 1026, 1025, 1024, 1023.98, 1024, 1024] + density = [ + 1026, + None, + 1026, + 1026, + 1025.9, + 1026, + 1025, + 1024, + 1023.98, + 1024, + 1024, + ] result = [1, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1] self._run_density_inversion_tests(density, depth, result) @@ -1596,42 +2050,66 @@ def test_density_inversion_input(self): density = [1024, 1024, 1025] depth = [1, 2, 3] + match = ( + "'less'|<" + ) # Wrong type suspect_threshold - with self.assertRaises(TypeError): - qartod.density_inversion_test(inp=density, zinp=depth, suspect_threshold="bad") + with pytest.raises(TypeError, match=match): + qartod.density_inversion_test( + inp=density, + zinp=depth, + suspect_threshold="bad", + ) # Wrong type fail_threshold - with self.assertRaises(TypeError): - qartod.density_inversion_test(inp=density, zinp=depth, fail_threshold="bad") + with pytest.raises(TypeError, match=match): + qartod.density_inversion_test( + inp=density, + zinp=depth, + fail_threshold="bad", + ) # Wrong type for both fail_threshold and suspect_threshold - with self.assertRaises(TypeError): - qartod.density_inversion_test(inp=density, zinp=depth, - suspect_threshold="bad", fail_threshold="bad") + with pytest.raises(TypeError, match=match): + qartod.density_inversion_test( + inp=density, + zinp=depth, + suspect_threshold="bad", + fail_threshold="bad", + ) + match = "could not convert string to float" # Wrong type density - with self.assertRaises(ValueError): - qartod.density_inversion_test(inp="density", zinp=depth, suspect_threshold=-0.3) + with pytest.raises(ValueError, match=match): + qartod.density_inversion_test( + inp="density", + zinp=depth, + suspect_threshold=-0.3, + ) # Wrong type depth - with self.assertRaises(ValueError): - qartod.density_inversion_test(inp=density, zinp="depth", suspect_threshold=-0.3) + with pytest.raises(ValueError, match=match): + qartod.density_inversion_test( + inp=density, + zinp="depth", + suspect_threshold=-0.3, + ) class QartodUtilsTests(unittest.TestCase): - def test_qartod_compare(self): - """Tests that the compare function works as intended. - """ + """Tests that the compare function works as intended.""" range_flags = np.array([1, 1, 1, 9, 1, 1, 9, 9]) spike_flags = np.array([2, 1, 1, 1, 1, 1, 9, 9]) grdtn_flags = np.array([1, 3, 3, 4, 3, 1, 2, 9]) - primary_flags = qartod.qartod_compare([ - range_flags, - spike_flags, - grdtn_flags, - ]) + primary_flags = qartod.qartod_compare( + [ + range_flags, + spike_flags, + grdtn_flags, + ], + ) np.testing.assert_array_equal( primary_flags, np.array([1, 3, 3, 4, 3, 1, 2, 9]), diff --git a/tests/test_streams.py b/tests/test_streams.py index 603cb708..48118325 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import logging import unittest @@ -23,7 +22,6 @@ class PandasStreamTest(unittest.TestCase): def setUp(self): - config = """ region: something window: @@ -85,7 +83,6 @@ def test_run(self): class NumpyStreamTestLightConfig(unittest.TestCase): def setUp(self): - config = """ streams: variable1: @@ -97,7 +94,11 @@ def setUp(self): self.config = Config(config) rows = 50 - self.tinp = pd.date_range(start="01/01/2020", periods=rows, freq="D").values + self.tinp = pd.date_range( + start="01/01/2020", + periods=rows, + freq="D", + ).to_numpy() self.zinp = np.full_like(self.tinp, 2.0) self.lat = np.full_like(self.tinp, 36.1) self.lon = np.full_like(self.tinp, -76.5) @@ -141,7 +142,6 @@ def test_run(self): class NumpyStreamTest(unittest.TestCase): def setUp(self): - config = """ region: something window: @@ -157,7 +157,11 @@ def setUp(self): self.config = Config(config) rows = 50 - self.tinp = pd.date_range(start="01/01/2020", periods=rows, freq="D").values + self.tinp = pd.date_range( + start="01/01/2020", + periods=rows, + freq="D", + ).to_numpy() self.zinp = np.full_like(self.tinp, 2.0) self.lat = np.full_like(self.tinp, 36.1) self.lon = np.full_like(self.tinp, -76.5) @@ -201,7 +205,6 @@ def test_run(self): class NetcdfStreamTest(unittest.TestCase): def setUp(self): - config = """ region: something window: @@ -266,7 +269,6 @@ def test_run(self): class XarrayStreamTest(unittest.TestCase): def setUp(self): - config = """ region: something window: @@ -503,8 +505,16 @@ def test_run_list_results(self): results = ps.run(self.config) results = collect_results(results, how="list") - var1_gr = next(res for res in results if res.stream_id == "variable1" and res.test == "gross_range_test") - var2_gr = next(res for res in results if res.stream_id == "variable2" and res.test == "gross_range_test") + var1_gr = next( + res + for res in results + if res.stream_id == "variable1" and res.test == "gross_range_test" + ) + var2_gr = next( + res + for res in results + if res.stream_id == "variable2" and res.test == "gross_range_test" + ) # Variable 1 # Actual data returned in full npt.assert_array_equal( diff --git a/tests/test_utils.py b/tests/test_utils.py index 94fcec01..8109e3d2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python import json import os import tempfile import time import unittest +from pathlib import Path import h5netcdf.legacyapi as nc4 import numpy as np @@ -13,33 +13,41 @@ class AuxillaryCheckTest(unittest.TestCase): - # Range of times every 15 minutes from 2013-01-01 to 2013-01-02. - times = np.arange("2013-01-01 00:00:00", "2013-01-02 00:00:00", - dtype="datetime64[15m]") + times = np.arange( + "2013-01-01 00:00:00", + "2013-01-02 00:00:00", + dtype="datetime64[15m]", + ) def test_bad_time_sorting(self): # Simply reversing the order ought to fail the sort check. reversed_times = self.times[::-1] - self.assertFalse(utils.check_timestamps(reversed_times)) + assert not utils.check_timestamps(reversed_times) def test_bad_time_repeat(self): """Check that repeated timestamps are picked up.""" - repeated = np.concatenate([np.repeat(self.times[0], 3), - self.times[3:]]) - self.assertFalse(utils.check_timestamps(repeated)) + repeated = np.concatenate( + [ + np.repeat(self.times[0], 3), + self.times[3:], + ], + ) + assert not utils.check_timestamps(repeated) def test_bad_interval(self): """Check that bad time intervals return false.""" # Intentionally set a small interval (3 min) to fail. interval = np.timedelta64(3, "m") - self.assertFalse(utils.check_timestamps(self.times, interval)) + assert not utils.check_timestamps(self.times, interval) class TestReadXarrayConfig(unittest.TestCase): - def setUp(self): - self.fh, self.fp = tempfile.mkstemp(suffix=".nc", prefix="ioos_qc_tests_") + self.fh, self.fp = tempfile.mkstemp( + suffix=".nc", + prefix="ioos_qc_tests_", + ) self.config = { "suspect_span": [1, 11], "fail_span": [0, 12], @@ -54,7 +62,7 @@ def setUp(self): def tearDown(self): os.close(self.fh) - os.remove(self.fp) + Path(self.fp).unlink() def test_load_from_xarray_file(self): c = utils.load_config_as_dict(self.fp) @@ -69,18 +77,15 @@ def test_load_from_xarray_dataset(self): class TestGreatCircle(unittest.TestCase): - def setUp(self): - """Test 1 million great circle calculations - """ + """Test 1 million great circle calculations.""" points = 10000 self.lon = np.linspace(-179, 179, points) self.lat = np.linspace(-89, 89, points) def test_great_circle(self): - s = time.perf_counter() + time.perf_counter() dist = utils.great_circle_distance(self.lat, self.lon) - e = time.perf_counter() - print(f"Great Circle: {e - s:0.4f} seconds") + time.perf_counter() close = np.isclose(dist[1:-1], dist[2:], atol=1) assert close.all()