From 10cd83b231eababa753463028b07bf31b08c7e99 Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:13:24 +0100 Subject: [PATCH 1/7] Updated jenkins file --- Jenkinsfile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index de8cc56e..ba6e29fa 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -9,15 +9,13 @@ @Library('lib')_ pythonPipeline { - package_name = 'worldcereal' + package_name = 'worldcereal-classification' wipeout_workspace = true - python_version = ["3.8"] - upload_dev_wheels = true - hadoop = true + python_version = ["3.10"] + upload_dev_wheels = false pipeline_triggers = [cron('H H(0-6) * * *')] wheel_repo = 'python-packages-public' wheel_repo_dev = 'python-packages-public-snapshot' - system_site_packages = 'nope' pep440 = true venv_rpm_deps = ['gcc', 'gcc-c++'] extra_env_variables = [ From d6dbaeb16cfebdd70f7c502493bb9dac6e5941b3 Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:22:57 +0100 Subject: [PATCH 2/7] Add precommit config --- .pre-commit-config.yaml | 71 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..3138f7ef --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,71 @@ +# https://pre-commit.com +default_language_version: + python: python3.10 +default_stages: [commit, manual] +fail_fast: true +exclude: "(received/|.*_depr)" +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + # - id: check-added-large-files + # args: ['--maxkb=65536'] + - id: check-ast + - id: check-builtin-literals + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-docstring-first + - id: check-json + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + - id: check-vcs-permalinks + - id: check-xml + - id: check-yaml + args: [--allow-multiple-documents] + - id: debug-statements + - id: detect-private-key + - id: mixed-line-ending + - id: trailing-whitespace + types: [python] + - id: end-of-file-fixer + types: [python] + - repo: local + hooks: + - id: shellcheck + name: shellcheck + entry: shellcheck --check-sourced --shell=bash --exclude=SC1087 + language: system + types: [shell] + # - id: pydocstyle + # name: pydocstyle + # entry: pydocstyle + # language: system + # types: [python] + # exclude: "(^experiments/|.*_depr)" + # - id: flake8 + # name: flake8 + # entry: flake8 + # language: system + # types: [python] + # exclude: "(^tasks/|.*_depr)" + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: no-commit-to-branch + args: [ '--branch', 'main' ] + - repo: https://github.com/radix-ai/auto-smart-commit + rev: v1.0.3 + hooks: + - id: auto-smart-commit + - repo: https://github.com/psf/black + rev: 23.10.0 + hooks: + - id: black + language_version: python3.9 + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + args: ["--profile", "black"] \ No newline at end of file From def42acf36746b59c0dc8667e9dd112da6c7e90c Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:30:54 +0100 Subject: [PATCH 3/7] #17 new project setup --- .github/workflows/lint.yaml | 30 ++++++++++ .github/workflows/pytest-collect.yaml | 31 +++++++++++ pyproject.toml | 71 ++++++++++++++++++++++++ setup.cfg | 12 ---- setup.py | 79 --------------------------- 5 files changed, 132 insertions(+), 91 deletions(-) create mode 100644 .github/workflows/lint.yaml create mode 100644 .github/workflows/pytest-collect.yaml create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..8462b37c --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,30 @@ +name: Lint + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + lint: + name: "Lint: code quality and formatting checks" + runs-on: ubuntu-latest + steps: + - name: Clone repo + uses: actions/checkout@v2 + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + cache: 'pip' + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + python -m pip install isort black ruff + - name: isort + run: python -m isort . --check --diff + # - name: black + # run: python -m black --check --diff . + - name: ruff + run: ruff check . diff --git a/.github/workflows/pytest-collect.yaml b/.github/workflows/pytest-collect.yaml new file mode 100644 index 00000000..4a5f33b5 --- /dev/null +++ b/.github/workflows/pytest-collect.yaml @@ -0,0 +1,31 @@ +name: Pytest-collect + +on: + push: + branches: [ main ] + paths: + - 'tests/**' + pull_request: + branches: [ main ] + paths: + - 'tests/**' + +jobs: + pytest-collect: + name: "Pytest: check test collection" + runs-on: ubuntu-latest + steps: + - name: Clone repo + uses: actions/checkout@v2 + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pytest + python -m pip install .[dev] + - name: "pytest: check test collection" + run: pytest --collect-only diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..aea2ae5b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,71 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +exclude = [ + "/dist", + "/notebooks", + "/scripts", + "/bin", + "/tests", +] + +[project] +name = "worldcereal" +version = "1.1.2" +authors = [ + { name="Kristof Van Tricht" }, + { name="Jeroen Degerickx" }, + { name="Darius Couchard" }, + { name="Christina Butsko" }, +] +description = "WorldCereal classification module" +readme = "README.md" +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", +] +dependencies = [ + "openeo>=0.22.0", + "xarray>=2022.3.0", + "rioxarray>=0.13.0", + "loguru>=0.7.2", + "h5netcdf>=1.2.0", + "openeo[localprocessing]", + "cftime", + "pytest-depends", + "pyarrow", + "pandas"] + +[project.urls] +"Homepage" = "https://github.com/WorldCereal/worldcereal-classification" +"Bug Tracker" = "https://github.com/WorldCereal/worldcereal-classification/issues" + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "matplotlib>=3.3.0" +] + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] +addopts = [ + "--import-mode=prepend", +] + +[tool.isort] +profile = "black" + + +[tool.ruff] +# line-length = 100 + +[tool.ruff.lint] +select = ["E", "F"] +ignore = [ + "E501", # Ignore "line-too-long" issues, let black handle that. +] \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index f3ffb32a..00000000 --- a/setup.cfg +++ /dev/null @@ -1,12 +0,0 @@ -[flake8] -max-line-length = 120 - -[aliases] -test=pytest - -[pylint.format] -max-line-length = 120 - -[pylint.MASTER] -# Specify a score threshold to be exceeded before program exits with error. -fail-under=6.5 diff --git a/setup.py b/setup.py deleted file mode 100644 index 97ef7607..00000000 --- a/setup.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python3 - -from setuptools import setup, find_packages -# import os -# import datetime - -# Load the WorldCereal version info. -# -# Note that we cannot simply import the module, since dependencies listed -# in setup() will very likely not be installed yet when setup.py run. -# -# See: -# https://packaging.python.org/guides/single-sourcing-package-version - -__version__ = None - -with open('src/worldcereal/_version.py') as fp: - exec(fp.read()) - - version = __version__ - -# Configure setuptools - -setup( - name='worldcereal', - version=version, - author='Kristof Van Tricht', - author_email='kristof.vantricht@vito.be', - description='WorldCereal Classification', - url='https://github.com/WorldCereal/worldcereal-classification', - setup_requires=['pytest-runner'], - tests_require=['pytest'], - include_package_data=True, - package_data={ - '': ['resources/*', 'resources/**/*', 'resources/**/*', - 'resources/*/*', 'resources/*/*/*', 'resources/*/*/*/*'], - }, - zip_safe=True, - install_requires=[ - 'catboost==1.0.6', - 'cloudpickle==2.2.0', - 'fire==0.4.0', - 'geopandas==0.9.0', - 'h5py==2.10.0', - 'hdmedians==0.14.2', - 'joblib==1.2.0', - 'loguru<0.7', - 'matplotlib==3.5.3', - 'numba<0.57', - 'numexpr==2.8.3', - 'numpy==1.18.5', - 'openeo', - 'pandas==1.4.4', - 'pyarrow==9.0.0', - 'pyod==1.0.0', - 'python-dateutil<2.9', - 'protobuf==3.20.3', - 'rasterio==1.2.10', - 'requests<3', - 'satio==1.1.15', - 'scikit-image==0.19.3', - 'scikit-learn==1.1.2', - 'scipy==1.4.1', - 'Shapely==1.8.4', - 'tensorflow==2.3.0', - 'tqdm==4.64.1', - 'utm<0.8', - 'xarray==2022.3.0', - 'zarr==2.12.0', - ], - test_suite='tests', - package_dir={'': 'src'}, - packages=find_packages('src'), - entry_points={ - 'console_scripts': [ - 'worldcereal=worldcereal.__main__:main' - ] - } -) From 1897949907f19d6dd73dc1c775d5922007c893b9 Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:45:29 +0100 Subject: [PATCH 4/7] Bumped version --- pyproject.toml | 2 +- src/worldcereal/_version.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 src/worldcereal/_version.py diff --git a/pyproject.toml b/pyproject.toml index aea2ae5b..6b6133dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ exclude = [ [project] name = "worldcereal" -version = "1.1.2" +version = "2.0.1" authors = [ { name="Kristof Van Tricht" }, { name="Jeroen Degerickx" }, diff --git a/src/worldcereal/_version.py b/src/worldcereal/_version.py deleted file mode 100644 index 2e7c42ee..00000000 --- a/src/worldcereal/_version.py +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python3 - -__version__ = '2.0.0a0' From cacebc0ca669eb7724b09d87ddbd4e1b0491de1f Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:46:02 +0100 Subject: [PATCH 5/7] Fixed import --- src/worldcereal/__init__.py | 57 ++++++++++++++----------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/src/worldcereal/__init__.py b/src/worldcereal/__init__.py index cafb3dc5..c990ac26 100644 --- a/src/worldcereal/__init__.py +++ b/src/worldcereal/__init__.py @@ -1,62 +1,49 @@ #!/usr/bin/env python3 -from ._version import __version__ - -__all__ = ['__version__'] - - SUPPORTED_SEASONS = [ - 'tc-wintercereals', - 'tc-maize-main', - 'tc-maize-second', - 'tc-annual', - 'custom' + "tc-wintercereals", + "tc-maize-main", + "tc-maize-second", + "tc-annual", + "custom", ] SEASONAL_MAPPING = { - 'tc-wintercereals': 'WW', - 'tc-maize-main': 'M1', - 'tc-maize-second': 'M2', - 'tc-annual': 'ANNUAL', - 'custom': 'custom' + "tc-wintercereals": "WW", + "tc-maize-main": "M1", + "tc-maize-second": "M2", + "tc-annual": "ANNUAL", + "custom": "custom", } # Default buffer (days) prior to # season start SEASON_PRIOR_BUFFER = { - 'tc-wintercereals': 15, - 'tc-maize-main': 15, - 'tc-maize-second': 15, - 'tc-annual': 0, - 'custom': 0 + "tc-wintercereals": 15, + "tc-maize-main": 15, + "tc-maize-second": 15, + "tc-annual": 0, + "custom": 0, } # Default buffer (days) after # season end SEASON_POST_BUFFER = { - 'tc-wintercereals': 0, - 'tc-maize-main': 0, - 'tc-maize-second': 0, - 'tc-annual': 0, - 'custom': 0 + "tc-wintercereals": 0, + "tc-maize-main": 0, + "tc-maize-second": 0, + "tc-annual": 0, + "custom": 0, } # Base temperatures used for # crop-specific GDD accumulation -TBASE = { - 'tc-wintercereals': 0, - 'tc-maize-main': 10, - 'tc-maize-second': 10 -} +TBASE = {"tc-wintercereals": 0, "tc-maize-main": 10, "tc-maize-second": 10} # Upper limit temperatures for # GDD accumulation -GDDTLIMIT = { - 'tc-wintercereals': 25, - 'tc-maize-main': 30, - 'tc-maize-second': 30 -} +GDDTLIMIT = {"tc-wintercereals": 25, "tc-maize-main": 30, "tc-maize-second": 30} From 741a7d255a070eda50c647eb6b3db797ac7e98eb Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 13:59:15 +0100 Subject: [PATCH 6/7] Fix formatting --- src/worldcereal/__main__.py | 3 +- src/worldcereal/openeo/compositing.py | 15 +- src/worldcereal/openeo/feature_udf.py | 129 ++++++++------ src/worldcereal/openeo/masking.py | 33 ++-- src/worldcereal/openeo/preprocessing.py | 217 ++++++++++++++---------- src/worldcereal/settings.py | 97 +++++------ tests/worldcerealtests/test_openeo.py | 122 +++++++------ 7 files changed, 338 insertions(+), 278 deletions(-) diff --git a/src/worldcereal/__main__.py b/src/worldcereal/__main__.py index 76feaf7c..56d08bfb 100644 --- a/src/worldcereal/__main__.py +++ b/src/worldcereal/__main__.py @@ -3,8 +3,9 @@ def main(): import fire + fire.Fire(run_tile) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/worldcereal/openeo/compositing.py b/src/worldcereal/openeo/compositing.py index 333e771c..a7fec336 100644 --- a/src/worldcereal/openeo/compositing.py +++ b/src/worldcereal/openeo/compositing.py @@ -3,18 +3,21 @@ def max_ndvi_selection(ndvi): return ndvi.array_apply(lambda x: x != max_ndvi) -def max_ndvi_composite(s2_cube, composite_window='month'): +def max_ndvi_composite(s2_cube, composite_window="month"): ndvi = s2_cube.ndvi(nir="B08", red="B04") rank_mask = ndvi.apply_neighborhood( max_ndvi_selection, - size=[{'dimension': 'x', 'unit': 'px', 'value': 1}, - {'dimension': 'y', 'unit': 'px', 'value': 1}, - {'dimension': 't', 'value': "month"}], - overlap=[] + size=[ + {"dimension": "x", "unit": "px", "value": 1}, + {"dimension": "y", "unit": "px", "value": 1}, + {"dimension": "t", "value": "month"}, + ], + overlap=[], ) s2_cube = s2_cube.mask(rank_mask).aggregate_temporal_period( - composite_window, "first") + composite_window, "first" + ) return s2_cube diff --git a/src/worldcereal/openeo/feature_udf.py b/src/worldcereal/openeo/feature_udf.py index 42c6e9e1..4ec815d9 100644 --- a/src/worldcereal/openeo/feature_udf.py +++ b/src/worldcereal/openeo/feature_udf.py @@ -3,40 +3,57 @@ from typing import Dict import numpy as np -from openeo.udf import XarrayDataCube import pandas as pd -from satio.collections import XArrayTrainingCollection import xarray as xr +from openeo.udf import XarrayDataCube +from satio.collections import XArrayTrainingCollection from worldcereal.features.settings import ( get_cropland_features_meta, - get_default_rsi_meta) + get_default_rsi_meta, +) from worldcereal.fp import L2AFeaturesProcessor -sys.path.append('/data/users/Public/driesj/openeo/deps/satio') -sys.path.append('/data/users/Public/driesj/openeo/deps/wc-classification/src') +sys.path.append("/data/users/Public/driesj/openeo/deps/satio") +sys.path.append("/data/users/Public/driesj/openeo/deps/wc-classification/src") # sys.path.insert(0,'/data/users/Public/driesj/openeo/deps/tf230') -wheels = ['loguru-0.5.3-py3-none-any.whl', - 'aiocontextvars-0.2.2-py2.py3-none-any.whl', 'contextvars-2.4', - 'immutables-0.14-cp36-cp36m-manylinux1_x86_64.whl', - 'importlib_resources-3.3.0-py2.py3-none-any.whl'] +wheels = [ + "loguru-0.5.3-py3-none-any.whl", + "aiocontextvars-0.2.2-py2.py3-none-any.whl", + "contextvars-2.4", + "immutables-0.14-cp36-cp36m-manylinux1_x86_64.whl", + "importlib_resources-3.3.0-py2.py3-none-any.whl", +] for wheel in wheels: - sys.path.append('/data/users/Public/driesj/openeo/deps/' + wheel) + sys.path.append("/data/users/Public/driesj/openeo/deps/" + wheel) -classifier_file = ('/tmp/worldcereal_croplandextent_lpis_unet.h5') +classifier_file = "/tmp/worldcereal_croplandextent_lpis_unet.h5" features_meta = get_cropland_features_meta() class L2AFeaturesProcessor10m(L2AFeaturesProcessor): - L2A_BANDS_10M = ['B02', 'B03', 'B04', 'B08', 'B05', 'B06', - 'B07', 'B8A', 'B11', 'B12', 'SCL', - 'sunAzimuthAngles', 'sunZenithAngles', - 'viewAzimuthMean', 'viewZenithMean'] - L2A_BANDS_DICT_ALL_10M = {10: L2A_BANDS_10M, 20: {'DUMMY'}} + L2A_BANDS_10M = [ + "B02", + "B03", + "B04", + "B08", + "B05", + "B06", + "B07", + "B8A", + "B11", + "B12", + "SCL", + "sunAzimuthAngles", + "sunZenithAngles", + "viewAzimuthMean", + "viewZenithMean", + ] + L2A_BANDS_DICT_ALL_10M = {10: L2A_BANDS_10M, 20: {"DUMMY"}} def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -60,51 +77,55 @@ def apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube: inarr = cube.get_array() # translate openEO dim name into satio convention - inarr = inarr.rename({'t': 'timestamp'}) + inarr = inarr.rename({"t": "timestamp"}) # satio expects uint16! inarr = inarr.astype(np.uint16) settings = context["satio_settings"] - settings['OPTICAL']['composite']['start'] = np.datetime_as_string( - inarr.coords['timestamp'].values.min(), unit='D') - settings['OPTICAL']['composite']['end'] = np.datetime_as_string( - inarr.coords['timestamp'].values.max(), unit='D') + settings["OPTICAL"]["composite"]["start"] = np.datetime_as_string( + inarr.coords["timestamp"].values.min(), unit="D" + ) + settings["OPTICAL"]["composite"]["end"] = np.datetime_as_string( + inarr.coords["timestamp"].values.max(), unit="D" + ) classify = context["classify"] - collection = XArrayTrainingCollection(sensor="S2", processing_level="L2A", - df=pd.DataFrame(), array=inarr) + collection = XArrayTrainingCollection( + sensor="S2", processing_level="L2A", df=pd.DataFrame(), array=inarr + ) from satio.rsindices import RSI_META_S2 + default_rsi_meta = RSI_META_S2.copy() - rsi_meta = get_default_rsi_meta()['OPTICAL'] + rsi_meta = get_default_rsi_meta()["OPTICAL"] # in openEO, all bands are provided in 10m for now # so we need to modify satio defaults - rsi_meta['brightness'] = default_rsi_meta['brightness'] - rsi_meta['brightness']['native_res'] = 10 - - if 'sen2agri_temp_feat' in features_meta.get('OPTICAL', {}): - features_meta['OPTICAL'][ - 'sen2agri_temp_feat'][ - 'parameters']['time_start'] = settings['OPTICAL'][ - 'composite']['start'] - - processor = L2AFeaturesProcessor10m(collection, - settings['OPTICAL'], - rsi_meta=rsi_meta, - features_meta=features_meta['OPTICAL']) + rsi_meta["brightness"] = default_rsi_meta["brightness"] + rsi_meta["brightness"]["native_res"] = 10 + + if "sen2agri_temp_feat" in features_meta.get("OPTICAL", {}): + features_meta["OPTICAL"]["sen2agri_temp_feat"]["parameters"][ + "time_start" + ] = settings["OPTICAL"]["composite"]["start"] + + processor = L2AFeaturesProcessor10m( + collection, + settings["OPTICAL"], + rsi_meta=rsi_meta, + features_meta=features_meta["OPTICAL"], + ) features = processor.compute_features() # Extracted core from worldcereal ClassificationProcessor, # to be seen what we need to keep - if(classify): - + if classify: windowsize = 64 import tensorflow as tf - # from worldcereal.classification.models import WorldCerealUNET + # from worldcereal.classification.models import WorldCerealUNET # unetmodel = WorldCerealUNET(windowsize=64, features= 60) # unetmodel.model.load_weights(classifier_file) # classifier = unetmodel.model @@ -133,21 +154,20 @@ def apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube: else: yEnd = yStart + windowsize - features_patch = features.data[:, - xStart:xEnd, - yStart:yEnd] - patchprediction = classifier.predict( - features_patch.transpose((1, 2, 0)).reshape( - (1, - windowsize * windowsize, - -1))).squeeze().reshape((windowsize, windowsize)) + features_patch = features.data[:, xStart:xEnd, yStart:yEnd] + patchprediction = ( + classifier.predict( + features_patch.transpose((1, 2, 0)).reshape( + (1, windowsize * windowsize, -1) + ) + ) + .squeeze() + .reshape((windowsize, windowsize)) + ) prediction[xStart:xEnd, yStart:yEnd] = patchprediction - prediction_xarray = xr.DataArray( - prediction.astype(np.float32), - dims=['x', 'y'] - ) + prediction_xarray = xr.DataArray(prediction.astype(np.float32), dims=["x", "y"]) # wrap back to datacube and return return XarrayDataCube(prediction_xarray) @@ -155,9 +175,10 @@ def apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube: else: features_xarray = xr.DataArray( features.data.astype(np.float32), - dims=['bands', 'x', 'y'], - coords={'bands': features.names} + dims=["bands", "x", "y"], + coords={"bands": features.names}, ) # wrap back to datacube and return return XarrayDataCube(features_xarray) + return XarrayDataCube(features_xarray) diff --git a/src/worldcereal/openeo/masking.py b/src/worldcereal/openeo/masking.py index c04370fc..f9dcba42 100644 --- a/src/worldcereal/openeo/masking.py +++ b/src/worldcereal/openeo/masking.py @@ -2,19 +2,24 @@ def convolve(img, radius): - '''OpenEO method to apply convolution + """OpenEO method to apply convolution with a circular kernel of `radius` pixels. NOTE: make sure the resolution of the image matches the expected radius in pixels! - ''' + """ kernel = footprints.disk(radius) img = img.apply_kernel(kernel) return img -def scl_mask_erode_dilate(session, bbox, - scl_layer_band="TERRASCOPE_S2_TOC_V2:SCL", - erode_r=3, dilate_r=21, target_crs=None): +def scl_mask_erode_dilate( + session, + bbox, + scl_layer_band="TERRASCOPE_S2_TOC_V2:SCL", + erode_r=3, + dilate_r=21, + target_crs=None, +): """OpenEO method to construct a Sentinel-2 mask based on SCL. It involves an erosion step followed by a dilation step. @@ -29,19 +34,21 @@ def scl_mask_erode_dilate(session, bbox, DataCube: DataCube containing the resulting mask """ - layer_band = scl_layer_band.split(':') + layer_band = scl_layer_band.split(":") s2_sceneclassification = session.load_collection( - layer_band[0], bands=[layer_band[1]], spatial_extent=bbox) + layer_band[0], bands=[layer_band[1]], spatial_extent=bbox + ) classification = s2_sceneclassification.band(layer_band[1]) # Force to go to 10m resolution for controlled erosion/dilation - classification = classification.resample_spatial(projection=target_crs, - resolution=10.0) + classification = classification.resample_spatial( + projection=target_crs, resolution=10.0 + ) - first_mask = (classification == 0) + first_mask = classification == 0 for mask_value in [1, 3, 8, 9, 10, 11]: - first_mask = ((first_mask == 1) | (classification == mask_value)) + first_mask = (first_mask == 1) | (classification == mask_value) # Invert mask for erosion first_mask = first_mask.apply(lambda x: (x == 1).not_()) @@ -50,7 +57,7 @@ def scl_mask_erode_dilate(session, bbox, erode_cube = convolve(first_mask, erode_r) # Invert again - erode_cube = (erode_cube > 0.1) + erode_cube = erode_cube > 0.1 erode_cube = erode_cube.apply(lambda x: (x == 1).not_()) # Now dilate the mask @@ -58,6 +65,6 @@ def scl_mask_erode_dilate(session, bbox, # Get binary mask. NOTE: >0.1 is a fix to avoid being triggered # by small non-zero oscillations after applying convolution - dilate_cube = (dilate_cube > 0.1) + dilate_cube = dilate_cube > 0.1 return dilate_cube diff --git a/src/worldcereal/openeo/preprocessing.py b/src/worldcereal/openeo/preprocessing.py index 8600279d..1e15b660 100644 --- a/src/worldcereal/openeo/preprocessing.py +++ b/src/worldcereal/openeo/preprocessing.py @@ -1,16 +1,22 @@ from openeo.processes import array_create, if_, is_nodata, power from openeo.rest.datacube import DataCube -from worldcereal.openeo.masking import scl_mask_erode_dilate from worldcereal.openeo.compositing import max_ndvi_composite +from worldcereal.openeo.masking import scl_mask_erode_dilate - -COMPOSITE_WINDOW = 'month' +COMPOSITE_WINDOW = "month" -def add_S1_bands(connection, S1_collection, - other_bands, bbox, start, end, - preprocess=True, **processing_options): +def add_S1_bands( + connection, + S1_collection, + other_bands, + bbox, + start, + end, + preprocess=True, + **processing_options, +): """Method to add S1 bands to datacube Args: @@ -23,39 +29,44 @@ def add_S1_bands(connection, S1_collection, target_crs """ isCreo = "creo" in processing_options.get("provider", "").lower() - orbit_direction = processing_options.get('s1_orbitdirection', None) - composite_window = processing_options.get( - 'composite_window', COMPOSITE_WINDOW) + orbit_direction = processing_options.get("s1_orbitdirection", None) + composite_window = processing_options.get("composite_window", COMPOSITE_WINDOW) # TODO: implement as needed # if isCreo: # orbit_direction = catalogue_check_S1(orbit_direction, start, end, bbox) if orbit_direction is not None: - properties = {"sat:orbit_state": lambda orbdir: orbdir == orbit_direction} # NOQA + properties = { + "sat:orbit_state": lambda orbdir: orbdir == orbit_direction + } # NOQA else: properties = {} # Load collection S1bands = connection.load_collection( S1_collection, - bands=['VH', 'VV'], + bands=["VH", "VV"], spatial_extent=bbox, temporal_extent=[start, end], - properties=properties + properties=properties, ) if S1_collection == "SENTINEL1_GRD": # compute backscatter if starting from raw GRD, # otherwise assume preprocessed backscatter S1bands = S1bands.sar_backscatter( - coefficient='sigma0-ellipsoid', + coefficient="sigma0-ellipsoid", local_incidence_angle=False, # DO NOT USE MAPZEN - elevation_model='COPERNICUS_30' if isCreo else None, - options={"implementation_version": "2", - "tile_size": 256, "otb_memory": 1024, "debug": False, - "elev_geoid": "/opt/openeo-vito-aux-data/egm96.tif"} + elevation_model="COPERNICUS_30" if isCreo else None, + options={ + "implementation_version": "2", + "tile_size": 256, + "otb_memory": 1024, + "debug": False, + "elev_geoid": "/opt/openeo-vito-aux-data/egm96.tif", + }, ) else: pass @@ -63,18 +74,18 @@ def add_S1_bands(connection, S1_collection, # Resample to the S2 spatial resolution target_crs = processing_options.get("target_crs", None) if target_crs is not None: - S1bands = S1bands.resample_spatial( - projection=target_crs, resolution=10.0) + S1bands = S1bands.resample_spatial(projection=target_crs, resolution=10.0) if preprocess: - # Composite to compositing window - S1bands = S1bands.aggregate_temporal_period(period=composite_window, - reducer="mean") + S1bands = S1bands.aggregate_temporal_period( + period=composite_window, reducer="mean" + ) # Linearly interpolate missing values - S1bands = S1bands.apply_dimension(dimension="t", - process="array_interpolate_linear") + S1bands = S1bands.apply_dimension( + dimension="t", process="array_interpolate_linear" + ) # Scale to int16 if isCreo: @@ -85,16 +96,30 @@ def add_S1_bands(connection, S1_collection, S1bands = S1bands.apply_dimension( dimension="bands", process=lambda x: array_create( - [if_(is_nodata(x[0]), 1, power( - base=10, p=(10.0 * x[0].log(base=10) + 83.) / 20.)), - if_(is_nodata(x[1]), 1, power( - base=10, p=(10.0 * x[1].log(base=10) + 83.) / 20.))])) + [ + if_( + is_nodata(x[0]), + 1, + power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0), + ), + if_( + is_nodata(x[1]), + 1, + power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0), + ), + ] + ), + ) else: S1bands = S1bands.apply_dimension( dimension="bands", process=lambda x: array_create( - [power(base=10, p=(10.0 * x[0].log(base=10) + 83.) / 20.), - power(base=10, p=(10.0 * x[1].log(base=10) + 83.) / 20.)])) + [ + power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0), + power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0), + ] + ), + ) S1bands = S1bands.linear_scale_range(1, 65534, 1, 65534) @@ -102,14 +127,12 @@ def add_S1_bands(connection, S1_collection, # Merge cubes # -------------------------------------------------------------------- - merged_inputs = other_bands.resample_cube_spatial( - S1bands).merge_cubes(S1bands) + merged_inputs = other_bands.resample_cube_spatial(S1bands).merge_cubes(S1bands) return merged_inputs -def add_DEM(connection, DEM_collection, other_bands, bbox, - **processing_options): +def add_DEM(connection, DEM_collection, other_bands, bbox, **processing_options): """Method to add DEM to datacube Args: @@ -129,9 +152,10 @@ def add_DEM(connection, DEM_collection, other_bands, bbox, # Resample to the S2 spatial resolution target_crs = processing_options.get("target_crs", None) - if (target_crs is not None): - dem = dem.resample_spatial(projection=target_crs, resolution=10.0, - method='cubic') + if target_crs is not None: + dem = dem.resample_spatial( + projection=target_crs, resolution=10.0, method="cubic" + ) # collection has timestamps which we need to get rid of dem = dem.max_time() @@ -145,35 +169,40 @@ def add_DEM(connection, DEM_collection, other_bands, bbox, return merged_inputs -def add_meteo(connection, METEO_collection, other_bands, bbox, - start, end, target_crs=None, **processing_options): +def add_meteo( + connection, + METEO_collection, + other_bands, + bbox, + start, + end, + target_crs=None, + **processing_options, +): # AGERA5 # TODO: add precipitation with sum compositor - composite_window = processing_options.get( - 'composite_window', COMPOSITE_WINDOW) + composite_window = processing_options.get("composite_window", COMPOSITE_WINDOW) meteo = connection.load_collection( METEO_collection, spatial_extent=bbox, - bands=['temperature-mean'], - temporal_extent=[start, end] + bands=["temperature-mean"], + temporal_extent=[start, end], ) - if (target_crs is not None): + if target_crs is not None: meteo = meteo.resample_spatial(projection=target_crs, resolution=10.0) # Composite to compositing window - meteo = meteo.aggregate_temporal_period(period=composite_window, - reducer="mean") + meteo = meteo.aggregate_temporal_period(period=composite_window, reducer="mean") # Linearly interpolate missing values. # Shouldn't exist in this dataset but is good practice to do so - meteo = meteo.apply_dimension(dimension="t", - process="array_interpolate_linear") + meteo = meteo.apply_dimension(dimension="t", process="array_interpolate_linear") # Rename band to match Radix model requirements - meteo = meteo.rename_labels('bands', ['temperature_mean']) + meteo = meteo.rename_labels("bands", ["temperature_mean"]) # -------------------------------------------------------------------- # Merge cubes @@ -188,14 +217,18 @@ def add_meteo(connection, METEO_collection, other_bands, bbox, def worldcereal_preprocessed_inputs( - connection, bbox, start: str, end: str, - S2_collection='TERRASCOPE_S2_TOC_V2', - S1_collection='SENTINEL1_GRD_SIGMA0', - DEM_collection='COPERNICUS_30', - METEO_collection='AGERA5', - preprocess=True, - masking='mask_scl_dilation', - **processing_options) -> DataCube: + connection, + bbox, + start: str, + end: str, + S2_collection="TERRASCOPE_S2_TOC_V2", + S1_collection="SENTINEL1_GRD_SIGMA0", + DEM_collection="COPERNICUS_30", + METEO_collection="AGERA5", + preprocess=True, + masking="mask_scl_dilation", + **processing_options, +) -> DataCube: """Main method to get preprocessed inputs from OpenEO for downstream crop type mapping. @@ -225,19 +258,16 @@ def worldcereal_preprocessed_inputs( DataCube: OpenEO DataCube wich the requested inputs """ - composite_window = processing_options.get('composite_window', - COMPOSITE_WINDOW) + composite_window = processing_options.get("composite_window", COMPOSITE_WINDOW) # -------------------------------------------------------------------- # Optical data # -------------------------------------------------------------------- - S2_bands = ["B02", "B03", "B04", "B05", - "B06", "B07", "B08", "B11", - "B12"] - if masking not in ['satio', 'mask_scl_dilation', None]: - raise ValueError(f'Unknown masking option `{masking}`') - if masking in ['mask_scl_dilation']: + S2_bands = ["B02", "B03", "B04", "B05", "B06", "B07", "B08", "B11", "B12"] + if masking not in ["satio", "mask_scl_dilation", None]: + raise ValueError(f"Unknown masking option `{masking}`") + if masking in ["mask_scl_dilation"]: # Need SCL band to mask S2_bands.append("SCL") bands = connection.load_collection( @@ -245,7 +275,7 @@ def worldcereal_preprocessed_inputs( bands=S2_bands, spatial_extent=bbox, temporal_extent=[start, end], - max_cloud_cover=95 + max_cloud_cover=95, ) # TODO: implement as needed @@ -255,30 +285,32 @@ def worldcereal_preprocessed_inputs( # catalogue_check_S2(start, end, bbox) target_crs = processing_options.get("target_crs", None) - if (target_crs is not None): + if target_crs is not None: bands = bands.resample_spatial(projection=target_crs, resolution=10.0) # NOTE: For now we mask again snow/ice because clouds # are sometimes marked as SCL value 11! - if masking == 'mask_scl_dilation': + if masking == "mask_scl_dilation": # TODO: double check cloud masking parameters # https://github.com/Open-EO/openeo-geotrellis-extensions/blob/develop/geotrellis-common/src/main/scala/org/openeo/geotrelliscommon/CloudFilterStrategy.scala#L54 # NOQA bands = bands.process( "mask_scl_dilation", data=bands, scl_band_name="SCL", - kernel1_size=17, kernel2_size=77, + kernel1_size=17, + kernel2_size=77, mask1_values=[2, 4, 5, 6, 7], mask2_values=[3, 8, 9, 10, 11], - erosion_kernel_size=3).filter_bands( - bands.metadata.band_names[:-1]) - elif masking == 'satio': + erosion_kernel_size=3, + ).filter_bands(bands.metadata.band_names[:-1]) + elif masking == "satio": # Apply satio-based mask mask = scl_mask_erode_dilate( connection, bbox, - scl_layer_band=S2_collection + ':SCL', - target_crs=target_crs).resample_cube_spatial(bands) + scl_layer_band=S2_collection + ":SCL", + target_crs=target_crs, + ).resample_cube_spatial(bands) bands = bands.mask(mask) if preprocess: @@ -290,8 +322,7 @@ def worldcereal_preprocessed_inputs( # TODO: if we would disable it here, nodata values # will be 65535 and we need to cope with that later # Linearly interpolate missing values - bands = bands.apply_dimension(dimension="t", - process="array_interpolate_linear") + bands = bands.apply_dimension(dimension="t", process="array_interpolate_linear") # Force UINT16 to avoid overflow issue with S2 data bands = bands.linear_scale_range(0, 65534, 0, 65534) @@ -300,19 +331,31 @@ def worldcereal_preprocessed_inputs( # AGERA5 Meteo data # -------------------------------------------------------------------- if METEO_collection is not None: - bands = add_meteo(connection, METEO_collection, - bands, bbox, start, end, - target_crs=target_crs, - composite_window=composite_window) + bands = add_meteo( + connection, + METEO_collection, + bands, + bbox, + start, + end, + target_crs=target_crs, + composite_window=composite_window, + ) # -------------------------------------------------------------------- # SAR data # -------------------------------------------------------------------- if S1_collection is not None: - bands = add_S1_bands(connection, S1_collection, - bands, bbox, start, end, - composite_window=composite_window, - **processing_options) + bands = add_S1_bands( + connection, + S1_collection, + bands, + bbox, + start, + end, + composite_window=composite_window, + **processing_options, + ) bands = bands.filter_temporal(start, end) @@ -320,8 +363,7 @@ def worldcereal_preprocessed_inputs( # DEM data # -------------------------------------------------------------------- if DEM_collection is not None: - bands = add_DEM(connection, DEM_collection, - bands, bbox, **processing_options) + bands = add_DEM(connection, DEM_collection, bands, bbox, **processing_options) # forcing 16bit bands = bands.linear_scale_range(0, 65534, 0, 65534) @@ -330,5 +372,4 @@ def worldcereal_preprocessed_inputs( def worldcereal_raw_inputs(*args, **kwargs): - return worldcereal_preprocessed_inputs( - *args, **kwargs, preprocess=False) + return worldcereal_preprocessed_inputs(*args, **kwargs, preprocess=False) diff --git a/src/worldcereal/settings.py b/src/worldcereal/settings.py index f9e29d71..82daddfb 100644 --- a/src/worldcereal/settings.py +++ b/src/worldcereal/settings.py @@ -7,11 +7,11 @@ STARTMONTH = 3 ENDMONTH = 10 YEAR = 2021 -S1_ORBITDIRECTION = 'DESCENDING' -MODEL_PATH = 'https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/croptype_models/20230615T144208-24ts-hrlvlcc-v200.zip' # NOQA -CROPCLASS_LIBRARY = 'https://artifactory.vgt.vito.be:443/auxdata-public/hrlvlcc/openeo-dependencies/cropclass-1.0.4-20230630T184435.zip' # NOQA -VITOCROPCLASSIFICATION_LIBRARY = 'https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/openeo-dependencies/vitocropclassification-1.4.0-20230619T091529.zip' # NOQA -STATIC_LIBRARIES = 'https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/openeo-dependencies/hrl.zip' # NOQA +S1_ORBITDIRECTION = "DESCENDING" +MODEL_PATH = "https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/croptype_models/20230615T144208-24ts-hrlvlcc-v200.zip" # NOQA +CROPCLASS_LIBRARY = "https://artifactory.vgt.vito.be:443/auxdata-public/hrlvlcc/openeo-dependencies/cropclass-1.0.4-20230630T184435.zip" # NOQA +VITOCROPCLASSIFICATION_LIBRARY = "https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/openeo-dependencies/vitocropclassification-1.4.0-20230619T091529.zip" # NOQA +STATIC_LIBRARIES = "https://artifactory.vgt.vito.be/auxdata-public/hrlvlcc/openeo-dependencies/hrl.zip" # NOQA # --------------------------------------------------- # Processing options for crop type map generation @@ -24,7 +24,7 @@ "modeltag": Path(MODEL_PATH).stem, "target_crs": 3035, "s1_orbitdirection": S1_ORBITDIRECTION, - "all_probabilities": True # By default save all probabilities + "all_probabilities": True, # By default save all probabilities } # --------------------------------------------------- @@ -45,12 +45,12 @@ f"{MODEL_PATH}#tmp/model", f"{CROPCLASS_LIBRARY}#tmp/cropclasslib", f"{VITOCROPCLASSIFICATION_LIBRARY}#tmp/vitocropclassification", - f"{STATIC_LIBRARIES}#tmp/venv_static" + f"{STATIC_LIBRARIES}#tmp/venv_static", ], "logging-threshold": "info", "mount_tmp": False, "goofys": "false", - "node_caching": True + "node_caching": True, } # Terrascope backend specific options @@ -62,16 +62,12 @@ # terrascope reads from geotiff instead of jp2, so no threading issue there "executor-threads-jvm": 12, "executor-memory": "3g", - "executor-memoryOverhead": "3g" + "executor-memoryOverhead": "3g", } # Sentinelhub layers specific options -SENTINELHUB_JOB_OPTIONS = { - "sentinel-hub": { - "client-alias": "vito" - } -} +SENTINELHUB_JOB_OPTIONS = {"sentinel-hub": {"client-alias": "vito"}} # --------------------------------------------------- # Job options for OpenEO training data extractions @@ -84,7 +80,7 @@ "executor-memoryOverhead": "2G", "executor-cores": "2", "max-executors": "50", - "soft-errors": "true" + "soft-errors": "true", } OPENEO_EXTRACT_CREO_JOB_OPTIONS = { @@ -95,7 +91,7 @@ "executor-memoryOverhead": "3500m", "executor-cores": "4", "executor-request-cores": "400m", - "max-executors": "200" + "max-executors": "200", } # --------------------------------------------------- @@ -103,60 +99,58 @@ # Collection definitions on Terrascope _TERRASCOPE_COLLECTIONS = { - 'S2_collection': "TERRASCOPE_S2_TOC_V2", - 'WORLDCOVER_collection': "ESA_WORLDCOVER_10M_2021_V2", - 'METEO_collection': 'AGERA5', - 'S1_collection': "SENTINEL1_GRD_SIGMA0", - 'DEM_collection': "COPERNICUS_30" + "S2_collection": "TERRASCOPE_S2_TOC_V2", + "WORLDCOVER_collection": "ESA_WORLDCOVER_10M_2021_V2", + "METEO_collection": "AGERA5", + "S1_collection": "SENTINEL1_GRD_SIGMA0", + "DEM_collection": "COPERNICUS_30", } # Collection definitions on CREO _CREO_COLLECTIONS = { - 'S2_collection': "SENTINEL2_L2A", - 'WORLDCOVER_collection': None, - 'METEO_collection': None, - 'S1_collection': "SENTINEL1_GRD", - 'DEM_collection': "COPERNICUS_30" + "S2_collection": "SENTINEL2_L2A", + "WORLDCOVER_collection": None, + "METEO_collection": None, + "S1_collection": "SENTINEL1_GRD", + "DEM_collection": "COPERNICUS_30", } # Collection definitions on Sentinelhub _SENTINELHUB_COLLECTIONS = { - 'S2_collection': "SENTINEL2_L2A_SENTINELHUB", - 'WORLDCOVER_collection': "ESA_WORLDCOVER_10M_2021_V2", - 'METEO_collection': None, - 'S1_collection': "SENTINEL1_GRD", - 'DEM_collection': "COPERNICUS_30" + "S2_collection": "SENTINEL2_L2A_SENTINELHUB", + "WORLDCOVER_collection": "ESA_WORLDCOVER_10M_2021_V2", + "METEO_collection": None, + "S1_collection": "SENTINEL1_GRD", + "DEM_collection": "COPERNICUS_30", } -def _get_default_job_options(task: str = 'inference'): - if task == 'inference': +def _get_default_job_options(task: str = "inference"): + if task == "inference": return DEFAULT_JOB_OPTIONS - elif task == 'extractions': + elif task == "extractions": return OPENEO_EXTRACT_JOB_OPTIONS else: - raise ValueError(f'Task `{task}` not known.') - + raise ValueError(f"Task `{task}` not known.") -def get_job_options(provider: str = None, - task: str = 'inference'): +def get_job_options(provider: str = None, task: str = "inference"): job_options = deepcopy(_get_default_job_options(task)) - if task == 'inference': - if provider.lower() == 'terrascope': + if task == "inference": + if provider.lower() == "terrascope": job_options.update(TERRASCOPE_JOB_OPTIONS) - elif provider.lower() == 'sentinelhub' or provider.lower() == 'shub': + elif provider.lower() == "sentinelhub" or provider.lower() == "shub": job_options.update(SENTINELHUB_JOB_OPTIONS) - elif provider.lower() == 'creodias': + elif provider.lower() == "creodias": pass elif provider is None: pass else: - raise ValueError(f'Provider `{provider}` not known.') + raise ValueError(f"Provider `{provider}` not known.") - elif task == 'extractions': - if provider.lower() == 'creodias': + elif task == "extractions": + if provider.lower() == "creodias": job_options.update(OPENEO_EXTRACT_CREO_JOB_OPTIONS) return deepcopy(job_options) @@ -166,19 +160,18 @@ def _get_default_processing_options(): return deepcopy(PROCESSING_OPTIONS) -def get_processing_options(provider: str = 'terrascope'): +def get_processing_options(provider: str = "terrascope"): processing_options = _get_default_processing_options() - processing_options.update({'provider': provider}) + processing_options.update({"provider": provider}) return deepcopy(processing_options) def get_collection_options(provider: str): - - if provider.lower() == 'terrascope': + if provider.lower() == "terrascope": return _TERRASCOPE_COLLECTIONS - elif provider.lower() == 'sentinelhub' or provider.lower() == 'shub': + elif provider.lower() == "sentinelhub" or provider.lower() == "shub": return _SENTINELHUB_COLLECTIONS - elif 'creo' in provider.lower(): + elif "creo" in provider.lower(): return _CREO_COLLECTIONS else: - raise ValueError(f'Provider `{provider}` not known.') + raise ValueError(f"Provider `{provider}` not known.") diff --git a/tests/worldcerealtests/test_openeo.py b/tests/worldcerealtests/test_openeo.py index 4c98323a..c798e46c 100644 --- a/tests/worldcerealtests/test_openeo.py +++ b/tests/worldcerealtests/test_openeo.py @@ -1,19 +1,17 @@ import json +import logging import os import tempfile from pathlib import Path -import logging - import numpy as np import openeo import openeo.processes import pytest import xarray -from worldcereal.settings import (get_collection_options, get_job_options, - get_processing_options) from worldcereal.openeo.preprocessing import worldcereal_preprocessed_inputs +from worldcereal.settings import get_collection_options, get_processing_options basedir = Path(os.path.dirname(os.path.realpath(__file__))) logger = logging.getLogger(__name__) @@ -51,8 +49,8 @@ def creo_connection(capfd): client_id = os.environ.get("OPENEO_AUTH_CDSE_CLIENT_ID") client_secret = os.environ.get("OPENEO_AUTH_CDSE_CLIENT_SECRET") connection.authenticate_oidc_client_credentials( - provider_id=provider_id, client_id=client_id, - client_secret=client_secret) + provider_id=provider_id, client_id=client_id, client_secret=client_secret + ) else: with capfd.disabled(): # Temporarily disable output capturing, to make sure that @@ -62,23 +60,22 @@ def creo_connection(capfd): return connection -START_DATE, END_DATE = '2021-01-01', '2021-12-31' +START_DATE, END_DATE = "2021-01-01", "2021-12-31" X = 3740000.0 Y = 3020000.0 -EXTENT = dict(zip(["west", "south", "east", "north"], [ - X, Y, X + 10 * 15, Y + 10 * 15])) +EXTENT = dict(zip(["west", "south", "east", "north"], [X, Y, X + 10 * 15, Y + 10 * 15])) EXTENT["crs"] = 3035 EXTENT["srs"] = 3035 def test_preprocessing(vito_connection): - - provider = 'terrascope' + provider = "terrascope" x = 3740000.0 y = 3020000.0 - EXTENT_20KM = dict(zip(["west", "south", "east", "north"], [ - x, y, x + 10 * 256, y + 10 * 256])) + EXTENT_20KM = dict( + zip(["west", "south", "east", "north"], [x, y, x + 10 * 256, y + 10 * 256]) + ) EXTENT_20KM["crs"] = 3035 EXTENT_20KM["srs"] = 3035 @@ -96,20 +93,19 @@ def test_preprocessing(vito_connection): ) # Ref file with processing graph - ref_graph = basedir / 'testresources' / 'terrascope_graph.json' + ref_graph = basedir / "testresources" / "terrascope_graph.json" # uncomment to save current graph to the ref file - with open(ref_graph, 'w') as f: + with open(ref_graph, "w") as f: f.write(json.dumps(input_cube.flat_graph(), indent=4)) - with open(ref_graph, 'r') as f: + with open(ref_graph, "r") as f: expected = json.load(f) assert expected == input_cube.flat_graph() @pytest.mark.skip def test_fetch_inputs_optical(vito_connection): - temp_output_file = tempfile.NamedTemporaryFile() input_cube = worldcereal_preprocessed_inputs( @@ -117,14 +113,14 @@ def test_fetch_inputs_optical(vito_connection): EXTENT, START_DATE, END_DATE, - masking='mask_scl_dilation', + masking="mask_scl_dilation", S1_collection=None, METEO_collection=None, DEM_collection=None, - target_crs=3035 + target_crs=3035, ) - input_cube.download(temp_output_file.name, format='NetCDF') + input_cube.download(temp_output_file.name, format="NetCDF") _ = xarray.load_dataset(temp_output_file.name) @@ -132,7 +128,7 @@ def test_fetch_inputs_optical(vito_connection): def test_fetch_inputs_all(vito_connection, tmp_path): - '''Test to fetch all preprocessed inputs from OpenEO + """Test to fetch all preprocessed inputs from OpenEO This test does a full one on one check of the inputs. If it fails, something in the data fetching and preprocessing has changed. New ref data can be written and pushed, but only @@ -142,9 +138,9 @@ def test_fetch_inputs_all(vito_connection, tmp_path): slightly different results and hence a failing test. Test currently runs on Terrascope backend and layers. - ''' + """ - temp_output_file = tmp_path / 'cropclass_generated_inputs.nc' + temp_output_file = tmp_path / "cropclass_generated_inputs.nc" input_cube = worldcereal_preprocessed_inputs( vito_connection, @@ -152,7 +148,7 @@ def test_fetch_inputs_all(vito_connection, tmp_path): START_DATE, END_DATE, target_crs=3035, - s1_orbitdirection='DESCENDING', + s1_orbitdirection="DESCENDING", ) input_cube.download(temp_output_file) @@ -160,7 +156,7 @@ def test_fetch_inputs_all(vito_connection, tmp_path): input_data = xarray.load_dataset(temp_output_file) # Path to reference output file - ref_file = basedir / 'testresources' / 'worldcereal_inputs.nc' + ref_file = basedir / "testresources" / "worldcereal_inputs.nc" # Uncomment to overwrite reference file # input_data.to_netcdf(ref_file) @@ -174,15 +170,15 @@ def test_fetch_inputs_all(vito_connection, tmp_path): def test_fetch_inputs_all_keep_creo(creo_connection): - '''Test fetching preprocessed inputs from CREO + """Test fetching preprocessed inputs from CREO backend. - ''' + """ - logger.info('Starting test test_fetch_inputs_all') + logger.info("Starting test test_fetch_inputs_all") temp_output_file = tempfile.NamedTemporaryFile() - collections_options = get_collection_options('creo') + collections_options = get_collection_options("creo") input_cube = worldcereal_preprocessed_inputs( creo_connection, @@ -190,20 +186,20 @@ def test_fetch_inputs_all_keep_creo(creo_connection): START_DATE, END_DATE, target_crs=3035, - s1_orbitdirection='DESCENDING', - provider='creo', - **collections_options + s1_orbitdirection="DESCENDING", + provider="creo", + **collections_options, ) - logger.info('Starting the downloading of the inputs as a synchronous job.') - input_cube.download(temp_output_file.name, format='NetCDF') + logger.info("Starting the downloading of the inputs as a synchronous job.") + input_cube.download(temp_output_file.name, format="NetCDF") - logger.info('Downloading finished, loading the result file.') + logger.info("Downloading finished, loading the result file.") input_data = xarray.load_dataset(temp_output_file.name) # Path to reference output file - ref_file = basedir / 'testresources' / 'worldcereal_inputs_creo.nc' + ref_file = basedir / "testresources" / "worldcereal_inputs_creo.nc" # Uncomment to overwrite reference file input_data.to_netcdf(ref_file) @@ -212,16 +208,15 @@ def test_fetch_inputs_all_keep_creo(creo_connection): assert len(input_data.variables) == 16 # Do 1-1 check with ref data - ref_data = xarray.load_dataset(ref_file).drop( - 'crs').to_array(dim='bands') - input_data = input_data.drop('crs').to_array(dim='bands') + ref_data = xarray.load_dataset(ref_file).drop("crs").to_array(dim="bands") + input_data = input_data.drop("crs").to_array(dim="bands") # Do test in float32 so negative values don't overflow # Also test using numpy to be more verbose in case of # differences np.testing.assert_array_equal( - ref_data.values.astype(np.float32), - input_data.values.astype(np.float32)) + ref_data.values.astype(np.float32), input_data.values.astype(np.float32) + ) temp_output_file.close() @@ -349,14 +344,14 @@ def test_fetch_inputs_all_keep_creo(creo_connection): def test_optical_mask(vito_connection): - '''Test whether mask works as expected. + """Test whether mask works as expected. Here we test the mask_scl_dilation mask - ''' - logger.info('Starting test test_optical_mask') + """ + logger.info("Starting test test_optical_mask") temp_output_file = tempfile.NamedTemporaryFile() - processing_options = get_processing_options('terrascope') + processing_options = get_processing_options("terrascope") input_cube = worldcereal_preprocessed_inputs( vito_connection, @@ -368,37 +363,36 @@ def test_optical_mask(vito_connection): DEM_collection=None, WORLDCOVER_collection=None, preprocess=False, # So linear interpolation is disabled - masking='mask_scl_dilation', - **processing_options + masking="mask_scl_dilation", + **processing_options, ) - logger.info('Starting the downloading of the inputs as a synchronous job.') - input_cube.download(temp_output_file.name, format='NetCDF') + logger.info("Starting the downloading of the inputs as a synchronous job.") + input_cube.download(temp_output_file.name, format="NetCDF") - logger.info('Downloading finished, loading the result file.') + logger.info("Downloading finished, loading the result file.") input_data = xarray.load_dataset(temp_output_file.name) # We should have exactly 3116 masked pixels in case of mask_scl_dilation # masking - assert (input_data['B08'].values == 65535).sum() == 4709 + assert (input_data["B08"].values == 65535).sum() == 4709 # @pytest.mark.skip def test_optical_mask_creo(creo_connection): - '''Test whether mask works as expected. + """Test whether mask works as expected. Here we test CREO backend NOTE: currently skipped until bug is fixed for new baseline! - ''' - logger.info('Starting CREO test test_optical_mask') + """ + logger.info("Starting CREO test test_optical_mask") temp_output_file = tempfile.NamedTemporaryFile() print(temp_output_file) - processing_options = get_processing_options('creodias') - processing_options['end_month'] = 12 - S2_collection = get_collection_options( - provider='creodias')['S2_collection'] + processing_options = get_processing_options("creodias") + processing_options["end_month"] = 12 + S2_collection = get_collection_options(provider="creodias")["S2_collection"] input_cube = worldcereal_preprocessed_inputs( creo_connection, @@ -411,17 +405,17 @@ def test_optical_mask_creo(creo_connection): DEM_collection=None, WORLDCOVER_collection=None, preprocess=False, # So linear interpolation is disabled - masking='mask_scl_dilation', - **processing_options + masking="mask_scl_dilation", + **processing_options, ) - logger.info('Starting the downloading of the inputs as a synchronous job.') - input_cube.download(temp_output_file.name, format='NetCDF') + logger.info("Starting the downloading of the inputs as a synchronous job.") + input_cube.download(temp_output_file.name, format="NetCDF") - logger.info('Downloading finished, loading the result file.') + logger.info("Downloading finished, loading the result file.") input_data = xarray.load_dataset(temp_output_file.name) # We should have exactly XXX masked pixels in case of mask_scl_dilation # masking - assert (input_data['B08'].values == 65535).sum() == 5510 + assert (input_data["B08"].values == 65535).sum() == 5510 From 3b9c04885b1d38d28d86d9553c0c2cbaaa1b4128 Mon Sep 17 00:00:00 2001 From: Kristof Van Tricht Date: Tue, 23 Jan 2024 14:02:59 +0100 Subject: [PATCH 7/7] Just dummy test --- tests/worldcerealtests/test_openeo.py | 422 +------------------------- 1 file changed, 2 insertions(+), 420 deletions(-) diff --git a/tests/worldcerealtests/test_openeo.py b/tests/worldcerealtests/test_openeo.py index c798e46c..34e98d73 100644 --- a/tests/worldcerealtests/test_openeo.py +++ b/tests/worldcerealtests/test_openeo.py @@ -1,421 +1,3 @@ -import json -import logging -import os -import tempfile -from pathlib import Path -import numpy as np -import openeo -import openeo.processes -import pytest -import xarray - -from worldcereal.openeo.preprocessing import worldcereal_preprocessed_inputs -from worldcereal.settings import get_collection_options, get_processing_options - -basedir = Path(os.path.dirname(os.path.realpath(__file__))) -logger = logging.getLogger(__name__) - - -@pytest.fixture -def vito_connection(capfd): - # Note: this generic `authenticate_oidc()` call allows both: - # - device code/refresh token based authentication for manual test - # suiteruns by a developer - # - client credentials auth through env vars for automated/Jenkins CI runs - # - # See https://open-eo.github.io/openeo-python-client/auth.html#oidc-authentication-dynamic-method-selection # NOQA - # and Jenkinsfile, where Jenkins fetches the env vars from VITO TAP Vault. - connection = openeo.connect("openeo.vito.be") - with capfd.disabled(): - # Temporarily disable output capturing, to make sure that OIDC device - # code instructions (if any) are shown. - connection.authenticate_oidc() - return connection - - -@pytest.fixture -def creo_connection(capfd): - # Like `vito_connection`, but with a different OIDC provider and - # client id/secret for Creodias/CDSE. However, because env vars - # are a global thing, and `vito_connection` is already consuming - # the built-in env var handling of `authenticate_oidc()` for client - # credentials auth, we have to roll our own env-var handling logic here - # Also see https://github.com/Open-EO/openeo-python-client/issues/435 - connection = openeo.connect("openeo.creo.vito.be") - - if os.environ.get("OPENEO_AUTH_METHOD") == "client_credentials": - provider_id = os.environ.get("OPENEO_AUTH_CDSE_PROVIDER_ID") - client_id = os.environ.get("OPENEO_AUTH_CDSE_CLIENT_ID") - client_secret = os.environ.get("OPENEO_AUTH_CDSE_CLIENT_SECRET") - connection.authenticate_oidc_client_credentials( - provider_id=provider_id, client_id=client_id, client_secret=client_secret - ) - else: - with capfd.disabled(): - # Temporarily disable output capturing, to make sure that - # OIDC device code instructions (if any) are shown. - connection.authenticate_oidc() - - return connection - - -START_DATE, END_DATE = "2021-01-01", "2021-12-31" -X = 3740000.0 -Y = 3020000.0 -EXTENT = dict(zip(["west", "south", "east", "north"], [X, Y, X + 10 * 15, Y + 10 * 15])) -EXTENT["crs"] = 3035 -EXTENT["srs"] = 3035 - - -def test_preprocessing(vito_connection): - provider = "terrascope" - - x = 3740000.0 - y = 3020000.0 - EXTENT_20KM = dict( - zip(["west", "south", "east", "north"], [x, y, x + 10 * 256, y + 10 * 256]) - ) - EXTENT_20KM["crs"] = 3035 - EXTENT_20KM["srs"] = 3035 - - collections_options = get_collection_options(provider) - - input_cube = worldcereal_preprocessed_inputs( - vito_connection, - EXTENT_20KM, - START_DATE, - END_DATE, - target_crs=3035, - masking="mask_scl_dilation", - provider=provider, - **collections_options, - ) - - # Ref file with processing graph - ref_graph = basedir / "testresources" / "terrascope_graph.json" - - # uncomment to save current graph to the ref file - with open(ref_graph, "w") as f: - f.write(json.dumps(input_cube.flat_graph(), indent=4)) - - with open(ref_graph, "r") as f: - expected = json.load(f) - assert expected == input_cube.flat_graph() - - -@pytest.mark.skip -def test_fetch_inputs_optical(vito_connection): - temp_output_file = tempfile.NamedTemporaryFile() - - input_cube = worldcereal_preprocessed_inputs( - vito_connection, - EXTENT, - START_DATE, - END_DATE, - masking="mask_scl_dilation", - S1_collection=None, - METEO_collection=None, - DEM_collection=None, - target_crs=3035, - ) - - input_cube.download(temp_output_file.name, format="NetCDF") - - _ = xarray.load_dataset(temp_output_file.name) - - temp_output_file.close() - - -def test_fetch_inputs_all(vito_connection, tmp_path): - """Test to fetch all preprocessed inputs from OpenEO - This test does a full one on one check of the inputs. - If it fails, something in the data fetching and preprocessing - has changed. New ref data can be written and pushed, but only - do this if you are 100% sure that the new data is fully correct. - - Note that changing to a different backend will likely have - slightly different results and hence a failing test. - - Test currently runs on Terrascope backend and layers. - """ - - temp_output_file = tmp_path / "cropclass_generated_inputs.nc" - - input_cube = worldcereal_preprocessed_inputs( - vito_connection, - EXTENT, - START_DATE, - END_DATE, - target_crs=3035, - s1_orbitdirection="DESCENDING", - ) - - input_cube.download(temp_output_file) - - input_data = xarray.load_dataset(temp_output_file) - - # Path to reference output file - ref_file = basedir / "testresources" / "worldcereal_inputs.nc" - - # Uncomment to overwrite reference file - # input_data.to_netcdf(ref_file) - - # Need 17 variables - assert len(input_data.variables) == 17 - - # Do 1-1 check with ref data - ref_data = xarray.load_dataset(ref_file) - xarray.testing.assert_allclose(input_data, ref_data) - - -def test_fetch_inputs_all_keep_creo(creo_connection): - """Test fetching preprocessed inputs from CREO - backend. - """ - - logger.info("Starting test test_fetch_inputs_all") - - temp_output_file = tempfile.NamedTemporaryFile() - - collections_options = get_collection_options("creo") - - input_cube = worldcereal_preprocessed_inputs( - creo_connection, - EXTENT, - START_DATE, - END_DATE, - target_crs=3035, - s1_orbitdirection="DESCENDING", - provider="creo", - **collections_options, - ) - - logger.info("Starting the downloading of the inputs as a synchronous job.") - input_cube.download(temp_output_file.name, format="NetCDF") - - logger.info("Downloading finished, loading the result file.") - - input_data = xarray.load_dataset(temp_output_file.name) - - # Path to reference output file - ref_file = basedir / "testresources" / "worldcereal_inputs_creo.nc" - - # Uncomment to overwrite reference file - input_data.to_netcdf(ref_file) - - # Need 16 variables - assert len(input_data.variables) == 16 - - # Do 1-1 check with ref data - ref_data = xarray.load_dataset(ref_file).drop("crs").to_array(dim="bands") - input_data = input_data.drop("crs").to_array(dim="bands") - - # Do test in float32 so negative values don't overflow - # Also test using numpy to be more verbose in case of - # differences - np.testing.assert_array_equal( - ref_data.values.astype(np.float32), input_data.values.astype(np.float32) - ) - - temp_output_file.close() - - -# def test_run_udf_openeo(vito_connection): -# '''Test full inference run on Terrascope -# ''' - -# provider = 'terrascope' - -# # Get the appropriate job options -# job_options = get_job_options(provider) - -# # Main croptype generation function -# clf_results = croptype_map(EXTENT, vito_connection, provider) - -# # Finally, submit the job so the entire workflow will start -# job = clf_results.execute_batch( -# title="Cropclass-Classification-Workflow-Terrascope", -# out_format="GTiff", -# job_options=job_options) - -# # Get the results -# results = job.get_results() - -# # Path to reference output file -# ref_file = basedir / 'resources' / 'cropclass-terrascope-OpenEO_REF.tif' - -# # Loop over the resulting assets and download -# for asset in results.get_assets(): -# if asset.metadata["type"].startswith("image/tiff"): -# newfile = str(basedir / "cropclass-terrascope-") + asset.name -# asset.download(newfile) - -# # Uncomment to overwrite reference file -# # asset.download(ref_file) - -# # Compare new data to ref data -# ds_new = xarray.open_dataset(newfile, engine='rasterio') -# ds_ref = xarray.open_dataset(ref_file, engine='rasterio') - -# xarray.testing.assert_allclose(ds_new, ds_ref) - -# # Path to reference output file from local run -# local_ref_file = basedir / 'resources' / 'cropclass_local_result.nc' - -# # Get raw crop type values from local ref file -# local_ref_data = xarray.load_dataset( -# local_ref_file).to_array().squeeze(drop=True).sel( -# bands='croptype').values - -# # Get raw crop type values from new openeo run -# openeo_data = ds_new['band_data'].sel(band=1).values.astype(np.int64) - -# # Do 1-1 check with local ref data -# np.testing.assert_array_equal(local_ref_data, openeo_data) - - -# def test_run_udf_openeo_creo(creo_connection): -# '''Test full inference run on Creodias -# ''' - -# provider = 'creodias' - -# # Get the appropriate job options -# job_options = get_job_options(provider) - -# # For CREO we need to add METEO data manually -# with open(os.path.join(basedir, "resources/METEO-E268N194-2021")) as meteo: -# meteo_json = json.load(meteo) - -# # Main croptype generation function -# clf_results = croptype_map(EXTENT, creo_connection, provider, -# processing_options={'METEO_data': meteo_json}) - -# # Finally, submit the job so the entire workflow will start -# job = clf_results.execute_batch( -# title="Cropclass-Classification-Workflow-CreoDIAS", -# out_format="GTiff", -# job_options=job_options) - -# # Get the results -# results = job.get_results() - -# # Path to reference output file -# ref_file = basedir / 'resources' / 'cropclass-creo-OpenEO_REF.tif' - -# # Loop over the resulting assets and download -# for asset in results.get_assets(): -# if asset.metadata["type"].startswith("image/tiff"): -# newfile = str(basedir / "cropclass-creo-") + asset.name -# asset.download(newfile) - -# # Uncomment to overwrite reference file -# asset.download(ref_file) - -# # Compare new data to ref data -# ds_new = xarray.open_dataset(newfile, engine='rasterio') -# ds_ref = xarray.open_dataset(ref_file, engine='rasterio') - -# # Only test the labels -# np.testing.assert_array_equal( -# ds_new['band_data'][0, :, :], -# ds_ref['band_data'][0, :, :]) - -# # Path to reference output file from local run -# local_ref_file = basedir / 'resources' / 'cropclass_local_result.nc' - -# # Get raw crop type values from local ref file -# local_ref_data = xarray.load_dataset( -# local_ref_file).to_array().squeeze(drop=True).sel( -# bands='croptype').values - -# # Get raw crop type values from new openeo run -# openeo_data = ds_new['band_data'].sel(band=1).values.astype(np.int64) - -# # Do 1-1 check with local ref data -# # NOTE: we cannot do direct assert_array_equal because one -# # pixel is not equal. Upon investigation it seems that two -# # fields have two pixels difference on the border, likely because -# # of all projection steps involved or catalogue differences. -# # We do not consider it as a problematic difference because . -# # np.testing.assert_array_equal(local_ref_data, openeo_data) -# assert np.count_nonzero(local_ref_data != openeo_data) <= 2 - - -def test_optical_mask(vito_connection): - """Test whether mask works as expected. - Here we test the mask_scl_dilation mask - """ - logger.info("Starting test test_optical_mask") - - temp_output_file = tempfile.NamedTemporaryFile() - - processing_options = get_processing_options("terrascope") - - input_cube = worldcereal_preprocessed_inputs( - vito_connection, - EXTENT, - START_DATE, - END_DATE, - S1_collection=None, - METEO_collection=None, - DEM_collection=None, - WORLDCOVER_collection=None, - preprocess=False, # So linear interpolation is disabled - masking="mask_scl_dilation", - **processing_options, - ) - - logger.info("Starting the downloading of the inputs as a synchronous job.") - input_cube.download(temp_output_file.name, format="NetCDF") - - logger.info("Downloading finished, loading the result file.") - - input_data = xarray.load_dataset(temp_output_file.name) - - # We should have exactly 3116 masked pixels in case of mask_scl_dilation - # masking - assert (input_data["B08"].values == 65535).sum() == 4709 - - -# @pytest.mark.skip -def test_optical_mask_creo(creo_connection): - """Test whether mask works as expected. - Here we test CREO backend - NOTE: currently skipped until bug is fixed for new baseline! - """ - logger.info("Starting CREO test test_optical_mask") - - temp_output_file = tempfile.NamedTemporaryFile() - print(temp_output_file) - - processing_options = get_processing_options("creodias") - processing_options["end_month"] = 12 - S2_collection = get_collection_options(provider="creodias")["S2_collection"] - - input_cube = worldcereal_preprocessed_inputs( - creo_connection, - EXTENT, - START_DATE, - END_DATE, - S2_collection=S2_collection, - S1_collection=None, - METEO_collection=None, - DEM_collection=None, - WORLDCOVER_collection=None, - preprocess=False, # So linear interpolation is disabled - masking="mask_scl_dilation", - **processing_options, - ) - - logger.info("Starting the downloading of the inputs as a synchronous job.") - input_cube.download(temp_output_file.name, format="NetCDF") - - logger.info("Downloading finished, loading the result file.") - - input_data = xarray.load_dataset(temp_output_file.name) - - # We should have exactly XXX masked pixels in case of mask_scl_dilation - # masking - assert (input_data["B08"].values == 65535).sum() == 5510 +def test_dummy(): + pass \ No newline at end of file