Skip to content

Commit c69b056

Browse files
authored
Test of enforcing a minimum observation count in tidal composites, fix pystac version bug (#130)
* Add test of minimum observation count * Add missing dependencies, fix DEA Tools incompatability * Clean setup.py to match requirements * Automatically update integration test validation results * Add missing tide threshold params * Automatically update integration test validation results * Add pystac pin * Add pystac pin * Use more recent version of `eo-datasets` too * Automatically update integration test validation results --------- Co-authored-by: robbibt <[email protected]>
1 parent 2921bde commit c69b056

File tree

6 files changed

+93
-42
lines changed

6 files changed

+93
-42
lines changed

intertidal/composites.py

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,39 @@ def rename_bands(ds, old_string, new_string):
3333
return ds_renamed
3434

3535

36+
def tidal_thresholds(
37+
tides_highres,
38+
threshold_lowtide=0.15,
39+
threshold_hightide=0.85,
40+
min_obs=0,
41+
):
42+
# Calculate per-pixel integer rankings for each tide height
43+
rank_n = tides_highres.rank(dim="time")
44+
45+
# Calculate low and high ranking thresholds from total rankings.
46+
# Low threshold needs to be rounded up ("ceil"), and high tide
47+
# rounded down ("floor") to ensure we capture all matching values.
48+
rank_max = rank_n.max(dim="time")
49+
rank_thresh_low = np.ceil(rank_max * threshold_lowtide)
50+
rank_thresh_high = np.floor(rank_max * threshold_hightide)
51+
52+
# Update thresholds to ensure minimum number of valid observations
53+
if min_obs > 0:
54+
rank_thresh_low = np.maximum(rank_thresh_low, min_obs)
55+
rank_thresh_high = np.minimum(rank_thresh_high, rank_max - min_obs)
56+
57+
# Calculate tide thresholds by masking tides by ranking threshold
58+
tide_thresh_low = tides_highres.where(rank_n <= rank_thresh_low).max(dim="time")
59+
tide_thresh_high = tides_highres.where(rank_n >= rank_thresh_high).min(dim="time")
60+
61+
return tide_thresh_low, tide_thresh_high
62+
63+
3664
def tidal_composites(
3765
satellite_ds,
38-
threshold_lowtide=0.2,
39-
threshold_hightide=0.8,
66+
threshold_lowtide=0.15,
67+
threshold_hightide=0.85,
68+
min_obs=0,
4069
eps=1e-4,
4170
cpus=None,
4271
max_iters=10000,
@@ -55,7 +84,7 @@ def tidal_composites(
5584
to filter satellite data to low and high tide images prior to
5685
loading it into memory, allowing more efficient processing.
5786
58-
Based on the method described in:
87+
Pixel-based implementation of the method originally published in:
5988
6089
Sagar, S., Phillips, C., Bala, B., Roberts, D., & Lymburner, L.
6190
(2018). Generating Continental Scale Pixel-Based Surface Reflectance
@@ -67,9 +96,12 @@ def tidal_composites(
6796
satellite_ds : xarray.Dataset
6897
A satellite data time series containing spectral bands.
6998
threshold_lowtide : float, optional
70-
Quantile used to identify low tide observations, by default 0.2.
99+
Quantile used to identify low tide observations, by default 0.15.
71100
threshold_hightide : float, optional
72-
Quantile used to identify high tide observations, by default 0.8.
101+
Quantile used to identify high tide observations, by default 0.85.
102+
min_obs : int, optional
103+
Minimum number of clear observations to enforce when calculating tide
104+
height thresholds. Defaults to 0, which will not apply any minimum.
73105
eps: float, optional
74106
Termination criteria passed on to the geomedian algorithm.
75107
cpus: int, optional
@@ -134,14 +166,22 @@ def tidal_composites(
134166
tides_highres = tides_highres.where(nodata_array)
135167

136168
# Calculate low and high tide thresholds from masked tide data
137-
log.info(f"{run_id}: Calculating low and high tide thresholds")
138-
threshold_ds = xr_quantile(
139-
src=tides_highres.to_dataset(),
140-
quantiles=[threshold_lowtide, threshold_hightide],
141-
nodata=np.nan,
169+
log.info(
170+
f"{run_id}: Calculating low and high tide thresholds with minimum {min_obs} observations"
171+
)
172+
# threshold_ds = xr_quantile(
173+
# src=tides_highres.to_dataset(),
174+
# quantiles=[threshold_lowtide, threshold_hightide],
175+
# nodata=np.nan,
176+
# )
177+
# low_threshold = threshold_ds.isel(quantile=0).tide_height.drop("quantile")
178+
# high_threshold = threshold_ds.isel(quantile=-1).tide_height.drop("quantile")
179+
low_threshold, high_threshold = tidal_thresholds(
180+
tides_highres=tides_highres,
181+
threshold_lowtide=threshold_lowtide,
182+
threshold_hightide=threshold_hightide,
183+
min_obs=min_obs,
142184
)
143-
low_threshold = threshold_ds.isel(quantile=0).tide_height.drop("quantile")
144-
high_threshold = threshold_ds.isel(quantile=-1).tide_height.drop("quantile")
145185

146186
# Create masks for selecting satellite observations below and above the
147187
# low and high tide thresholds
@@ -270,14 +310,21 @@ def tidal_composites(
270310
@click.option(
271311
"--threshold_lowtide",
272312
type=float,
273-
default=0.2,
274-
help="The quantile used to identify low tide observations. " "Defaults to 0.2.",
313+
default=0.15,
314+
help="The quantile used to identify low tide observations. Defaults to 0.15.",
275315
)
276316
@click.option(
277317
"--threshold_hightide",
278318
type=float,
279-
default=0.8,
280-
help="The quantile used to identify high tide observations. " "Defaults to 0.8.",
319+
default=0.85,
320+
help="The quantile used to identify high tide observations. Defaults to 0.85.",
321+
)
322+
@click.option(
323+
"--min_obs",
324+
type=int,
325+
default=0,
326+
help="Minimum number of clear observations to enforce when calculating tide "
327+
"height thresholds. Defaults to 0, which will not apply any minimum.",
281328
)
282329
@click.option(
283330
"--mask_sunglint",
@@ -355,6 +402,7 @@ def tidal_composites_cli(
355402
resolution,
356403
threshold_lowtide,
357404
threshold_hightide,
405+
min_obs,
358406
mask_sunglint,
359407
include_coastal_aerosol,
360408
eps,
@@ -429,7 +477,7 @@ def tidal_composites_cli(
429477
)
430478

431479
# Fail early if not enough observations
432-
if len(satellite_ds.time) < 20:
480+
if len(satellite_ds.time) < 50:
433481
raise Exception(
434482
"Insufficient satellite data available to process composites; skipping."
435483
)
@@ -440,6 +488,7 @@ def tidal_composites_cli(
440488
satellite_ds=satellite_ds,
441489
threshold_lowtide=threshold_lowtide,
442490
threshold_hightide=threshold_hightide,
491+
min_obs=min_obs,
443492
eps=eps,
444493
cpus=cpus,
445494
max_iters=max_iters,

requirements.in

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
--find-links=https://packages.dea.ga.gov.au/hdstats/
2-
aiohttp
3-
awscli
4-
botocore
2+
aiohttp==3.10.10
3+
awscli==1.38.1
4+
botocore==1.37.1
55
Bottleneck==1.4.2
66
click==8.1.7
77
dask==2024.3.1
88
datacube[s3,performance]==1.8.19
9-
dea-tools>=0.3.4
10-
eodatasets3==0.30.6
9+
dea-tools==0.3.6
10+
eodatasets3==0.30.9
1111
eo-tides==0.6.3
1212
hdstats==0.1.8.post1
1313
geopandas==0.14.4
1414
matplotlib==3.9.2
15-
mdutils
15+
mdutils==1.6.0
1616
numpy==1.26.4
1717
odc-algo==0.2.3
1818
odc-geo==0.4.8
19-
odc-ui
19+
odc-ui==0.2.1
2020
pandas==2.2.3
2121
pyogrio==0.10.0
2222
pyproj==3.7.0
23+
pystac==1.11
2324
pytest==8.3.3
2425
pytest-dependency
2526
pytest-cov
26-
pyTMD>=2.2.1
2727
pytz==2024.1
2828
rasterio==1.4.1
2929
rioxarray==0.17.0

setup.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,45 @@
33
import os
44
from setuptools import find_packages, setup
55

6-
# Where are we?
7-
IS_SANDBOX = "sandbox" in os.getenv("JUPYTER_IMAGE", default="")
8-
96
# What packages are required for this module to be executed?
107
REQUIRED = [
118
"aiohttp",
12-
"affine",
9+
"awscli",
1310
"botocore",
11+
"Bottleneck",
1412
"click",
15-
"datacube",
16-
"dea_tools",
17-
"eo-tides>=0.6.3",
13+
"dask",
14+
"datacube[s3,performance]",
15+
"dea_tools>=0.3.6",
1816
"eodatasets3<1.9",
19-
"fiona",
17+
"eo-tides>=0.6.3",
18+
"hdstats",
2019
"geopandas",
2120
"matplotlib",
2221
"mdutils",
2322
"numpy",
24-
"odc-geo",
25-
"odc-ui",
2623
"odc-algo",
24+
"odc-geo",
25+
"odc-ui",
2726
"pandas",
27+
"pyogrio",
2828
"pyproj",
29+
"pystac<1.12",
2930
"pytest",
3031
"pytest-dependency",
3132
"pytest-cov",
32-
"pyTMD>=2.2.1",
3333
"pytz",
3434
"rasterio",
35+
"rioxarray",
3536
"s3fs",
36-
"setuptools-scm",
3737
"seaborn",
38-
"sunriset",
3938
"scikit-image",
4039
"scikit-learn",
4140
"scipy",
41+
"sunriset",
4242
"shapely",
4343
"tqdm",
4444
"xarray",
45-
"xskillscore",
4645
]
4746

4847
# Package metadata
@@ -51,7 +50,7 @@
5150
URL = "https://github.com/GeoscienceAustralia/dea-intertidal"
5251
5352
AUTHOR = "Geoscience Australia"
54-
REQUIRES_PYTHON = ">=3.8.0"
53+
REQUIRES_PYTHON = ">=3.10.0"
5554

5655
# Setup kwargs
5756
setup_kwargs = {
@@ -63,7 +62,7 @@
6362
"author_email": EMAIL,
6463
"python_requires": REQUIRES_PYTHON,
6564
"url": URL,
66-
"install_requires": REQUIRED if not IS_SANDBOX else [],
65+
"install_requires": REQUIRED,
6766
"packages": find_packages(),
6867
"include_package_data": True,
6968
"license": "Apache License 2.0",

tests/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Integration tests
1010
1111
This directory contains tests that are run to verify that DEA Intertidal code runs correctly. The ``test_intertidal.py`` file runs a small-scale full workflow analysis over an intertidal flat in the Gulf of Carpentaria using the DEA Intertidal [Command Line Interface (CLI) tools](../notebooks/Intertidal_CLI.ipynb), and compares these results against a LiDAR validation DEM to produce some simple accuracy metrics.
1212

13-
The latest integration test completed at **2025-04-17 16:32**. Compared to the previous run, it had an:
13+
The latest integration test completed at **2025-04-28 16:23**. Compared to the previous run, it had an:
1414
- RMSE accuracy of **0.14 m ( :heavy_minus_sign: no change)**
1515
- MAE accuracy of **0.12 m ( :heavy_minus_sign: no change)**
1616
- Bias of **0.12 m ( :heavy_minus_sign: no change)**

tests/validation.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,6 @@ time,Correlation,RMSE,MAE,R-squared,Bias,Regression slope
104104
2025-04-17 01:12:44.346527+00:00,0.975,0.145,0.123,0.95,0.117,1.119
105105
2025-04-17 03:13:55.567169+00:00,0.975,0.145,0.123,0.95,0.117,1.119
106106
2025-04-17 06:32:08.248874+00:00,0.975,0.145,0.123,0.95,0.117,1.119
107+
2025-04-28 01:32:00.287768+00:00,0.975,0.145,0.123,0.95,0.117,1.119
108+
2025-04-28 02:09:27.190062+00:00,0.975,0.145,0.123,0.95,0.117,1.119
109+
2025-04-28 06:23:57.332550+00:00,0.975,0.145,0.123,0.95,0.117,1.119

tests/validation.jpg

138 Bytes
Loading

0 commit comments

Comments
 (0)