Skip to content

Commit

Permalink
Issue #197 add basic pg based test of UDF on VectorCube
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Aug 3, 2023
1 parent a25f3ab commit 21bc21d
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 16 deletions.
10 changes: 5 additions & 5 deletions openeo_driver/ProcessGraphDeserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,10 @@ def apply_neighborhood(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:

@process
def apply_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
data_cube = args.get_required("data", expected_type=DriverDataCube)
data_cube = args.get_required("data", expected_type=(DriverDataCube, DriverVectorCube))
process = args.get_deep("process", "process_graph", expected_type=dict)
dimension = args.get_required(
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
)
target_dimension = args.get_optional("target_dimension", default=None, expected_type=str)
context = args.get_optional("context", default=None)
Expand Down Expand Up @@ -748,7 +748,7 @@ def reduce_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
data_cube: DriverDataCube = args.get_required("data", expected_type=DriverDataCube)
reduce_pg = args.get_deep("reducer", "process_graph", expected_type=dict)
dimension = args.get_required(
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
)
context = args.get_optional("context", default=None)
return data_cube.reduce_dimension(reducer=reduce_pg, dimension=dimension, context=context, env=env)
Expand Down Expand Up @@ -924,7 +924,7 @@ def aggregate_temporal(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
dimension = args.get_optional(
"dimension",
default=lambda: data_cube.metadata.temporal_dimension.name,
validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
)
context = args.get_optional("context", default=None)

Expand All @@ -941,7 +941,7 @@ def aggregate_temporal_period(args: ProcessArgs, env: EvalEnv) -> DriverDataCube
dimension = args.get_optional(
"dimension",
default=lambda: data_cube.metadata.temporal_dimension.name,
validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
)
context = args.get_optional("context", default=None)

Expand Down
9 changes: 9 additions & 0 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def __eq__(self, o: object) -> bool:
return True
return False

def get_dimension_names(self) -> List[str]:
return self.metadata.dimension_names()

def _not_implemented(self):
"""Helper to raise a NotImplemented exception containing method name"""
raise NotImplementedError("DataCube method not implemented: {m!r}".format(m=inspect.stack()[1].function))
Expand Down Expand Up @@ -511,6 +514,12 @@ def to_legacy_save_result(self) -> Union["AggregatePolygonResult", "JSONResult"]
f"Unsupported cube configuration {cube.dims} for _write_legacy_aggregate_polygon_result_json"
)

def get_dimension_names(self) -> List[str]:
if self._cube is None:
return [self.DIM_GEOMETRIES]
else:
return list(self._cube.dims)

def get_bounding_box(self) -> Tuple[float, float, float, float]:
# TODO: cache bounding box?
# TODO #114 #141 Open-EO/openeo-geopyspark-driver#239: option to buffer point geometries (if any)
Expand Down
8 changes: 5 additions & 3 deletions openeo_driver/dummy/dummy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,12 @@ def __init__(self, metadata: CollectionMetadata = None):
self.apply_tiles = Mock(name="apply_tiles", return_value=self)
self.apply_tiles_spatiotemporal = Mock(name="apply_tiles_spatiotemporal", return_value=self)

# Create mock methods for remaining data cube methods that are not yet defined
already_defined = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
# Create mock methods for remaining DriverDataCube methods that are not yet defined directly by DummyDataCube
to_keep = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
to_keep.update(m for m in DriverDataCube.__dict__.keys() if m.startswith("_"))
to_keep.update(["get_dimension_names"])
for name, method in DriverDataCube.__dict__.items():
if not name.startswith('_') and name not in already_defined and callable(method):
if not name in to_keep and callable(method):
setattr(self, name, Mock(name=name, return_value=self))

for name in [n for n, m in DummyDataCube.__dict__.items() if getattr(m, '_mock_side_effect', False)]:
Expand Down
47 changes: 43 additions & 4 deletions openeo_driver/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
import urllib.request
from pathlib import Path
from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union
from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union, Collection
from unittest import mock

import pytest
Expand Down Expand Up @@ -532,9 +532,48 @@ def to_geojson_feature_collection(self) -> dict:
return approxify(result, rel=self.rel, abs=self.abs)


def caplog_with_custom_formatter(
caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]
):
class ApproxGeoJSONByBounds:
"""pytest assert helper to build a matcher to check if a certain GeoJSON construct is within expected bounds"""

def __init__(
self,
*args,
types: Collection[str] = ("Polygon", "MultiPolygon"),
rel: Optional[float] = None,
abs: Optional[float] = None,
):
bounds = args[0] if len(args) == 1 else args
assert isinstance(bounds, (list, tuple)) and len(bounds) == 4
self.expected_bounds = [float(b) for b in bounds]
self.rel = rel
self.abs = abs
self.expected_types = set(types)
self.actual_info = []

def __eq__(self, other):
try:
assert isinstance(other, dict), "Not a dict"
assert "type" in other, "No 'type' field"
assert other["type"] in self.expected_types, f"Wrong type {other['type']!r}"
assert "coordinates" in other, "No 'coordinates' field"

actual_bounds = shapely.geometry.shape(other).bounds
matching = actual_bounds == pytest.approx(self.expected_bounds, rel=self.rel, abs=self.abs)
if not matching:
self.actual_info.append(f"expected bounds {self.expected_bounds} != actual bounds: {actual_bounds}")
return matching
except Exception as e:
self.actual_info.append(str(e))
return False

def __repr__(self):
msg = f"<{type(self).__name__} types={self.expected_types} bounds={self.expected_bounds} rel={self.rel}, abs={self.abs}>"
if self.actual_info:
msg += "\n" + "\n".join(f" # {i}" for i in self.actual_info)
return msg


def caplog_with_custom_formatter(caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]):
"""
Context manager to set a custom formatter on the caplog fixture.
Expand Down
36 changes: 36 additions & 0 deletions tests/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
caplog_with_custom_formatter,
ephemeral_fileserver,
preprocess_check_and_replace,
ApproxGeoJSONByBounds,
)


Expand Down Expand Up @@ -284,3 +285,38 @@ def test_caplog_with_custom_formatter(caplog, format):
"[WARNING] still not good (root)",
"WARNING root:test_testing.py:XXX hmm bad times",
]


class TestApproxGeoJSONByBounds:
def test_basic(self):
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, abs=0.1)

@pytest.mark.parametrize(
["data", "expected_message"],
[
("nope", "# Not a dict"),
({"foo": "bar"}, " # No 'type' field"),
({"type": "Polygommm", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}, " # Wrong type 'Polygommm'"),
({"type": "Polygon"}, " # No 'coordinates' field"),
],
)
def test_invalid_construct(self, data, expected_message):
expected = ApproxGeoJSONByBounds(1, 2, 3, 4)
assert data != expected
assert expected_message in repr(expected)

def test_out_of_bounds(self):
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
expected = ApproxGeoJSONByBounds(11, 22, 33, 44, abs=0.1)
assert geometry != expected
assert "# expected bounds [11.0, 22.0, 33.0, 44.0] != actual bounds: (1.0, 1.0, 3.0, 4.0)" in repr(expected)

def test_types(self):
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon"], abs=0.1)
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon", "Point"], abs=0.1)

expected = ApproxGeoJSONByBounds(1, 1, 3, 4, types=["MultiPolygon"], abs=0.1)
assert geometry != expected
assert "Wrong type 'Polygon'" in repr(expected)
77 changes: 73 additions & 4 deletions tests/test_views_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
generate_unique_test_process_id,
preprocess_check_and_replace,
preprocess_regex_check_and_replace,
ApproxGeoJSONByBounds,
)
from openeo_driver.util.geometry import as_geojson_feature, as_geojson_feature_collection
from openeo_driver.util.ioformats import IOFORMATS
Expand Down Expand Up @@ -1310,7 +1311,7 @@ def test_run_udf_on_vector_read_vector(api100, udf_code):
"udf": udf_code,
"runtime": "Python",
},
"result": "true",
"result": True,
},
}
resp = api100.check_result(process_graph)
Expand Down Expand Up @@ -1355,8 +1356,8 @@ def test_run_udf_on_vector_get_geometries(api100, udf_code):
"udf": udf_code,
"runtime": "Python",
},
"result": "true"
}
"result": True,
},
}
resp = api100.check_result(process_graph)
assert resp.json == [
Expand Down Expand Up @@ -1401,7 +1402,7 @@ def test_run_udf_on_vector_load_uploaded_files(api100, udf_code):
"udf": udf_code,
"runtime": "Python",
},
"result": "true",
"result": True,
},
}
resp = api100.check_result(process_graph)
Expand Down Expand Up @@ -3522,3 +3523,71 @@ def test_request_costs_for_failed_request(api, backend_implementation):
assert env["correlation_id"] == "r-abc123"

get_request_costs.assert_called_with(TEST_USER, "r-abc123", False)


class TestVectorCubeRunUDF:
"""
Related to:
- https://github.com/Open-EO/openeo-python-driver/issues/197
- https://github.com/Open-EO/openeo-python-driver/pull/200
- https://github.com/Open-EO/openeo-geopyspark-driver/issues/437
"""

def test_apply_dimension_run_udf_change_geometry(self, api100):
udf_code = """
from openeo.udf import UdfData, FeatureCollection
def process_geometries(udf_data: UdfData) -> UdfData:
[feature_collection] = udf_data.get_feature_collection_list()
gdf = feature_collection.data
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
udf_data.set_feature_collection_list([
FeatureCollection(id="_", data=gdf),
])
"""
udf_code = textwrap.dedent(udf_code)
process_graph = {
"get_vector_data": {
"process_id": "load_uploaded_files",
"arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"},
},
"apply_dimension": {
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "get_vector_data"},
"dimension": "properties",
"process": {
"process_graph": {
"runudf1": {
"process_id": "run_udf",
"arguments": {
"data": {"from_node": "get_vector_data"},
"udf": udf_code,
"runtime": "Python",
},
"result": True,
}
},
},
},
"result": True,
},
}
resp = api100.check_result(process_graph)
# DictSubSet=dict
assert resp.json == DictSubSet(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(0, 0, 4, 4, types=["Polygon"], abs=0.1),
"properties": {"id": "first", "pop": 1234},
},
{
"type": "Feature",
"geometry": ApproxGeoJSONByBounds(2, 1, 6, 5, types=["Polygon"], abs=0.1),
"properties": {"id": "second", "pop": 5678},
},
],
}
)

0 comments on commit 21bc21d

Please sign in to comment.