Skip to content

Commit

Permalink
Issue #114/#211 initial support for load_url (vector cubes)
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Aug 7, 2023
1 parent 65920ef commit ab2ad96
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 36 deletions.
74 changes: 43 additions & 31 deletions openeo_driver/ProcessGraphDeserializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,29 @@
import datetime
import logging
import math
import re
import tempfile
import time
import warnings
from pathlib import Path
from typing import Dict, Callable, List, Union, Tuple, Any, Iterable
from typing import Any, Callable, Dict, Iterable, List, Tuple, Union

import pandas as pd
import geopandas as gpd
import numpy as np
import openeo.udf
import openeo_processes
import pandas as pd
import pyproj
import requests
from dateutil.relativedelta import relativedelta
from requests.structures import CaseInsensitiveDict
import shapely.geometry
from shapely.geometry import shape, GeometryCollection, shape, mapping, MultiPolygon
import shapely.ops

import openeo.udf
from dateutil.relativedelta import relativedelta
from openeo.capabilities import ComparableVersion
from openeo.internal.process_graph_visitor import ProcessGraphVisitor, ProcessGraphVisitException
from openeo.internal.process_graph_visitor import ProcessGraphVisitException, ProcessGraphVisitor
from openeo.metadata import CollectionMetadata, MetadataException
from openeo.util import load_json, rfc3339, deep_get, str_truncate
from openeo.util import deep_get, load_json, rfc3339, str_truncate
from shapely.geometry import GeometryCollection, MultiPolygon, mapping, shape

from openeo_driver import dry_run
from openeo_driver.backend import (
UserDefinedProcessMetadata,
Expand All @@ -53,13 +53,9 @@
to_save_result, AggregatePolygonSpatialResult, MlModelResult
from openeo_driver.specs import SPECS_ROOT, read_spec
from openeo_driver.util.date_math import month_shift
from openeo_driver.util.geometry import (
geojson_to_geometry,
geojson_to_multipolygon,
spatial_extent_union,
)
from openeo_driver.util.geometry import geojson_to_geometry, geojson_to_multipolygon, spatial_extent_union
from openeo_driver.util.utm import auto_utm_epsg_for_geometry
from openeo_driver.utils import smart_bool, EvalEnv
from openeo_driver.utils import EvalEnv, smart_bool

_log = logging.getLogger(__name__)

Expand Down Expand Up @@ -1540,26 +1536,24 @@ def read_vector(args: Dict, env: EvalEnv) -> DelayedVector:


@process_registry_100.add_function(spec=read_spec("openeo-processes/1.x/proposals/load_uploaded_files.json"))
def load_uploaded_files(args: dict, env: EvalEnv) -> Union[DriverVectorCube,DriverDataCube]:
def load_uploaded_files(args: ProcessArgs, env: EvalEnv) -> Union[DriverVectorCube, DriverDataCube]:
# TODO #114 EP-3981 process name is still under discussion https://github.com/Open-EO/openeo-processes/issues/322
paths = extract_arg(args, 'paths', process_id="load_uploaded_files")
format = extract_arg(args, 'format', process_id="load_uploaded_files")
options = args.get("options", {})

input_formats = CaseInsensitiveDict(env.backend_implementation.file_formats()["input"])
if format not in input_formats:
raise FileTypeInvalidException(type=format, types=", ".join(input_formats.keys()))
paths = args.get_required("paths", expected_type=list)
format = args.get_required(
"format",
expected_type=str,
validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]),
)
options = args.get_optional("options", default={})

if format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"}:
if DriverVectorCube.from_fiona_supports(format):
return DriverVectorCube.from_fiona(paths, driver=format, options=options)
elif format.lower() in {"GTiff"}:
if(len(paths)!=1):
raise FeatureUnsupportedException(f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}")
kwargs = dict(
glob_pattern=paths[0],
format=format,
options=options
)
if len(paths) != 1:
raise FeatureUnsupportedException(
f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}"
)
kwargs = dict(glob_pattern=paths[0], format=format, options=options)
dry_run_tracer: DryRunDataTracer = env.get(ENV_DRY_RUN_TRACER)
if dry_run_tracer:
return dry_run_tracer.load_disk_data(**kwargs)
Expand Down Expand Up @@ -1604,6 +1598,24 @@ def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube:
return vector_cube


@process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/load_url.json"))
def load_url(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube:
# TODO: Follow up possible `load_url` changes https://github.com/Open-EO/openeo-processes/issues/450 ?
url = args.get_required("url", expected_type=str, validator=re.compile("^https?://").match)
format = args.get_required(
"format",
expected_type=str,
validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]),
)
options = args.get_optional("options", default={})

if DriverVectorCube.from_fiona_supports(format):
# TODO: for GeoJSON (and related) support `properties` option like load_geojson? https://github.com/Open-EO/openeo-processes/issues/450
return DriverVectorCube.from_fiona(paths=[url], driver=format, options=options)
else:
raise FeatureUnsupportedException(f"Loading format {format!r} is not supported")


@non_standard_process(
ProcessSpec("get_geometries", description="Reads vector data from a file or a URL or get geometries from a FeatureCollection")
.param('filename', description="filename or http url of a vector file", schema={"type": "string"}, required=False)
Expand Down
2 changes: 1 addition & 1 deletion openeo_driver/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.61.0a1"
__version__ = "0.61.1a1"
6 changes: 6 additions & 0 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,12 @@ def from_geodataframe(
)
return cls(geometries=data, cube=cube)

@classmethod
def from_fiona_supports(cls, format: str) -> bool:
"""Does `from_fiona` supports given format?"""
# TODO: also cover input format options?
return format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"}

@classmethod
def from_fiona(
cls,
Expand Down
22 changes: 22 additions & 0 deletions openeo_driver/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union

from openeo_driver.errors import (
FileTypeInvalidException,
OpenEOApiException,
ProcessParameterInvalidException,
ProcessParameterRequiredException,
ProcessUnsupportedException,
Expand Down Expand Up @@ -325,6 +327,9 @@ def _check_value(
try:
valid = validator(value)
reason = "Failed validation."
except OpenEOApiException:
# Preserve original OpenEOApiException
raise
except Exception as e:
valid = False
reason = str(e)
Expand Down Expand Up @@ -441,6 +446,23 @@ def validator(value):

return validator

@staticmethod
def validator_file_format(formats: Union[List[str], Dict[str, dict]]):
"""
Build validator for input/output format (case-insensitive check)
:param formats list of valid formats, or dictionary with formats as keys
"""
formats = list(formats)
options = set(f.lower() for f in formats)

def validator(value: str):
if value.lower() not in options:
raise FileTypeInvalidException(type=value, types=", ".join(formats))
return True

return validator

@staticmethod
def validator_geojson_dict(
allowed_types: Optional[Collection[str]] = None,
Expand Down
11 changes: 10 additions & 1 deletion openeo_driver/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,10 +614,19 @@ def test_my_function(caplog, monkeypatch)
@contextlib.contextmanager
def ephemeral_fileserver(path: Union[Path, str], host: str = "localhost", port: int = 0) -> str:
"""
Context manager to run a short-lived (static) file HTTP server, serving some local test data.
Context manager to run a short-lived (static) file HTTP server, serving files from a given local test data folder.
This is an alternative to traditional mocking of HTTP requests (e.g. with requests_mock)
for situations where that doesn't work (requests are done in a subprocess or at the level of a C-extension/library).
Usage example:
>>> # create temp file with `tmp_path` fixture
>>> (tmp_path / "hello.txt").write_text("Hello world")
>>> with ephemeral_fileserver(tmp_path) as fileserver_root:
... res = subprocess.check_output(["curl", f"{fileserver_root}/hello.txt"])
>>> assert res.strip() == "Hello world"
:param path: root path of the local files to serve
:return: root URL of the ephemeral file server (e.g. "http://localhost:21342")
"""
Expand Down
31 changes: 28 additions & 3 deletions tests/test_processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

from openeo_driver.datacube import DriverDataCube
from openeo_driver.errors import (
ProcessUnsupportedException,
ProcessParameterRequiredException,
FileTypeInvalidException,
ProcessParameterInvalidException,
ProcessParameterRequiredException,
ProcessUnsupportedException,
)
from openeo_driver.processes import ProcessSpec, ProcessRegistry, ProcessRegistryException, ProcessArgs
from openeo_driver.processes import ProcessArgs, ProcessRegistry, ProcessRegistryException, ProcessSpec


def test_process_spec_basic_040():
Expand Down Expand Up @@ -635,3 +636,27 @@ def test_validator_geojson_dict(self):
),
):
_ = args.get_required("geometry", validator=validator)

@pytest.mark.parametrize(
["formats"],
[
(["GeoJSON", "CSV"],),
({"GeoJSON": {}, "CSV": {}},),
],
)
def test_validator_file_format(self, formats):
args = ProcessArgs(
{"format1": "GeoJSON", "format2": "geojson", "format3": "TooExotic"},
process_id="wibble",
)

validator = ProcessArgs.validator_file_format(formats=formats)

assert args.get_required("format1", validator=validator) == "GeoJSON"
assert args.get_required("format2", validator=validator) == "geojson"

with pytest.raises(
FileTypeInvalidException,
match=re.escape("File format TooExotic not allowed. Allowed file formats: GeoJSON, CSV"),
):
_ = args.get_required("format3", validator=validator)
94 changes: 94 additions & 0 deletions tests/test_views_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1965,11 +1965,105 @@ def test_to_vector_cube(self, api100, geojson, expected):
],
)
def test_load_geojson(self, api100, geojson, expected):
# TODO: cover `properties` parameter
res = api100.check_result(
{"vc": {"process_id": "load_geojson", "arguments": {"data": geojson}, "result": True}}
)
assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected})

@pytest.mark.parametrize(
["geometry", "expected"],
[
(
{"type": "Point", "coordinates": (1, 2)},
[
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [1, 2]},
"properties": {},
},
],
),
(
{"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]},
[
{
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]},
"properties": {},
},
],
),
(
{"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
[
{
"type": "Feature",
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
"properties": {},
},
],
),
(
{
"type": "Feature",
"geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
"properties": {"id": "12_3"},
},
[
{
"type": "Feature",
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
"properties": {"id": "12_3"},
},
],
),
(
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]},
"properties": {"id": 1},
},
{
"type": "Feature",
"geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
"properties": {"id": 2},
},
],
},
[
{
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]},
"properties": {"id": 1},
},
{
"type": "Feature",
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
"properties": {"id": 2},
},
],
),
],
)
def test_load_url_geojson(self, api100, geometry, expected, tmp_path):
(tmp_path / "geometry.json").write_text(json.dumps(geometry))
with ephemeral_fileserver(tmp_path) as fileserver_root:
url = f"{fileserver_root}/geometry.json"
res = api100.check_result(
{
"load": {
"process_id": "load_url",
"arguments": {"url": url, "format": "GeoJSON"},
"result": True,
}
}
)
assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected})


def test_no_nested_JSONResult(api):
api.set_auth_bearer_token()
Expand Down

0 comments on commit ab2ad96

Please sign in to comment.