diff --git a/extensions/linz/notebooks/asset-datetime-summary.ipynb b/extensions/linz/notebooks/asset-datetime-summary.ipynb new file mode 100644 index 00000000..37aaf667 --- /dev/null +++ b/extensions/linz/notebooks/asset-datetime-summary.ipynb @@ -0,0 +1,495 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4b29124d-9e52-4124-8b9f-efb60762c0dc", + "metadata": {}, + "source": [ + "# Creating asset summaries" + ] + }, + { + "cell_type": "markdown", + "id": "d38f9eb9-e10e-4903-9772-8b887e3cdef5", + "metadata": {}, + "source": [ + "We'll need a *LINZ extension schema validator:*" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7b2d67c6-3313-4023-9752-49d133d86d59", + "metadata": {}, + "outputs": [], + "source": [ + "from json import dumps, load\n", + "from sys import stderr\n", + "from urllib.request import urlopen\n", + "\n", + "from IPython.display import JSON\n", + "from jsonschema import Draft7Validator\n", + "\n", + "with urlopen(\"https://stac.linz.govt.nz/v0.0.10/linz/schema.json\") as schema_pointer:\n", + " schema = load(schema_pointer)\n", + "\n", + "validator = Draft7Validator(schema)\n", + "\n", + "def validate(instance: str) -> None:\n", + " found_error = False\n", + " \n", + " for error in validator.iter_errors(instance):\n", + " found_error = True\n", + " print(error.message, file=stderr)\n", + " \n", + " if not found_error:\n", + " print(\"Validated successfully\")\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "5e32302e-6714-494f-bbe0-23a60d9b383e", + "metadata": { + "tags": [] + }, + "source": [ + "The *collection example* is valid:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3931b743-885d-47ec-aff4-48faa8c6da5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validated successfully\n" + ] + } + ], + "source": [ + "with urlopen(\"https://stac.linz.govt.nz/v0.0.10/linz/examples/collection.json\") as example_pointer:\n", + " example = load(example_pointer)\n", + "\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "f0e21a05-f00f-4923-a457-b3a7f25907d8", + "metadata": {}, + "source": [ + "Can we *summarise assets?*" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "76ec8b0b-6d5a-4902-a0b1-6b1415b577f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validated successfully\n" + ] + } + ], + "source": [ + "example[\"summaries\"][\"assets\"] = {\n", + " \"created\": {\"minimum\": \"1999-01-01T00:00:00Z\", \"maximum\": \"2010-01-01T00:00:00Z\"},\n", + " \"updated\": {\"minimum\": \"1999-01-02T00:00:00Z\", \"maximum\": \"2010-01-02T00:00:00Z\"},\n", + "}\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "3cb90d1c-d25f-46e5-9885-c3d4f303ef09", + "metadata": {}, + "source": [ + "Yes, but *are such properties validated as normal summaries?*" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2f043e5c-11a9-4ad8-9d6b-4ce8fe4cb1c0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validated successfully\n" + ] + } + ], + "source": [ + "example[\"summaries\"][\"assets\"] = {\n", + " \"created\": None,\n", + " \"updated\": {\"minimum\": \"1999-01-02T00:00:00Z\"},\n", + "}\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "16355abb-5da3-4486-9ae7-4f450330c508", + "metadata": {}, + "source": [ + "Oops, that should've caused *validation exceptions!* Let's instead extend the schema to validate asset summaries as normal summaries:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4cd36c8b-ac96-4ea3-a4a2-0db92ef07675", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "None is not valid under any of the given schemas\n", + "{'minimum': '1999-01-02T00:00:00Z'} is not valid under any of the given schemas\n" + ] + } + ], + "source": [ + "summaries_schema = schema[\"definitions\"][\"fields\"][\"properties\"][\"summaries\"]\n", + "summaries_schema[\"properties\"][\"assets\"] = {\n", + " \"additionalProperties\": {\n", + " \"$ref\": \"https://schemas.stacspec.org/v1.0.0/collection-spec/json-schema/collection.json#definitions/summaries/additionalProperties\"\n", + " }\n", + "}\n", + "\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "58b70eb6-84cb-4eda-813f-1508a2cbe1a6", + "metadata": {}, + "source": [ + "Looks like that worked. For the LINZ schema we want to be even stricter. Let's start by requiring the `assets` property:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "84ad9bf2-b324-47c4-a453-431bc2fcc8a9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'assets' is a required property\n" + ] + } + ], + "source": [ + "summaries_schema[\"required\"] = summaries_schema.get(\"required\", []) + [\"assets\"]\n", + "\n", + "del example[\"summaries\"][\"assets\"]\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "7741d37b-1a1b-4999-afe2-6b65a6c306a4", + "metadata": {}, + "source": [ + "Good! We also want to require `created` and `updated`:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0c75c673-7894-47f4-8712-46451488bb87", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'created' is a required property\n", + "'updated' is a required property\n", + "{'stac_version': '1.0.0', 'stac_extensions': ['https://linz.github.io/stac/v0.0.10/linz/schema.json', 'https://linz.github.io/stac/v0.0.10/quality/schema.json', 'https://stac-extensions.github.io/file/v2.0.0/schema.json', 'https://stac-extensions.github.io/projection/v1.0.0/schema.json', 'https://stac-extensions.github.io/version/v1.0.0/schema.json'], 'type': 'Collection', 'id': 'collection', 'title': 'A title', 'description': 'A description', 'license': 'Apache-2.0', 'linz:lifecycle': 'under development', 'linz:providers': [{'name': 'Example', 'description': 'Example description.', 'roles': ['custodian'], 'url': 'https://www.exampleurl.com'}, {'name': 'Example', 'description': 'Example description.', 'roles': ['manager'], 'url': 'https://www.exampleurl.com'}], 'linz:security_classification': 'unclassified', 'extent': {'spatial': {'bbox': [[172.9, 1.3, 173, 1.4]]}, 'temporal': {'interval': [['2015-06-23T00:00:00Z', None]]}}, 'summaries': {'created': {'minimum': '1999-01-01T00:00:00Z', 'maximum': '2010-01-01T00:00:00Z'}, 'updated': {'minimum': '1999-01-02T00:00:00Z', 'maximum': '2010-01-02T00:00:00Z'}, 'datetime': {'minimum': '2015-06-23T00:00:00Z', 'maximum': '2019-07-10T13:44:56Z'}, 'linz:geospatial_type': ['polygon'], 'assets': {}}, 'links': [], 'quality:description': 'Example quality description', 'quality:horizontal_accuracy': 1, 'quality:horizontal_accuracy_type': 'nominal', 'linz:history': 'This is an example dataset lineage description.', 'providers': [{'name': 'Example', 'description': 'Example description.', 'roles': ['licensor'], 'url': 'https://www.exampleurl.com'}, {'name': 'Example', 'description': 'Example description.', 'roles': ['producer'], 'url': 'https://www.exampleurl.com'}], 'version': '2.0.0', 'assets': {'example': {'href': 'https://example.com/examples/file.xyz', 'created': '2000-01-01T00:00:00Z', 'updated': '2020-01-01T00:00:00Z', 'file:checksum': '1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', 'proj:epsg': 32659}}} is not valid under any of the given schemas\n" + ] + } + ], + "source": [ + "summaries_schema[\"properties\"][\"assets\"][\"required\"] = [\"created\", \"updated\"]\n", + "\n", + "example[\"summaries\"][\"assets\"] = {}\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "ec1eac97-716a-4aac-8ebe-a19ea67766ba", + "metadata": {}, + "source": [ + "Finally we want to make sure `minimum` and `maximum` are datetimes. At this point we've basically reimplemented `summaries/additionalProperties`, so we can delete that:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b584a70d-5c29-45a5-a6a4-337e844cfddf", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "'created minimum' does not match '(\\\\+00:00|Z)$'\n", + "'created maximum' does not match '(\\\\+00:00|Z)$'\n", + "'updated minimum' does not match '(\\\\+00:00|Z)$'\n", + "'updated maximum' does not match '(\\\\+00:00|Z)$'\n" + ] + } + ], + "source": [ + "summaries_schema[\"properties\"][\"assets\"].pop(\"additionalProperties\", None)\n", + "summaries_schema[\"properties\"][\"assets\"][\"properties\"] = {\n", + " \"created\": {\n", + " \"type\": \"object\",\n", + " \"required\": [\"minimum\", \"maximum\"],\n", + " \"properties\": {\n", + " \"minimum\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date-time\",\n", + " \"pattern\": \"(\\\\+00:00|Z)$\"\n", + " },\n", + " \"maximum\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date-time\",\n", + " \"pattern\": \"(\\\\+00:00|Z)$\"\n", + " }\n", + " }\n", + " },\n", + " \"updated\": {\n", + " \"type\": \"object\",\n", + " \"required\": [\"minimum\", \"maximum\"],\n", + " \"properties\": {\n", + " \"minimum\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date-time\",\n", + " \"pattern\": \"(\\\\+00:00|Z)$\"\n", + " },\n", + " \"maximum\": {\n", + " \"type\": \"string\",\n", + " \"format\": \"date-time\",\n", + " \"pattern\": \"(\\\\+00:00|Z)$\"\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "example[\"summaries\"][\"assets\"] = {\n", + " \"created\": {\"minimum\": \"created minimum\", \"maximum\": \"created maximum\"},\n", + " \"updated\": {\"minimum\": \"updated minimum\", \"maximum\": \"updated maximum\"}\n", + "}\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "04934fbc-8482-43fe-9991-1e6bfdf04737", + "metadata": {}, + "source": [ + "Let's make sure by reverting to the valid summary:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8e7cfdb5-1232-40fd-957b-9b01f7448401", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Validated successfully\n" + ] + } + ], + "source": [ + "example[\"summaries\"][\"assets\"] = {\n", + " \"created\": {\"minimum\": \"1999-01-01T00:00:00Z\", \"maximum\": \"2010-01-01T00:00:00Z\"},\n", + " \"updated\": {\"minimum\": \"1999-01-02T00:00:00Z\", \"maximum\": \"2010-01-02T00:00:00Z\"},\n", + "}\n", + "validate(example)" + ] + }, + { + "cell_type": "markdown", + "id": "6ee3378f-5507-4831-8611-ce56c52304ac", + "metadata": {}, + "source": [ + "And we're golden! The final summaries schema looks like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f05ddf06-3d64-4c4f-a03b-80c4643f61ae", + "metadata": {}, + "outputs": [ + { + "data": { + "application/json": { + "properties": { + "assets": { + "properties": { + "created": { + "properties": { + "maximum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "type": "string" + }, + "minimum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "type": "string" + } + }, + "required": [ + "minimum", + "maximum" + ], + "type": "object" + }, + "updated": { + "properties": { + "maximum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "type": "string" + }, + "minimum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "type": "string" + } + }, + "required": [ + "minimum", + "maximum" + ], + "type": "object" + } + }, + "required": [ + "created", + "updated" + ] + }, + "created": { + "properties": { + "maximum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "title": "Latest asset creation time", + "type": "string" + }, + "minimum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "title": "Earliest asset creation time", + "type": "string" + } + }, + "required": [ + "minimum", + "maximum" + ], + "type": "object" + }, + "linz:geospatial_type": { + "items": { + "$ref": "#/definitions/geospatial_type_enum" + }, + "type": "array" + }, + "updated": { + "properties": { + "maximum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "title": "Latest asset updated time", + "type": "string" + }, + "minimum": { + "format": "date-time", + "pattern": "(\\+00:00|Z)$", + "title": "Earliest asset updated time", + "type": "string" + } + }, + "required": [ + "minimum", + "maximum" + ], + "type": "object" + } + }, + "required": [ + "created", + "updated", + "linz:geospatial_type", + "assets" + ], + "type": "object" + }, + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "JSON(summaries_schema)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python3 - stac", + "language": "python", + "name": "ipython_stac" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}