From 48b0ccd7c3134759691ca110ba75c4ad25c58df5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 25 Jul 2024 17:48:57 -0400 Subject: [PATCH 1/8] remove errorneous '$comment' under JSON object 'properties' --- json-schema/schema.json | 44 ++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/json-schema/schema.json b/json-schema/schema.json index 53a9486..54dcaee 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -126,9 +126,8 @@ }, "stac_extensions_eo_bands": { "required": ["eo:bands"], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].", "properties": { - "$comment": "https://github.com/stac-extensions/eo#item-properties-or-asset-fields", "eo:bands": { "type": "array", "minItems": 1, @@ -155,9 +154,8 @@ }, "stac_extensions_raster_bands": { "required": ["raster:bands"], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'raster:bands' as described in [https://github.com/stac-extensions/raster#item-asset-fields].", "properties": { - "$comment": "https://github.com/stac-extensions/raster#item-asset-fields", "raster:bands": { "type": "array", "minItems": 1, @@ -327,13 +325,11 @@ "type": "boolean", "$comment": "If trained from scratch, the source should be explicitly 'null'. However, omitting the source if pretrained is allowed.", "if": { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the STAC MLM pretraining reference.", "properties": { - "$comment": "Required MLM pretraining reference.", "mlm:pretrained": { "const": false } @@ -342,14 +338,12 @@ } }, "then": { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the STAC MLM pretraining reference.", "required": ["mlm:pretrained_source"], "properties": { - "$comment": "Required MLM pretraining reference.", "mlm:pretrained_source": { "const": null } @@ -762,16 +756,15 @@ "$comment": "This definition ensures that, if at least 1 named MLM input 'bands' is provided, at least 1 of the supported references from EO, Raster or STAC Core 1.1 are provided as well. Otherwise, 'bands' must be explicitly empty.", "if": { "type": "object", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { "type": "object", "required": [ "mlm:input" ], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the MLM input with bands listing referring to at least one band name.", "properties": { - "$comment": "Required MLM bands listing referring to at least one band name.", "mlm:input": { "type": "array", "items": { @@ -801,14 +794,12 @@ "$ref": "#/$defs/stac_extensions_raster" }, { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { "required": ["raster:bands"], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field, containing the 'raster:bands' properties defined in [https://github.com/stac-extensions/raster#item-asset-fields].", "properties": { - "$comment": "https://github.com/stac-extensions/raster#item-asset-fields", "raster:bands": { "type": "array", "minItems": 1, @@ -832,9 +823,8 @@ "$comment": "EO extension expects at 'eo:bands' in (at least) 1 asset, and possibly in Item properties. Items are for summarizing. Since MLM also uses bands by 'name' reference, allow any combination, and let 'eo' validate remaining combinations.", "anyOf": [ { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { "$ref": "#/$defs/stac_extensions_eo_bands" } @@ -866,16 +856,14 @@ "$ref": "#/$defs/stac_version_1.1" }, { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { "required": [ "bands" ], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the STAC Core 'bands' field defined by [https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#bands].", "properties": { - "$comment": "https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#bands", "bands": { "type": "array", "minItems": 1, @@ -895,16 +883,14 @@ "$comment": "Case where no 'bands' are referenced in the MLM input. Counter-validate there are no 'eo:bands' or 'raster:bands' in the Model Asset.", "allOf": [ { - "$comment": "This is the JSON-object 'properties' definition.", + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", "properties": { - "$comment": "This is the STAC-Item 'properties' field.", "properties": { "required": [ "mlm:input" ], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field.", + "$comment": "This is the JSON-object 'properties' definition for the STAC MLM input definition with required bands listing referring to at least one band name.", "properties": { - "$comment": "Required MLM bands listing referring to at least one band name.", "mlm:input": { "type": "array", "items": { From 85adc2be2fe793016a0d5a487caf21e34ca4f487 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 26 Jul 2024 10:44:59 -0400 Subject: [PATCH 2/8] split ModelBands and AnyBandsRef definitions --- json-schema/schema.json | 77 ++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/json-schema/schema.json b/json-schema/schema.json index 54dcaee..f5f4487 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -50,6 +50,10 @@ { "$ref": "#/$defs/stac_extensions_mlm" }, + { + "$comment": "Schema to validate cross-references of bands between MLM inputs and any 'bands'-compliant section describing them using another STAC definition.", + "$ref": "#/$defs/AnyBandsRef" + }, { "$comment": "Schema to validate model role requirement.", "$ref": "#/$defs/AssetModelRoleMinimumOneDefinition" @@ -706,51 +710,44 @@ }, "ModelBands": { "description": "List of bands (if any) that compose the input. Band order represents the index position of the bands.", - "allOf": [ - { - "$comment": "No 'minItems' here to support model inputs not using any band (other data source).", - "type": "array", - "items": { - "oneOf": [ - { - "description": "Implied named-band with the name directly provided.", + "$comment": "No 'minItems' here to support model inputs not using any band (other data source).", + "type": "array", + "items": { + "oneOf": [ + { + "description": "Implied named-band with the name directly provided.", + "type": "string", + "minLength": 1 + }, + { + "description": "Explicit named-band with optional derived expression to obtain it.", + "type": "object", + "required": [ + "name" + ], + "properties": { + "name": { "type": "string", "minLength": 1 }, - { - "description": "Explicit named-band with optional derived expression to obtain it.", - "type": "object", - "required": [ - "name" - ], - "properties": { - "name": { - "type": "string", - "minLength": 1 - }, - "format": { - "description": "Format to interpret the specified expression used to obtain the band.", - "type": "string", - "minLength": 1 - }, - "expression": { - "description": "Any representation relevant for the specified 'format'." - } - }, - "dependencies": { - "format": ["expression"], - "expression": ["format"] - }, - "additionalProperties": false + "format": { + "description": "Format to interpret the specified expression used to obtain the band.", + "type": "string", + "minLength": 1 + }, + "expression": { + "description": "Any representation relevant for the specified 'format'.", + "type": {} } - ] + }, + "dependencies": { + "format": ["expression"], + "expression": ["format"] + }, + "additionalProperties": false } - }, - { - "$comment": "However, if any band is indicated, a 'bands'-compliant section should describe them.", - "$ref": "#/$defs/AnyBandsRef" - } - ] + ] + } }, "AnyBandsRef": { "$comment": "This definition ensures that, if at least 1 named MLM input 'bands' is provided, at least 1 of the supported references from EO, Raster or STAC Core 1.1 are provided as well. Otherwise, 'bands' must be explicitly empty.", From 5212acfbde02f22a651b6922321b9f7d7eaa3806 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 17:21:56 -0400 Subject: [PATCH 3/8] update changes --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a480432..a92a196 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,16 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://github.com/crim-ca/mlm-extension/tree/main) ### Added -- n/a +- Add `AnyBandsRef` definition explicitly to STAC *Item* JSON schema, rather than implicitly inferred via ``mlm:input``. ### Changed -- n/a +- Split `ModelBands` and `AnyBandsRef` definitions in the JSON schema to allow them to be referenced individually. ### Deprecated - n/a ### Removed -- n/a +- Removed `$comment` entries from the JSON schema that are considered as invalid by some parsers. ### Fixed - n/a From 33c57ae6efbaecc62dd5ed6d943ba0dd0d381405 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 18:09:01 -0400 Subject: [PATCH 4/8] revert 'type: {}' for 'expression' --- json-schema/schema.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/json-schema/schema.json b/json-schema/schema.json index f5f4487..df36982 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -736,8 +736,7 @@ "minLength": 1 }, "expression": { - "description": "Any representation relevant for the specified 'format'.", - "type": {} + "description": "Any representation relevant for the specified 'format'." } }, "dependencies": { From 9d3b7cab4a0b24677646c88d0ede0b93f61dd69d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 22:22:04 -0400 Subject: [PATCH 5/8] revision of mlm:input to validate bands against STAC extensions band references --- CHANGELOG.md | 22 ++- CONTRIBUTING.md | 34 +++- README.md | 63 ++++-- README_DLM_LEGACY.md | 6 +- best-practices.md | 18 +- examples/item_eo_bands.json | 2 +- examples/item_multi_io.json | 86 ++++---- examples/item_raster_bands.json | 266 ++++++++++++------------- json-schema/schema.json | 339 +++++++++++++++----------------- stac_model/examples.py | 19 +- tests/test_schema.py | 23 ++- 11 files changed, 484 insertions(+), 394 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a92a196..7986b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,19 +8,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased](https://github.com/crim-ca/mlm-extension/tree/main) ### Added -- Add `AnyBandsRef` definition explicitly to STAC *Item* JSON schema, rather than implicitly inferred via ``mlm:input``. +- Add `raster:bands` required property `name` for describing `mlm:input` bands + (see [README - Bands and Statistics](README.md#bands-and-statistics) for details). +- Add README warnings about new extension `eo` and `raster` versions. ### Changed - Split `ModelBands` and `AnyBandsRef` definitions in the JSON schema to allow them to be referenced individually. +- Move `AnyBandsRef` definition explicitly to STAC Item JSON schema, rather than implicitly inferred via `mlm:input`. +- Modified the JSON schema to use a `if` check of the `type` (STAC Item or Collection) prior to validating further + properties. This allows some validators (e.g. `pystac`) to better report the *real* error that causes the schema + to fail, rather than reporting the first mismatching `type` case with a poor error description to debug the issue. ### Deprecated - n/a ### Removed - Removed `$comment` entries from the JSON schema that are considered as invalid by some parsers. +- When `mlm:input` objects do **NOT** define band references (i.e.: `bands: []` is used), the JSON schema will not + fail if an Asset with the `mlm:model` role contains a band definition. This is to allow MLM model definitions to + simultaneously use some inputs with `bands` reference names while others do not. ### Fixed -- n/a +- Band checks against [`eo`](https://github.com/stac-extensions/eo), [`raster`](https://github.com/stac-extensions/eo) + or STAC Core 1.1 [`bands`](https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#bands) + when a `mlm:input` references names in `bands` are now properly validated. +- Fix the examples using `raster:bands` incorrectly defined in STAC Item properties. + The correct use is for them to be defined under the STAC Asset using the `mlm:model` role. +- Fix the [EuroSAT ResNet pydantic example](./stac_model/examples.py) that incorrectly referenced some `bands` + in its `mlm:input` definition without providing any definition of those bands. The `eo:bands` properties have + been added to the corresponding `model` Asset using + the [`pystac.extensions.eo`](https://github.com/stac-utils/pystac/blob/main/pystac/extensions/eo.py) utilities. +- Fix various STAC Asset definitions erroneously employing `mlm:model` role instead of the intended `mlm:source_code`. ## [v1.2.0](https://github.com/crim-ca/mlm-extension/tree/v1.2.0) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1fb68c6..a9adfdb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,7 +19,7 @@ make install-dev make pre-commit-install ``` -## PR submittion +## PR submission Before submitting your code please do the following steps: @@ -41,7 +41,7 @@ make lint-all make test ``` -5. Upload your changes to your fork, then make a PR from there to the main repo: +6. Upload your changes to your fork, then make a PR from there to the main repo: ```bash git checkout -b your-branch @@ -53,11 +53,15 @@ git push -u origin your-branch ## Building and releasing -> :warning:
+ + +> [!WARNING] > There are multiple types of releases for this repository:
> 1. Release for MLM specification (usually, this should include one for `stac-model` as well to support it) > 2. Release for `stac-model` only + + ### Building a new version of MLM specification - Checkout to the `main` branch by making sure the CI passed all previous tests. @@ -69,9 +73,14 @@ git push -u origin your-branch - Make a commit to `GitHub` and push the corresponding auto-generated `v{MAJOR}.{MINOR}.{PATCH}` tag. - Validate that the CI validated everything once again. - Create a `GitHub release` with the created tag. - > :warning:
- > - Ensure the "Set as the latest release" option is selected :heavy_check_mark:. - > - Ensure the diff ranges from the previous MLM version, and not an intermediate `stac-model` release. + + + +> [!WARNING] +> - Ensure the "Set as the latest release" option is selected :heavy_check_mark:. +> - Ensure the diff ranges from the previous MLM version, and not an intermediate `stac-model` release. + + ### Building a new version of `stac-model` @@ -83,9 +92,14 @@ git push -u origin your-branch - Checkout to `main` branch that contais the freshly created merge commit. - Push the tag `stac-model-v{MAJOR}.{MINOR}.{PATCH}`. The CI should auto-publish it to PyPI. - Create a `GitHub release` - > :warning:
- > - Ensure the "Set as the latest release" option is deselected :x:. - > - Ensure the diff ranges from the previous release of `stac-model`, not an intermediate MLM release. + + + +> [!WARNING] +> - Ensure the "Set as the latest release" option is deselected :x:. +> - Ensure the diff ranges from the previous release of `stac-model`, not an intermediate MLM release. + + ## Other help @@ -93,7 +107,7 @@ You can contribute by spreading a word about this library. It would also be a huge contribution to write a short article on how you are using this project. You can also share how the ML Model extension does or does -not serve your needs with us in the Github Discussions or raise +not serve your needs with us in the GitHub Discussions or raise Issues for bugs. [poetry-install]: https://github.com/python-poetry/install.python-poetry.org diff --git a/README.md b/README.md index 99f2159..f20587c 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,9 @@ It is recommended to define `accelerator` with one of the following values: - `intel-ipex-gpu` for models optimized with IPEX for Intel GPUs - `macos-arm` for models trained on Apple Silicon -> :warning:
+ + +> [!WARNING] > If `mlm:accelerator = amd64`, this explicitly indicates that the model does not (and will not try to) use any > accelerator, even if some are available from the runtime environment. This is to be distinguished from > the value `mlm:accelerator = null`, which means that the model *could* make use of some accelerators if provided, @@ -232,6 +234,8 @@ It is recommended to define `accelerator` with one of the following values: > using `mlm:accelerator = amd64` also set explicitly `mlm:accelerator_constrained = true` to illustrate that the > model **WILL NOT** use accelerators, although the hardware resolution should be identical nonetheless. + + When `mlm:accelerator = null` is employed, the value of `mlm:accelerator_constrained` can be ignored, since even if set to `true`, there would be no `accelerator` to contain against. To avoid confusion, it is suggested to set the `mlm:accelerator_constrained = false` or omit the field entirely in this case. @@ -265,7 +269,15 @@ representing bands information, including notably the `nodata` value, the `data_type` (see also [Data Type Enum](#data-type-enum)), and [Common Band Names][stac-band-names]. -> :information_source:
+ + +> [!WARNING] +> Only versions `v1.x` of `eo` and `raster` are supported to provide `mlm:input` band references. +> Versions `2.x` of those extensions rely on the [STAC 1.1 - Band Object][stac-1.1-band] instead. +> If those versions are desired, consider migrating your MLM definition to use [STAC 1.1 - Band Object][stac-1.1-band] +> as well for referencing `mlm:input` with band names. + +> [!NOTE] > Due to how the schema for [`eo:bands`][stac-eo-band] is defined, it is not sufficient to *only* provide > the `eo:bands` property at the STAC Item level. The schema validation of the EO extension explicitly looks > for a corresponding set of bands under an Asset, and if none is found, it disallows `eo:bands` in the Item properties. @@ -273,12 +285,21 @@ and [Common Band Names][stac-band-names]. > (see [Model Asset](#model-asset)), or define them *both* under the Asset and Item properties. If the second > approach is selected, it is recommended that the `eo:bands` under the Asset contains only the `name` or the > `common_name` property, such that all other details about the bands are defined at the Item level. +> An example of such representation is provided in +> [examples/item_eo_bands_summarized.json](examples/item_eo_bands_summarized.json). +>

+> For an example where `eo:bands` are entirely defined in the Asset on their own, please refer to +> [examples/item_eo_bands.json](examples/item_eo_bands.json) instead. >

> For more details, refer to [stac-extensions/eo#12](https://github.com/stac-extensions/eo/issues/12). >
-> For an example, please refer to [examples/item_eo_bands.json](examples/item_eo_bands.json). -> Notably in this example, the `assets.weights.eo:bands` property provides the `name` to fulfill the Asset requirement, -> while all additional band details are provided in `properties.eo:bands`. + +> [!NOTE] +> When using `raster:bands`, and additional `name` parameter **MUST** be provided for each band. This parameter +> is not defined in `raster` extension itself, but is permitted. This addition is required to ensure +> that `mlm:input` bands referenced by name can be associated to their respective `raster:bands` definitions. + + Only bands used as input to the model should be included in the MLM `bands` field. To avoid duplicating the information, MLM only uses the `name` of whichever "Band Object" is defined in the STAC Item. @@ -294,12 +315,12 @@ to normalize all bands, rather than normalizing the values over a single product applied differently for distinct [Model Input](#model-input-object) definitions, in order to adjust for intrinsic properties of the model. -[stac-1.1-band]: https://github.com/radiantearth/stac-spec/pull/1254 -[stac-1.1-stats]: https://github.com/radiantearth/stac-spec/blob/bands/item-spec/common-metadata.md#statistics-object -[stac-eo-band]: https://github.com/stac-extensions/eo?tab=readme-ov-file#band-object -[stac-raster-band]: https://github.com/stac-extensions/raster?tab=readme-ov-file#raster-band-object -[stac-raster-stats]: https://github.com/stac-extensions/raster?tab=readme-ov-file#statistics-object -[stac-band-names]: https://github.com/stac-extensions/eo?tab=readme-ov-file#common-band-names +[stac-1.1-band]: https://github.com/radiantearth/stac-spec/blob/v1.1.0/commons/common-metadata.md#bands +[stac-1.1-stats]: https://github.com/radiantearth/stac-spec/blob/v1.1.0/commons/common-metadata.md#statistics-object +[stac-eo-band]: https://github.com/stac-extensions/eo/tree/v1.1.0#band-object +[stac-raster-band]: https://github.com/stac-extensions/raster/tree/v1.1.0#raster-band-object +[stac-raster-stats]: https://github.com/stac-extensions/raster/tree/v1.1.0#statistics-object +[stac-band-names]: https://github.com/stac-extensions/eo#common-band-names #### Model Band Object @@ -309,10 +330,14 @@ properties of the model. | format | string | The type of expression that is specified in the `expression` property. | | expression | \* | An expression compliant with the `format` specified. The expression can be applied to any data type and depends on the `format` given. | -> :information_source:
+ + +> [!NOTE] > Although `format` and `expression` are not required in this context, they are mutually dependent on each other.
> See also [Processing Expression](#processing-expression) for more details and examples. + + The `format` and `expression` properties can serve multiple purpose. 1. Applying a band-specific pre-processing step, @@ -441,14 +466,18 @@ the following formats are recommended as alternative scripts and function refere | `docker` | string | An URI with image and tag to a Docker. | `ghcr.io/NAMESPACE/IMAGE_NAME:latest` | | `uri` | string | An URI to some binary or script. | `{"href": "https://raw.githubusercontent.com/ORG/REPO/TAG/package/cli.py", "type": "text/x-python"}` | -> :information_source:
+ + +> [!NOTE] > Above definitions are only indicative, and more can be added as desired with even more custom definitions. > It is left as an implementation detail for users to resolve how these expressions should be handled at runtime. -> :warning:
+> [!WARNING] > See also discussion regarding additional processing expressions: > [stac-extensions/processing#31](https://github.com/stac-extensions/processing/issues/31) + + [stac-proc-expr]: https://github.com/stac-extensions/processing#expression-object ### Model Output Object @@ -543,10 +572,14 @@ In order to provide more context, the following roles are also recommended were | mlm:model | `model` | Required role for [Model Asset](#model-asset). | | mlm:source_code | `code` | Required role for [Model Asset](#source-code-asset). | -> :information_source:
+ + +> [!NOTE] > (*) These roles are offered as direct conversions from the previous extension > that provided [ML-Model Asset Roles][ml-model-asset-roles] to provide easier upgrade to the MLM extension. + + [ml-model-asset-roles]: https://github.com/stac-extensions/ml-model?tab=readme-ov-file#asset-objects ### Model Asset diff --git a/README_DLM_LEGACY.md b/README_DLM_LEGACY.md index fbc815a..058ed45 100644 --- a/README_DLM_LEGACY.md +++ b/README_DLM_LEGACY.md @@ -1,9 +1,13 @@ # Deep Learning Model (DLM) Extension -> :information_source:
+ + +> [!NOTE] > This is legacy documentation references of Deep Learning Model extension > preceding the current Machine Learning Model (MLM) extension. + + Check the original [Technical Report](https://github.com/crim-ca/CCCOT03/raw/main/CCCOT03_Rapport%20Final_FINAL_EN.pdf). ![Image Description](https://i.imgur.com/cVAg5sA.png) diff --git a/best-practices.md b/best-practices.md index fc595bf..9d94b35 100644 --- a/best-practices.md +++ b/best-practices.md @@ -41,7 +41,9 @@ could include the bbox of "the world" `[-90, -180, 90, 180]` and the `start_date would ideally be generic values like `["1900-01-01T00:00:00Z", null]` (see warning below). However, due to limitations with the STAC 1.0 specification, this time extent is not applicable. -> :warning:
+ + +> [!WARNING] > The `null` value is not allowed for datetime specification. > As a workaround, the `end_datetime` can be set with a "very large value" > (similarly to `start_datetime` set with a small value), such as `"9999-12-31T23:59:59Z"`. @@ -49,6 +51,8 @@ However, due to limitations with the STAC 1.0 specification, this time extent is >

> For more details, see the following [discussion](https://github.com/radiantearth/stac-spec/issues/1268). + + It is to be noted that generic and very broad spatiotemporal extents like above rarely reflect the reality regarding the capabilities and precision of the model to predict reliable results. If a more restrained area and time of interest can be identified, such as the ranges for which the training @@ -187,9 +191,13 @@ MLM definition to indicate which class values can be contained in the resulting For more details, see the [Model Output Object](README.md#model-output-object) definition. -> :information_source:
+ + +> [!NOTE] > Update according to [stac-extensions/classification#48](https://github.com/stac-extensions/classification/issues/48). + + ### Scientific Extension Provided that most models derive from previous scientific work, it is strongly recommended to employ the @@ -252,9 +260,13 @@ inference strategies to apply a model should define the [Source Code Asset](READ This code is in itself ideal to guide users how to run it, and should therefore be replicated as an `example` link reference to offer more code samples to execute the model. -> :information_source:
+ + +> [!NOTE] > Update according to [stac-extensions/example-links#4](https://github.com/stac-extensions/example-links/issues/4). + + ### Version Extension In the even that a model is retrained with gradually added annotations or improved training strategies leading to diff --git a/examples/item_eo_bands.json b/examples/item_eo_bands.json index c937e92..0efd02c 100644 --- a/examples/item_eo_bands.json +++ b/examples/item_eo_bands.json @@ -374,7 +374,7 @@ "description": "Source code to run the model.", "type": "text/x-python", "roles": [ - "mlm:model", + "mlm:source_code", "code", "metadata" ] diff --git a/examples/item_multi_io.json b/examples/item_multi_io.json index fa6c46b..4922415 100644 --- a/examples/item_multi_io.json +++ b/examples/item_multi_io.json @@ -43,7 +43,7 @@ 58.21798141355221 ], "properties": { - "description": "Generic model that employs multiple input sources with different combination of bands.", + "description": "Generic model that employs multiple input sources with different combination of bands, and some inputs without any band at all.", "datetime": null, "start_datetime": "1900-01-01T00:00:00Z", "end_datetime": "9999-12-31T23:59:59Z", @@ -216,48 +216,6 @@ "expression": "logical_not(A)" } } - ], - "raster:bands": [ - { - "name": "B02 - blue", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B03 - green", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B04 - red", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B08 - nir", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - } ] }, "assets": { @@ -269,6 +227,48 @@ "roles": [ "mlm:model", "mlm:weights" + ], + "raster:bands": [ + { + "name": "B02 - blue", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B03 - green", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B04 - red", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B08 - nir", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + } ] } }, diff --git a/examples/item_raster_bands.json b/examples/item_raster_bands.json index 0677909..46334e9 100644 --- a/examples/item_raster_bands.json +++ b/examples/item_raster_bands.json @@ -204,138 +204,6 @@ ], "post_processing_function": null } - ], - "raster:bands": [ - { - "name": "B01", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 60, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B02", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B03", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B04", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B05", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B06", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B07", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B08", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 10, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B8A", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B09", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 60, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B10", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 60, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B11", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - }, - { - "name": "B12", - "nodata": 0, - "data_type": "uint16", - "bits_per_sample": 15, - "spatial_resolution": 20, - "scale": 0.0001, - "offset": 0, - "unit": "m" - } ] }, "assets": { @@ -347,6 +215,138 @@ "roles": [ "mlm:model", "mlm:weights" + ], + "raster:bands": [ + { + "name": "B01", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 60, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B02", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B03", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B04", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B05", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B06", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B07", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B08", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 10, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B8A", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B09", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 60, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B10", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 60, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B11", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + }, + { + "name": "B12", + "nodata": 0, + "data_type": "uint16", + "bits_per_sample": 15, + "spatial_resolution": 20, + "scale": 0.0001, + "offset": 0, + "unit": "m" + } ] }, "source_code": { @@ -355,7 +355,7 @@ "description": "Source code to run the model.", "type": "text/x-python", "roles": [ - "mlm:model", + "mlm:source_code", "code", "metadata" ] diff --git a/json-schema/schema.json b/json-schema/schema.json index df36982..90b0ad0 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -3,99 +3,110 @@ "$id": "https://crim-ca.github.io/mlm-extension/v1.2.0/schema.json", "title": "Machine Learning Model STAC Extension Schema", "description": "This object represents the metadata for a Machine Learning Model (MLM) used in STAC documents.", - "oneOf": [ + "$comment": "Use 'allOf+if/then' for each 'type' to allow implementations to report more specific messages about the exact case in error (if any). Using only a 'oneOf/allOf' with the 'type' caused any incompatible 'type' to be reported first with a minimal and poorly described error by 'pystac'.", + "allOf": [ { "$comment": "This is the schema for STAC extension MLM in Items.", - "allOf": [ - { - "$comment": "Schema to validate the MLM fields under Item properties or Assets properties.", - "type": "object", - "required": [ - "type", - "properties", - "assets" - ], - "properties": { - "type": { - "const": "Feature" - }, + "if": { + "required": ["type"], + "properties": { + "type": { + "const": "Feature" + } + } + }, + "then": { + "allOf": [ + { + "$comment": "Schema to validate the MLM fields under Item properties or Assets properties.", + "type": "object", + "required": [ + "properties", + "assets" + ], "properties": { - "allOf": [ - { - "required": [ - "mlm:name", - "mlm:architecture", - "mlm:tasks", - "mlm:input", - "mlm:output" - ] - }, - { - "$ref": "#/$defs/fields" - } - ] - }, - "assets": { - "type": "object", - "additionalProperties": { + "properties": { "allOf": [ + { + "required": [ + "mlm:name", + "mlm:architecture", + "mlm:tasks", + "mlm:input", + "mlm:output" + ] + }, { "$ref": "#/$defs/fields" } ] + }, + "assets": { + "type": "object", + "additionalProperties": { + "allOf": [ + { + "$ref": "#/$defs/fields" + } + ] + } } } + }, + { + "$ref": "#/$defs/stac_extensions_mlm" + }, + { + "$comment": "Schema to validate cross-references of bands between MLM inputs and any 'bands'-compliant section describing them using another STAC definition.", + "$ref": "#/$defs/AnyBandsRef" + }, + { + "$comment": "Schema to validate model role requirement.", + "$ref": "#/$defs/AssetModelRoleMinimumOneDefinition" } - }, - { - "$ref": "#/$defs/stac_extensions_mlm" - }, - { - "$comment": "Schema to validate cross-references of bands between MLM inputs and any 'bands'-compliant section describing them using another STAC definition.", - "$ref": "#/$defs/AnyBandsRef" - }, - { - "$comment": "Schema to validate model role requirement.", - "$ref": "#/$defs/AssetModelRoleMinimumOneDefinition" - } - ] + ] + } }, { "$comment": "This is the schema for STAC extension MLM in Collections.", - "allOf": [ - { - "type": "object", - "required": [ - "type" - ], - "properties": { - "type": { - "const": "Collection" - }, - "summaries": { - "type": "object", - "additionalProperties": { - "$ref": "#/$defs/fields" - } - }, - "assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/$defs/fields" - } - }, - "item_assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/$defs/fields" + "if": { + "required": ["type"], + "properties": { + "type": { + "const": "Collection" + } + } + }, + "then": { + "allOf": [ + { + "type": "object", + "properties": { + "summaries": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/fields" + } + }, + "assets": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/fields" + } + }, + "item_assets": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/fields" + } } } + }, + { + "$ref": "#/$defs/stac_extensions_mlm" } - }, - { - "$ref": "#/$defs/stac_extensions_mlm" - } - ] + ] + } } ], "$defs": { @@ -128,15 +139,44 @@ } } }, - "stac_extensions_eo_bands": { - "required": ["eo:bands"], + "stac_extensions_eo_bands_item": { "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].", "properties": { - "eo:bands": { - "type": "array", - "minItems": 1, - "items": { - "type": "object" + "properties": { + "required": ["eo:bands"], + "properties": { + "eo:bands": { + "type": "array", + "minItems": 1, + "items": { + "type": "object" + } + } + } + } + } + }, + "stac_extensions_eo_bands_asset": { + "required": ["assets"], + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Asset containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].", + "properties": { + "assets": { + "additionalProperties": { + "if": { + "$ref": "#/$defs/AssetModelRole" + }, + "then": { + "required": ["eo:bands"], + "properties": { + "eo:bands": { + "type": "array", + "minItems": 1, + "items": { + "type": "object" + } + } + } + } } } } @@ -156,15 +196,37 @@ } } }, - "stac_extensions_raster_bands": { - "required": ["raster:bands"], - "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'raster:bands' as described in [https://github.com/stac-extensions/raster#item-asset-fields].", + "stac_extensions_raster_bands_asset": { + "required": ["assets"], + "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item at least one Asset field containing 'raster:bands' as described in [https://github.com/stac-extensions/raster/tree/v1.1.0#item-asset-fields].", "properties": { - "raster:bands": { - "type": "array", - "minItems": 1, - "items": { - "type": "object" + "assets": { + "additionalProperties": { + "if": { + "$ref": "#/$defs/AssetModelRole" + }, + "then": { + "required": [ + "raster:bands" + ], + "properties": { + "raster:bands": { + "type": "array", + "minItems": 1, + "items": { + "$comment": "Raster extension does not explicitly indicate a 'name', but one is needed for MLM.", + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string", + "minLength": 1 + } + } + } + } + } + } } } } @@ -790,22 +852,7 @@ "$ref": "#/$defs/stac_extensions_raster" }, { - "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", - "properties": { - "properties": { - "required": ["raster:bands"], - "$comment": "This is the JSON-object 'properties' definition for the STAC Item 'properties' field, containing the 'raster:bands' properties defined in [https://github.com/stac-extensions/raster#item-asset-fields].", - "properties": { - "raster:bands": { - "type": "array", - "minItems": 1, - "items": { - "type": "object" - } - } - } - } - } + "$ref": "#/$defs/stac_extensions_raster_bands_asset" } ] }, @@ -816,30 +863,12 @@ "$ref": "#/$defs/stac_extensions_eo" }, { - "$comment": "EO extension expects at 'eo:bands' in (at least) 1 asset, and possibly in Item properties. Items are for summarizing. Since MLM also uses bands by 'name' reference, allow any combination, and let 'eo' validate remaining combinations.", "anyOf": [ { - "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", - "properties": { - "properties": { - "$ref": "#/$defs/stac_extensions_eo_bands" - } - } + "$ref": "#/$defs/stac_extensions_eo_bands_item" }, { - "$comment": "For the case where 'eo:bands' is in the Asset of the model, it must also contain the 'mlm:model' role.", - "properties": { - "assets": { - "additionalProperties": { - "if": { - "$ref": "#/$defs/AssetModelRole" - }, - "then": { - "$ref": "#/$defs/stac_extensions_eo_bands" - } - } - } - } + "$ref": "#/$defs/stac_extensions_eo_bands_asset" } ] } @@ -876,61 +905,7 @@ ] }, "else": { - "$comment": "Case where no 'bands' are referenced in the MLM input. Counter-validate there are no 'eo:bands' or 'raster:bands' in the Model Asset.", - "allOf": [ - { - "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties'.", - "properties": { - "properties": { - "required": [ - "mlm:input" - ], - "$comment": "This is the JSON-object 'properties' definition for the STAC MLM input definition with required bands listing referring to at least one band name.", - "properties": { - "mlm:input": { - "type": "array", - "items": { - "required": [ - "bands" - ], - "$comment": "This is the 'Model Input Object' properties.", - "properties": { - "bands": { - "$comment": "No bands reference provided, therefore none permitted in model inputs.", - "type": "array", - "maxItems": 0 - } - } - } - } - } - } - } - }, - { - "properties": { - "assets": { - "additionalProperties": { - "if": { - "$ref": "#/$defs/AssetModelRole" - }, - "then": { - "not": { - "anyOf": [ - { - "$ref": "#/$defs/stac_extensions_eo_bands" - }, - { - "$ref": "#/$defs/stac_extensions_raster_bands" - } - ] - } - } - } - } - } - } - ] + "$comment": "Case where no 'bands' (empty list) are referenced in the MLM input. Because models can use a mixture of inputs with/without bands, we cannot enforce eo/raster/stac bands references to be omitted. If bands are provided in the 'mlm:model', it will simply be an odd case if none are used in any 'mlm:input' bands'." } } } diff --git a/stac_model/examples.py b/stac_model/examples.py index 47be1db..d882623 100644 --- a/stac_model/examples.py +++ b/stac_model/examples.py @@ -3,6 +3,7 @@ import pystac import shapely from dateutil.parser import parse as parse_dt +from pystac.extensions.eo import Band, EOExtension from pystac.extensions.file import FileExtension from stac_model.base import ProcessingExpression @@ -134,7 +135,7 @@ def eurosat_resnet() -> ItemMLModelExtension: href="https://github.com/microsoft/torchgeo/blob/61efd2e2c4df7ebe3bd03002ebbaeaa3cfe9885a/torchgeo/models/resnet.py#L207", media_type="text/x-python", roles=[ - "mlm:model", + "mlm:source_code", "code", ], ), @@ -214,10 +215,24 @@ def eurosat_resnet() -> ItemMLModelExtension: model_asset = cast( FileExtension[pystac.Asset], - pystac.extensions.file.FileExtension.ext(assets["model"], add_if_missing=True), + FileExtension.ext(assets["model"], add_if_missing=True), ) model_asset.apply(size=ml_model_size) + eo_model_asset = cast( + EOExtension[pystac.Asset], + EOExtension.ext(assets["model"], add_if_missing=True), + ) + # NOTE: + # typically, it is recommended to add as much details as possible for the band description + # minimally, the names (which are well-known for sentinel-2) are sufficient + eo_bands = [] + for name in band_names: + band = Band({}) + band.apply(name=name) + eo_bands.append(band) + eo_model_asset.apply(bands=eo_bands) + item_mlm = MLModelExtension.ext(item, add_if_missing=True) item_mlm.apply(ml_model_meta.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True)) return item_mlm diff --git a/tests/test_schema.py b/tests/test_schema.py index 5cf04d5..74a0523 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -27,6 +27,25 @@ def test_mlm_schema( assert SCHEMA_URI in validated +@pytest.mark.parametrize( + "mlm_example", + ["item_raster_bands.json"], + indirect=True, +) +def test_mlm_missing_bands_invalid_if_mlm_input_lists_bands( + mlm_validator: STACValidator, + mlm_example: Dict[str, JSON], +) -> None: + mlm_item = pystac.Item.from_dict(mlm_example) + pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid + + mlm_bands_bad_data = copy.deepcopy(mlm_example) + mlm_bands_bad_data["assets"]["weights"].pop("raster:bands") # type: ignore # no 'None' to raise in case modified + with pytest.raises(pystac.errors.STACValidationError) as err: + mlm_bands_bad_item = pystac.Item.from_dict(mlm_bands_bad_data) + pystac.validation.validate(mlm_bands_bad_item, validator=mlm_validator) + + @pytest.mark.parametrize( "mlm_example", ["item_eo_bands_summarized.json"], @@ -40,7 +59,7 @@ def test_mlm_eo_bands_invalid_only_in_item_properties( pystac.validation.validate(mlm_item, validator=mlm_validator) # ensure original is valid mlm_eo_bands_bad_data = copy.deepcopy(mlm_example) - mlm_eo_bands_bad_data["assets"]["weights"].pop("eo:bands") # type: ignore + mlm_eo_bands_bad_data["assets"]["weights"].pop("eo:bands") # type: ignore # no 'None' to raise in case modified with pytest.raises(pystac.errors.STACValidationError): mlm_eo_bands_bad_item = pystac.Item.from_dict(mlm_eo_bands_bad_data) pystac.validation.validate(mlm_eo_bands_bad_item, validator=mlm_validator) @@ -61,7 +80,7 @@ def test_mlm_no_input_allowed_but_explicit_empty_array_required( pystac.validation.validate(mlm_item, validator=mlm_validator) with pytest.raises(pystac.errors.STACValidationError): - mlm_data["properties"].pop("mlm:input") # type: ignore + mlm_data["properties"].pop("mlm:input") # type: ignore # no 'None' to raise in case modified mlm_item = pystac.Item.from_dict(mlm_data) pystac.validation.validate(mlm_item, validator=mlm_validator) From 5b1ecf41b8f4f6053004e3f782074740149f7e15 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 22:36:29 -0400 Subject: [PATCH 6/8] fix lint --- Makefile | 39 ++++++++++++++++++------------------ json-schema/schema.json | 44 ++++++++++++++++++++++++++++++----------- tests/test_schema.py | 2 +- 3 files changed, 54 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index 6c0fa19..599754d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ #* Variables -SHELL := /usr/bin/env bash -PYTHON := python +SHELL ?= /usr/bin/env bash +PYTHON ?= python PYTHONPATH := `pwd` +POETRY ?= poetry #* Poetry .PHONY: poetry-install @@ -14,36 +15,36 @@ poetry-remove: .PHONY: poetry-plugins poetry-plugins: - poetry self add poetry-plugin-up + $(POETRY) self add poetry-plugin-up .PHONY: poetry-env poetry-env: - poetry config virtualenvs.in-project true + $(POETRY) config virtualenvs.in-project true .PHONY: publish publish: - poetry publish --build + $(POETRY) publish --build #* Installation .PHONY: install install: poetry-env - poetry lock -n && poetry export --without-hashes > requirements-lock.txt - poetry install -n + $(POETRY) lock -n && poetry export --without-hashes > requirements-lock.txt + $(POETRY) install -n -poetry run mypy --install-types --non-interactive ./ .PHONY: install-dev install-dev: poetry-env install - poetry install -n --with dev + $(POETRY) install -n --with dev .PHONY: pre-commit-install pre-commit-install: - poetry run pre-commit install + $(POETRY) run pre-commit install #* Formatters .PHONY: codestyle codestyle: - poetry run ruff format --config=pyproject.toml stac_model tests + $(POETRY) run ruff format --config=pyproject.toml stac_model tests .PHONY: format format: codestyle @@ -61,29 +62,29 @@ check-all: check .PHONY: mypy mypy: - poetry run mypy --config-file pyproject.toml ./ + $(POETRY) run mypy --config-file pyproject.toml ./ .PHONY: check-mypy check-mypy: mypy .PHONY: check-safety check-safety: - poetry check - poetry run safety check --full-report - poetry run bandit -ll --recursive stac_model tests + $(POETRY) check + $(POETRY) run safety check --full-report + $(POETRY) run bandit -ll --recursive stac_model tests .PHONY: lint lint: - poetry run ruff --config=pyproject.toml ./ - poetry run pydocstyle --count --config=pyproject.toml ./ - poetry run pydoclint --config=pyproject.toml ./ + $(POETRY) run ruff --config=pyproject.toml ./ + $(POETRY) run pydocstyle --count --config=pyproject.toml ./ + $(POETRY) run pydoclint --config=pyproject.toml ./ .PHONY: check-lint check-lint: lint .PHONY: format-lint format-lint: - poetry run ruff --config=pyproject.toml --fix ./ + $(POETRY) run ruff --config=pyproject.toml --fix ./ .PHONY: install-npm install-npm: @@ -113,7 +114,7 @@ lint-all: lint mypy check-safety check-markdown .PHONY: update-dev-deps update-dev-deps: - poetry up --only=dev-dependencies --latest + $(POETRY) up --only=dev-dependencies --latest #* Cleaning .PHONY: pycache-remove diff --git a/json-schema/schema.json b/json-schema/schema.json index 90b0ad0..df9e8e7 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -8,7 +8,9 @@ { "$comment": "This is the schema for STAC extension MLM in Items.", "if": { - "required": ["type"], + "required": [ + "type" + ], "properties": { "type": { "const": "Feature" @@ -70,7 +72,9 @@ { "$comment": "This is the schema for STAC extension MLM in Collections.", "if": { - "required": ["type"], + "required": [ + "type" + ], "properties": { "type": { "const": "Collection" @@ -143,7 +147,9 @@ "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item field named 'properties' containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].", "properties": { "properties": { - "required": ["eo:bands"], + "required": [ + "eo:bands" + ], "properties": { "eo:bands": { "type": "array", @@ -157,7 +163,9 @@ } }, "stac_extensions_eo_bands_asset": { - "required": ["assets"], + "required": [ + "assets" + ], "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Asset containing 'eo:bands' as described in [https://github.com/stac-extensions/eo#item-properties-or-asset-fields].", "properties": { "assets": { @@ -166,7 +174,9 @@ "$ref": "#/$defs/AssetModelRole" }, "then": { - "required": ["eo:bands"], + "required": [ + "eo:bands" + ], "properties": { "eo:bands": { "type": "array", @@ -197,7 +207,9 @@ } }, "stac_extensions_raster_bands_asset": { - "required": ["assets"], + "required": [ + "assets" + ], "$comment": "This is the JSON-object 'properties' definition, which describes the STAC-Item at least one Asset field containing 'raster:bands' as described in [https://github.com/stac-extensions/raster/tree/v1.1.0#item-asset-fields].", "properties": { "assets": { @@ -216,7 +228,9 @@ "items": { "$comment": "Raster extension does not explicitly indicate a 'name', but one is needed for MLM.", "type": "object", - "required": ["name"], + "required": [ + "name" + ], "properties": { "name": { "type": "string", @@ -408,7 +422,9 @@ "properties": { "properties": { "$comment": "This is the JSON-object 'properties' definition for the STAC MLM pretraining reference.", - "required": ["mlm:pretrained_source"], + "required": [ + "mlm:pretrained_source" + ], "properties": { "mlm:pretrained_source": { "const": null @@ -760,7 +776,9 @@ ] }, "AssetModelRole": { - "required": ["roles"], + "required": [ + "roles" + ], "properties": { "roles": { "contains": { @@ -802,8 +820,12 @@ } }, "dependencies": { - "format": ["expression"], - "expression": ["format"] + "format": [ + "expression" + ], + "expression": [ + "format" + ] }, "additionalProperties": false } diff --git a/tests/test_schema.py b/tests/test_schema.py index 74a0523..4755978 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -41,7 +41,7 @@ def test_mlm_missing_bands_invalid_if_mlm_input_lists_bands( mlm_bands_bad_data = copy.deepcopy(mlm_example) mlm_bands_bad_data["assets"]["weights"].pop("raster:bands") # type: ignore # no 'None' to raise in case modified - with pytest.raises(pystac.errors.STACValidationError) as err: + with pytest.raises(pystac.errors.STACValidationError): mlm_bands_bad_item = pystac.Item.from_dict(mlm_bands_bad_data) pystac.validation.validate(mlm_bands_bad_item, validator=mlm_validator) From f64c93777c3670e162221ef452345c5001cc608b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 22:59:52 -0400 Subject: [PATCH 7/8] fix case for mlm:input empty [] against bands check --- json-schema/schema.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/json-schema/schema.json b/json-schema/schema.json index df9e8e7..c9ec4da 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -847,7 +847,10 @@ "properties": { "mlm:input": { "type": "array", + "$comment": "Below 'minItems' ensures that band check does not fail for explicitly empty 'mlm:inputs'.", + "minItems": 1, "items": { + "type": "object", "required": [ "bands" ], From 1c518c48ea993c7682eca1bce7a505bbe98c1a2a Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 20 Sep 2024 23:01:38 -0400 Subject: [PATCH 8/8] linting changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7986b56..c52af0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 in its `mlm:input` definition without providing any definition of those bands. The `eo:bands` properties have been added to the corresponding `model` Asset using the [`pystac.extensions.eo`](https://github.com/stac-utils/pystac/blob/main/pystac/extensions/eo.py) utilities. -- Fix various STAC Asset definitions erroneously employing `mlm:model` role instead of the intended `mlm:source_code`. +- Fix various STAC Asset definitions erroneously employing `mlm:model` role instead of the intended `mlm:source_code`. ## [v1.2.0](https://github.com/crim-ca/mlm-extension/tree/v1.2.0)