From 5a3f01f11c18048c1cce0b4c850b71ab611444f2 Mon Sep 17 00:00:00 2001 From: Thomas Date: Mon, 17 Jun 2024 11:36:49 +0100 Subject: [PATCH] add max_evaluation_depth parameter and documentation in README --- CHANGELOG.md | 3 +- README.md | 97 +++++++++++++++++++++++++++ cmem_plugin_pyshacl/plugin_pyshacl.py | 55 +++++++++------ poetry.lock | 48 ++++++------- tests/test_pyshacl.py | 1 + 5 files changed, 160 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57f1fcd..1027d91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,5 +8,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ### Added -- initial version +- Added ability to specify a custom max-evaluation-depth introduced with pySHACL 0.26.0 + diff --git a/README.md b/README.md index 63d3456..7e1a406 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,100 @@ Validate your Knowledge Graphs based on tests generated from SHACL shapes. - Use [pre-commit](https://pre-commit.com/) to avoid errors before commit. - This repository was created with [this copier template](https://github.com/eccenca/cmem-plugin-template). +## Options + +### Data graph URI + +The URI of the data graph to be validated. The graph URI is selected from a list of graphs of types: +- `di:Dataset` +- `dsm:ThesaurusProject` +- `owl:Ontology` +- `shui:ShapeCatalog` +- `void:Dataset` + +### SHACL graph URI + +The URI of the graph containing the SHACL shapes to be validated against. The graph URI is selected from a list of graphs of type `shui:ShapeCatalog` + +### Generate validation graph + +If enabled, the validation graph is posted to the CMEM instance with the graph URI specified with the *validation graph URI* option. Default value: *false* + +### Validation graph URI + +If the *generate validation graph* option is enabled the validation graph is posted to the CMEM instance with this graph URI + +### Output entities + +If enabled, the plugin outputs the validation results and can be connected to, for instance, a CSV dataset to produce a results table. Default value: *false* + +### Clear validation graph + +If enabled, the validation graph is cleared before workflow execution. Default value: *true*. + +## Advanced Options + +### Resolve owl:imports + +If enabled, the graph tree defined with `owl:imports` in the data graph is resolved. Default value: *true* + +### Blank node skolemization + +If enabled, blank nodes in the validation graph are skolemized into URIs. Default value: *true* + +### Add labels + +If enabled, `rdfs:label` triples are added to the validation graph for instances of `sh:ValidationReport` and `sh:ValidationResult`. Default value: *true* + +### Add labels from data and SHACL graphs + +If enabled along with the *add labels* option, `rdfs:label` triples are added for the focus nodes, values and SHACL shapes in the validation graph. The labels are taken from the specified data and SHACL graphs. Default value: *false* + +### Add shui:conforms flag to focus node resources + +If enabled, `shui:conforms false` triples are added to the focus nodes in the validation graph. Default value: *false* + +### Meta-SHACL + +If enabled, the SHACL shapes graph is validated against the SHACL-SHACL shapes graph before validating the data graph. Default value: *false* + +### Ontology graph URI + +The URI of a graph containing extra ontological information. RDFS and OWL definitions from this are used to inoculate the data graph. The graph URI is selected from a list of graphs of type `owl:Ontology` + +### Inference + +If enabled, OWL inferencing expansion of the data graph is performed before validation. Options are *RDFS*, *OWLRL*, *Both*, *None*. Default value: *None* + +### Advanced + +Enable SHACL Advanced Features. Default value: *false*. + +### Maximum evaluation depth + +The maximum number of SHACL shapes "deep" that the validator can go before reaching an "endpoint" constraint. Default value: 15 + + +## Parameter Input + +In order to set options via the input the following parameter names can be used: + +| Option | Name | +|------------------------------------------------|------------------------| +| Data graph URI | data_graph_uri | +| SHACL graph URI | shacl_graph_uri | +| Generate validation graph | generate_graph | +| Validation graph URI | validation_graph_uri | +| Output entities | output_entities | +| Clear validation graph | clear_validation_graph | +| Resolve owl:imports | owl_imports | +| Blank node skolemization | skolemize | +| Add labels | add_labels | +| Add labels from data and SHACL graphs | include_graphs_labels | +| Add shui:conforms flag to focus node resources | add_shui_conforms | +| Meta-SHACL | meta_shacl | +| Ontology graph URI | ontology_graph_uri | +| Inference | inference | +| Advanced | advanced | +| Maximum evaluation depth | max_evaluation_depth | + diff --git a/cmem_plugin_pyshacl/plugin_pyshacl.py b/cmem_plugin_pyshacl/plugin_pyshacl.py index 08bfec0..46fc62a 100644 --- a/cmem_plugin_pyshacl/plugin_pyshacl.py +++ b/cmem_plugin_pyshacl/plugin_pyshacl.py @@ -26,6 +26,7 @@ from cmem_plugin_base.dataintegration.plugins import WorkflowPlugin from cmem_plugin_base.dataintegration.types import ( BoolParameterType, + IntParameterType, StringParameterType, ) from cmem_plugin_base.dataintegration.utils import setup_cmempy_user_access @@ -170,7 +171,7 @@ def langfilter(lbl: Literal) -> bool: # noqa: ARG001 param_type=BoolParameterType(), name="clear_validation_graph", label="Clear validation graph", - description="If enabled, the validation graph is cleared before workflow " "execution.", + description="If enabled, the validation graph is cleared before workflow execution.", default_value=True, ), PluginParameter( @@ -292,6 +293,16 @@ def langfilter(lbl: Literal) -> bool: # noqa: ARG001 default_value=False, advanced=True, ), + PluginParameter( + param_type=IntParameterType(), + name="max_validation_depth", + label="specify a custom max-evaluation-depth", + description="specify a custom max-evaluation-depth. If you find yourself with a " + "legitimate use case, and you are certain you need to increase this limit, and you are " + "cetain you know what you are doing.", + default_value=15, + advanced=True, + ), ], ) class ShaclValidation(WorkflowPlugin): @@ -299,24 +310,25 @@ class ShaclValidation(WorkflowPlugin): def __init__( # noqa: PLR0913 self, - data_graph_uri: str, - shacl_graph_uri: str, - ontology_graph_uri: str, - generate_graph: bool, - validation_graph_uri: str, - output_entities: bool, - clear_validation_graph: bool, - owl_imports: bool, - skolemize: bool, - add_labels: bool, - include_graphs_labels: bool, - add_shui_conforms: bool, - meta_shacl: bool, - inference: str, - advanced: bool, - remove_dataset_graph_type: bool, - remove_thesaurus_graph_type: bool, - remove_shape_catalog_graph_type: bool, + data_graph_uri: str = "", + shacl_graph_uri: str = "", + ontology_graph_uri: str = "", + generate_graph: bool = False, + validation_graph_uri: str = "", + output_entities: bool = False, + clear_validation_graph: bool = True, + owl_imports: bool = True, + skolemize: bool = True, + add_labels: bool = True, + include_graphs_labels: bool = False, + add_shui_conforms: bool = False, + meta_shacl: bool = False, + inference: str = "none", + advanced: bool = False, + remove_dataset_graph_type: bool = False, + remove_thesaurus_graph_type: bool = False, + remove_shape_catalog_graph_type: bool = False, + max_validation_depth: int = 15, ) -> None: self.data_graph_uri = data_graph_uri self.shacl_graph_uri = shacl_graph_uri @@ -336,6 +348,7 @@ def __init__( # noqa: PLR0913 self.remove_dataset_graph_type = remove_dataset_graph_type self.remove_thesaurus_graph_type = remove_thesaurus_graph_type self.remove_shape_catalog_graph_type = remove_shape_catalog_graph_type + self.max_validation_depth = max_validation_depth discover_plugins("cmem_plugin_pyshacl") this_plugin = Plugin.plugins[0] @@ -604,6 +617,9 @@ def check_parameters( # noqa: C901 PLR0912 if self.inference not in ("none", "rdfs", "owlrl", "both"): raise ValueError("Invalid value for inference parameter") + if not isinstance(self.max_validation_depth, int) and self.max_validation_depth < 1: + raise ValueError("Invalid value for maximum evaluation depth") + self.log.info("Parameters OK:") for param in self.graph_parameters + self.bool_parameters: self.log.info(f"{param}: {self.__dict__[param]}") @@ -658,6 +674,7 @@ def execute( # noqa: C901 PLR0912 meta_shacl=self.meta_shacl, inference=self.inference, advanced=self.advanced, + max_validation_depth=self.max_validation_depth, inplace=True, ) self.log.info(f"Finished SHACL validation in {e_t(start)} seconds") diff --git a/poetry.lock b/poetry.lock index 03ff222..d10d420 100644 --- a/poetry.lock +++ b/poetry.lock @@ -985,13 +985,13 @@ cli = ["click (>=5.0)"] [[package]] name = "python-ulid" -version = "2.6.0" +version = "2.7.0" description = "Universally unique lexicographically sortable identifier" optional = false python-versions = ">=3.9" files = [ - {file = "python_ulid-2.6.0-py3-none-any.whl", hash = "sha256:b47cc7a427b82f7526af96385d7702685df808e9b4922523dd5988a3ba98a89d"}, - {file = "python_ulid-2.6.0.tar.gz", hash = "sha256:904e19093dd6578a5ce01a8274e3e228d556d47be3bda328da2d3601c5240c4f"}, + {file = "python_ulid-2.7.0-py3-none-any.whl", hash = "sha256:c81658e382f69bad8c6d365155c4ae21843ae4226b94f72c12d7adcbb545a251"}, + {file = "python_ulid-2.7.0.tar.gz", hash = "sha256:18eb595885140851a490a95b0da4447911ff69fa9f434732067b97f6956f9fe9"}, ] [package.extras] @@ -1150,28 +1150,28 @@ files = [ [[package]] name = "ruff" -version = "0.4.8" +version = "0.4.9" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.4.8-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7663a6d78f6adb0eab270fa9cf1ff2d28618ca3a652b60f2a234d92b9ec89066"}, - {file = "ruff-0.4.8-py3-none-macosx_11_0_arm64.whl", hash = "sha256:eeceb78da8afb6de0ddada93112869852d04f1cd0f6b80fe464fd4e35c330913"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aad360893e92486662ef3be0a339c5ca3c1b109e0134fcd37d534d4be9fb8de3"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:284c2e3f3396fb05f5f803c9fffb53ebbe09a3ebe7dda2929ed8d73ded736deb"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7354f921e3fbe04d2a62d46707e569f9315e1a613307f7311a935743c51a764"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:72584676164e15a68a15778fd1b17c28a519e7a0622161eb2debdcdabdc71883"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9678d5c9b43315f323af2233a04d747409d1e3aa6789620083a82d1066a35199"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704977a658131651a22b5ebeb28b717ef42ac6ee3b11e91dc87b633b5d83142b"}, - {file = "ruff-0.4.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d05f8d6f0c3cce5026cecd83b7a143dcad503045857bc49662f736437380ad45"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6ea874950daca5697309d976c9afba830d3bf0ed66887481d6bca1673fc5b66a"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fc95aac2943ddf360376be9aa3107c8cf9640083940a8c5bd824be692d2216dc"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_i686.whl", hash = "sha256:384154a1c3f4bf537bac69f33720957ee49ac8d484bfc91720cc94172026ceed"}, - {file = "ruff-0.4.8-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e9d5ce97cacc99878aa0d084c626a15cd21e6b3d53fd6f9112b7fc485918e1fa"}, - {file = "ruff-0.4.8-py3-none-win32.whl", hash = "sha256:6d795d7639212c2dfd01991259460101c22aabf420d9b943f153ab9d9706e6a9"}, - {file = "ruff-0.4.8-py3-none-win_amd64.whl", hash = "sha256:e14a3a095d07560a9d6769a72f781d73259655919d9b396c650fc98a8157555d"}, - {file = "ruff-0.4.8-py3-none-win_arm64.whl", hash = "sha256:14019a06dbe29b608f6b7cbcec300e3170a8d86efaddb7b23405cb7f7dcaf780"}, - {file = "ruff-0.4.8.tar.gz", hash = "sha256:16d717b1d57b2e2fd68bd0bf80fb43931b79d05a7131aa477d66fc40fbd86268"}, + {file = "ruff-0.4.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b262ed08d036ebe162123170b35703aaf9daffecb698cd367a8d585157732991"}, + {file = "ruff-0.4.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:98ec2775fd2d856dc405635e5ee4ff177920f2141b8e2d9eb5bd6efd50e80317"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4555056049d46d8a381f746680db1c46e67ac3b00d714606304077682832998e"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e91175fbe48f8a2174c9aad70438fe9cb0a5732c4159b2a10a3565fea2d94cde"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e8e7b95673f22e0efd3571fb5b0cf71a5eaaa3cc8a776584f3b2cc878e46bff"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:2d45ddc6d82e1190ea737341326ecbc9a61447ba331b0a8962869fcada758505"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78de3fdb95c4af084087628132336772b1c5044f6e710739d440fc0bccf4d321"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:06b60f91bfa5514bb689b500a25ba48e897d18fea14dce14b48a0c40d1635893"}, + {file = "ruff-0.4.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88bffe9c6a454bf8529f9ab9091c99490578a593cc9f9822b7fc065ee0712a06"}, + {file = "ruff-0.4.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:673bddb893f21ab47a8334c8e0ea7fd6598ecc8e698da75bcd12a7b9d0a3206e"}, + {file = "ruff-0.4.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8c1aff58c31948cc66d0b22951aa19edb5af0a3af40c936340cd32a8b1ab7438"}, + {file = "ruff-0.4.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:784d3ec9bd6493c3b720a0b76f741e6c2d7d44f6b2be87f5eef1ae8cc1d54c84"}, + {file = "ruff-0.4.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:732dd550bfa5d85af8c3c6cbc47ba5b67c6aed8a89e2f011b908fc88f87649db"}, + {file = "ruff-0.4.9-py3-none-win32.whl", hash = "sha256:8064590fd1a50dcf4909c268b0e7c2498253273309ad3d97e4a752bb9df4f521"}, + {file = "ruff-0.4.9-py3-none-win_amd64.whl", hash = "sha256:e0a22c4157e53d006530c902107c7f550b9233e9706313ab57b892d7197d8e52"}, + {file = "ruff-0.4.9-py3-none-win_arm64.whl", hash = "sha256:5d5460f789ccf4efd43f265a58538a2c24dbce15dbf560676e430375f20a8198"}, + {file = "ruff-0.4.9.tar.gz", hash = "sha256:f1cb0828ac9533ba0135d148d214e284711ede33640465e706772645483427e3"}, ] [[package]] @@ -1220,13 +1220,13 @@ files = [ [[package]] name = "textual" -version = "0.67.1" +version = "0.69.0" description = "Modern Text User Interface framework" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "textual-0.67.1-py3-none-any.whl", hash = "sha256:6c65e37f2114b5c8d74499586769aee763c14238a7671e0d0cf823b5f47ee6ac"}, - {file = "textual-0.67.1.tar.gz", hash = "sha256:9d8708b2d1bf82de800b7da2202de26e6059d6106c67bf91e47b8a4763b3e8f5"}, + {file = "textual-0.69.0-py3-none-any.whl", hash = "sha256:51d19a5eaeb00e6dabc7b1ef3d8de558dc78e55a3b273c8cf8273b3db574a651"}, + {file = "textual-0.69.0.tar.gz", hash = "sha256:73736996f468f20efdc8854482db02bf8799586430a39bc0a7c97f91043081b0"}, ] [package.dependencies] diff --git a/tests/test_pyshacl.py b/tests/test_pyshacl.py index 2edf613..2ab53f5 100644 --- a/tests/test_pyshacl.py +++ b/tests/test_pyshacl.py @@ -62,6 +62,7 @@ def test_workflow_execution(_setup: None) -> None: # noqa: PT019 remove_dataset_graph_type=True, remove_thesaurus_graph_type=True, remove_shape_catalog_graph_type=True, + max_validation_depth=15, ) plugin.execute(inputs=(), context=TestExecutionContext()) res = get(VALIDATION_GRAPH_URI)