From 699b0576349458c41b9e1faede256ab9c6886ff3 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 07:53:33 +0000 Subject: [PATCH 01/14] Do not fetch RO-Crate metadata when already available --- lifemonitor/api/models/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index a7f603931..976f36c90 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -65,7 +65,7 @@ def __init__(self, uri, uuid=None, name=None, @hybrid_property def crate_metadata(self): - if not self._metadata_loaded: + if not self._metadata_loaded and not self._metadata: self.load_metadata() return self._metadata From 33c35046a015f48d2e2880085c328b156fd09fb2 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 07:58:28 +0000 Subject: [PATCH 02/14] Raise a more appropriate exception when download errors occur --- lifemonitor/exceptions.py | 8 ++++++++ lifemonitor/utils.py | 7 ++++--- specs/api.yaml | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lifemonitor/exceptions.py b/lifemonitor/exceptions.py index 1cdd523cf..9b240bcc3 100644 --- a/lifemonitor/exceptions.py +++ b/lifemonitor/exceptions.py @@ -133,6 +133,14 @@ def __init__(self, workflow_uuid, workflow_version, detail=None, **kwargs) -> No detail=detail, status=409, **kwargs) +class DownloadROCrateException(LifeMonitorException): + + def __init__(self, detail="Unable to get RO-Crate for workflow", + type="about:blank", status=500, instance=None, **kwargs): + super().__init__(title="Internal Server Error", + detail=detail, status=status, **kwargs) + + class NotValidROCrateException(LifeMonitorException): def __init__(self, detail="Not valid RO Crate", diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index 152deecda..4011ea197 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -33,6 +33,7 @@ import urllib import uuid import zipfile +import requests from importlib import import_module from os.path import basename, dirname, isfile, join @@ -166,12 +167,12 @@ def download_url(url: str, target_path: str = None, authorization: str = None) - logger.info("Fetched %s of data from %s", sizeof_fmt(os.path.getsize(target_path)), url) - except (urllib.error.URLError, requests.exceptions.HTTPError) as e: + except (urllib.error.URLError, IOError) as e: # requests raised on an exception as we were trying to download. raise \ - lm_exceptions.NotValidROCrateException( + lm_exceptions.DownloadROCrateException( details=f"Error downloading RO-crate from {url}", - status=400, + status=500, original_error=str(e)) return target_path diff --git a/specs/api.yaml b/specs/api.yaml index ce68e8fee..a044cc002 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -666,6 +666,8 @@ paths: $ref: "#/components/responses/Forbidden" "404": $ref: "#/components/responses/NotFound" + "500": + $ref: "#/components/responses/InternalServerError" /workflows/{wf_uuid}/suites: get: From 458733b71f6acb302e2aabdf541422384fee208e Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 07:59:24 +0000 Subject: [PATCH 03/14] Update specs: fix missing InternalServerError type --- specs/api.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/specs/api.yaml b/specs/api.yaml index a044cc002..01e46bbe3 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -1160,6 +1160,13 @@ components: schema: $ref: "#/components/schemas/Error" + InternalServerError: + description: Internal Server Error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + WorkflowRegistered: description: A new workflow has been registered. content: From dd6755f9dd6bbb603dbf09e52afac68ef9187789 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 08:01:58 +0000 Subject: [PATCH 04/14] Update specs: fix white spaces --- specs/api.yaml | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/specs/api.yaml b/specs/api.yaml index 01e46bbe3..90b80498e 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -610,7 +610,6 @@ paths: "404": $ref: "#/components/responses/NotFound" - /workflows/{wf_uuid}/rocrate/{wf_version}/metadata: get: x-openapi-router-controller: lifemonitor.api.controllers @@ -630,7 +629,7 @@ paths: "200": description: | RO-Crate JSON metadata for the specified workflow and workflow version. - RO-Crate specifcation is available at https://www.researchobject.org/ro-crate/. + RO-Crate specifcation is available at https://www.researchobject.org/ro-crate/. "400": $ref: "#/components/responses/BadRequest" "401": @@ -1512,16 +1511,19 @@ components: { uuid: "21ac72ec-b9a5-49e0-b5a6-1322b8b54552", version: "0.1", - links: { - origin: "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip" - }, - ro_crate: { - links: { - "download": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/download", - "metadata": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/metadata", - "origin": "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip" - } - }, + links: + { + origin: "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip", + }, + ro_crate: + { + links: + { + "download": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/download", + "metadata": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/metadata", + "origin": "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip", + }, + }, submitter: { id: 501, username: "lm" }, }, ] From 70263bd8ca1d742f0b02ef0a03bcf3922090e49b Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 08:05:07 +0000 Subject: [PATCH 05/14] Fix import --- lifemonitor/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index 4011ea197..5fe5c453c 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -33,7 +33,6 @@ import urllib import uuid import zipfile -import requests from importlib import import_module from os.path import basename, dirname, isfile, join From 927e82c727eaf36f2b219f71bccb285659175941 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 08:54:06 +0000 Subject: [PATCH 06/14] Map download error to bad request when posting workflow --- lifemonitor/api/controllers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lifemonitor/api/controllers.py b/lifemonitor/api/controllers.py index 9b8bb3aad..da64a3914 100644 --- a/lifemonitor/api/controllers.py +++ b/lifemonitor/api/controllers.py @@ -308,6 +308,9 @@ def workflows_post(body, _registry=None, _submitter_id=None): except KeyError as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.input_data_missing) + except lm_exceptions.DownloadROCrateException as e: + return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, + detail=messages.invalid_ro_crate) except lm_exceptions.NotValidROCrateException as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.invalid_ro_crate) From 4c2b4cec916edd714b579aab3f28c3d24acf61dd Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 08:54:32 +0000 Subject: [PATCH 07/14] Fix test --- tests/unit/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 28138e38a..620c9db4c 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -29,6 +29,6 @@ def test_download_url_404(): with tempfile.TemporaryDirectory() as d: - with pytest.raises(lm_exceptions.NotValidROCrateException) as excinfo: + with pytest.raises(lm_exceptions.DownloadROCrateException) as excinfo: _ = utils.download_url('http://httpbin.org/status/404', os.path.join(d, 'get_404')) assert excinfo.value.status == 400 From 40276c73dabad9ff0d0df3067463fe0a29aed6bb Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 10:05:54 +0000 Subject: [PATCH 08/14] Rename 'download' exception --- lifemonitor/api/controllers.py | 2 +- lifemonitor/exceptions.py | 2 +- tests/unit/test_utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lifemonitor/api/controllers.py b/lifemonitor/api/controllers.py index da64a3914..fea5456ee 100644 --- a/lifemonitor/api/controllers.py +++ b/lifemonitor/api/controllers.py @@ -308,7 +308,7 @@ def workflows_post(body, _registry=None, _submitter_id=None): except KeyError as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.input_data_missing) - except lm_exceptions.DownloadROCrateException as e: + except lm_exceptions.DownloadException as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.invalid_ro_crate) except lm_exceptions.NotValidROCrateException as e: diff --git a/lifemonitor/exceptions.py b/lifemonitor/exceptions.py index 9b240bcc3..9729a47bc 100644 --- a/lifemonitor/exceptions.py +++ b/lifemonitor/exceptions.py @@ -133,7 +133,7 @@ def __init__(self, workflow_uuid, workflow_version, detail=None, **kwargs) -> No detail=detail, status=409, **kwargs) -class DownloadROCrateException(LifeMonitorException): +class DownloadException(LifeMonitorException): def __init__(self, detail="Unable to get RO-Crate for workflow", type="about:blank", status=500, instance=None, **kwargs): diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 620c9db4c..6a36aaf6f 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -29,6 +29,6 @@ def test_download_url_404(): with tempfile.TemporaryDirectory() as d: - with pytest.raises(lm_exceptions.DownloadROCrateException) as excinfo: + with pytest.raises(lm_exceptions.DownloadException) as excinfo: _ = utils.download_url('http://httpbin.org/status/404', os.path.join(d, 'get_404')) assert excinfo.value.status == 400 From 8516f368a01213068b7e1aa53578d1db839dc171 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Mon, 27 Sep 2021 10:20:59 +0000 Subject: [PATCH 09/14] Make error reporting more precise --- lifemonitor/api/models/rocrate.py | 9 ++++++--- lifemonitor/utils.py | 28 +++++++++++++++++++++++++--- tests/unit/test_utils.py | 2 +- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index 976f36c90..caa909f9a 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -27,15 +27,14 @@ import lifemonitor.exceptions as lm_exceptions from lifemonitor.api.models import db -from lifemonitor.auth.models import Resource, HostingService +from lifemonitor.auth.models import HostingService, Resource from lifemonitor.models import JSON from lifemonitor.test_metadata import get_roc_suites -from lifemonitor.utils import download_url, extract_zip +from lifemonitor.utils import check_resource_exists, download_url, extract_zip from sqlalchemy.ext.hybrid import hybrid_property from rocrate.rocrate import ROCrate as ROCrateHelper - # set module level logger logger = logging.getLogger(__name__) @@ -115,6 +114,10 @@ def load_metadata(self) -> dict: raise lm_exceptions.NotAuthorizedException(detail=f"Not authorized to download {self.uri}", original_errors=errors) def download(self, target_path: str) -> str: + # report if the workflow is not longer available on the origin server + if self._metadata and not check_resource_exists(self.uri): + raise lm_exceptions.DownloadException(details=f"Workflow not found on the origin: {self.uri}", status=410) + errors = [] # try either with authorization header and without authorization for authorization in self._get_authorizations(): diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index 5fe5c453c..a2c3f4555 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -151,6 +151,13 @@ def _download_from_remote(url, output_stream, authorization=None): output_stream.write(chunk) +def check_resource_exists(url): + r = requests.head(url, verify=False) + result = r.status_code != 404 + logger.debug("Checking if resource %s exists: %r", url, result) + return result + + def download_url(url: str, target_path: str = None, authorization: str = None) -> str: if not target_path: target_path = tempfile.mktemp() @@ -166,11 +173,26 @@ def download_url(url: str, target_path: str = None, authorization: str = None) - logger.info("Fetched %s of data from %s", sizeof_fmt(os.path.getsize(target_path)), url) - except (urllib.error.URLError, IOError) as e: + except urllib.error.URLError as e: + logger.exception(e) + raise \ + lm_exceptions.DownloadException( + details=f"Error downloading from {url}", + status=400, + original_error=str(e)) + except requests.exceptions.HTTPError as e: + logger.exception(e) + raise \ + lm_exceptions.DownloadException( + details=f"Error downloading from {url}", + status=e.response.status_code, + original_error=str(e)) + except IOError as e: # requests raised on an exception as we were trying to download. + logger.exception(e) raise \ - lm_exceptions.DownloadROCrateException( - details=f"Error downloading RO-crate from {url}", + lm_exceptions.DownloadException( + details=f"Error downloading from {url}", status=500, original_error=str(e)) return target_path diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6a36aaf6f..695f050d2 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -31,4 +31,4 @@ def test_download_url_404(): with tempfile.TemporaryDirectory() as d: with pytest.raises(lm_exceptions.DownloadException) as excinfo: _ = utils.download_url('http://httpbin.org/status/404', os.path.join(d, 'get_404')) - assert excinfo.value.status == 400 + assert excinfo.value.status == 404 From f86bc291184770b2af206ae856484ef8987cf799 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Sep 2021 07:37:40 +0000 Subject: [PATCH 10/14] Fix title and detail on DownloadException __init__ --- lifemonitor/exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lifemonitor/exceptions.py b/lifemonitor/exceptions.py index 9729a47bc..b8ab12d8e 100644 --- a/lifemonitor/exceptions.py +++ b/lifemonitor/exceptions.py @@ -135,9 +135,9 @@ def __init__(self, workflow_uuid, workflow_version, detail=None, **kwargs) -> No class DownloadException(LifeMonitorException): - def __init__(self, detail="Unable to get RO-Crate for workflow", + def __init__(self, detail=None, type="about:blank", status=500, instance=None, **kwargs): - super().__init__(title="Internal Server Error", + super().__init__(title="Download error", detail=detail, status=status, **kwargs) From cf4368aa0a2f7857cda31a02ea0e8c6409369e96 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Sep 2021 07:39:03 +0000 Subject: [PATCH 11/14] Fix trailing 's' on DownloadException instances --- lifemonitor/api/models/rocrate.py | 4 ++-- lifemonitor/utils.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index caa909f9a..37434343a 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -115,8 +115,8 @@ def load_metadata(self) -> dict: def download(self, target_path: str) -> str: # report if the workflow is not longer available on the origin server - if self._metadata and not check_resource_exists(self.uri): - raise lm_exceptions.DownloadException(details=f"Workflow not found on the origin: {self.uri}", status=410) + if self._metadata and not self.available: + raise lm_exceptions.DownloadException(detail=f"Workflow not found on the origin: {self.uri}", status=410) errors = [] # try either with authorization header and without authorization diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index a2c3f4555..9f7a5e02b 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -177,14 +177,14 @@ def download_url(url: str, target_path: str = None, authorization: str = None) - logger.exception(e) raise \ lm_exceptions.DownloadException( - details=f"Error downloading from {url}", + detail=f"Error downloading from {url}", status=400, original_error=str(e)) except requests.exceptions.HTTPError as e: logger.exception(e) raise \ lm_exceptions.DownloadException( - details=f"Error downloading from {url}", + detail=f"Error downloading from {url}", status=e.response.status_code, original_error=str(e)) except IOError as e: @@ -192,7 +192,7 @@ def download_url(url: str, target_path: str = None, authorization: str = None) - logger.exception(e) raise \ lm_exceptions.DownloadException( - details=f"Error downloading from {url}", + detail=f"Error downloading from {url}", status=500, original_error=str(e)) return target_path From 49ed591e27f06ef81cf6943168de80b3c22da997 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Sep 2021 07:41:25 +0000 Subject: [PATCH 12/14] Fix condition to check if resource exists --- lifemonitor/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index 9f7a5e02b..12f74a68f 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -153,7 +153,7 @@ def _download_from_remote(url, output_stream, authorization=None): def check_resource_exists(url): r = requests.head(url, verify=False) - result = r.status_code != 404 + result = r.status_code == 200 logger.debug("Checking if resource %s exists: %r", url, result) return result From e4219b70d0d1c181dbea26fb750fc5219a7c7207 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Tue, 28 Sep 2021 07:52:22 +0000 Subject: [PATCH 13/14] Restore function call --- lifemonitor/api/models/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index 37434343a..82559b150 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -115,7 +115,7 @@ def load_metadata(self) -> dict: def download(self, target_path: str) -> str: # report if the workflow is not longer available on the origin server - if self._metadata and not self.available: + if self._metadata and not check_resource_exists(self.uri): raise lm_exceptions.DownloadException(detail=f"Workflow not found on the origin: {self.uri}", status=410) errors = [] From f721ad65084e236591c33b2dadf24b8c1c1b6466 Mon Sep 17 00:00:00 2001 From: Marco Enrico Piras Date: Wed, 29 Sep 2021 16:11:04 +0000 Subject: [PATCH 14/14] Fix exception message --- lifemonitor/api/models/rocrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index 82559b150..b7f80fcfb 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -116,7 +116,7 @@ def load_metadata(self) -> dict: def download(self, target_path: str) -> str: # report if the workflow is not longer available on the origin server if self._metadata and not check_resource_exists(self.uri): - raise lm_exceptions.DownloadException(detail=f"Workflow not found on the origin: {self.uri}", status=410) + raise lm_exceptions.DownloadException(detail=f"Not found: {self.uri}", status=410) errors = [] # try either with authorization header and without authorization