diff --git a/lifemonitor/api/controllers.py b/lifemonitor/api/controllers.py index 9b8bb3aad..fea5456ee 100644 --- a/lifemonitor/api/controllers.py +++ b/lifemonitor/api/controllers.py @@ -308,6 +308,9 @@ def workflows_post(body, _registry=None, _submitter_id=None): except KeyError as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.input_data_missing) + except lm_exceptions.DownloadException as e: + return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, + detail=messages.invalid_ro_crate) except lm_exceptions.NotValidROCrateException as e: return lm_exceptions.report_problem(400, "Bad Request", extra_info={"exception": str(e)}, detail=messages.invalid_ro_crate) diff --git a/lifemonitor/api/models/rocrate.py b/lifemonitor/api/models/rocrate.py index a7f603931..b7f80fcfb 100644 --- a/lifemonitor/api/models/rocrate.py +++ b/lifemonitor/api/models/rocrate.py @@ -27,15 +27,14 @@ import lifemonitor.exceptions as lm_exceptions from lifemonitor.api.models import db -from lifemonitor.auth.models import Resource, HostingService +from lifemonitor.auth.models import HostingService, Resource from lifemonitor.models import JSON from lifemonitor.test_metadata import get_roc_suites -from lifemonitor.utils import download_url, extract_zip +from lifemonitor.utils import check_resource_exists, download_url, extract_zip from sqlalchemy.ext.hybrid import hybrid_property from rocrate.rocrate import ROCrate as ROCrateHelper - # set module level logger logger = logging.getLogger(__name__) @@ -65,7 +64,7 @@ def __init__(self, uri, uuid=None, name=None, @hybrid_property def crate_metadata(self): - if not self._metadata_loaded: + if not self._metadata_loaded and not self._metadata: self.load_metadata() return self._metadata @@ -115,6 +114,10 @@ def load_metadata(self) -> dict: raise lm_exceptions.NotAuthorizedException(detail=f"Not authorized to download {self.uri}", original_errors=errors) def download(self, target_path: str) -> str: + # report if the workflow is not longer available on the origin server + if self._metadata and not check_resource_exists(self.uri): + raise lm_exceptions.DownloadException(detail=f"Not found: {self.uri}", status=410) + errors = [] # try either with authorization header and without authorization for authorization in self._get_authorizations(): diff --git a/lifemonitor/exceptions.py b/lifemonitor/exceptions.py index 1cdd523cf..b8ab12d8e 100644 --- a/lifemonitor/exceptions.py +++ b/lifemonitor/exceptions.py @@ -133,6 +133,14 @@ def __init__(self, workflow_uuid, workflow_version, detail=None, **kwargs) -> No detail=detail, status=409, **kwargs) +class DownloadException(LifeMonitorException): + + def __init__(self, detail=None, + type="about:blank", status=500, instance=None, **kwargs): + super().__init__(title="Download error", + detail=detail, status=status, **kwargs) + + class NotValidROCrateException(LifeMonitorException): def __init__(self, detail="Not valid RO Crate", diff --git a/lifemonitor/utils.py b/lifemonitor/utils.py index 152deecda..12f74a68f 100644 --- a/lifemonitor/utils.py +++ b/lifemonitor/utils.py @@ -151,6 +151,13 @@ def _download_from_remote(url, output_stream, authorization=None): output_stream.write(chunk) +def check_resource_exists(url): + r = requests.head(url, verify=False) + result = r.status_code == 200 + logger.debug("Checking if resource %s exists: %r", url, result) + return result + + def download_url(url: str, target_path: str = None, authorization: str = None) -> str: if not target_path: target_path = tempfile.mktemp() @@ -166,13 +173,28 @@ def download_url(url: str, target_path: str = None, authorization: str = None) - logger.info("Fetched %s of data from %s", sizeof_fmt(os.path.getsize(target_path)), url) - except (urllib.error.URLError, requests.exceptions.HTTPError) as e: - # requests raised on an exception as we were trying to download. + except urllib.error.URLError as e: + logger.exception(e) raise \ - lm_exceptions.NotValidROCrateException( - details=f"Error downloading RO-crate from {url}", + lm_exceptions.DownloadException( + detail=f"Error downloading from {url}", status=400, original_error=str(e)) + except requests.exceptions.HTTPError as e: + logger.exception(e) + raise \ + lm_exceptions.DownloadException( + detail=f"Error downloading from {url}", + status=e.response.status_code, + original_error=str(e)) + except IOError as e: + # requests raised on an exception as we were trying to download. + logger.exception(e) + raise \ + lm_exceptions.DownloadException( + detail=f"Error downloading from {url}", + status=500, + original_error=str(e)) return target_path diff --git a/specs/api.yaml b/specs/api.yaml index ce68e8fee..90b80498e 100644 --- a/specs/api.yaml +++ b/specs/api.yaml @@ -610,7 +610,6 @@ paths: "404": $ref: "#/components/responses/NotFound" - /workflows/{wf_uuid}/rocrate/{wf_version}/metadata: get: x-openapi-router-controller: lifemonitor.api.controllers @@ -630,7 +629,7 @@ paths: "200": description: | RO-Crate JSON metadata for the specified workflow and workflow version. - RO-Crate specifcation is available at https://www.researchobject.org/ro-crate/. + RO-Crate specifcation is available at https://www.researchobject.org/ro-crate/. "400": $ref: "#/components/responses/BadRequest" "401": @@ -666,6 +665,8 @@ paths: $ref: "#/components/responses/Forbidden" "404": $ref: "#/components/responses/NotFound" + "500": + $ref: "#/components/responses/InternalServerError" /workflows/{wf_uuid}/suites: get: @@ -1158,6 +1159,13 @@ components: schema: $ref: "#/components/schemas/Error" + InternalServerError: + description: Internal Server Error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + WorkflowRegistered: description: A new workflow has been registered. content: @@ -1503,16 +1511,19 @@ components: { uuid: "21ac72ec-b9a5-49e0-b5a6-1322b8b54552", version: "0.1", - links: { - origin: "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip" - }, - ro_crate: { - links: { - "download": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/download", - "metadata": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/metadata", - "origin": "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip" - } - }, + links: + { + origin: "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip", + }, + ro_crate: + { + links: + { + "download": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/download", + "metadata": "https://lm:8000/workflows/123e4567-e89b-12d3-a456-426614174000/rocrate/1.0/metadata", + "origin": "http://webserver:5000/download?file=ro-crate-galaxy-sortchangecase.crate.zip", + }, + }, submitter: { id: 501, username: "lm" }, }, ] diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 28138e38a..695f050d2 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -29,6 +29,6 @@ def test_download_url_404(): with tempfile.TemporaryDirectory() as d: - with pytest.raises(lm_exceptions.NotValidROCrateException) as excinfo: + with pytest.raises(lm_exceptions.DownloadException) as excinfo: _ = utils.download_url('http://httpbin.org/status/404', os.path.join(d, 'get_404')) - assert excinfo.value.status == 400 + assert excinfo.value.status == 404