Skip to content

Commit

Permalink
Merge pull request #156 from kikkomep/fix/ro-crate-download-on-seek
Browse files Browse the repository at this point in the history
Fix/RO-Crate download from Seek registries
  • Loading branch information
kikkomep authored Oct 8, 2021
2 parents 548e9e7 + 65bcd08 commit fd7b96a
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 23 deletions.
12 changes: 6 additions & 6 deletions lifemonitor/api/models/registries/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def get_external_uuid(self, identifier, version, user: auth_models.User) -> str:
return ",".join([str(identifier), str(version)])

@abstractmethod
def get_external_link(self, wf: models.WorkflowVersion) -> str:
def get_external_link(self, external_id: str, version: str) -> str:
pass

@abstractmethod
def get_rocrate_external_link(self, user, w: Union[models.WorkflowVersion, str]) -> str:
def get_rocrate_external_link(self, external_id: str, version: str) -> str:
pass

@abstractmethod
Expand Down Expand Up @@ -184,11 +184,11 @@ def get_external_uuid(self, external_id, version, user: auth_models.User) -> str
def get_external_id(self, uuid, version, user: auth_models.User) -> str:
return self.client.get_external_id(uuid, version, user)

def get_external_link(self, w: models.WorkflowVersion) -> str:
return self.client.get_external_link(w)
def get_external_link(self, external_id: str, version: str) -> str:
return self.client.get_external_link(external_id, version)

def get_rocrate_external_link(self, user, w: Union[models.WorkflowVersion, str]) -> str:
return self.client.get_rocrate_external_link(user, w)
def get_rocrate_external_link(self, external_id: str, version: str) -> str:
return self.client.get_rocrate_external_link(external_id, version)

def download_url(self, url, user, target_path=None):
return self.client.download_url(url, user, target_path=target_path)
Expand Down
10 changes: 5 additions & 5 deletions lifemonitor/api/models/registries/seek.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from lifemonitor.api import models
from lifemonitor.auth.models import User
from lifemonitor.exceptions import EntityNotFoundException

from .registry import WorkflowRegistry, WorkflowRegistryClient

# set module level logger
Expand Down Expand Up @@ -62,12 +63,11 @@ def get_workflow_metadata(self, user, w: Union[models.WorkflowVersion, str]):
raise RuntimeError(f"ERROR: unable to get workflow (status code: {r.status_code})")
return r.json()['data']

def get_external_link(self, wf: models.WorkflowVersion) -> str:
return f"{self.registry.uri}/workflows/{wf.workflow.external_id}?version={wf.version}"
def get_external_link(self, external_id: str, version: str) -> str:
return f"{self.registry.uri}/workflows/{external_id}?version={version}"

def get_rocrate_external_link(self, user, w: Union[models.WorkflowVersion, str]) -> str:
workflow = self.get_workflow_metadata(user, w)
return f'{workflow["attributes"]["content_blobs"][0]["link"]}/download'
def get_rocrate_external_link(self, external_id: str, version: str) -> str:
return f'{self.registry.uri}/workflows/{external_id}/ro_crate?version={version}'

def filter_by_user(self, workflows: list, user: User):
result = []
Expand Down
2 changes: 1 addition & 1 deletion lifemonitor/api/models/rocrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def load_metadata(self) -> dict:

def download(self, target_path: str) -> str:
# report if the workflow is not longer available on the origin server
if self._metadata and not check_resource_exists(self.uri):
if self._metadata and not check_resource_exists(self.uri, self._get_authorizations()):
raise lm_exceptions.DownloadException(detail=f"Not found: {self.uri}", status=410)

errors = []
Expand Down
2 changes: 1 addition & 1 deletion lifemonitor/api/models/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def check_health(self) -> dict:
def external_link(self) -> str:
if self.hosting_service is None:
return self.uri
return self.hosting_service.get_external_link(self)
return self.hosting_service.get_external_link(self.workflow.external_id, self.version)

@hybrid_property
def authorizations(self):
Expand Down
2 changes: 1 addition & 1 deletion lifemonitor/api/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def register_workflow(cls, roc_link, workflow_submitter: User, workflow_version,
if not workflow_registry:
raise ValueError("Missing ROC link")
else:
roc_link = workflow_registry.get_rocrate_external_link(workflow_submitter, w.external_id)
roc_link = workflow_registry.get_rocrate_external_link(w.external_id, workflow_version)

wv = w.add_version(workflow_version, roc_link, workflow_submitter,
name=name, hosting_service=workflow_registry)
Expand Down
6 changes: 3 additions & 3 deletions lifemonitor/auth/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
# SOFTWARE.

from __future__ import annotations
import abc

import abc
import datetime
import logging
import uuid as _uuid
from typing import List, Union
from typing import List

from authlib.integrations.sqla_oauth2 import OAuth2TokenMixin
from flask_bcrypt import check_password_hash, generate_password_hash
Expand Down Expand Up @@ -258,7 +258,7 @@ def get_external_link(self, external_id: str, version: str) -> str:
pass

@abc.abstractmethod
def get_rocrate_external_link(self, user, w: Union[object, str]) -> str:
def get_rocrate_external_link(self, external_id: str, version: str) -> str:
pass


Expand Down
31 changes: 26 additions & 5 deletions lifemonitor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import zipfile
from importlib import import_module
from os.path import basename, dirname, isfile, join
from typing import List

import flask
import requests
Expand Down Expand Up @@ -163,11 +164,31 @@ def _download_from_remote(url, output_stream, authorization=None):
output_stream.write(chunk)


def check_resource_exists(url):
r = requests.head(url, verify=False)
result = r.status_code == 200
logger.debug("Checking if resource %s exists: %r", url, result)
return result
def check_resource_exists(url, authorizations: List = None):
errors = []
authorizations = authorizations or [None]
with requests.Session() as session:
for authorization in authorizations:
try:
logger.debug("Checking head URL: %s", url)
auth_header = authorization.as_http_header() if authorization else None
if auth_header:
session.headers['Authorization'] = auth_header
else:
session.headers.pop('Authorization', None)
response = session.head(url)
logger.debug("Check URL (with auth=%r): %r", auth_header is not None, response)
if response.status_code == 200 or response.status_code == 302:
return True
except lm_exceptions.NotAuthorizedException as e:
logger.info("Caught authorization error exception while downloading and processing RO-crate: %s", e)
errors.append(str(e))
except Exception as e:
# errors.append(str(e))
logger.debug(e)
if len(errors) > 0:
raise lm_exceptions.NotAuthorizedException(detail=f"Not authorized to download {url}", original_errors=errors)
return False


def download_url(url: str, target_path: str = None, authorization: str = None) -> str:
Expand Down
32 changes: 32 additions & 0 deletions migrations/versions/7aa503323413_fix_links_of_seek_ro_crates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Fix links of Seek RO-Crates
Revision ID: 7aa503323413
Revises: bbe1397dc8a9
Create Date: 2021-10-06 14:16:09.851731
"""

import logging
from lifemonitor.api import models

# revision identifiers, used by Alembic.
revision = '7aa503323413'
down_revision = 'bbe1397dc8a9'
branch_labels = None
depends_on = None

# set logger
logger = logging.getLogger('alembic.env')


def upgrade():
workflows = models.WorkflowVersion.all()
for w in workflows:
if w.hosting_service and w.hosting_service.type == 'seek_registry':
w.uri = w.hosting_service.get_rocrate_external_link(w.workflow.external_id, w.version)
w.save()
logger.info(f"URI of seek workflow {w.workflow.uuid} upgraded to: {w.uri}")


def downgrade():
pass
2 changes: 1 addition & 1 deletion tests/conftest_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def seek_workflow(application, provider, public, to_skip=None, index_user=0):
'uuid': workflow['meta']['uuid'],
'version': str(workflow["attributes"]["versions"][0]['version']), # pick the first version
'name': workflow["attributes"]["title"],
'roc_link': f'{workflow["attributes"]["content_blobs"][0]["link"]}/download',
'roc_link': f"{wfhub_workflows_url}/{workflow['id']}/ro_crate?version={str(workflow['attributes']['versions'][0]['version'])}",
'registry_name': 'seek',
'registry_uri': application.config["SEEK_API_BASE_URL"],
'valid': re.search("invalid", workflow["attributes"]["title"]),
Expand Down

0 comments on commit fd7b96a

Please sign in to comment.