diff --git a/apps/ingest/services.py b/apps/ingest/services.py index 3af4ab6e..365283df 100644 --- a/apps/ingest/services.py +++ b/apps/ingest/services.py @@ -19,7 +19,10 @@ def clean_metadata(metadata): :rtype: dict """ metadata = {key.casefold().replace(' ', '_'): value for key, value in metadata.items()} - fields = [f.name for f in Manifest._meta.get_fields()] + fields = [ + *(f.name for f in Manifest._meta.get_fields()), + "related", # used for related external links + ] invalid_keys = [] for key in metadata.keys(): @@ -33,13 +36,31 @@ def clean_metadata(metadata): if key not in fields: invalid_keys.append(key) + # TODO: Update this method to allow all "invalid" keys to populate Manifest.metadata JSONField for invalid_key in invalid_keys: metadata.pop(invalid_key) - - return metadata +def create_related_links(manifest, related_str): + """ + Create RelatedLink objects from supplied related links string and associate each with supplied + Manifest. String should consist of semicolon-separated URLs. + :param manifest: + :type related_str: iiif.manifest.models.Manifest + :param related_str: + :type related_str: str + :rtype: None + """ + for link in related_str.split(";"): + (format, _) = guess_type(link) + RelatedLink.objects.create( + manifest=manifest, + link=link, + format=format or "text/html", # assume web page if MIME type cannot be determined + data_type="Document", # assume this is not meant for seeAlso + ) + def create_manifest(ingest): """ Create or update a Manifest from supplied metadata and images. @@ -61,7 +82,13 @@ def create_manifest(ingest): else: manifest = Manifest.objects.create() for (key, value) in metadata.items(): - setattr(manifest, key, value) + if key == "related": + # add RelatedLinks from metadata spreadsheet key "related" + create_related_links(manifest, value) + else: + # all other keys should exist as fields on Manifest (for now) + setattr(manifest, key, value) + # TODO: if the key doesn't exist on Manifest model, add it to Manifest.metadata else: manifest = Manifest() @@ -77,9 +104,7 @@ def create_manifest(ingest): manifest.collections.set(ingest.collections.all()) # Save again once relationship is set manifest.save() - - # if type(ingest, .models.Remote): - if isinstance(ingest, Remote): + else: RelatedLink( manifest=manifest, link=ingest.remote_url, diff --git a/apps/ingest/tasks.py b/apps/ingest/tasks.py index 6b5458b2..91202aab 100644 --- a/apps/ingest/tasks.py +++ b/apps/ingest/tasks.py @@ -17,7 +17,7 @@ from apps.ingest.models import IngestTaskWatcher from .mail import send_email_on_failure, send_email_on_success -from .services import create_manifest +from .services import create_manifest, create_related_links # Use `apps.get_model` to avoid circular import error. Because the parameters used to # create a background task have to be serializable, we can't just pass in the model object. @@ -129,7 +129,12 @@ def create_canvases_from_s3_ingest(metadata, ingest_id): except Manifest.DoesNotExist: manifest = Manifest.objects.create(pid=pid) for (key, value) in metadata.items(): - setattr(manifest, key, value) + if key == "related": + # add RelatedLinks from metadata spreadsheet key "related" + create_related_links(manifest, value) + else: + # all other keys should exist as fields on Manifest (for now) + setattr(manifest, key, value) # Image server: set from ingest ingest = S3Ingest.objects.get(pk=ingest_id) manifest.image_server = ingest.image_server diff --git a/apps/ingest/tests/test_local.py b/apps/ingest/tests/test_local.py index 2d7a0a4b..5cfdb6ca 100644 --- a/apps/ingest/tests/test_local.py +++ b/apps/ingest/tests/test_local.py @@ -331,6 +331,23 @@ def test_it_creates_mainfest_with_metadata_property(self): assert local.manifest.pid == '808' assert local.manifest.title == 'Goodie Mob' + def test_create_related_links(self): + metadata = { + 'pid': '808', + 'related': 'https://github.com/ecds/readux/tree/develop;https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf' + } + local = self.mock_local('no_meta_file.zip', metadata=metadata) + local.manifest = create_manifest(local) + related_links = local.manifest.related_links + # should get 2 from metadata, 1 from volume url + assert len(related_links) == 3 + # should get github link format as text/html + assert any([link["@id"] == "https://github.com/ecds/readux/tree/develop" for link in related_links]) + assert any([link["format"] == "text/html" for link in related_links]) + # should get pdf format too + assert any([link["@id"] == "https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf" for link in related_links]) + assert any([link["format"] == "application/pdf" for link in related_links]) + def test_moving_bulk_bundle_to_s3(self): """ It should upload Local.bundle_from_bulk to mock S3 by saving it to