-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #825 from aaxelb/quest/supplementary-metadata
[ENG-6265] supplementary metadata
- Loading branch information
Showing
13 changed files
with
649 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
share/migrations/0074_sourceuniqueidentifier_is_supplementary.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Generated by Django 3.2.25 on 2024-09-19 20:33 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('share', '0073_remove_indexbackfill_backfill_phase_index'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='sourceuniqueidentifier', | ||
name='is_supplementary', | ||
field=models.BooleanField(null=True), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import json | ||
|
||
from django.test import TestCase | ||
from primitive_metadata import primitive_rdf as rdf | ||
|
||
from tests import factories | ||
from trove import digestive_tract | ||
from trove import models as trove_db | ||
from trove.vocab.namespaces import TROVE | ||
from trove.util.iris import get_sufficiently_unique_iri | ||
|
||
|
||
_BLARG = rdf.IriNamespace('https://blarg.example/') | ||
|
||
|
||
class TestDigestiveTractDerive(TestCase): | ||
@classmethod | ||
def setUpTestData(cls): | ||
cls.focus_iri = _BLARG.this | ||
_focus_ident = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(cls.focus_iri) | ||
_raw = factories.RawDatumFactory() | ||
cls.indexcard = trove_db.Indexcard.objects.create(source_record_suid=_raw.suid) | ||
cls.indexcard.focus_identifier_set.add(_focus_ident) | ||
cls.latest_rdf = trove_db.LatestIndexcardRdf.objects.create( | ||
indexcard=cls.indexcard, | ||
from_raw_datum=_raw, | ||
focus_iri=cls.focus_iri, | ||
rdf_as_turtle='''@prefix blarg: <https://blarg.example/> . | ||
blarg:this | ||
a blarg:Thing ; | ||
blarg:like blarg:that . | ||
''', | ||
) | ||
|
||
def test_derive(self): | ||
(_derived,) = digestive_tract.derive(self.indexcard) | ||
self.assertEqual(_derived.upriver_indexcard_id, self.indexcard.id) | ||
self.assertEqual(_derived.deriver_identifier.sufficiently_unique_iri, get_sufficiently_unique_iri(TROVE['derive/osfmap_json'])) | ||
self.assertEqual(json.loads(_derived.derived_text), { | ||
'@id': _BLARG.this, | ||
'resourceType': [{'@id': _BLARG.Thing}], | ||
_BLARG.like: [{'@id': _BLARG.that}], | ||
}) | ||
|
||
def test_derive_with_supplementary(self): | ||
_supp_raw = factories.RawDatumFactory( | ||
suid=factories.SourceUniqueIdentifierFactory(is_supplementary=True), | ||
) | ||
trove_db.SupplementaryIndexcardRdf.objects.create( | ||
indexcard=self.indexcard, | ||
from_raw_datum=_supp_raw, | ||
supplementary_suid=_supp_raw.suid, | ||
focus_iri=self.focus_iri, | ||
rdf_as_turtle='''@prefix blarg: <https://blarg.example/> . | ||
blarg:this blarg:unlike blarg:nonthing . | ||
''', | ||
) | ||
(_derived,) = digestive_tract.derive(self.indexcard) | ||
self.assertEqual(_derived.upriver_indexcard_id, self.indexcard.id) | ||
self.assertEqual(_derived.deriver_identifier.sufficiently_unique_iri, get_sufficiently_unique_iri(TROVE['derive/osfmap_json'])) | ||
self.assertEqual(json.loads(_derived.derived_text), { | ||
'@id': _BLARG.this, | ||
'resourceType': [{'@id': _BLARG.Thing}], | ||
_BLARG.like: [{'@id': _BLARG.that}], | ||
_BLARG.unlike: [{'@id': _BLARG.nonthing}], | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from django.test import TestCase | ||
from primitive_metadata import primitive_rdf as rdf | ||
|
||
from tests import factories | ||
from trove import digestive_tract | ||
from trove import models as trove_db | ||
|
||
|
||
_BLARG = rdf.IriNamespace('https://blarg.example/') | ||
|
||
|
||
class TestDigestiveTractExtract(TestCase): | ||
@classmethod | ||
def setUpTestData(cls): | ||
_focus_ident = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(_BLARG.this) | ||
cls.raw = factories.RawDatumFactory( | ||
mediatype='text/turtle', | ||
datum='''@prefix blarg: <https://blarg.example/> . | ||
blarg:this | ||
a blarg:Thing ; | ||
blarg:like blarg:that . | ||
''', | ||
suid__focus_identifier=_focus_ident, | ||
) | ||
cls.supplementary_raw = factories.RawDatumFactory( | ||
mediatype='text/turtle', | ||
datum='''@prefix blarg: <https://blarg.example/> . | ||
blarg:this blarg:like blarg:another ; | ||
blarg:unlike blarg:nonthing . | ||
''', | ||
suid=factories.SourceUniqueIdentifierFactory( | ||
source_config=cls.raw.suid.source_config, | ||
focus_identifier=cls.raw.suid.focus_identifier, | ||
is_supplementary=True, | ||
), | ||
) | ||
|
||
def test_setup(self): | ||
self.assertEqual(trove_db.Indexcard.objects.all().count(), 0) | ||
self.assertEqual(trove_db.LatestIndexcardRdf.objects.all().count(), 0) | ||
self.assertEqual(trove_db.ArchivedIndexcardRdf.objects.all().count(), 0) | ||
self.assertEqual(trove_db.SupplementaryIndexcardRdf.objects.all().count(), 0) | ||
|
||
def test_extract(self): | ||
(_indexcard,) = digestive_tract.extract(self.raw) | ||
self.assertEqual(_indexcard.source_record_suid_id, self.raw.suid_id) | ||
_focus_idents = list( | ||
_indexcard.focus_identifier_set.values_list('sufficiently_unique_iri', flat=True), | ||
) | ||
self.assertEqual(_focus_idents, ['://blarg.example/this']) | ||
_focustype_idents = list( | ||
_indexcard.focustype_identifier_set.values_list('sufficiently_unique_iri', flat=True), | ||
) | ||
self.assertEqual(_focustype_idents, ['://blarg.example/Thing']) | ||
self.assertEqual(list(_indexcard.supplementary_rdf_set.all()), []) | ||
_latest_rdf = _indexcard.latest_rdf | ||
self.assertEqual(_latest_rdf.from_raw_datum_id, self.raw.id) | ||
self.assertEqual(_latest_rdf.indexcard_id, _indexcard.id) | ||
self.assertEqual(_latest_rdf.focus_iri, _BLARG.this) | ||
self.assertEqual(_latest_rdf.as_rdf_tripledict(), { | ||
_BLARG.this: { | ||
rdf.RDF.type: {_BLARG.Thing}, | ||
_BLARG.like: {_BLARG.that}, | ||
}, | ||
}) | ||
|
||
def test_extract_supplementary_without_prior(self): | ||
_cards = digestive_tract.extract(self.supplementary_raw) | ||
self.assertEqual(_cards, []) | ||
self.assertEqual(trove_db.Indexcard.objects.all().count(), 0) | ||
self.assertEqual(trove_db.LatestIndexcardRdf.objects.all().count(), 0) | ||
self.assertEqual(trove_db.ArchivedIndexcardRdf.objects.all().count(), 0) | ||
self.assertEqual(trove_db.SupplementaryIndexcardRdf.objects.all().count(), 0) | ||
|
||
def test_extract_supplementary(self): | ||
(_orig_indexcard,) = digestive_tract.extract(self.raw) | ||
_orig_timestamp = _orig_indexcard.latest_rdf.modified | ||
(_indexcard,) = digestive_tract.extract(self.supplementary_raw) | ||
self.assertEqual(_orig_indexcard.id, _indexcard.id) | ||
self.assertEqual(_indexcard.source_record_suid_id, self.raw.suid_id) | ||
(_supp_rdf,) = _indexcard.supplementary_rdf_set.all() | ||
self.assertEqual(_supp_rdf.from_raw_datum_id, self.supplementary_raw.id) | ||
self.assertEqual(_supp_rdf.indexcard_id, _indexcard.id) | ||
self.assertEqual(_supp_rdf.focus_iri, _BLARG.this) | ||
self.assertEqual(_supp_rdf.as_rdf_tripledict(), { | ||
_BLARG.this: { | ||
_BLARG.like: {_BLARG.another}, | ||
_BLARG.unlike: {_BLARG.nonthing}, | ||
}, | ||
}) | ||
self.assertEqual(_indexcard.latest_rdf.modified, _orig_timestamp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
from unittest import mock | ||
from django.test import TestCase | ||
|
||
from tests import factories | ||
from trove import digestive_tract | ||
from share import models as share_db | ||
|
||
|
||
class TestDigestiveTractSwallow(TestCase): | ||
@classmethod | ||
def setUpTestData(cls): | ||
cls.user = factories.ShareUserFactory() | ||
cls.turtle = ''' | ||
@prefix blarg: <https://blarg.example/> . | ||
blarg:this | ||
a blarg:Thing ; | ||
blarg:like blarg:that . | ||
''' | ||
|
||
def test_setup(self): | ||
self.assertEqual(share_db.RawDatum.objects.all().count(), 0) | ||
|
||
def test_swallow(self): | ||
with mock.patch('trove.digestive_tract.task__extract_and_derive') as _mock_task: | ||
digestive_tract.swallow( | ||
from_user=self.user, | ||
record=self.turtle, | ||
record_identifier='blarg', | ||
record_mediatype='text/turtle', | ||
focus_iri='https://blarg.example/this', | ||
) | ||
(_raw,) = share_db.RawDatum.objects.all() | ||
self.assertEqual(_raw.datum, self.turtle) | ||
self.assertEqual(_raw.mediatype, 'text/turtle') | ||
self.assertEqual(_raw.suid.identifier, 'blarg') | ||
self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') | ||
self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) | ||
self.assertFalse(_raw.suid.is_supplementary) | ||
_mock_task.delay.assert_called_once_with(_raw.id, urgent=False) | ||
|
||
def test_swallow_urgent(self): | ||
with mock.patch('trove.digestive_tract.task__extract_and_derive') as _mock_task: | ||
digestive_tract.swallow( | ||
from_user=self.user, | ||
record=self.turtle, | ||
record_identifier='blarg', | ||
record_mediatype='text/turtle', | ||
focus_iri='https://blarg.example/this', | ||
urgent=True | ||
) | ||
(_raw,) = share_db.RawDatum.objects.all() | ||
self.assertEqual(_raw.datum, self.turtle) | ||
self.assertEqual(_raw.mediatype, 'text/turtle') | ||
self.assertEqual(_raw.suid.identifier, 'blarg') | ||
self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') | ||
self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) | ||
self.assertFalse(_raw.suid.is_supplementary) | ||
_mock_task.delay.assert_called_once_with(_raw.id, urgent=True) | ||
|
||
def test_swallow_supplementary(self): | ||
with mock.patch('trove.digestive_tract.task__extract_and_derive') as _mock_task: | ||
digestive_tract.swallow( | ||
from_user=self.user, | ||
record=self.turtle, | ||
record_identifier='blarg', | ||
record_mediatype='text/turtle', | ||
focus_iri='https://blarg.example/this', | ||
is_supplementary=True, | ||
) | ||
(_raw,) = share_db.RawDatum.objects.all() | ||
self.assertEqual(_raw.datum, self.turtle) | ||
self.assertEqual(_raw.mediatype, 'text/turtle') | ||
self.assertEqual(_raw.suid.identifier, 'blarg') | ||
self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') | ||
self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) | ||
self.assertTrue(_raw.suid.is_supplementary) | ||
_mock_task.delay.assert_called_once_with(_raw.id, urgent=False) |
Oops, something went wrong.