diff --git a/requirements.txt b/requirements.txt index 33835f78d..b5a428f23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ psycogreen==1.0.2 # BSD psycopg2==2.9.5 # LGPL with exceptions or ZPL python-dateutil==2.8.1 # Apache 2.0 PyJWE==1.0.0 # Apache 2.0 -pyshacl==0.22.0 # Apache 2.0 +rdflib==7.0.0 pyyaml==6.0 # MIT requests==2.25.1 # Apache 2.0 sentry-sdk[django]==1.22.2 # MIT @@ -43,4 +43,4 @@ xmltodict==0.12.0 # MIT # Allows custom-rendered IDs, hiding null values, and including data in error responses git+https://github.com/cos-forks/django-rest-framework-json-api.git@v4.2.1+cos0 -git+https://github.com/aaxelb/primitive_metadata.git@0.2023.57 +git+https://github.com/aaxelb/primitive_metadata.git@0.2024.09 diff --git a/share/search/exceptions.py b/share/search/exceptions.py index 77fc10904..f586e5cc3 100644 --- a/share/search/exceptions.py +++ b/share/search/exceptions.py @@ -15,15 +15,3 @@ class DaemonIndexingError(ShareException): class IndexStrategyError(ShareException): pass - - -class SearchApiError(ShareException): - pass - - -class InvalidSearchParam(SearchApiError): - pass - - -class UnsupportedSearchParam(IndexStrategyError): - pass diff --git a/share/search/index_strategy/_base.py b/share/search/index_strategy/_base.py index da0f40e1d..a2b14f7b5 100644 --- a/share/search/index_strategy/_base.py +++ b/share/search/index_strategy/_base.py @@ -10,15 +10,15 @@ from share.models.index_backfill import IndexBackfill from share.search.exceptions import IndexStrategyError from share.search.index_status import IndexStatus -from share.search.search_params import ( +from share.util.checksum_iri import ChecksumIri +from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, ) -from share.search.search_response import ( +from trove.trovesearch.search_response import ( CardsearchResponse, ValuesearchResponse, ) -from share.util.checksum_iri import ChecksumIri logger = logging.getLogger(__name__) diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py index ba1bf546a..610591e1a 100644 --- a/share/search/index_strategy/trove_indexcard_flats.py +++ b/share/search/index_strategy/trove_indexcard_flats.py @@ -7,7 +7,7 @@ import logging import re import uuid -from typing import Iterable, ClassVar, Optional +from typing import Iterable, ClassVar, Optional, Iterator from django.conf import settings from django.db.models import Exists, OuterRef @@ -18,7 +18,9 @@ from share.search import messages from share.search.index_strategy.elastic8 import Elastic8IndexStrategy from share.search.index_strategy._util import encode_cursor_dataclass, decode_cursor_dataclass -from share.search.search_params import ( +from share.util.checksum_iri import ChecksumIri +from trove import models as trove_db +from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, SearchFilter, @@ -27,7 +29,7 @@ PageParam, GLOB_PATHSTEP, ) -from share.search.search_response import ( +from trove.trovesearch.search_response import ( CardsearchResponse, ValuesearchResponse, TextMatchEvidence, @@ -35,8 +37,6 @@ ValuesearchResult, PropertypathUsage, ) -from share.util.checksum_iri import ChecksumIri -from trove import models as trove_db from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri, iri_path_as_keyword from trove.vocab.osfmap import is_date_property from trove.vocab.namespaces import TROVE, FOAF, RDF, RDFS, DCTERMS, OWL, SKOS, OSFMAP @@ -689,7 +689,7 @@ def _cardsearch_date_filter(self, search_filter): 'query': {'bool': {'filter': list(self._iter_nested_date_filters(search_filter))}}, }} - def _iter_nested_date_filters(self, search_filter) -> dict: + def _iter_nested_date_filters(self, search_filter) -> Iterator[dict]: # filter by requested paths yield _pathset_as_nestedvalue_filter(search_filter.propertypath_set, 'nested_date') # filter by requested value/operator @@ -1065,6 +1065,7 @@ def cardsearch_start_index(self) -> int: class _PredicatePathWalker: WalkYield = tuple[tuple[str, ...], primitive_rdf.RdfObject] + _visiting: set[str | frozenset] def __init__(self, tripledict: primitive_rdf.RdfTripleDictionary): self.tripledict = tripledict diff --git a/tests/share/search/index_strategy/_with_real_services.py b/tests/share/search/index_strategy/_with_real_services.py index 07cdd1e83..3a88879e5 100644 --- a/tests/share/search/index_strategy/_with_real_services.py +++ b/tests/share/search/index_strategy/_with_real_services.py @@ -19,8 +19,8 @@ class RealElasticTestCase(TransactionTestCase): serialized_rollback = True # for TransactionTestCase; restore db after # required for subclasses - strategy_name_for_real = None - strategy_name_for_test = None + strategy_name_for_real: str + strategy_name_for_test: str @classmethod def setUpClass(cls): @@ -38,7 +38,7 @@ def setUp(self): ) self.current_index = self.index_strategy.for_current_index() self.current_index.pls_delete() # in case it already exists - self._assert_happypath_until_ingest() + self._assert_setup_happypath() def tearDown(self): super().tearDown() @@ -120,7 +120,7 @@ def _assert_happypath_with_daemon(self, messages_chunk, expected_doc_count): else: assert False, 'checked and waited but the daemon did not do the thing' - def _assert_happypath_until_ingest(self): + def _assert_setup_happypath(self): # initial assert not self.current_index.pls_check_exists() index_status = self.current_index.pls_get_status() diff --git a/tests/share/search/index_strategy/test_trove_indexcard_flats.py b/tests/share/search/index_strategy/test_trove_indexcard_flats.py index 3a1bee2b1..be321a710 100644 --- a/tests/share/search/index_strategy/test_trove_indexcard_flats.py +++ b/tests/share/search/index_strategy/test_trove_indexcard_flats.py @@ -1,52 +1,320 @@ +from typing import Iterable, Iterator +from datetime import date +from urllib.parse import urlencode + +from primitive_metadata import primitive_rdf as rdf + from tests import factories from share.search import messages from trove import models as trove_db -from trove.vocab.namespaces import RDFS, TROVE +from trove.trovesearch.search_params import CardsearchParams +from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF from ._with_real_services import RealElasticTestCase +BLARG = rdf.IriNamespace('https://blarg.example/blarg/') + + class TestTroveIndexcardFlats(RealElasticTestCase): # for RealElasticTestCase strategy_name_for_real = 'trove_indexcard_flats' strategy_name_for_test = 'test_trove_indexcard_flats' + _indexcard_focus_by_uuid: dict[str, str] + def setUp(self): super().setUp() - self.__suid = factories.SourceUniqueIdentifierFactory() - self.__raw = factories.RawDatumFactory( - suid=self.__suid, - ) - self.__indexcard = trove_db.Indexcard.objects.create( - source_record_suid=self.__suid, - ) - trove_db.DerivedIndexcard.objects.create( - upriver_indexcard=self.__indexcard, - deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), - ) - self.__indexcardf = trove_db.LatestIndexcardRdf.objects.create( - from_raw_datum=self.__raw, - indexcard=self.__indexcard, - focus_iri='http://foo.example/hello', - rdf_as_turtle=f' <{RDFS.label}> "hello".', - turtle_checksum_iri='foo', # not enforced - ) + self._indexcard_focus_by_uuid = {} - def test_without_daemon(self): + def test_for_smoke_without_daemon(self): + _indexcard = self._create_indexcard( + focus_iri=BLARG.hello, + rdf_tripledict={BLARG.hello: {RDFS.label: {rdf.literal('hello')}}}, + ) _messages_chunk = messages.MessagesChunk( messages.MessageType.UPDATE_INDEXCARD, - [self.__indexcard.id], + [_indexcard.id], ) self._assert_happypath_without_daemon( _messages_chunk, expected_doc_count=1, ) - def test_with_daemon(self): + def test_for_smoke_with_daemon(self): + _indexcard = self._create_indexcard( + focus_iri=BLARG.hello, + rdf_tripledict={BLARG.hello: {RDFS.label: {rdf.literal('hello')}}}, + ) _messages_chunk = messages.MessagesChunk( messages.MessageType.UPDATE_INDEXCARD, - [self.__indexcard.id], + [_indexcard.id], ) self._assert_happypath_with_daemon( _messages_chunk, expected_doc_count=1, ) + + def test_cardsearch(self): + self._fill_test_data_for_querying() + for _queryparams, _expected_result_iris in self._cardsearch_cases(): + _cardsearch_params = CardsearchParams.from_querystring(urlencode(_queryparams)) + _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) + # assumes all results fit on one page + _actual_result_iris = { + self._indexcard_focus_by_uuid[_result.card_uuid()] + for _result in _cardsearch_response.search_result_page + } + self.assertEqual(_expected_result_iris, _actual_result_iris) + + def _fill_test_data_for_querying(self): + self._index_indexcards([ + self._create_indexcard(BLARG.a, { + BLARG.a: { + RDF.type: {BLARG.Thing}, + OWL.sameAs: {BLARG.a_same, BLARG.a_same2}, + DCTERMS.created: {rdf.literal(date(1999, 12, 31))}, + DCTERMS.creator: {BLARG.someone}, + DCTERMS.title: {rdf.literal('aaaa')}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_a}, + DCTERMS.references: {BLARG.b, BLARG.c}, + DCTERMS.description: {rdf.literal('This place is not a place of honor... no highly esteemed deed is commemorated here... nothing valued is here.', language='en')}, + }, + BLARG.someone: { + FOAF.name: {rdf.literal('some one')}, + }, + BLARG.b: { + RDF.type: {BLARG.Thing}, + DCTERMS.subject: {BLARG.subj_b, BLARG.subj_bc}, + DCTERMS.title: {rdf.literal('bbbb')}, + DCTERMS.references: {BLARG.c}, + }, + BLARG.c: { + RDF.type: {BLARG.Thing}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.title: {rdf.literal('cccc')}, + }, + }), + self._create_indexcard(BLARG.b, { + BLARG.b: { + RDF.type: {BLARG.Thing}, + OWL.sameAs: {BLARG.b_same}, + DCTERMS.created: {rdf.literal(date(2012, 12, 31))}, + DCTERMS.creator: {BLARG.someone}, + DCTERMS.title: {rdf.literal('bbbb')}, + DCTERMS.subject: {BLARG.subj_b, BLARG.subj_bc}, + DCTERMS.references: {BLARG.c}, + DCTERMS.description: {rdf.literal('What is here was dangerous and repulsive to us. This message is a warning about danger. ', language='en')}, + }, + BLARG.someone: { + FOAF.name: {rdf.literal('some one')}, + }, + BLARG.c: { + RDF.type: {BLARG.Thing}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.title: {rdf.literal('cccc')}, + }, + }), + self._create_indexcard(BLARG.c, { + BLARG.c: { + RDF.type: {BLARG.Thing}, + DCTERMS.created: {rdf.literal(date(2024, 12, 31))}, + DCTERMS.creator: {BLARG.someone_else}, + DCTERMS.title: {rdf.literal('cccc')}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.description: {rdf.literal('The danger is unleashed only if you substantially disturb this place physically. This place is best shunned and left uninhabited.', language='en')}, + }, + BLARG.someone_else: { + FOAF.name: {rdf.literal('some one else')}, + }, + }), + ]) + + def _cardsearch_cases(self) -> Iterator[tuple[dict[str, str], set[str]]]: + # using data from _fill_test_data_for_querying + yield ( + {'cardSearchFilter[creator]': BLARG.someone}, + {BLARG.a, BLARG.b}, + ) + yield ( + {'cardSearchFilter[creator]': ','.join((BLARG.someone_else, BLARG.someone))}, + {BLARG.a, BLARG.b, BLARG.c}, + ) + yield ( + {'cardSearchFilter[resourceType]': BLARG.Thing}, + {BLARG.a, BLARG.b, BLARG.c}, + ) + yield ( + {'cardSearchFilter[resourceType]': BLARG.Nothing}, + set(), + ) + yield ( + {'cardSearchFilter[references]': BLARG.b}, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[references]': BLARG.c}, + {BLARG.a, BLARG.b}, + ) + yield ( + {'cardSearchFilter[references.references]': BLARG.c}, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[references.references][is-present]': ''}, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[references.references.subject][is-present]': ''}, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[references.references][is-absent]': ''}, + {BLARG.c, BLARG.b}, + ) + yield ( + {'cardSearchFilter[references.references.subject][is-absent]': ''}, + {BLARG.c, BLARG.b}, + ) + yield ( + {'cardSearchFilter[subject]': BLARG.subj_ac}, + {BLARG.c, BLARG.a}, + ) + yield ( + {'cardSearchFilter[subject][none-of]': BLARG.subj_ac}, + {BLARG.b}, + ) + yield ( + { + 'cardSearchFilter[subject]': BLARG.subj_bc, + 'cardSearchFilter[creator]': BLARG.someone, + }, + {BLARG.b}, + ) + yield ( + { + 'cardSearchFilter[subject]': BLARG.subj_bc, + 'cardSearchText[*]': 'cccc', + }, + {BLARG.c}, + ) + yield ( + { + 'cardSearchFilter[resourceType]': ','.join((BLARG.Thing, BLARG.Another, BLARG.Nothing)), + 'cardSearchFilter[subject]': BLARG.subj_bc, + 'cardSearchText[*,creator.name]': 'else', + }, + {BLARG.c}, + ) + yield ( + { + 'cardSearchFilter[resourceType]': BLARG.Nothing, + 'cardSearchFilter[subject]': BLARG.subj_bc, + 'cardSearchText[*,creator.name]': 'else', + }, + set(), + ) + yield ( + {'cardSearchText[*,creator.name]': 'some'}, + {BLARG.a, BLARG.b, BLARG.c}, + ) + yield ( + { + 'cardSearchFilter[dateCreated]': '1999', + 'cardSearchText[*]': '', + }, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[dateCreated]': '1999-12'}, + {BLARG.a}, + ) + yield ( + {'cardSearchFilter[dateCreated]': '1999-11'}, + set(), + ) + yield ( + {'cardSearchFilter[dateCreated]': '2012-12-31'}, + {BLARG.b}, + ) + yield ( + {'cardSearchFilter[dateCreated][after]': '2030'}, + set(), + ) + yield ( + {'cardSearchFilter[dateCreated][after]': '2011'}, + {BLARG.b, BLARG.c}, + ) + yield ( + {'cardSearchFilter[dateCreated][before]': '2012-12'}, + {BLARG.a}, + ) + yield ( + {'cardSearchText': 'bbbb'}, + {BLARG.b}, + ) + yield ( + {'cardSearchText': '-bbbb'}, + {BLARG.a, BLARG.c}, + ) + yield ( + {'cardSearchText': 'danger'}, + {BLARG.b, BLARG.c}, + ) + yield ( + {'cardSearchText': 'dangre'}, + {BLARG.b, BLARG.c}, + ) + yield ( + {'cardSearchText': '"dangre"'}, + set(), + ) + yield ( + {'cardSearchText': 'danger -repulsive'}, + {BLARG.c}, + ) + yield ( + {'cardSearchText': '"nothing valued is here"'}, + {BLARG.a}, + ) + yield ( + {'cardSearchText': '"nothing valued here"'}, + set(), + ) + yield ( + {'cardSearchText': '"what is here"'}, + {BLARG.b}, + ) + + def _index_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): + _messages_chunk = messages.MessagesChunk( + messages.MessageType.UPDATE_INDEXCARD, + [_indexcard.id for _indexcard in indexcards], + ) + self.assertTrue(all( + _response.is_done + for _response in self.index_strategy.pls_handle_messages_chunk(_messages_chunk) + )) + self.current_index.pls_refresh() + + def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: + _suid = factories.SourceUniqueIdentifierFactory() + _raw = factories.RawDatumFactory( + suid=_suid, + ) + _indexcard = trove_db.Indexcard.objects.create( + source_record_suid=_suid, + ) + # an osfmap_json card is required for indexing, but not used in these tests + trove_db.DerivedIndexcard.objects.create( + upriver_indexcard=_indexcard, + deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), + ) + trove_db.LatestIndexcardRdf.objects.create( + from_raw_datum=_raw, + indexcard=_indexcard, + focus_iri=focus_iri, + rdf_as_turtle=rdf.turtle_from_tripledict(rdf_tripledict), + turtle_checksum_iri='foo', # not enforced + ) + self._indexcard_focus_by_uuid[str(_indexcard.uuid)] = focus_iri + return _indexcard diff --git a/tests/share/search/test_search_params.py b/tests/trove/test_search_params.py similarity index 99% rename from tests/share/search/test_search_params.py rename to tests/trove/test_search_params.py index 4b0f1225d..3b9f0e6f4 100644 --- a/tests/share/search/test_search_params.py +++ b/tests/trove/test_search_params.py @@ -1,6 +1,6 @@ from django.test import SimpleTestCase -from share.search.search_params import ( +from trove.trovesearch.search_params import ( Textsegment, SearchFilter, ) diff --git a/trove/derive/_base.py b/trove/derive/_base.py index b279c155d..823f494e9 100644 --- a/trove/derive/_base.py +++ b/trove/derive/_base.py @@ -27,6 +27,11 @@ def q(self, pathset): def deriver_iri() -> str: raise NotImplementedError + @staticmethod + @abc.abstractmethod + def derived_datatype_iris() -> tuple[str]: + raise NotImplementedError + @abc.abstractmethod def should_skip(self) -> bool: raise NotImplementedError diff --git a/trove/derive/oaidc_xml.py b/trove/derive/oaidc_xml.py index 037e28cf1..f22caa4dc 100644 --- a/trove/derive/oaidc_xml.py +++ b/trove/derive/oaidc_xml.py @@ -4,7 +4,7 @@ from share.oaipmh.util import format_datetime, ns, nsmap, SubEl from trove.vocab.namespaces import ( - DCMITYPE, + DCTYPE, DCTERMS, FOAF, OAI_DC, @@ -44,6 +44,11 @@ class OaiDcXmlDeriver(IndexcardDeriver): def deriver_iri() -> str: return str(OAI_DC) + # abstract method from IndexcardDeriver + @staticmethod + def derived_datatype_iris() -> tuple[str]: + return (RDF.XMLLiteral,) + # abstract method from IndexcardDeriver def should_skip(self) -> bool: _allowed_focustype_iris = { @@ -105,7 +110,7 @@ def _derive_card_as_xml(self) -> etree.Element: SubEl(dc_element, ns('dc', 'date'), format_datetime(_date)) for _type_iri in sorted(self.q(RDF.type)): - for _type_namespace in (OSFMAP, DCMITYPE, SHAREv2): + for _type_namespace in (OSFMAP, DCTYPE, SHAREv2): if _type_iri in _type_namespace: SubEl( dc_element, diff --git a/trove/derive/osfmap_json.py b/trove/derive/osfmap_json.py index 04d4d2b41..35856cdfb 100644 --- a/trove/derive/osfmap_json.py +++ b/trove/derive/osfmap_json.py @@ -1,7 +1,14 @@ +import datetime import json -from trove.render.osfmap_jsonld import RdfOsfmapJsonldRenderer -from trove.vocab.trove import TROVE +from primitive_metadata import primitive_rdf as rdf + +from trove import exceptions as trove_exceptions +from trove.vocab.namespaces import TROVE, RDF, OWL +from trove.vocab.osfmap import ( + OSFMAP_THESAURUS, + osfmap_shorthand, +) from ._base import IndexcardDeriver @@ -11,6 +18,11 @@ class OsfmapJsonDeriver(IndexcardDeriver): def deriver_iri() -> str: return TROVE['derive/osfmap_json'] + # abstract method from IndexcardDeriver + @staticmethod + def derived_datatype_iris() -> tuple[str]: + return (RDF.JSON,) + # abstract method from IndexcardDeriver def should_skip(self) -> bool: return False @@ -18,8 +30,118 @@ def should_skip(self) -> bool: # abstract method from IndexcardDeriver def derive_card_as_text(self): return json.dumps( - RdfOsfmapJsonldRenderer().tripledict_as_nested_jsonld( + _RdfOsfmapJsonldRenderer().tripledict_as_nested_jsonld( self.data.tripledict, self.focus_iri, ) ) + + +class _RdfOsfmapJsonldRenderer: + __nestvisiting_iris: set + + def tripledict_as_nested_jsonld(self, tripledict: rdf.RdfTripleDictionary, focus_iri: str): + self.__nestvisiting_iris = set() + return self.__nested_rdfobject_as_jsonld(tripledict, focus_iri) + + def rdfobject_as_jsonld(self, rdfobject: rdf.RdfObject) -> dict: + if isinstance(rdfobject, frozenset): + return self.twopledict_as_jsonld( + rdf.twopledict_from_twopleset(rdfobject), + ) + elif isinstance(rdfobject, rdf.Literal): + if not rdfobject.datatype_iris: + return {'@value': rdfobject.unicode_value} + if RDF.JSON in rdfobject.datatype_iris: + # NOTE: does not reset jsonld context (is that a problem?) + return json.loads(rdfobject.unicode_value) + _language_tag = rdfobject.language + if _language_tag: # standard language tag + return { + '@value': rdfobject.unicode_value, + '@language': _language_tag, + } + # datatype iri (or non-standard language iri) + return { + '@value': rdfobject.unicode_value, + '@type': ( + list(rdfobject.datatype_iris) + if len(rdfobject.datatype_iris) > 1 + else next(iter(rdfobject.datatype_iris)) + ), + } + elif isinstance(rdfobject, str): + return {'@id': osfmap_shorthand().compact_iri(rdfobject)} + elif isinstance(rdfobject, (float, int)): + return {'@value': rdfobject} + elif isinstance(rdfobject, datetime.date): + # just "YYYY-MM-DD" + return {'@value': datetime.date.isoformat(rdfobject)} + elif isinstance(rdfobject, tuple): + return {'@list': [ + self.rdfobject_as_jsonld(_obj) + for _obj in rdfobject + ]} + raise trove_exceptions.UnsupportedRdfObject(rdfobject) + + def twopledict_as_jsonld(self, twopledict: rdf.RdfTwopleDictionary) -> dict: + _jsonld = {} + for _pred, _objectset in twopledict.items(): + if _objectset: + _key = osfmap_shorthand().compact_iri(_pred) + _jsonld[_key] = self._list_or_single_value(_pred, [ + self.rdfobject_as_jsonld(_obj) + for _obj in _objectset + ]) + return _jsonld + + def __nested_rdfobject_as_jsonld( + self, + tripledict: rdf.RdfTripleDictionary, + rdfobject: rdf.RdfObject, + ): + _yes_nest = ( + isinstance(rdfobject, str) + and (rdfobject not in self.__nestvisiting_iris) + and (rdfobject in tripledict) + ) + if not _yes_nest: + return self.rdfobject_as_jsonld(rdfobject) + self.__nestvisiting_iris.add(rdfobject) + _nested_obj = ( + {} + if rdfobject.startswith('_:') # HACK: non-blank blank nodes (stop that) + else {'@id': rdfobject} + ) + for _pred, _objectset in tripledict[rdfobject].items(): + _label = osfmap_shorthand().compact_iri(_pred) + if _objectset: + _nested_obj[_label] = self._list_or_single_value( + _pred, + [ # recursion: + self.__nested_rdfobject_as_jsonld(tripledict, _obj) + for _obj in _objectset + ], + ) + self.__nestvisiting_iris.discard(rdfobject) + return _nested_obj + + def _list_or_single_value(self, predicate_iri, objectset): + _only_one_object = OWL.FunctionalProperty in ( + OSFMAP_THESAURUS + .get(predicate_iri, {}) + .get(RDF.type, ()) + ) + if _only_one_object: + if len(objectset) > 1: + raise trove_exceptions.OwlObjection(( + f'expected at most one object for <{predicate_iri}>' + f' (got {objectset})' + )) + try: + (_only_obj,) = objectset + except ValueError: + return None + else: + return _only_obj + return list(objectset) diff --git a/trove/derive/sharev2_elastic.py b/trove/derive/sharev2_elastic.py index 47552c697..064cf736b 100644 --- a/trove/derive/sharev2_elastic.py +++ b/trove/derive/sharev2_elastic.py @@ -53,6 +53,11 @@ class ShareV2ElasticDeriver(IndexcardDeriver): def deriver_iri() -> str: return SHAREv2.sharev2_elastic + # abstract method from IndexcardDeriver + @staticmethod + def derived_datatype_iris() -> tuple[str]: + return (RDF.JSON,) + # abstract method from IndexcardDeriver def should_skip(self) -> bool: _allowed_focustype_iris = { diff --git a/trove/exceptions.py b/trove/exceptions.py index 4de9d0c9d..6f68b0f20 100644 --- a/trove/exceptions.py +++ b/trove/exceptions.py @@ -1,6 +1,130 @@ +import http +import inspect + + class TroveError(Exception): - pass + # set more helpful codes in subclasses + http_status: int = http.HTTPStatus.INTERNAL_SERVER_ERROR + error_location: str = '' + + def __init__(self, *args): + super().__init__(*args) + self.error_location = _get_nearest_code_location() + +### +# digesting metadata class DigestiveError(TroveError): pass + + +class CannotDigestMediatype(DigestiveError): + pass + + +class CannotDigestDateValue(DigestiveError): + pass + + +### +# parsing a request + +class RequestParsingError(TroveError): + http_status = http.HTTPStatus.BAD_REQUEST + + +class InvalidQuotedIri(RequestParsingError): + pass + + +class InvalidQueryParamName(RequestParsingError): + pass + + +class InvalidFilterOperator(InvalidQueryParamName): + pass + + +class InvalidQueryParamValue(RequestParsingError): + pass + + +class InvalidSearchText(InvalidQueryParamValue): + pass + + +class MissingRequiredQueryParam(RequestParsingError): + pass + + +class InvalidRepeatedQueryParam(RequestParsingError): + pass + + +class InvalidPropertyPath(RequestParsingError): + pass + + +### +# rendering a response + +class ResponseRenderingError(TroveError): + pass + + +class CannotRenderMediatype(ResponseRenderingError): + http_status = http.HTTPStatus.NOT_ACCEPTABLE + + +### +# primitive rdf + +class PrimitiveRdfWhoopsy(TroveError): + pass + + +class IriInvalid(PrimitiveRdfWhoopsy): + pass + + +class IriMismatch(PrimitiveRdfWhoopsy): + pass + + +class UnsupportedRdfType(PrimitiveRdfWhoopsy): + pass + + +class MissingRdfType(PrimitiveRdfWhoopsy): + pass + + +class UnsupportedRdfObject(PrimitiveRdfWhoopsy): + pass + + +class ExpectedIriOrBlanknode(UnsupportedRdfObject): + pass + + +class ExpectedLiteralObject(UnsupportedRdfObject): + pass + + +class OwlObjection(PrimitiveRdfWhoopsy): + pass + + +### +# local helpers + +def _get_nearest_code_location() -> str: + try: + _raise_frame = next( + _frameinfo for _frameinfo in inspect.stack() + if _frameinfo.filename != __file__ # nearest frame not in this file + ) + return f'{_raise_frame.filename}::{_raise_frame.lineno}' + except Exception: + return 'unknown' # eh, whatever diff --git a/trove/extract/__init__.py b/trove/extract/__init__.py index a33440cc2..b31cda5d6 100644 --- a/trove/extract/__init__.py +++ b/trove/extract/__init__.py @@ -1,3 +1,5 @@ +from trove import exceptions as trove_exceptions + from ._base import BaseRdfExtractor from .legacy_sharev2 import LegacySharev2Extractor from .turtle import TurtleRdfExtractor @@ -11,4 +13,4 @@ def get_rdf_extractor_class(mediatype) -> type[BaseRdfExtractor]: return LegacySharev2Extractor if mediatype == 'text/turtle': return TurtleRdfExtractor - raise NotImplementedError(f'no rdf extractor for media-type "{mediatype}"') + raise trove_exceptions.CannotDigestMediatype(mediatype) diff --git a/trove/extract/legacy_sharev2.py b/trove/extract/legacy_sharev2.py index e888849ab..6add0221a 100644 --- a/trove/extract/legacy_sharev2.py +++ b/trove/extract/legacy_sharev2.py @@ -6,6 +6,7 @@ from share.util.graph import MutableNode from share.regulate import Regulator +from trove import exceptions as trove_exceptions from trove.vocab.namespaces import OSFMAP, DCTERMS, FOAF, DCAT, SHAREv2, RDF from trove.vocab.osfmap import OSFMAP_NORMS from ._base import BaseRdfExtractor @@ -164,7 +165,7 @@ def _choose_iri(iris): def _focus_for_mnode(mnode: MutableNode): - return gather.focus( + return gather.Focus.new( frozenset(_iris_for_mnode(mnode)), frozenset(_focustype_iris(mnode)), {'mnode': mnode}, @@ -188,7 +189,7 @@ def _date_or_none(maybe_date) -> typing.Optional[datetime.date]: return maybe_date if maybe_date is None: return None - raise ValueError(f'expected datetime.date, str, or None (got {maybe_date})') + raise trove_exceptions.CannotDigestDateValue(maybe_date) def _focustype_iris(mnode: MutableNode) -> typing.Iterable[str]: diff --git a/trove/management/commands/ingest_rdf_vocabs.py b/trove/management/commands/ingest_rdf_vocabs.py deleted file mode 100644 index e0d4c87b3..000000000 --- a/trove/management/commands/ingest_rdf_vocabs.py +++ /dev/null @@ -1,26 +0,0 @@ -from django.conf import settings - -from share.models import ShareUser -from share.management.commands import BaseShareCommand -from trove import digestive_tract -from trove.vocab import VOCAB_SET - - -def ingest_vocabs(system_user: ShareUser): - for _vocab in VOCAB_SET: - digestive_tract.swallow( - from_user=system_user, - record=_vocab.turtle(), - record_identifier=_vocab.turtle_filename, - record_mediatype='text/turtle', - focus_iri=_vocab.turtle_focus_iri, - ) - - -class Command(BaseShareCommand): - def add_arguments(self, parser): - pass - - def handle(self, *args, **options): - _system_user = ShareUser.objects.get(username=settings.APPLICATION_USERNAME) - ingest_vocabs(_system_user) diff --git a/trove/models/indexcard.py b/trove/models/indexcard.py index 89baf93f1..b7a411ffa 100644 --- a/trove/models/indexcard.py +++ b/trove/models/indexcard.py @@ -4,7 +4,7 @@ from django.db import models from django.db import transaction from django.utils import timezone -from primitive_metadata import primitive_rdf +from primitive_metadata import primitive_rdf as rdf from share import models as share_db # TODO: break this dependency from share.search.index_messenger import IndexMessenger @@ -17,20 +17,20 @@ class IndexcardManager(models.Manager): def get_for_iri(self, iri: str): - _uuid = primitive_rdf.iri_minus_namespace(iri, namespace=trove_indexcard_namespace()) + _uuid = rdf.iri_minus_namespace(iri, namespace=trove_indexcard_namespace()) return self.get(uuid=_uuid) @transaction.atomic def save_indexcards_from_tripledicts( self, *, from_raw_datum: share_db.RawDatum, - rdf_tripledicts_by_focus_iri: dict[str, primitive_rdf.RdfTripleDictionary], + rdf_tripledicts_by_focus_iri: dict[str, rdf.RdfTripleDictionary], undelete: bool = False, ) -> list['Indexcard']: from_raw_datum.no_output = (not rdf_tripledicts_by_focus_iri) from_raw_datum.save(update_fields=['no_output']) _indexcards = [] - _seen_focus_identifier_ids = set() + _seen_focus_identifier_ids: set[str] = set() for _focus_iri, _tripledict in rdf_tripledicts_by_focus_iri.items(): _indexcard = self.save_indexcard_from_tripledict( from_raw_datum=from_raw_datum, @@ -59,7 +59,7 @@ def save_indexcards_from_tripledicts( def save_indexcard_from_tripledict( self, *, from_raw_datum: share_db.RawDatum, - rdf_tripledict: primitive_rdf.RdfTripleDictionary, + rdf_tripledict: rdf.RdfTripleDictionary, focus_iri: str, undelete: bool = False, ): @@ -187,8 +187,14 @@ class IndexcardRdf(models.Model): focus_iri = models.TextField() # exact iri used in rdf_as_turtle rdf_as_turtle = models.TextField() # TODO: store elsewhere by checksum - def as_rdf_tripledict(self) -> primitive_rdf.RdfTripleDictionary: - return primitive_rdf.tripledict_from_turtle(self.rdf_as_turtle) + def as_rdf_tripledict(self) -> rdf.RdfTripleDictionary: + return rdf.tripledict_from_turtle(self.rdf_as_turtle) + + def as_quoted_graph(self) -> rdf.QuotedGraph: + return rdf.QuotedGraph( + self.as_rdf_tripledict(), + focus_iri=self.focus_iri, + ) class Meta: abstract = True @@ -204,12 +210,12 @@ def __str__(self): def save_indexcard_rdf( indexcard: Indexcard, from_raw_datum: share_db.RawDatum, - rdf_tripledict: primitive_rdf.RdfTripleDictionary, + rdf_tripledict: rdf.RdfTripleDictionary, focus_iri: str, ) -> 'IndexcardRdf': if focus_iri not in rdf_tripledict: raise DigestiveError(f'expected {focus_iri} in {set(rdf_tripledict.keys())}') - _rdf_as_turtle = primitive_rdf.turtle_from_tripledict(rdf_tripledict) + _rdf_as_turtle = rdf.turtle_from_tripledict(rdf_tripledict) _turtle_checksum_iri = str( ChecksumIri.digest('sha-256', salt='', raw_data=_rdf_as_turtle), ) @@ -291,3 +297,15 @@ def __repr__(self): def __str__(self): return repr(self) + + @property + def deriver_cls(self): + from trove.derive import get_deriver_classes + (_deriver_cls,) = get_deriver_classes(self.deriver_identifier.raw_iri_list) + return _deriver_cls + + def as_rdf_literal(self) -> rdf.Literal: + return rdf.literal( + self.derived_text, + datatype_iris=self.deriver_cls.derived_datatype_iris(), + ) diff --git a/trove/models/resource_identifier.py b/trove/models/resource_identifier.py index f5d55503d..6d2fe548b 100644 --- a/trove/models/resource_identifier.py +++ b/trove/models/resource_identifier.py @@ -6,6 +6,7 @@ from django.db.models.functions import Substr, StrIndex from primitive_metadata import primitive_rdf +from trove import exceptions as trove_exceptions from trove.util.iris import ( get_sufficiently_unique_iri, get_sufficiently_unique_iri_and_scheme, @@ -194,4 +195,4 @@ def find_equivalent_iri(self, tripledict: primitive_rdf.RdfTripleDictionary) -> ) if _is_equivalent: return _iri - raise ValueError(f'could not find "{_identifier_iri}" or equivalent in {set(tripledict.keys())}') + raise trove_exceptions.IriMismatch(f'could not find "{_identifier_iri}" or equivalent in {set(tripledict.keys())}') diff --git a/trove/openapi.py b/trove/openapi.py index 954a0bff9..12ecc80b7 100644 --- a/trove/openapi.py +++ b/trove/openapi.py @@ -7,9 +7,10 @@ from share.version import __version__ from trove.util.randomness import shuffled -from trove.vocab.jsonapi import JSONAPI_MEMBERNAME, JSONAPI_MEDIATYPE +from trove.vocab import mediatypes +from trove.vocab.jsonapi import JSONAPI_MEMBERNAME from trove.vocab.namespaces import TROVE, RDFS, RDF, DCTERMS -from trove.vocab.trove import TROVE_API_VOCAB +from trove.vocab.trove import TROVE_API_THESAURUS _OPENAPI_PARAM_LOCATION_BY_RDF_TYPE = { @@ -30,7 +31,7 @@ def get_trove_openapi() -> dict: following https://spec.openapis.org/oas/v3.1.0 ''' # TODO: language parameter, get translations - _api_graph = primitive_rdf.RdfGraph(TROVE_API_VOCAB) + _api_graph = primitive_rdf.RdfGraph(TROVE_API_THESAURUS) _path_iris = shuffled(set(_api_graph.q(TROVE.search_api, TROVE.hasPath))) _label = next(_api_graph.q(TROVE.search_api, RDFS.label)) _comment = next(_api_graph.q(TROVE.search_api, RDFS.comment)) @@ -152,7 +153,7 @@ def _openapi_path(path_iri: str, api_graph: primitive_rdf.RdfGraph): '200': { 'description': 'ok', 'content': { - JSONAPI_MEDIATYPE: { + mediatypes.JSONAPI: { 'examples': [ {'$ref': f'#/components/examples/{_example_label}'} for _example_label in _example_labels diff --git a/trove/render/__init__.py b/trove/render/__init__.py index 27f4f941b..637d948b1 100644 --- a/trove/render/__init__.py +++ b/trove/render/__init__.py @@ -1,5 +1,9 @@ from django import http +from trove import exceptions as trove_exceptions +from trove.vocab.trove import TROVE_API_THESAURUS +from trove.vocab.namespaces import NAMESPACES_SHORTHAND +from ._base import BaseRenderer from .jsonapi import RdfJsonapiRenderer from .html_browse import RdfHtmlBrowseRenderer from .turtle import RdfTurtleRenderer @@ -7,16 +11,21 @@ from .simple_json import TrovesearchSimpleJsonRenderer +__all__ = ('get_renderer',) + +RENDERERS: tuple[type[BaseRenderer], ...] = ( + RdfHtmlBrowseRenderer, + RdfJsonapiRenderer, + RdfTurtleRenderer, + RdfJsonldRenderer, + TrovesearchSimpleJsonRenderer, +) + RENDERER_BY_MEDIATYPE = { _renderer_cls.MEDIATYPE: _renderer_cls - for _renderer_cls in ( - RdfHtmlBrowseRenderer, - RdfJsonapiRenderer, - RdfTurtleRenderer, - RdfJsonldRenderer, - TrovesearchSimpleJsonRenderer, - ) + for _renderer_cls in RENDERERS } +DEFAULT_RENDERER = RdfJsonapiRenderer # the most stable one def get_renderer(request: http.HttpRequest): @@ -27,12 +36,16 @@ def get_renderer(request: http.HttpRequest): try: _chosen_renderer_cls = RENDERER_BY_MEDIATYPE[_requested_mediatype] except KeyError: - raise ValueError(f'could not find renderer for acceptMediatype={_requested_mediatype}') + raise trove_exceptions.CannotRenderMediatype(_requested_mediatype) else: for _mediatype, _renderer_cls in RENDERER_BY_MEDIATYPE.items(): if request.accepts(_mediatype): _chosen_renderer_cls = _renderer_cls break if _chosen_renderer_cls is None: - raise ValueError(f'could not find renderer for {request}') - return _chosen_renderer_cls(request=request) + _chosen_renderer_cls = DEFAULT_RENDERER + return _chosen_renderer_cls( + iri_shorthand=NAMESPACES_SHORTHAND, + thesaurus=TROVE_API_THESAURUS, + request=request, + ) diff --git a/trove/render/_base.py b/trove/render/_base.py index 3962303fd..2110c511b 100644 --- a/trove/render/_base.py +++ b/trove/render/_base.py @@ -1,35 +1,63 @@ import abc -from typing import Optional +import json +from typing import Optional, ClassVar from django import http -from primitive_metadata import primitive_rdf +from primitive_metadata import primitive_rdf as rdf -from trove.vocab.namespaces import STATIC_SHORTHAND +from trove import exceptions as trove_exceptions +from trove.vocab import mediatypes class BaseRenderer(abc.ABC): - MEDIATYPE = None # override in subclasses + # required in subclasses + MEDIATYPE: ClassVar[str] + # should be set when render_error_document is overridden: + ERROR_MEDIATYPE: ClassVar[str] = mediatypes.JSONAPI + # should be set when the renderer expects a specific derived metadata format + INDEXCARD_DERIVER_IRI: ClassVar[str | None] = None def __init__( self, *, + iri_shorthand: rdf.IriShorthand, + thesaurus: rdf.RdfTripleDictionary, request: Optional[http.HttpRequest] = None, - iri_shorthand: Optional[primitive_rdf.IriShorthand] = None, ): + self.iri_shorthand = iri_shorthand + self.thesaurus = rdf.RdfGraph(thesaurus) self.request = request - self.iri_shorthand = iri_shorthand or STATIC_SHORTHAND def render_response( self, - response_data: primitive_rdf.RdfTripleDictionary, + response_data: rdf.RdfTripleDictionary, response_focus_iri: str, **response_kwargs, ): return http.HttpResponse( - content=self.render_document(response_data, response_focus_iri), + content=self.render_document(rdf.RdfGraph(response_data), response_focus_iri), content_type=self.MEDIATYPE, **response_kwargs, ) + def render_error_response(self, error: trove_exceptions.TroveError): + return http.HttpResponse( + content=self.render_error_document(error), + content_type=self.ERROR_MEDIATYPE, + status=error.http_status, + ) + @abc.abstractmethod - def render_document(self, data: primitive_rdf.RdfTripleDictionary, focus_iri: str) -> str: + def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: raise NotImplementedError + + def render_error_document(self, error: trove_exceptions.TroveError) -> str: + # may override, but default to jsonapi + return json.dumps( + {'errors': [{ # https://jsonapi.org/format/#error-objects + 'status': error.http_status, + 'code': error.error_location, + 'title': error.__class__.__name__, + 'detail': str(error), + }]}, + indent=2, + ) diff --git a/trove/render/html_browse.py b/trove/render/html_browse.py index e4c799490..4e6df7640 100644 --- a/trove/render/html_browse.py +++ b/trove/render/html_browse.py @@ -16,13 +16,13 @@ from trove.util.iris import get_sufficiently_unique_iri from trove.util.randomness import shuffled -from trove.vocab.jsonapi import JSONAPI_MEDIATYPE -from trove.vocab.namespaces import TROVE, RDF, FOAF +from trove.vocab import mediatypes +from trove.vocab.namespaces import RDF from trove.vocab.trove import trove_browse_link from ._base import BaseRenderer -STABLE_MEDIATYPES = (JSONAPI_MEDIATYPE,) -UNSTABLE_MEDIATYPES = ('text/turtle', 'application/ld+json', 'application/json') +STABLE_MEDIATYPES = (mediatypes.JSONAPI,) +UNSTABLE_MEDIATYPES = (mediatypes.TURTLE, mediatypes.JSONLD, mediatypes.JSON,) class RdfHtmlBrowseRenderer(BaseRenderer): @@ -35,7 +35,7 @@ def __init__(self, **kwargs): self.__visiting_iris = None self.__heading_depth = None - def render_document(self, data: primitive_rdf.RdfTripleDictionary, focus_iri: str) -> str: + def render_document(self, data: primitive_rdf.RdfGraph, focus_iri: str) -> str: self.data = data with self.__rendering(): with self.__nest('head'): @@ -87,12 +87,8 @@ def __mediatype_link(self, mediatype: str): _link.text = 'documented use' _link.tail = ')' - def __render_subj(self, subj_iri: str, twopledict=None, start_collapsed=False): - _twopledict = ( - self.data.get(subj_iri, {}) - if twopledict is None - else twopledict - ) + def __render_subj(self, subj_iri: str, start_collapsed=False): + _twopledict = self.data.tripledict.get(subj_iri, {}) with self.__visiting(subj_iri): with self.__h_tag() as _h_tag: with self.__nest( @@ -120,31 +116,15 @@ def __twoples(self, twopledict: primitive_rdf.RdfTwopleDictionary): for _pred, _obj_set in shuffled(twopledict.items()): with self.__nest('li', {'class': 'Browse__twople'}, visible=True): self.__leaf_link(_pred) - # TODO: use a vocab, not static property iris - if _pred == TROVE.resourceMetadata and all( - isinstance(_obj, primitive_rdf.QuotedTriple) - for _obj in _obj_set - ): - _focus_iris = twopledict[FOAF.primaryTopic] # assumed - _focus_iri = None - _quoted_triples = set() + with self.__nest('ul', {'class': 'Browse__objectset'}): for _obj in shuffled(_obj_set): - _quoted_triples.add(_obj) - (_subj, _, _) = _obj - if _subj in _focus_iris: - _focus_iri = _subj - assert _focus_iri is not None - self.__quoted_graph(_focus_iri, _quoted_triples) - else: - with self.__nest('ul', {'class': 'Browse__objectset'}): - for _obj in shuffled(_obj_set): - with self.__nest('li', {'class': 'Browse__object'}, visible=True): - self.__obj(_obj) + with self.__nest('li', {'class': 'Browse__object'}, visible=True): + self.__obj(_obj) def __obj(self, obj: primitive_rdf.RdfObject): if isinstance(obj, str): # iri # TODO: detect whether indexcard? - if obj in self.data: + if obj in self.data.tripledict: if obj in self.__visiting_iris: self.__leaf_link(obj) # TODO: consider else: @@ -160,6 +140,8 @@ def __obj(self, obj: primitive_rdf.RdfObject): self.__literal(obj) elif isinstance(obj, (float, int, datetime.date)): self.__literal(primitive_rdf.literal(obj)) + elif isinstance(obj, primitive_rdf.QuotedGraph): + self.__quoted_graph(obj) def __literal(self, literal: primitive_rdf.Literal): # TODO language tag, datatypes @@ -169,13 +151,15 @@ def __literal(self, literal: primitive_rdf.Literal): for _datatype in literal.datatype_iris ) # TODO: checksum_iri, literal_iri - with self.__nest('article'): + with self.__nest('article', attrs={'class': 'Browse__literal'}): if _is_markdown: # TODO: tests for safe_mode _html = markdown2.markdown(literal.unicode_value, safe_mode='escape') self.__current_element.append(etree_fromstring(f'{_html}')) else: self.__leaf('q', text=literal.unicode_value) + for _datatype_iri in literal.datatype_iris: + self.__leaf_link(_datatype_iri) def __sequence(self, sequence_twoples: frozenset): _obj_in_order = list(primitive_rdf.sequence_objects_in_order(sequence_twoples)) @@ -186,12 +170,9 @@ def __sequence(self, sequence_twoples: frozenset): with self.__nest('li', visible=True): self.__obj(_seq_obj) - def __quoted_graph(self, focus_iri, quoted_triples): - _quoted_graph = primitive_rdf.RdfGraph({}) - for _triple in quoted_triples: - _quoted_graph.add(_triple) - with self.__quoted_data(_quoted_graph.tripledict): - self.__render_subj(focus_iri, start_collapsed=True) + def __quoted_graph(self, quoted_graph: primitive_rdf.QuotedGraph): + with self.__quoted_data(quoted_graph.tripledict): + self.__render_subj(quoted_graph.focus_iri, start_collapsed=True) ### # private html-building helpers @@ -233,7 +214,7 @@ def __h_tag(self): def __quoted_data(self, quoted_data: dict): _outer_data = self.data _outer_visiting_iris = self.__visiting_iris - self.data = quoted_data + self.data = primitive_rdf.RdfGraph(quoted_data) self.__visiting_iris = set() try: yield @@ -262,11 +243,14 @@ def __leaf(self, tag_name, *, text=None, attrs=None): if text is not None: _leaf_element.text = text - def __nest_link(self, iri: str): - return self.__nest('a', attrs={'href': self.__href_for_iri(iri)}) + def __nest_link(self, iri: str, *, attrs=None): + return self.__nest('a', attrs={ + **(attrs or {}), + 'href': self.__href_for_iri(iri), + }) - def __leaf_link(self, iri: str): - with self.__nest_link(iri) as _link: + def __leaf_link(self, iri: str, *, attrs=None): + with self.__nest_link(iri, attrs=attrs) as _link: _link.text = self.iri_shorthand.compact_iri(iri) def __href_for_iri(self, iri: str): diff --git a/trove/render/jsonapi.py b/trove/render/jsonapi.py index 37a2260f9..3f8e3f40a 100644 --- a/trove/render/jsonapi.py +++ b/trove/render/jsonapi.py @@ -6,13 +6,14 @@ from primitive_metadata import primitive_rdf +from trove import exceptions as trove_exceptions from trove.vocab.jsonapi import ( - JSONAPI_MEDIATYPE, JSONAPI_MEMBERNAME, JSONAPI_RELATIONSHIP, JSONAPI_ATTRIBUTE, JSONAPI_LINK_OBJECT, ) +from trove.vocab import mediatypes from trove.vocab.namespaces import ( OSFMAP, OWL, @@ -20,7 +21,7 @@ TROVE, ) from trove.vocab.trove import ( - TROVE_API_VOCAB, + TROVE_API_THESAURUS, trove_indexcard_namespace, ) from ._base import BaseRenderer @@ -49,26 +50,27 @@ class RdfJsonapiRenderer(BaseRenderer): note: does not support relationship links (or many other jsonapi features) ''' - MEDIATYPE = JSONAPI_MEDIATYPE + MEDIATYPE = mediatypes.JSONAPI + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - __to_include = None + __to_include: set[primitive_rdf.RdfObject] | None = None def __init__(self, **kwargs): super().__init__(**kwargs) - self._vocab = primitive_rdf.RdfGraph(TROVE_API_VOCAB) + self._vocab = primitive_rdf.RdfGraph(TROVE_API_THESAURUS) self._identifier_object_cache = {} # TODO: move "id namespace" to vocab (property on each type) self._id_namespace_set = [trove_indexcard_namespace()] - def render_document(self, data: primitive_rdf.RdfTripleDictionary, focus_iri: str) -> str: - self._data = primitive_rdf.RdfGraph(data) + def render_document(self, data: primitive_rdf.RdfGraph, focus_iri: str) -> str: + self._data = data return json.dumps( self.render_dict(focus_iri), indent=2, # TODO: pretty-print query param? ) def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> dict: - _primary_data = None + _primary_data: dict | list | None = None _included_data = [] with self._contained__to_include() as _to_include: if isinstance(primary_iris, str): @@ -125,13 +127,13 @@ def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode): 'type': self._single_typename(_type_iris), } else: - raise ValueError(f'expected str or frozenset (got {iri_or_blanknode})') + raise trove_exceptions.ExpectedIriOrBlanknode(f'expected str or frozenset (got {iri_or_blanknode})') self._identifier_object_cache[iri_or_blanknode] = _id_obj return _id_obj def _single_typename(self, type_iris: list[str]): if not type_iris: - raise ValueError('need at least one type iri') + raise trove_exceptions.MissingRdfType if len(type_iris) == 1: return self._membername_for_iri(type_iris[0]) # choose one predictably, preferring osfmap and trove @@ -141,7 +143,7 @@ def _single_typename(self, type_iris: list[str]): return self._membername_for_iri(_type_iris[0]) return self._membername_for_iri(sorted(type_iris)[0]) - def _membername_for_iri(self, iri: str, *, iri_fallback=False): + def _membername_for_iri(self, iri: str): try: _membername = next(self._vocab.q(iri, JSONAPI_MEMBERNAME)) except StopIteration: @@ -149,13 +151,8 @@ def _membername_for_iri(self, iri: str, *, iri_fallback=False): else: if isinstance(_membername, primitive_rdf.Literal): return _membername.unicode_value - raise ValueError(f'found non-text membername {_membername}') - if iri_fallback: - return iri - _compact = self.iri_shorthand.compact_iri(iri) - if _compact != iri: - return _compact - raise ValueError(f'could not find membername for <{iri}>') + raise trove_exceptions.ExpectedLiteralObject((iri, JSONAPI_MEMBERNAME, _membername)) + return self.iri_shorthand.compact_iri(iri) def _resource_id_for_blanknode(self, blanknode: frozenset): # content-addressed blanknode id (maybe-TODO: care about hash stability, @@ -172,12 +169,9 @@ def _resource_id_for_iri(self, iri: str): def _render_field(self, predicate_iri, object_set, *, into: dict): _is_relationship = (predicate_iri, RDF.type, JSONAPI_RELATIONSHIP) in self._vocab _is_attribute = (predicate_iri, RDF.type, JSONAPI_ATTRIBUTE) in self._vocab + _field_key = self._membername_for_iri(predicate_iri) _doc_key = 'meta' # unless configured for jsonapi, default to unstructured 'meta' - try: - _field_key = self._membername_for_iri(predicate_iri) - except ValueError: - _field_key = predicate_iri # use the full iri as key - else: # got a valid membername; may go in attributes or relationships + if ':' not in _field_key: if _is_relationship: _doc_key = 'relationships' elif _is_attribute: @@ -193,7 +187,7 @@ def _one_or_many(self, predicate_iri: str, datalist: list): _only_one = (predicate_iri, RDF.type, OWL.FunctionalProperty) in self._vocab if _only_one: if len(datalist) > 1: - raise ValueError(f'multiple objects for to-one relation <{predicate_iri}>: {datalist}') + raise trove_exceptions.OwlObjection(f'multiple objects for to-one relation <{predicate_iri}>: {datalist}') return (datalist[0] if datalist else None) return datalist @@ -271,7 +265,7 @@ def _pls_include(self, item): if self.__to_include is not None: self.__to_include.add(item) - def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict: + def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict | list | str | float | int: if isinstance(rdfobject, frozenset): if (RDF.type, RDF.Seq) in rdfobject: return [ @@ -280,7 +274,7 @@ def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict: ] _json_blanknode = {} for _pred, _obj_set in primitive_rdf.twopledict_from_twopleset(rdfobject).items(): - _key = self._membername_for_iri(_pred, iri_fallback=True) + _key = self._membername_for_iri(_pred) _json_blanknode[_key] = self._one_or_many(_pred, self._attribute_datalist(_obj_set)) return _json_blanknode if isinstance(rdfobject, primitive_rdf.Literal): @@ -297,4 +291,4 @@ def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict: elif isinstance(rdfobject, datetime.date): # just "YYYY-MM-DD" return datetime.date.isoformat(rdfobject) - raise ValueError(f'unrecognized RdfObject (got {rdfobject})') + raise trove_exceptions.UnsupportedRdfObject(rdfobject) diff --git a/trove/render/jsonld.py b/trove/render/jsonld.py index 82deddc87..6f07a4073 100644 --- a/trove/render/jsonld.py +++ b/trove/render/jsonld.py @@ -1,18 +1,167 @@ +import contextlib +import datetime import json -from primitive_metadata import primitive_rdf +from primitive_metadata import primitive_rdf as rdf +from trove import exceptions as trove_exceptions +from trove.vocab.namespaces import RDF, OWL, TROVE +from trove.vocab import mediatypes from ._base import BaseRenderer +_PREDICATES_OF_FLEXIBLE_CARDINALITY = { + # RDF.type, + RDF.value, +} + + class RdfJsonldRenderer(BaseRenderer): - MEDIATYPE = 'application/ld+json' + MEDIATYPE = mediatypes.JSONLD + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] + + __visiting_iris: set | None = None - def render_document(self, data: primitive_rdf.RdfTripleDictionary, focus_iri: str) -> str: - _jsonld_serializer = primitive_rdf.JsonldSerializer(self.iri_shorthand) - # TODO: use focus_iri + def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: return json.dumps( - _jsonld_serializer.tripledict_as_jsonld(data, with_context=True), + self.render_jsonld(data, focus_iri), indent=2, sort_keys=True, ) + + def render_jsonld( + self, + rdfgraph: rdf.RdfGraph, + focus_iri: str, + with_context: bool = False, + ) -> dict: + with self.iri_shorthand.track_used_shorts() as _used_shorts: + _rendered = self.rdfobject_as_jsonld(focus_iri, rdfgraph.tripledict) + if with_context: + _rendered['@context'] = { + _shorthand_name: self.iri_shorthand.expand_iri(_shorthand_name) + for _shorthand_name in _used_shorts + } + return _rendered + + def literal_as_jsonld(self, rdfliteral: rdf.Literal): + if not rdfliteral.datatype_iris or rdfliteral.datatype_iris == {RDF.string}: + return {'@value': rdfliteral.unicode_value} + if RDF.JSON in rdfliteral.datatype_iris: + # NOTE: does not reset jsonld context (is that a problem?) + return json.loads(rdfliteral.unicode_value) + _language_tag = rdfliteral.language + if _language_tag: # standard language tag + return { + '@value': rdfliteral.unicode_value, + '@language': _language_tag, + } + # datatype iri (or non-standard language iri) + _datatype_iris = [ + self.iri_shorthand.compact_iri(_datatype_iri) + for _datatype_iri in rdfliteral.datatype_iris + ] + return { + '@value': rdfliteral.unicode_value, + '@type': ( + _datatype_iris + if len(_datatype_iris) != 1 + else _datatype_iris[0] + ), + } + + def rdfobject_as_jsonld( + self, + rdfobject: rdf.RdfObject, + tripledict: rdf.RdfTripleDictionary | None = None, + ): + if isinstance(rdfobject, str): + return self.iri_as_jsonld(rdfobject, tripledict) + elif isinstance(rdfobject, frozenset): + if (RDF.type, RDF.Seq) in rdfobject: + # TODO: jsonld has lists but not sequences -- switch to lists? + return {'@list': [ + self.rdfobject_as_jsonld(_sequence_obj, tripledict) + for _sequence_obj in rdf.sequence_objects_in_order(rdfobject) + ]} + return self.blanknode_as_jsonld(rdfobject, tripledict) + elif isinstance(rdfobject, rdf.Literal): + return self.literal_as_jsonld(rdfobject) + elif isinstance(rdfobject, (float, int, datetime.date)): + return self.literal_as_jsonld(rdf.literal(rdfobject)) + raise trove_exceptions.UnsupportedRdfObject(rdfobject) + + def blanknode_as_jsonld( + self, + blanknode: rdf.RdfBlanknode, + tripledict: rdf.RdfTripleDictionary | None = None, + ) -> dict: + _twopledict = rdf.twopledict_from_twopleset(blanknode) + _jsonld = {} + for _pred, _objectset in _twopledict.items(): + if _objectset: + _key = self.iri_shorthand.compact_iri(_pred) + _jsonld[_key] = self._list_or_single_value(_pred, [ + self.rdfobject_as_jsonld(_obj, tripledict) + for _obj in _objectset + ]) + return _jsonld + + def iri_as_jsonld( + self, + iri: str, + tripledict: rdf.RdfTripleDictionary | None = None, + ): + if (not tripledict) or (iri not in tripledict) or self.__already_visiting(iri): + return self.iri_shorthand.compact_iri(iri) + with self.__visiting(iri): + _nested_obj = ( + {} + if iri.startswith('_:') # HACK: non-blank blank nodes (stop that) + else {'@id': self.iri_shorthand.compact_iri(iri)} + ) + for _pred, _objectset in tripledict[iri].items(): + if _objectset: + _nested_obj[self.iri_shorthand.compact_iri(_pred)] = self._list_or_single_value( + _pred, + [ # indirect recursion: + self.rdfobject_as_jsonld(_obj, tripledict) + for _obj in _objectset + ], + ) + return _nested_obj + + def _list_or_single_value(self, predicate_iri: str, objectlist: list): + _only_one_object = ( + (predicate_iri, RDF.type, OWL.FunctionalProperty) in self.thesaurus + ) + if _only_one_object: + if len(objectlist) > 1: + raise trove_exceptions.OwlObjection(( + f'expected at most one object for <{predicate_iri}>' + f' (got {objectlist})' + )) + try: + (_only_obj,) = objectlist + except ValueError: + return None + else: + return _only_obj + if predicate_iri in _PREDICATES_OF_FLEXIBLE_CARDINALITY: + return ( + objectlist + if len(objectlist) != 1 + else objectlist[0] + ) + return objectlist + + @contextlib.contextmanager + def __visiting(self, iri: str): + if self.__visiting_iris is None: + self.__visiting_iris = set() + self.__visiting_iris.add(iri) + yield + self.__visiting_iris.discard(iri) + + def __already_visiting(self, iri: str) -> bool: + return bool(self.__visiting_iris and (iri in self.__visiting_iris)) diff --git a/trove/render/osfmap_jsonld.py b/trove/render/osfmap_jsonld.py deleted file mode 100644 index 73b547bad..000000000 --- a/trove/render/osfmap_jsonld.py +++ /dev/null @@ -1,131 +0,0 @@ -import datetime -import json - -from primitive_metadata import primitive_rdf - -from trove.vocab.osfmap import ( - OSFMAP_VOCAB, - osfmap_labeler, -) -from trove.vocab.namespaces import RDF, OWL -from ._base import BaseRenderer - - -# TODO: use RdfJsonldRenderer instead -class RdfOsfmapJsonldRenderer(BaseRenderer): - vocabulary = OSFMAP_VOCAB - labeler = osfmap_labeler - - def render_document(self, data: primitive_rdf.RdfTripleDictionary, focus_iri: str) -> str: - _rendered = self.tripledict_as_nested_jsonld(data, focus_iri) - _rendered['@context'] = self.simple_jsonld_context() - return json.dumps(_rendered, indent=2, sort_keys=True) - - def simple_jsonld_context(self): - return self.labeler.all_iris_by_label() - - def tripledict_as_nested_jsonld(self, tripledict: primitive_rdf.RdfTripleDictionary, focus_iri: str): - self.__nestvisiting_iris = set() - return self.__nested_rdfobject_as_jsonld(tripledict, focus_iri) - - def rdfobject_as_jsonld(self, rdfobject: primitive_rdf.RdfObject) -> dict: - if isinstance(rdfobject, frozenset): - return self.twopledict_as_jsonld( - primitive_rdf.twopledict_from_twopleset(rdfobject), - ) - elif isinstance(rdfobject, primitive_rdf.Literal): - if not rdfobject.datatype_iris: - return {'@value': rdfobject.unicode_value} - if RDF.JSON in rdfobject.datatype_iris: - # NOTE: does not reset jsonld context (is that a problem?) - return json.loads(rdfobject.unicode_value) - _language_tag = rdfobject.language - if _language_tag: # standard language tag - return { - '@value': rdfobject.unicode_value, - '@language': _language_tag, - } - # datatype iri (or non-standard language iri) - return { - '@value': rdfobject.unicode_value, - '@type': ( - list(rdfobject.datatype_iris) - if len(rdfobject.datatype_iris) > 1 - else next(iter(rdfobject.datatype_iris)) - ), - } - elif isinstance(rdfobject, str): - return {'@id': self.labeler.get_label_or_iri(rdfobject)} - elif isinstance(rdfobject, (float, int)): - return {'@value': rdfobject} - elif isinstance(rdfobject, datetime.date): - # just "YYYY-MM-DD" - return {'@value': datetime.date.isoformat(rdfobject)} - elif isinstance(rdfobject, tuple): - return {'@list': [ - self.rdfobject_as_jsonld(_obj) - for _obj in rdfobject - ]} - raise ValueError(f'unrecognized RdfObject (got {rdfobject})') - - def twopledict_as_jsonld(self, twopledict: primitive_rdf.RdfTwopleDictionary) -> dict: - _jsonld = {} - for _pred, _objectset in twopledict.items(): - if _objectset: - _key = self.labeler.get_label_or_iri(_pred) - _jsonld[_key] = self._list_or_single_value(_pred, [ - self.rdfobject_as_jsonld(_obj) - for _obj in _objectset - ]) - return _jsonld - - def __nested_rdfobject_as_jsonld( - self, - tripledict: primitive_rdf.RdfTripleDictionary, - rdfobject: primitive_rdf.RdfObject, - ): - _yes_nest = ( - isinstance(rdfobject, str) - and (rdfobject not in self.__nestvisiting_iris) - and (rdfobject in tripledict) - ) - if not _yes_nest: - return self.rdfobject_as_jsonld(rdfobject) - self.__nestvisiting_iris.add(rdfobject) - _nested_obj = ( - {} - if rdfobject.startswith('_:') # HACK: non-blank blank nodes (stop that) - else {'@id': rdfobject} - ) - for _pred, _objectset in tripledict[rdfobject].items(): - _label = self.labeler.get_label_or_iri(_pred) - if _objectset: - _nested_obj[_label] = self._list_or_single_value( - _pred, - [ # recursion: - self.__nested_rdfobject_as_jsonld(tripledict, _obj) - for _obj in _objectset - ], - ) - self.__nestvisiting_iris.discard(rdfobject) - return _nested_obj - - def _list_or_single_value(self, predicate_iri, objectset): - _only_one_object = OWL.FunctionalProperty in ( - self.vocabulary - .get(predicate_iri, {}) - .get(RDF.type, ()) - ) - if _only_one_object: - if len(objectset) > 1: - raise ValueError(( - f'expected at most one object for <{predicate_iri}>' - f' (got {objectset})' - )) - try: - (_only_obj,) = objectset - except ValueError: - return None - else: - return _only_obj - return list(objectset) diff --git a/trove/render/simple_json.py b/trove/render/simple_json.py index 064eba99b..68f16362c 100644 --- a/trove/render/simple_json.py +++ b/trove/render/simple_json.py @@ -2,35 +2,36 @@ from primitive_metadata import primitive_rdf as rdf +from trove import exceptions as trove_exceptions from trove.vocab.jsonapi import ( JSONAPI_LINK_OBJECT, JSONAPI_MEMBERNAME, ) +from trove.vocab import mediatypes from trove.vocab.namespaces import TROVE, RDF from ._base import BaseRenderer class TrovesearchSimpleJsonRenderer(BaseRenderer): - '''for "simple json" search api -- very entangled with trove/trovesearch_gathering.py + '''for "simple json" search api -- very entangled with trove/trovesearch/trovesearch_gathering.py ''' - MEDIATYPE = 'application/json' + MEDIATYPE = mediatypes.JSON + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def render_document(self, data: rdf.RdfTripleDictionary, focus_iri: str) -> str: - _focustypes = data[focus_iri][RDF.type] - _graph = rdf.RdfGraph(data) + def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: + _focustypes = set(data.q(focus_iri, RDF.type)) if TROVE.Cardsearch in _focustypes: - _jsonable = self._render_cardsearch(_graph, focus_iri) + _jsonable = self._render_cardsearch(data, focus_iri) elif TROVE.Valuesearch in _focustypes: - _jsonable = self._render_valuesearch(_graph, focus_iri) + _jsonable = self._render_valuesearch(data, focus_iri) elif TROVE.Indexcard in _focustypes: - _jsonable = self._render_card(_graph, focus_iri) + _jsonable = self._render_card(data, focus_iri) else: - raise NotImplementedError(f'simplejson not implemented for any of {_focustypes}') - # TODO: links, total in 'meta' + raise trove_exceptions.UnsupportedRdfType(_focustypes) return json.dumps({ 'data': _jsonable, - 'links': self._render_links(_graph, focus_iri), - 'meta': self._render_meta(_graph, focus_iri), + 'links': self._render_links(data, focus_iri), + 'meta': self._render_meta(data, focus_iri), }, indent=2) def _render_cardsearch(self, graph: rdf.RdfGraph, cardsearch_iri: str): @@ -59,7 +60,7 @@ def _render_result(self, graph: rdf.RdfGraph, search_result_blanknode: rdf.RdfBl ) return self._render_card(graph, _card) - def _render_card(self, graph: rdf.RdfGraph, card: str | rdf.RdfBlanknode): + def _render_card(self, graph: rdf.RdfGraph, card: rdf.RdfObject): # just the card contents if isinstance(card, str): _card_contents = next(graph.q(card, TROVE.resourceMetadata)) @@ -70,7 +71,7 @@ def _render_card(self, graph: rdf.RdfGraph, card: str | rdf.RdfBlanknode): if _pred == TROVE.resourceMetadata ) else: - raise NotImplementedError + raise trove_exceptions.ExpectedIriOrBlanknode(card) assert isinstance(_card_contents, rdf.Literal) assert RDF.JSON in _card_contents.datatype_iris _json_contents = json.loads(_card_contents.unicode_value) @@ -79,7 +80,7 @@ def _render_card(self, graph: rdf.RdfGraph, card: str | rdf.RdfBlanknode): return _json_contents def _render_meta(self, graph: rdf.RdfGraph, focus_iri: str): - _meta = {} + _meta: dict[str, int | str] = {} try: _total = next(graph.q(focus_iri, TROVE.totalResultCount)) if isinstance(_total, int): diff --git a/trove/render/turtle.py b/trove/render/turtle.py index c6ae49914..c035e773a 100644 --- a/trove/render/turtle.py +++ b/trove/render/turtle.py @@ -1,10 +1,12 @@ -from primitive_metadata.primitive_rdf import turtle_from_tripledict +from primitive_metadata import primitive_rdf as rdf +from trove.vocab.namespaces import TROVE from ._base import BaseRenderer class RdfTurtleRenderer(BaseRenderer): MEDIATYPE = 'text/turtle' + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def render_document(self, rdf_graph, focus_iri): - return turtle_from_tripledict(rdf_graph, focus=focus_iri) + def render_document(self, rdf_graph: rdf.RdfGraph, focus_iri: str): + return rdf.turtle_from_tripledict(rdf_graph.tripledict, focus=focus_iri) diff --git a/trove/static/css/browse.css b/trove/static/css/browse.css index fb2714c21..163364611 100644 --- a/trove/static/css/browse.css +++ b/trove/static/css/browse.css @@ -29,6 +29,17 @@ border: solid 0.382rem rgba(0,0,0,0.191); } +details.Browse__card > summary::before { + content: '‽'; + display: inline-block; + transition-property: rotate; + transition-duration: 1s; +} + +details.Browse__card[open] > summary::before { + rotate: var(--random-turn); +} + .BrowseWrapper > .Browse__card { margin: 1em; } @@ -90,6 +101,13 @@ gap: 0.382rem; } +.Browse__literal { + display: flex; + flex-direction: row; + flex-wrap: wrap; + gap: 0.382rem; +} + /* .Browse :focus-within { backdrop-filter: hue-rotate(var(--hue-rotate-step)); diff --git a/trove/trovesearch/__init__.py b/trove/trovesearch/__init__.py new file mode 100644 index 000000000..ea9b78354 --- /dev/null +++ b/trove/trovesearch/__init__.py @@ -0,0 +1 @@ +__all__ = () diff --git a/share/search/search_params.py b/trove/trovesearch/search_params.py similarity index 87% rename from share/search/search_params.py rename to trove/trovesearch/search_params.py index 1794d3c28..14d3a6673 100644 --- a/share/search/search_params.py +++ b/trove/trovesearch/search_params.py @@ -9,7 +9,7 @@ from django.http import QueryDict from primitive_metadata import primitive_rdf -from share.search import exceptions +from trove import exceptions as trove_exceptions from trove.util.queryparams import ( QueryparamDict, QueryparamName, @@ -18,13 +18,13 @@ queryparams_from_querystring, ) from trove.vocab.osfmap import ( - osfmap_labeler, + osfmap_shorthand, is_date_property, suggested_property_paths, - OSFMAP_VOCAB, + OSFMAP_THESAURUS, ) -from trove.vocab.trove import trove_labeler -from trove.vocab.namespaces import RDF, TROVE, OWL +from trove.vocab.trove import trove_shorthand +from trove.vocab.namespaces import RDF, TROVE, OWL, NAMESPACES_SHORTHAND logger = logging.getLogger(__name__) @@ -49,6 +49,8 @@ # special path-step that matches any property GLOB_PATHSTEP = '*' +ONE_GLOB_PROPERTYPATH = (GLOB_PATHSTEP,) +DEFAULT_PROPERTYPATH_SET = frozenset([ONE_GLOB_PROPERTYPATH]) ### @@ -57,7 +59,7 @@ @dataclasses.dataclass(frozen=True) class BaseTroveParams: - iri_shorthand: primitive_rdf.IriShorthand + iri_shorthand: primitive_rdf.IriShorthand = dataclasses.field(repr=False) include: frozenset[tuple[str, ...]] accept_mediatype: str | None @@ -74,7 +76,7 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: # subclasses should override and add their fields to super().parse_queryparams(queryparams) return { 'iri_shorthand': cls._gather_shorthand(queryparams), - 'include': cls._gather_include(queryparams.get('include', [])), + 'include': cls._gather_include(queryparams), 'accept_mediatype': _get_single_value(queryparams, QueryparamName('acceptMediatype')), } @@ -96,10 +98,10 @@ def _gather_shorthand(cls, queryparams: QueryparamDict): try: (_shortname,) = _qp_name.bracketed_names except ValueError: - raise # TODO: 400 response + raise trove_exceptions.InvalidQueryParamName(_qp_name) else: _prefixmap[_shortname] = _iri - return primitive_rdf.IriShorthand(_prefixmap) + return NAMESPACES_SHORTHAND.with_update(_prefixmap) @classmethod def _gather_include(cls, queryparams: QueryparamDict): @@ -113,11 +115,11 @@ class Textsegment: is_fuzzy: bool = True is_negated: bool = False is_openended: bool = False - propertypath_set: frozenset[tuple[str, ...]] = frozenset((GLOB_PATHSTEP,)) + propertypath_set: frozenset[tuple[str, ...]] = DEFAULT_PROPERTYPATH_SET def __post_init__(self): if self.is_negated and self.is_fuzzy: - raise ValueError(f'{self}: cannot have both is_negated and is_fuzzy') + raise trove_exceptions.InvalidSearchText(self.text, "search cannot be both negated and fuzzy") def words(self): return self.text.split() @@ -232,7 +234,7 @@ def queryparams_from_textsegments(self, queryparam_family: str, textsegments): for _propertypath_set, _combinable_segments in _by_propertypath_set.items(): _qp_name = QueryparamName( queryparam_family, - propertypath_set_key(_propertypath_set), + (propertypath_set_key(_propertypath_set),), ) _qp_value = ' '.join( _textsegment.as_searchtext() @@ -263,11 +265,11 @@ class FilterOperator(enum.Enum): @classmethod def from_shortname(cls, shortname): - _iri = trove_labeler.iri_for_label(shortname) + _iri = trove_shorthand().expand_iri(shortname) return cls(_iri) def to_shortname(self) -> str: - return trove_labeler.label_for_iri(self.value) + return trove_shorthand().compact_iri(self.value) def is_date_operator(self): return self in (self.BEFORE, self.AFTER, self.AT_DATE) @@ -280,7 +282,7 @@ def is_valueless_operator(self): operator: FilterOperator value_set: frozenset[str] - propertypath_set: frozenset[tuple[str, ...]] = frozenset((GLOB_PATHSTEP,)) + propertypath_set: frozenset[tuple[str, ...]] = DEFAULT_PROPERTYPATH_SET @classmethod def from_queryparam_family(cls, queryparams: QueryparamDict, queryparam_family: str): @@ -299,7 +301,7 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str): try: # "filter[]" (with default operator) (_serialized_path_set,) = param_name.bracketed_names except ValueError: - raise exceptions.InvalidSearchParam( + raise trove_exceptions.InvalidQueryParamName( f'expected one or two bracketed queryparam-name segments' f' ({len(param_name.bracketed_names)} in "{param_name}")' ) @@ -308,7 +310,10 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str): try: _operator = SearchFilter.FilterOperator.from_shortname(_operator_value) except ValueError: - raise ValueError(f'unrecognized search-filter operator "{_operator_value}"') + raise trove_exceptions.InvalidQueryParamName( + str(param_name), + f'unknown filter operator "{_operator_value}"', + ) _propertypath_set = _parse_propertypath_set(_serialized_path_set) _is_date_filter = all( is_date_property(_path[-1]) @@ -321,19 +326,17 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str): else SearchFilter.FilterOperator.ANY_OF ) if _operator.is_date_operator() and not _is_date_filter: - raise ValueError(f'cannot use date operator {_operator.value} on non-date property') + raise trove_exceptions.InvalidQueryParamName( + str(param_name), + f'cannot use date operator "{_operator.to_shortname()}" on non-date property' + ) _value_list = [] if not _operator.is_valueless_operator(): for _value in split_queryparam_value(param_value): if _is_date_filter: _value_list.append(_value) # TODO: vali-date else: - try: - _iri = osfmap_labeler.iri_for_label(_value) - except KeyError: # not a known shorthand - _value_list.append(_value) # assume iri already - else: - _value_list.append(_iri) + _value_list.append(osfmap_shorthand().expand_iri(_value)) return cls( value_set=frozenset(_value_list), operator=_operator, @@ -362,7 +365,7 @@ def as_queryparam(self, queryparam_family: str): self.operator.to_shortname(), )) _qp_value = join_queryparam_value( - osfmap_labeler.get_label_or_iri(_value) + osfmap_shorthand().compact_iri(_value) for _value in self.value_set ) return str(_qp_name), _qp_value @@ -381,7 +384,7 @@ def sortlist_as_queryparam_value(cls, sort_params): ) @classmethod - def from_queryparams(cls, queryparams: QueryparamDict) -> tuple['SortParam']: + def from_queryparams(cls, queryparams: QueryparamDict) -> tuple['SortParam', ...]: _paramvalue = _get_single_value(queryparams, QueryparamName('sort')) if not _paramvalue or _paramvalue == '-relevance': return () @@ -391,12 +394,9 @@ def from_queryparams(cls, queryparams: QueryparamDict) -> tuple['SortParam']: def _from_sort_param_str(cls, param_value: str) -> typing.Iterable['SortParam']: for _sort in split_queryparam_value(param_value): _sort_property = _sort.lstrip(DESCENDING_SORT_PREFIX) - try: - _property_iri = osfmap_labeler.iri_for_label(_sort_property) - except KeyError: - _property_iri = _sort_property + _property_iri = osfmap_shorthand().expand_iri(_sort_property) if not is_date_property(_property_iri): - raise ValueError(f'bad sort: {_sort_property}') # TODO: nice response + raise trove_exceptions.InvalidQueryParamValue('sort', _sort_property, "may not sort on non-date properties") yield cls( property_iri=_property_iri, descending=param_value.startswith(DESCENDING_SORT_PREFIX), @@ -416,11 +416,11 @@ class PageParam: @classmethod def from_queryparams(cls, queryparams: QueryparamDict) -> 'PageParam': - _cursor = _get_single_value(queryparams, QueryparamName('page', ['cursor'])) + _cursor = _get_single_value(queryparams, QueryparamName('page', ('cursor',))) if _cursor: return cls(cursor=_cursor) _size = int( # TODO: 400 response on non-int value - _get_single_value(queryparams, QueryparamName('page', ['size'])) + _get_single_value(queryparams, QueryparamName('page', ('size',))) or DEFAULT_PAGE_SIZE ) return cls(size=min(_size, MAX_PAGE_SIZE), cursor=None) @@ -474,7 +474,7 @@ class ValuesearchParams(CardsearchParams): # includes fields from CardsearchParams, because a # valuesearch is always in context of a cardsearch valuesearch_propertypath_set: frozenset[tuple[str, ...]] - valuesearch_textsegment_set: frozenset[str] + valuesearch_textsegment_set: frozenset[Textsegment] valuesearch_filter_set: frozenset[SearchFilter] # override CardsearchParams @@ -482,7 +482,7 @@ class ValuesearchParams(CardsearchParams): def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: _raw_propertypath = _get_single_value(queryparams, QueryparamName('valueSearchPropertyPath')) if not _raw_propertypath: - raise ValueError('TODO: 400 valueSearchPropertyPath required') + raise trove_exceptions.MissingRequiredQueryParam('valueSearchPropertyPath') return { **super().parse_queryparams(queryparams), 'valuesearch_propertypath_set': _parse_propertypath_set(_raw_propertypath, allow_globs=False), @@ -514,14 +514,21 @@ def valuesearch_type_iris(self): ### # local helpers -def propertypath_key(property_path: tuple[str, ...]): +def propertypathstep_key(pathstep: str) -> str: + if pathstep == GLOB_PATHSTEP: + return pathstep + # assume iri + return urllib.parse.quote(osfmap_shorthand().compact_iri(pathstep)) + + +def propertypath_key(property_path: tuple[str, ...]) -> str: return PROPERTYPATH_DELIMITER.join( - urllib.parse.quote(osfmap_labeler.get_label_or_iri(_property_iri)) - for _property_iri in property_path + propertypathstep_key(_pathstep) + for _pathstep in property_path ) -def propertypath_set_key(propertypath_set: frozenset[tuple[str, ...]]): +def propertypath_set_key(propertypath_set: frozenset[tuple[str, ...]]) -> str: return join_queryparam_value( propertypath_key(_propertypath) for _propertypath in propertypath_set @@ -553,7 +560,7 @@ def _get_single_value( try: (_singlevalue,) = _paramvalues except ValueError: - raise ValueError(f'expected at most one {queryparam_name} value, got {len(_paramvalues)}') + raise trove_exceptions.InvalidRepeatedQueryParam(str(queryparam_name)) else: return _singlevalue @@ -568,18 +575,21 @@ def _parse_propertypath_set(serialized_path_set: str, *, allow_globs=True) -> fr def _parse_propertypath(serialized_path: str, *, allow_globs=True) -> tuple[str, ...]: _path = tuple( - osfmap_labeler.iri_for_label(_pathstep, default=_pathstep) + osfmap_shorthand().expand_iri(_pathstep) for _pathstep in serialized_path.split(PROPERTYPATH_DELIMITER) ) if GLOB_PATHSTEP in _path: if not allow_globs: - raise ValueError(f'no * allowed (got {serialized_path})') + raise trove_exceptions.InvalidPropertyPath(serialized_path, 'no * allowed') if any(_pathstep != GLOB_PATHSTEP for _pathstep in _path): - raise ValueError(f'path must be all * or no * (got {serialized_path})') + raise trove_exceptions.InvalidPropertyPath( + serialized_path, + f'path must be all * or no * (got {serialized_path})', + ) return _path -def _get_related_property_paths(filter_set) -> tuple[tuple[str]]: +def _get_related_property_paths(filter_set) -> tuple[tuple[str, ...], ...]: # hard-coded for osf.io search pages, static list per type # TODO: replace with some dynamism, maybe a 'significant_terms' aggregation _type_iris = set() @@ -593,5 +603,5 @@ def _get_unnamed_iri_values(filter_set) -> typing.Iterable[str]: for _filter in filter_set: if _filter.operator.is_iri_operator(): for _iri in _filter.value_set: - if _iri not in OSFMAP_VOCAB: + if _iri not in OSFMAP_THESAURUS: yield _iri diff --git a/share/search/search_response.py b/trove/trovesearch/search_response.py similarity index 100% rename from share/search/search_response.py rename to trove/trovesearch/search_response.py diff --git a/trove/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py similarity index 84% rename from trove/trovesearch_gathering.py rename to trove/trovesearch/trovesearch_gathering.py index 9ab856251..31bf85d48 100644 --- a/trove/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -4,44 +4,46 @@ from primitive_metadata.primitive_rdf import ( Literal, - QuotedTriple, blanknode, iri_minus_namespace, - iter_tripleset, literal, sequence, ) from primitive_metadata import gather from primitive_metadata.primitive_rdf import literal_json -from share.search.search_params import ( +from trove import models as trove_db +from trove import exceptions as trove_exceptions +from trove.derive.osfmap_json import _RdfOsfmapJsonldRenderer +from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, PageParam, propertypath_key, propertypath_set_key, ) -from share.search.search_response import ValuesearchResult -from trove import models as trove_db -from trove.render.osfmap_jsonld import RdfOsfmapJsonldRenderer -from trove.vocab.namespaces import RDF, FOAF, DCTERMS, RDFS, DCAT +from trove.trovesearch.search_response import ValuesearchResult +from trove.vocab.namespaces import RDF, FOAF, DCTERMS, RDFS, DCAT, TROVE from trove.vocab.jsonapi import ( JSONAPI_LINK_OBJECT, JSONAPI_MEMBERNAME, ) -from trove.vocab.osfmap import osfmap_labeler, OSFMAP_VOCAB, suggested_filter_operator +from trove.vocab.osfmap import ( + osfmap_shorthand, + OSFMAP_THESAURUS, + suggested_filter_operator, +) from trove.vocab.trove import ( - TROVE, - TROVE_API_VOCAB, + TROVE_API_THESAURUS, trove_indexcard_namespace, - trove_labeler, + trove_shorthand, ) logger = logging.getLogger(__name__) -TROVE_GATHERING_NORMS = gather.GatheringNorms( +TROVE_GATHERING_NORMS = gather.GatheringNorms.new( namestory=( literal('cardsearch', language='en'), literal('search for "index cards" that describe resources', language='en'), @@ -51,7 +53,7 @@ TROVE.Cardsearch, TROVE.Valuesearch, }, - vocabulary=TROVE_API_VOCAB, + thesaurus=TROVE_API_THESAURUS, ) @@ -60,28 +62,28 @@ literal('trove search', language='en'), ), norms=TROVE_GATHERING_NORMS, - gatherer_kwargnames={'search_params', 'specific_index', 'use_osfmap_json'}, + gatherer_kwargnames={'search_params', 'specific_index', 'deriver_iri'}, ) # TODO: per-field text search in rdf # @trovesearch_by_indexstrategy.gatherer(TROVE.cardSearchText) -# def gather_cardsearch_text(focus, *, specific_index, search_params, use_osfmap_json): +# def gather_cardsearch_text(focus, *, specific_index, search_params, deriver_iri): # yield (TROVE.cardSearchText, literal(search_params.cardsearch_text)) # # # @trovesearch_by_indexstrategy.gatherer(TROVE.valueSearchText) -# def gather_valuesearch_text(focus, *, specific_index, search_params, use_osfmap_json): +# def gather_valuesearch_text(focus, *, specific_index, search_params, deriver_iri): # yield (TROVE.valueSearchText, literal(search_params.valuesearch_text)) @trovesearch_by_indexstrategy.gatherer(TROVE.propertyPath, focustype_iris={TROVE.Valuesearch}) -def gather_valuesearch_propertypath(focus, *, specific_index, search_params, use_osfmap_json): +def gather_valuesearch_propertypath(focus, *, search_params, **kwargs): yield from _multi_propertypath_twoples(search_params.valuesearch_propertypath_set) @trovesearch_by_indexstrategy.gatherer(TROVE.valueSearchFilter) -def gather_valuesearch_filter(focus, *, specific_index, search_params, use_osfmap_json): +def gather_valuesearch_filter(focus, *, search_params, **kwargs): for _filter in search_params.valuesearch_filter_set: yield (TROVE.valueSearchFilter, _filter_as_blanknode(_filter, {})) @@ -92,7 +94,7 @@ def gather_valuesearch_filter(focus, *, specific_index, search_params, use_osfma TROVE.cardSearchFilter, focustype_iris={TROVE.Cardsearch}, ) -def gather_cardsearch(focus, *, specific_index, search_params, use_osfmap_json): +def gather_cardsearch(focus, *, specific_index, search_params, **kwargs): assert isinstance(search_params, CardsearchParams) # defer to the IndexStrategy implementation to do the search _cardsearch_resp = specific_index.pls_handle_cardsearch(search_params) @@ -105,7 +107,7 @@ def gather_cardsearch(focus, *, specific_index, search_params, use_osfmap_json): (TROVE.matchEvidence, frozenset(( (RDF.type, TROVE.TextMatchEvidence), (TROVE.matchingHighlight, _evidence.matching_highlight), - (TROVE.indexCard, _evidence.card_iri), + (TROVE.evidenceCardIdentifier, literal(_evidence.card_iri)), *_single_propertypath_twoples(_evidence.property_path), ))) for _evidence in _result.text_match_evidence @@ -127,7 +129,8 @@ def gather_cardsearch(focus, *, specific_index, search_params, use_osfmap_json): _related_property_result(_propertypath, _prop_usage_counts.get(_propertypath, 0)) for _propertypath in search_params.related_property_paths ] - yield (TROVE.relatedPropertyList, sequence(_relatedproperty_list)) + if _relatedproperty_list: + yield (TROVE.relatedPropertyList, sequence(_relatedproperty_list)) # filter-values from search params, with any additional info _valueinfo_by_iri = {} for _filtervalue in _cardsearch_resp.filtervalue_info: @@ -140,7 +143,7 @@ def gather_cardsearch(focus, *, specific_index, search_params, use_osfmap_json): @trovesearch_by_indexstrategy.gatherer( focustype_iris={TROVE.Valuesearch}, ) -def gather_valuesearch(focus, *, specific_index, search_params, use_osfmap_json): +def gather_valuesearch(focus, *, specific_index, search_params, **kwargs): assert isinstance(search_params, ValuesearchParams) _valuesearch_resp = specific_index.pls_handle_valuesearch(search_params) _result_page = [] @@ -194,7 +197,7 @@ def gather_valuesearch(focus, *, specific_index, search_params, use_osfmap_json) @trovesearch_by_indexstrategy.gatherer( focustype_iris={TROVE.Indexcard}, ) -def gather_card(focus, *, use_osfmap_json, **kwargs): +def gather_card(focus, *, deriver_iri, **kwargs): # TODO: batch gatherer -- load all cards in one query yield (RDF.type, DCAT.CatalogRecord) _indexcard_namespace = trove_indexcard_namespace() @@ -205,52 +208,54 @@ def gather_card(focus, *, use_osfmap_json, **kwargs): if _iri in _indexcard_namespace ) except StopIteration: - raise ValueError(f'could not find indexcard iri in {focus.iris} (looking for {_indexcard_namespace})') + raise trove_exceptions.IriMismatch(f'could not find indexcard iri in {focus.iris} (looking for {_indexcard_namespace})') _indexcard_uuid = iri_minus_namespace( _indexcard_iri, namespace=_indexcard_namespace, ) - if use_osfmap_json: # include graph as serialized json - _osfmap_indexcard = ( + if deriver_iri is None: # include data as a quoted graph + _indexcard_rdf = ( + trove_db.LatestIndexcardRdf.objects + .filter(indexcard__uuid=_indexcard_uuid) + .select_related('indexcard') + .prefetch_related('indexcard__focus_identifier_set') + .get() + ) + yield (DCTERMS.issued, _indexcard_rdf.indexcard.created.date()) + yield (DCTERMS.modified, _indexcard_rdf.modified.date()) + for _identifier in _indexcard_rdf.indexcard.focus_identifier_set.all(): + _iri = _identifier.as_iri() + yield (FOAF.primaryTopic, _iri) + yield (TROVE.focusIdentifier, literal(_iri)) + _quoted_graph = _indexcard_rdf.as_quoted_graph() + _quoted_graph.add( + (_quoted_graph.focus_iri, FOAF.primaryTopicOf, _indexcard_iri), + ) + yield (TROVE.resourceMetadata, _quoted_graph) + else: # include pre-formatted data from a DerivedIndexcard + _derived_indexcard = ( trove_db.DerivedIndexcard.objects .filter( upriver_indexcard__uuid=_indexcard_uuid, deriver_identifier__in=( trove_db.ResourceIdentifier.objects - .queryset_for_iri(TROVE['derive/osfmap_json']) - # TODO: choose deriver by queryparam/gatherer-kwarg + .queryset_for_iri(deriver_iri) ), ) .select_related('upriver_indexcard') .prefetch_related('upriver_indexcard__focus_identifier_set') .get() ) - yield (DCTERMS.issued, _osfmap_indexcard.upriver_indexcard.created.date()) - yield (DCTERMS.modified, _osfmap_indexcard.modified.date()) - for _identifier in _osfmap_indexcard.upriver_indexcard.focus_identifier_set.all(): + yield (DCTERMS.issued, _derived_indexcard.upriver_indexcard.created.date()) + yield (DCTERMS.modified, _derived_indexcard.modified.date()) + for _identifier in _derived_indexcard.upriver_indexcard.focus_identifier_set.all(): _iri = _identifier.as_iri() yield (FOAF.primaryTopic, _iri) yield (TROVE.focusIdentifier, literal(_iri)) yield ( TROVE.resourceMetadata, - literal(_osfmap_indexcard.derived_text, datatype_iris={RDF.JSON}) - ) - else: # include graph as a bag of quoted triples - _indexcard_rdf = ( - trove_db.LatestIndexcardRdf.objects - .filter(indexcard__uuid=_indexcard_uuid) - .select_related('indexcard') - .prefetch_related('indexcard__focus_identifier_set') - .get() + _derived_indexcard.as_rdf_literal(), ) - yield (DCTERMS.issued, _indexcard_rdf.indexcard.created.date()) - yield (DCTERMS.modified, _indexcard_rdf.modified.date()) - for _identifier in _indexcard_rdf.indexcard.focus_identifier_set.all(): - _iri = _identifier.as_iri() - yield (FOAF.primaryTopic, _iri) - yield (TROVE.focusIdentifier, literal(_iri)) - for _triple in iter_tripleset(_indexcard_rdf.as_rdf_tripledict()): - yield (TROVE.resourceMetadata, QuotedTriple(*_triple)) ### @@ -276,7 +281,7 @@ def _filter_as_blanknode(search_filter, valueinfo_by_iri) -> frozenset: def _osfmap_or_unknown_iri_as_json(iri: str): try: - _twopledict = OSFMAP_VOCAB[iri] + _twopledict = OSFMAP_THESAURUS[iri] except KeyError: return literal_json({'@id': iri}) else: @@ -307,22 +312,19 @@ def _valuesearch_result_as_indexcard_blanknode(result: ValuesearchResult) -> fro def _osfmap_json(tripledict, focus_iri): return literal_json( - RdfOsfmapJsonldRenderer().tripledict_as_nested_jsonld( - tripledict, - focus_iri, - ) + _RdfOsfmapJsonldRenderer().tripledict_as_nested_jsonld(tripledict, focus_iri) ) def _osfmap_twople_json(twopledict): return literal_json( - RdfOsfmapJsonldRenderer().twopledict_as_jsonld(twopledict), + _RdfOsfmapJsonldRenderer().twopledict_as_jsonld(twopledict) ) def _osfmap_path(property_path): return literal_json([ - osfmap_labeler.get_label_or_iri(_iri) + osfmap_shorthand().compact_iri(_iri) for _iri in property_path ]) @@ -343,7 +345,7 @@ def _propertypath_sequence(property_path: tuple[str, ...]): _propertypath_metadata = [] for _property_iri in property_path: try: - _property_twopledict = OSFMAP_VOCAB[_property_iri] + _property_twopledict = OSFMAP_THESAURUS[_property_iri] except KeyError: _property_twopledict = {RDF.type: {RDF.Property}} # giving benefit of the doubt _propertypath_metadata.append(_osfmap_json( @@ -357,9 +359,9 @@ def _related_property_result(property_path: tuple[str, ...], count: int): return frozenset(( (RDF.type, TROVE.RelatedPropertypath), (TROVE.cardsearchResultCount, count), - (TROVE.suggestedFilterOperator, trove_labeler.label_for_iri( + (TROVE.suggestedFilterOperator, literal(trove_shorthand().compact_iri( suggested_filter_operator(property_path[-1]), - )), + ))), *_single_propertypath_twoples(property_path), )) diff --git a/trove/util/__init__.py b/trove/util/__init__.py index 6d79bec15..d7dc37515 100644 --- a/trove/util/__init__.py +++ b/trove/util/__init__.py @@ -1,4 +1,3 @@ from . import iris, queryparams -from .iri_labeler import IriLabeler -__all__ = ('iris', 'queryparams', 'IriLabeler') +__all__ = ('iris', 'queryparams',) diff --git a/trove/util/iri_labeler.py b/trove/util/iri_labeler.py deleted file mode 100644 index cfaf5679e..000000000 --- a/trove/util/iri_labeler.py +++ /dev/null @@ -1,94 +0,0 @@ -from typing import Optional - -from primitive_metadata import primitive_rdf - -from trove.vocab.namespaces import RDFS - - -class IriLabeler: - def __init__( - self, - vocabulary: primitive_rdf.RdfTripleDictionary, - label_iri: str = RDFS.label, - acceptable_prefixes: tuple[str] = (), - output_prefix: Optional[str] = None, - ): - self.vocabulary = vocabulary - self.label_iri = label_iri - self.acceptable_prefixes = acceptable_prefixes - self.output_prefix = output_prefix - - def build_shorthand(self) -> primitive_rdf.IriShorthand: - return primitive_rdf.IriShorthand({ - _label: _iri - for _label, _iri in self.all_iris_by_label() - }) - - def all_iris_by_label(self) -> dict[str, str]: - try: - return self.__iris_by_label - except AttributeError: - _iris_by_label = {} - for _iri in self.vocabulary: - try: - _iris_by_label[self._find_label(_iri)] = _iri - except ValueError: - pass # no label, is ok - self.__iris_by_label = _iris_by_label - return _iris_by_label - - def all_labels_by_iri(self) -> dict[str, str]: - try: - return self.__labels_by_iri - except AttributeError: - _iris_by_label = self.all_iris_by_label() - _labels_by_iri = { - _iri: _label - for _label, _iri in _iris_by_label.items() - } - _missing_iris = ( - set(_iris_by_label.values()) - .difference(_labels_by_iri.keys()) - ) - if _missing_iris: - raise ValueError(f'vocab label collision! missing labels for {_missing_iris}') - self.__labels_by_iri = _labels_by_iri - return _labels_by_iri - - def iri_for_label(self, label: str, *, default=None) -> str: - _labelkey = label - for _prefix in self.acceptable_prefixes: - if label.startswith(_prefix): - _labelkey = label[len(_prefix):] # remove prefix - if default: - return self.all_iris_by_label().get(_labelkey, default) - return self.all_iris_by_label()[_labelkey] # may raise KeyError - - def label_for_iri(self, iri: str) -> str: - _label = self.all_labels_by_iri()[iri] # may raise KeyError - return ( - ''.join((self.output_prefix, _label)) - if self.output_prefix - else _label - ) - - def get_label_or_iri(self, iri: str) -> str: - try: - return self.label_for_iri(iri) - except KeyError: - return iri - - def _find_label(self, iri: str) -> str: - _labelset = ( - self.vocabulary - .get(iri, {}) - .get(self.label_iri, ()) - ) - try: - return next( - _label.unicode_value - for _label in _labelset - if isinstance(_label, primitive_rdf.Literal) - ) - except StopIteration: - raise ValueError(f'could not find label for iri "{iri}"') diff --git a/trove/util/iris.py b/trove/util/iris.py index b929baee0..5295e8ea3 100644 --- a/trove/util/iris.py +++ b/trove/util/iris.py @@ -2,6 +2,8 @@ import re from urllib.parse import urlsplit, urlunsplit, quote, unquote +from trove import exceptions as trove_exceptions + # quoth : # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) @@ -36,6 +38,11 @@ def get_iri_scheme(iri: str) -> str: return _iri_scheme +def iris_sufficiently_equal(*iris) -> bool: + _suffuniq_iris = set(map(get_sufficiently_unique_iri, iris)) + return len(_suffuniq_iris) == 1 + + def get_sufficiently_unique_iri_and_scheme(iri: str) -> tuple[str, str]: _scheme_match = IRI_SCHEME_REGEX_IGNORECASE.match(iri) if _scheme_match: @@ -46,7 +53,7 @@ def get_sufficiently_unique_iri_and_scheme(iri: str) -> tuple[str, str]: return (iri, _scheme) else: # may omit scheme only if `://` if not iri.startswith(COLON_SLASH_SLASH): - raise ValueError(f'does not look like an iri (got "{iri}")') + raise trove_exceptions.InvalidIri(f'does not look like an iri (got "{iri}")') _scheme = '' _remainder = iri # for an iri with '://', is "safe enough" to normalize a little: @@ -68,7 +75,7 @@ def is_worthwhile_iri(iri: str): ) -def iri_path_as_keyword(iris: list[str] | tuple[str], *, suffuniq=False) -> str: +def iri_path_as_keyword(iris: list[str] | tuple[str, ...], *, suffuniq=False) -> str: assert isinstance(iris, (list, tuple)) and all( isinstance(_pathstep, str) for _pathstep in iris @@ -87,5 +94,5 @@ def unquote_iri(iri: str) -> str: while QUOTED_IRI_REGEX.match(_unquoted_iri): _unquoted_iri = unquote(_unquoted_iri) if not UNQUOTED_IRI_REGEX.match(_unquoted_iri): - raise ValueError(f'does not look like a quoted iri: {iri}') + raise trove_exceptions.InvalidQuotedIri(f'does not look like a quoted iri: {iri}') return _unquoted_iri diff --git a/trove/util/queryparams.py b/trove/util/queryparams.py index 04309050b..3cff5b681 100644 --- a/trove/util/queryparams.py +++ b/trove/util/queryparams.py @@ -5,6 +5,8 @@ # TODO: remove django dependency (tho it is convenient) from django.http import QueryDict +from trove import exceptions as trove_exceptions + ### # jsonapi query parameter parsing: @@ -27,7 +29,7 @@ @dataclasses.dataclass(frozen=True) class QueryparamName: family: str - bracketed_names: tuple[str] = () + bracketed_names: tuple[str, ...] = () def __post_init__(self): if not isinstance(self.bracketed_names, tuple): @@ -37,18 +39,18 @@ def __post_init__(self): def from_str(cls, queryparam_name: str) -> 'QueryparamName': family_match = QUERYPARAM_FAMILY_REGEX.match(queryparam_name) if not family_match: - raise ValueError(f'invalid queryparam name "{queryparam_name}"') + raise trove_exceptions.InvalidQueryParamName(queryparam_name) family = family_match.group() next_position = family_match.end() bracketed_names = [] while next_position < len(queryparam_name): bracketed_match = QUERYPARAM_FAMILYMEMBER_REGEX.match(queryparam_name, next_position) if not bracketed_match: - raise ValueError(f'invalid queryparam name "{queryparam_name}"') + raise trove_exceptions.InvalidQueryParamName(queryparam_name) bracketed_names.append(bracketed_match.group('name') or '') next_position = bracketed_match.end() if next_position != len(queryparam_name): - raise ValueError(f'invalid queryparam name "{queryparam_name}"') + raise trove_exceptions.InvalidQueryParamName(queryparam_name) return cls(family, tuple(bracketed_names)) def __str__(self): diff --git a/trove/util/shorthand.py b/trove/util/shorthand.py new file mode 100644 index 000000000..c94598d0f --- /dev/null +++ b/trove/util/shorthand.py @@ -0,0 +1,17 @@ +from primitive_metadata import primitive_rdf as rdf + + +def build_shorthand_from_thesaurus( + thesaurus: rdf.RdfTripleDictionary, + label_predicate: str, + base_shorthand: rdf.IriShorthand | None = None +) -> rdf.IriShorthand: + _prefixmap = {} + for _iri, _twoples in thesaurus.items(): + for _label in _twoples.get(label_predicate, ()): + _prefixmap[_label.unicode_value] = _iri + return ( + rdf.IriShorthand(_prefixmap) + if base_shorthand is None + else base_shorthand.with_update(_prefixmap) + ) diff --git a/trove/views/browse.py b/trove/views/browse.py index 554273701..41049e303 100644 --- a/trove/views/browse.py +++ b/trove/views/browse.py @@ -1,5 +1,4 @@ -from typing import Iterable - +from django import http from django.shortcuts import redirect from django.views import View from primitive_metadata import primitive_rdf @@ -7,57 +6,61 @@ from trove import models as trove_db from trove.render import get_renderer from trove.util.iris import unquote_iri, get_sufficiently_unique_iri -from trove.vocab.namespaces import TROVE +from trove.vocab import namespaces as ns +from trove.vocab import static_vocab class BrowseIriView(View): def get(self, request, **kwargs): _iri_param = kwargs.get('iri') or request.GET.get('iri') if not _iri_param: - raise ValueError('TODO: random browse?') - _iri = unquote_iri(_iri_param) - _trove_term = _recognize_trove_term(_iri) + raise http.Http404 # TODO: docs? random browse? + _iri = ns.NAMESPACES_SHORTHAND.expand_iri(unquote_iri(_iri_param)) + _suffuniq_iri = get_sufficiently_unique_iri(_iri) + _trove_term = _recognize_trove_term(_suffuniq_iri) if _trove_term is not None: return redirect('trove-vocab', vocab_term=_trove_term) - _focus_iri, _tripledict = self._get_rdf(_iri) + _card_focus_iri, _combined_rdf = _get_latest_cardf(_iri) + _thesaurus_entry = static_vocab.combined_thesaurus__suffuniq().get(_suffuniq_iri, {}) + if _thesaurus_entry: + _combined_rdf.add_twopledict(_card_focus_iri, _thesaurus_entry) return get_renderer(request).render_response( - _tripledict, - _focus_iri, + _combined_rdf.tripledict, + _card_focus_iri, headers={ 'Content-Disposition': 'inline', }, ) - def _get_rdf(self, iri: str): - try: - _identifier = trove_db.ResourceIdentifier.objects.get_for_iri(iri) - except trove_db.ResourceIdentifier.DoesNotExist: - return iri, {} - else: - _rdf_qs = ( - trove_db.LatestIndexcardRdf.objects - .filter(indexcard__focus_identifier_set=_identifier) - ) - # TODO: handle different focus_iri on multiple cards - _focus_iri = _rdf_qs.first().focus_iri - # TODO: query param for split/merged - return _focus_iri, _merge_tripledicts( - _indexcard_rdf.as_rdf_tripledict() - for _indexcard_rdf in _rdf_qs - ) - -def _merge_tripledicts(tripledicts: Iterable[dict]): - _merged = primitive_rdf.RdfGraph({}) - for _tripledict in tripledicts: - for _triple in primitive_rdf.iter_tripleset(_tripledict): - _merged.add(_triple) - return _merged.tripledict +def _get_latest_cardf(iri: str): + _combined_rdf = primitive_rdf.RdfGraph({}) + try: + _identifier = trove_db.ResourceIdentifier.objects.get_for_iri(iri) + except trove_db.ResourceIdentifier.DoesNotExist: + return iri, _combined_rdf + else: + _rdf_qs = ( + trove_db.LatestIndexcardRdf.objects + .filter(indexcard__focus_identifier_set=_identifier) + .select_related('indexcard') + ) + _focus_iri = None + for _indexcard_rdf in _rdf_qs: + if _focus_iri is None: + _focus_iri = _indexcard_rdf.focus_iri + _combined_rdf.add((_focus_iri, ns.FOAF.primaryTopicOf, _indexcard_rdf.indexcard.get_iri())) + for (_subj, _pred, _obj) in primitive_rdf.iter_tripleset(_indexcard_rdf.as_rdf_tripledict()): + _combined_rdf.add( + (_focus_iri, _pred, _obj) + if _subj == _indexcard_rdf.focus_iri + else (_subj, _pred, _obj) + ) + return (_focus_iri or iri), _combined_rdf -def _recognize_trove_term(iri: str): - _suffuniq_iri = get_sufficiently_unique_iri(iri) - _suffuniq_trove = get_sufficiently_unique_iri(str(TROVE)) - if _suffuniq_iri.startswith(_suffuniq_trove): - return primitive_rdf.iri_minus_namespace(_suffuniq_iri, _suffuniq_trove).strip('/') +def _recognize_trove_term(suffuniq_iri: str): + _suffuniq_trove = get_sufficiently_unique_iri(str(ns.TROVE)) + if suffuniq_iri.startswith(_suffuniq_trove): + return primitive_rdf.iri_minus_namespace(suffuniq_iri, _suffuniq_trove).strip('/') return None diff --git a/trove/views/docs.py b/trove/views/docs.py index 51ca44236..9ae818602 100644 --- a/trove/views/docs.py +++ b/trove/views/docs.py @@ -3,13 +3,14 @@ from django.views import View from trove.openapi import get_trove_openapi_json +from trove.vocab import mediatypes class OpenapiJsonView(View): def get(self, request): return HttpResponse( content=get_trove_openapi_json(), - content_type='application/json', + content_type=mediatypes.JSON, ) diff --git a/trove/views/indexcard.py b/trove/views/indexcard.py index 8371939cc..88536122b 100644 --- a/trove/views/indexcard.py +++ b/trove/views/indexcard.py @@ -1,25 +1,29 @@ from django.views import View from primitive_metadata import gather +from trove import exceptions as trove_exceptions from trove.render import get_renderer -from trove.trovesearch_gathering import trovesearch_by_indexstrategy -from trove.vocab.jsonapi import JSONAPI_MEDIATYPE -from trove.vocab.trove import TROVE, trove_indexcard_iri +from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy +from trove.vocab.namespaces import TROVE +from trove.vocab.trove import trove_indexcard_iri class IndexcardView(View): def get(self, request, indexcard_uuid): _renderer = get_renderer(request) - _search_gathering = trovesearch_by_indexstrategy.new_gathering({ - # TODO (gather): allow omitting kwargs that go unused - 'search_params': None, - 'specific_index': None, - 'use_osfmap_json': (_renderer.MEDIATYPE in {'application/json', JSONAPI_MEDIATYPE}) - }) - _indexcard_iri = trove_indexcard_iri(indexcard_uuid) - _search_gathering.ask( - {}, # TODO: build from `include`/`fields` - focus=gather.focus(_indexcard_iri, TROVE.Indexcard), - ) - _response_tripledict = _search_gathering.leaf_a_record() - return _renderer.render_response(_response_tripledict, _indexcard_iri) + try: + _search_gathering = trovesearch_by_indexstrategy.new_gathering({ + # TODO (gather): allow omitting kwargs that go unused + 'search_params': None, + 'specific_index': None, + 'deriver_iri': _renderer.INDEXCARD_DERIVER_IRI, + }) + _indexcard_iri = trove_indexcard_iri(indexcard_uuid) + _search_gathering.ask( + {}, # TODO: build from `include`/`fields` + focus=gather.Focus.new(_indexcard_iri, TROVE.Indexcard), + ) + _response_tripledict = _search_gathering.leaf_a_record() + return _renderer.render_response(_response_tripledict, _indexcard_iri) + except trove_exceptions.TroveError as _error: + return _renderer.render_error_response(_error) diff --git a/trove/views/ingest.py b/trove/views/ingest.py index d509f75d1..c213c9cf2 100644 --- a/trove/views/ingest.py +++ b/trove/views/ingest.py @@ -13,7 +13,7 @@ class RdfIngestView(View): def get(self, request): # TODO: something? maybe show this user's most recently pushed rdf for this pid - raise NotImplementedError + raise http.Http404 def post(self, request): # TODO: better error responses (jsonapi? shacl:ValidationReport?) diff --git a/trove/views/search.py b/trove/views/search.py index b9c7af23e..c303eb5b0 100644 --- a/trove/views/search.py +++ b/trove/views/search.py @@ -5,13 +5,13 @@ from primitive_metadata import gather from share.search.index_strategy import IndexStrategy -from share.search.search_params import ( +from trove import exceptions as trove_exceptions +from trove.trovesearch.search_params import ( CardsearchParams, ValuesearchParams, ) -from trove.vocab.jsonapi import JSONAPI_MEDIATYPE +from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy from trove.vocab.namespaces import TROVE -from trove.trovesearch_gathering import trovesearch_by_indexstrategy from trove.render import get_renderer @@ -45,29 +45,36 @@ class CardsearchView(View): def get(self, request): - _search_iri, _search_gathering, _renderer = _parse_request(request, CardsearchParams) - _search_gathering.ask( - DEFAULT_CARDSEARCH_ASK, # TODO: build from `include`/`fields` - focus=gather.focus(_search_iri, TROVE.Cardsearch), - ) - return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) + _renderer = get_renderer(request) + try: + _search_iri, _search_gathering = _parse_request(request, _renderer, CardsearchParams) + _search_gathering.ask( + DEFAULT_CARDSEARCH_ASK, # TODO: build from `include`/`fields` + focus=gather.Focus.new(_search_iri, TROVE.Cardsearch), + ) + return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) + except trove_exceptions.TroveError as _error: + return _renderer.render_error_response(_error) class ValuesearchView(View): def get(self, request): - _search_iri, _search_gathering, _renderer = _parse_request(request, ValuesearchParams) - _search_gathering.ask( - DEFAULT_VALUESEARCH_ASK, # TODO: build from `include`/`fields` - focus=gather.focus(_search_iri, TROVE.Valuesearch), - ) - return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) + _renderer = get_renderer(request) + try: + _search_iri, _search_gathering = _parse_request(request, _renderer, ValuesearchParams) + _search_gathering.ask( + DEFAULT_VALUESEARCH_ASK, # TODO: build from `include`/`fields` + focus=gather.Focus.new(_search_iri, TROVE.Valuesearch), + ) + return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) + except trove_exceptions.TroveError as _error: + return _renderer.render_error_response(_error) ### # local helpers -def _parse_request(request: http.HttpRequest, search_params_dataclass): - _renderer = get_renderer(request) +def _parse_request(request: http.HttpRequest, renderer, search_params_dataclass): _search_iri = request.build_absolute_uri() _search_params = search_params_dataclass.from_querystring( request.META['QUERY_STRING'], @@ -77,6 +84,6 @@ def _parse_request(request: http.HttpRequest, search_params_dataclass): _search_gathering = trovesearch_by_indexstrategy.new_gathering({ 'search_params': _search_params, 'specific_index': _specific_index, - 'use_osfmap_json': (_renderer.MEDIATYPE in {'application/json', JSONAPI_MEDIATYPE}) + 'deriver_iri': renderer.INDEXCARD_DERIVER_IRI, }) - return (_search_iri, _search_gathering, _renderer) + return (_search_iri, _search_gathering) diff --git a/trove/views/vocab.py b/trove/views/vocab.py index 9414667d0..a5e01071f 100644 --- a/trove/views/vocab.py +++ b/trove/views/vocab.py @@ -3,14 +3,14 @@ from trove.render import get_renderer from trove.vocab.namespaces import TROVE -from trove.vocab.trove import TROVE_API_VOCAB +from trove.vocab.trove import TROVE_API_THESAURUS class TroveVocabView(View): def get(self, request, vocab_term): _iri = TROVE[vocab_term] try: - _data = {_iri: TROVE_API_VOCAB[_iri]} + _data = {_iri: TROVE_API_THESAURUS[_iri]} except KeyError: raise http.Http404 return get_renderer(request).render_response(_data, _iri) diff --git a/trove/vocab/__init__.py b/trove/vocab/__init__.py index 7663b81d2..ea9b78354 100644 --- a/trove/vocab/__init__.py +++ b/trove/vocab/__init__.py @@ -1,62 +1 @@ -import dataclasses -import pathlib - -from primitive_metadata import primitive_rdf - -from . import namespaces - - -@dataclasses.dataclass(frozen=True) -class StaticVocab: - iri_namespace: primitive_rdf.IriNamespace - shorthand_prefix: str # for convenience within this system - turtle_filename: str # assumed same directory as this file - turtle_focus_iri: str # may be different from IriNamespace - - def turtle_filepath(self): - return pathlib.Path(__file__).parent / self.turtle_filename - - def turtle(self): - with open(self.turtle_filepath()) as _vocab_file: - return _vocab_file.read() - - -VOCAB_SET = frozenset(( - StaticVocab( - iri_namespace=namespaces.DCTERMS, - shorthand_prefix='dcterms', - turtle_filename='dublin_core_terms.turtle', - turtle_focus_iri='http://purl.org/dc/terms/', - ), - StaticVocab( - iri_namespace=namespaces.DCAT, - shorthand_prefix='dcat', - turtle_filename='dcat.turtle', - turtle_focus_iri='http://www.w3.org/ns/dcat', - ), - StaticVocab( - iri_namespace=namespaces.OWL, - shorthand_prefix='owl', - turtle_filename='owl.turtle', - turtle_focus_iri='http://www.w3.org/2002/07/owl', - ), - StaticVocab( - iri_namespace=namespaces.RDF, - shorthand_prefix='rdf', - turtle_filename='rdf.turtle', - turtle_focus_iri='http://www.w3.org/1999/02/22-rdf-syntax-ns#', - ), - StaticVocab( - iri_namespace=namespaces.RDFS, - shorthand_prefix='rdfs', - turtle_filename='rdfs.turtle', - turtle_focus_iri='http://www.w3.org/2000/01/rdf-schema#', - ), - StaticVocab( - iri_namespace=namespaces.PROV, - shorthand_prefix='prov', - turtle_filename='prov.turtle', - turtle_focus_iri='http://www.w3.org/ns/prov#', - ), - # TODO: osfmap, trove (load from tripledict) -)) +__all__ = () diff --git a/trove/vocab/jsonapi.py b/trove/vocab/jsonapi.py index 7d1d36ffa..591db4219 100644 --- a/trove/vocab/jsonapi.py +++ b/trove/vocab/jsonapi.py @@ -7,5 +7,3 @@ JSONAPI_ATTRIBUTE = JSONAPI['document-resource-object-attributes'] JSONAPI_LINK = JSONAPI['document-links'] JSONAPI_LINK_OBJECT = JSONAPI['document-links-link-object'] - -JSONAPI_MEDIATYPE = 'application/vnd.api+json' diff --git a/trove/vocab/mediatypes.py b/trove/vocab/mediatypes.py new file mode 100644 index 000000000..d806f5944 --- /dev/null +++ b/trove/vocab/mediatypes.py @@ -0,0 +1,5 @@ +JSON = 'application/json' +JSONAPI = 'application/vnd.api+json' +JSONLD = 'application/ld+json' +TURTLE = 'text/turtle' +HTML = 'text/html' diff --git a/trove/vocab/namespaces.py b/trove/vocab/namespaces.py index 28927f3db..9402fd26c 100644 --- a/trove/vocab/namespaces.py +++ b/trove/vocab/namespaces.py @@ -1,69 +1,58 @@ -from primitive_metadata.primitive_rdf import ( - IriNamespace, - IriShorthand, +from primitive_metadata import primitive_rdf as rdf +from primitive_metadata.namespaces import ( RDF, RDFS, OWL, + DCTERMS, + DC, + DCTYPE, + FOAF, + DCAT, + PROV, + SKOS, + DEFAULT_SHORTHAND, ) - __all__ = ( - 'STATIC_SHORTHAND', - 'RDF', - 'RDFS', - 'OWL', 'DC', + 'DCAT', + 'DCTYPE', 'DCTERMS', - 'DCMITYPE', 'FOAF', - 'DCAT', + 'JSONAPI', + 'OAI', + 'OAI_DC', + 'OSFMAP', + 'OWL', 'PROV', + 'RDF', + 'RDFS', + 'SHAREv2', 'SKOS', 'TROVE', - 'SHAREv2', - 'OSFMAP', - 'JSONAPI', + 'NAMESPACES_SHORTHAND', ) - -# established standards -# RDF: http://www.w3.org/1999/02/22-rdf-syntax-ns# -# RDFS: http://www.w3.org/2000/01/rdf-schema# -# OWL: http://www.w3.org/2002/07/owl# -DCTERMS = IriNamespace('http://purl.org/dc/terms/') -DC = IriNamespace('http://purl.org/dc/elements/1.1/') -DCMITYPE = IriNamespace('http://purl.org/dc/dcmitype/') -FOAF = IriNamespace('http://xmlns.com/foaf/0.1/') -DCAT = IriNamespace('http://www.w3.org/ns/dcat#') -PROV = IriNamespace('http://www.w3.org/ns/prov#') -OAI = IriNamespace('http://www.openarchives.org/OAI/2.0/') -OAI_DC = IriNamespace('http://www.openarchives.org/OAI/2.0/oai_dc/') -SKOS = IriNamespace('http://www.w3.org/2004/02/skos/core#') +# namespaces used in OAI-PMH +OAI = rdf.IriNamespace('http://www.openarchives.org/OAI/2.0/') +OAI_DC = rdf.IriNamespace('http://www.openarchives.org/OAI/2.0/oai_dc/') # a new namespace for SHARE/trove concepts -TROVE = IriNamespace('https://share.osf.io/vocab/2023/trove/') +TROVE = rdf.IriNamespace('https://share.osf.io/vocab/2023/trove/') # a wild namespace for whatever lingers from SHAREv2 -SHAREv2 = IriNamespace('https://share.osf.io/vocab/2017/sharev2/') +SHAREv2 = rdf.IriNamespace('https://share.osf.io/vocab/2017/sharev2/') # for the OSF metadata application profile (TODO: update to resolvable URL, when there is one) -OSFMAP = IriNamespace('https://osf.io/vocab/2022/') +OSFMAP = rdf.IriNamespace('https://osf.io/vocab/2022/') # for identifying jsonapi concepts with linked anchors on the jsonapi spec (probably fine) -JSONAPI = IriNamespace('https://jsonapi.org/format/1.1/#') +JSONAPI = rdf.IriNamespace('https://jsonapi.org/format/1.1/#') -STATIC_SHORTHAND = IriShorthand({ - 'rdf': RDF, - 'rdfs': RDFS, - 'owl': OWL, - 'dc': DC, - 'dcterms': DCTERMS, - 'dcmitype': DCMITYPE, - 'foaf': FOAF, - 'dcat': DCAT, - 'prov': PROV, - 'skos': SKOS, +NAMESPACES_SHORTHAND = DEFAULT_SHORTHAND.with_update({ 'trove': TROVE, 'sharev2': SHAREv2, 'osf': OSFMAP, 'jsonapi': JSONAPI, + 'oai': OAI, + 'oai_dc': OAI_DC, }) diff --git a/trove/vocab/osfmap.py b/trove/vocab/osfmap.py index 077b5cb0d..d2a208d85 100644 --- a/trove/vocab/osfmap.py +++ b/trove/vocab/osfmap.py @@ -1,15 +1,18 @@ +import functools + from primitive_metadata.primitive_rdf import ( literal, RdfTripleDictionary, + IriShorthand, ) from primitive_metadata import gather from share.models.feature_flag import FeatureFlag -from trove.util.iri_labeler import IriLabeler +from trove.util.shorthand import build_shorthand_from_thesaurus from trove.vocab.jsonapi import JSONAPI_MEMBERNAME from trove.vocab.namespaces import ( DCAT, - DCMITYPE, + DCTYPE, DCTERMS, FOAF, OSFMAP, @@ -18,12 +21,13 @@ RDFS, SKOS, TROVE, + NAMESPACES_SHORTHAND, ) OSFMAP_LINK = 'https://osf.io/8yczr' # TODO: define as turtle, load in trove.vocab.__init__? -OSFMAP_VOCAB: RdfTripleDictionary = { +OSFMAP_THESAURUS: RdfTripleDictionary = { ### # properties: DCTERMS.identifier: { @@ -677,7 +681,7 @@ literal('Property', language='en'), }, }, - DCMITYPE.Collection: { + DCTYPE.Collection: { RDF.type: {RDFS.Class}, RDFS.label: { literal('Collection', language='en'), @@ -718,13 +722,13 @@ } -OSFMAP_NORMS = gather.GatheringNorms( +OSFMAP_NORMS = gather.GatheringNorms.new( namestory=( literal('OSFMAP', language='en'), literal('OSF Metadata Application Profile', language='en'), literal('Open Science Framework Metadata Application Profile', language='en'), ), - vocabulary=OSFMAP_VOCAB, + thesaurus=OSFMAP_THESAURUS, focustype_iris={ OSFMAP.Project, OSFMAP.ProjectComponent, @@ -736,11 +740,16 @@ }, ) -osfmap_labeler = IriLabeler( - OSFMAP_VOCAB, - label_iri=JSONAPI_MEMBERNAME, - acceptable_prefixes=('osf:', 'osfmap:'), -) + +@functools.cache +def osfmap_shorthand() -> IriShorthand: + '''build iri shorthand that includes unprefixed osfmap terms + ''' + return build_shorthand_from_thesaurus( + thesaurus=OSFMAP_THESAURUS, + label_predicate=JSONAPI_MEMBERNAME, + base_shorthand=NAMESPACES_SHORTHAND, + ) ALL_SUGGESTED_PROPERTY_PATHS = ( @@ -838,7 +847,8 @@ )) -def suggested_property_paths(type_iris: set[str]) -> tuple[tuple[str, ...]]: +def suggested_property_paths(type_iris: set[str]) -> tuple[tuple[str, ...], ...]: + _suggested: tuple[tuple[str, ...], ...] if not type_iris or not type_iris.issubset(OSFMAP_NORMS.focustype_iris): _suggested = () elif type_iris == {DCTERMS.Agent}: @@ -853,7 +863,7 @@ def suggested_property_paths(type_iris: set[str]) -> tuple[tuple[str, ...]]: _suggested = REGISTRATION_SUGGESTED_PROPERTY_PATHS else: _suggested = ALL_SUGGESTED_PROPERTY_PATHS - if FeatureFlag.objects.flag_is_up(FeatureFlag.SUGGEST_CREATOR_FACET): + if _suggested and FeatureFlag.objects.flag_is_up(FeatureFlag.SUGGEST_CREATOR_FACET): return ((DCTERMS.creator,), *_suggested) return _suggested diff --git a/trove/vocab/static_vocab/__init__.py b/trove/vocab/static_vocab/__init__.py new file mode 100644 index 000000000..97e634e21 --- /dev/null +++ b/trove/vocab/static_vocab/__init__.py @@ -0,0 +1,79 @@ +import functools +import pathlib +import types + +from primitive_metadata import primitive_rdf as rdf +import rdflib + +from trove.util.iris import get_sufficiently_unique_iri +from trove.vocab.osfmap import OSFMAP_THESAURUS +from trove.vocab.trove import TROVE_API_THESAURUS + + +__all__ = ( + 'combined_thesaurus', + 'combined_thesaurus__suffuniq', +) + + +_STATIC_THESAURUSES = ( + OSFMAP_THESAURUS, + TROVE_API_THESAURUS, +) + +_STATIC_TURTLES = ( + 'dublin_core_abstract_model.turtle', + 'dublin_core_elements.turtle', + 'dublin_core_terms.turtle', + 'dublin_core_type.turtle', + 'dcat.turtle', + 'owl.turtle', + 'rdf.turtle', + 'rdfs.turtle', + 'prov.turtle', +) + +_STATIC_XMLS = ( + 'skos.rdf.xml', + 'foaf.rdf.xml', +) + + +@functools.cache +def combined_thesaurus(): + _combined_rdf = rdf.RdfGraph() + for _thesaurus in _STATIC_THESAURUSES: + _combined_rdf.add_tripledict(_thesaurus) + for _turtle_filename in _STATIC_TURTLES: + _combined_rdf.add_tripledict(_load_static_turtle(_turtle_filename)) + for _xml_filename in _STATIC_XMLS: + _combined_rdf.add_tripledict(_load_static_xml(_xml_filename)) + return types.MappingProxyType(_combined_rdf.tripledict) + + +@functools.cache +def combined_thesaurus__suffuniq(): + return types.MappingProxyType({ + get_sufficiently_unique_iri(_subj): _twoples + for _subj, _twoples in combined_thesaurus().items() + }) + + +def _load_static_turtle(turtle_filename: str) -> rdf.RdfTripleDictionary: + # assumed same directory as this file + with open(_local_filepath(turtle_filename)) as _vocab_file: + _turtle = _vocab_file.read() + return rdf.tripledict_from_turtle(_turtle) + + +def _load_static_xml(xml_filename: str) -> rdf.RdfTripleDictionary: + # assumed same directory as this file + _graph = rdflib.Graph() + with open(_local_filepath(xml_filename)) as _vocab_file: + _graph.parse(_vocab_file, format='xml') + return rdf.tripledict_from_rdflib(_graph) + + +def _local_filepath(filename: str) -> pathlib.Path: + # assumed same directory as this file + return pathlib.Path(__file__).parent / filename diff --git a/trove/vocab/dcat.turtle b/trove/vocab/static_vocab/dcat.turtle similarity index 100% rename from trove/vocab/dcat.turtle rename to trove/vocab/static_vocab/dcat.turtle diff --git a/trove/vocab/static_vocab/dublin_core_abstract_model.turtle b/trove/vocab/static_vocab/dublin_core_abstract_model.turtle new file mode 100644 index 000000000..863cbc1b9 --- /dev/null +++ b/trove/vocab/static_vocab/dublin_core_abstract_model.turtle @@ -0,0 +1,43 @@ +@prefix rdf: . +@prefix owl: . +@prefix skos: . +@prefix dcam: . +@prefix dcterms: . +@prefix rdfs: . + + + dcterms:modified "2012-06-14"^^ ; + dcterms:publisher ; + dcterms:title "Metadata terms for vocabulary description"@en . + +dcam:VocabularyEncodingScheme + dcterms:issued "2008-01-14"^^ ; + a rdfs:Class ; + rdfs:comment "An enumerated set of resources."@en ; + rdfs:isDefinedBy ; + rdfs:label "Vocabulary Encoding Scheme"@en ; + rdfs:seeAlso . + +dcam:domainIncludes + dcterms:issued "2020-01-20"^^ ; + a rdf:Property ; + rdfs:comment "A suggested class for subjects of this property."@en ; + rdfs:isDefinedBy ; + rdfs:label "Domain Includes"@en . + +dcam:memberOf + dcterms:issued "2008-01-14"^^ ; + a rdf:Property ; + rdfs:comment "A relationship between a resource and a vocabulary encoding scheme which indicates that the resource is a member of a set."@en ; + rdfs:isDefinedBy ; + rdfs:label "Member Of"@en ; + rdfs:range dcam:VocabularyEncodingScheme ; + rdfs:seeAlso . + +dcam:rangeIncludes + dcterms:issued "2020-01-20"^^ ; + a rdf:Property ; + rdfs:comment "A suggested class for values of this property."@en ; + rdfs:isDefinedBy ; + rdfs:label "Range Includes"@en . + diff --git a/trove/vocab/static_vocab/dublin_core_elements.turtle b/trove/vocab/static_vocab/dublin_core_elements.turtle new file mode 100644 index 000000000..54b9e7c10 --- /dev/null +++ b/trove/vocab/static_vocab/dublin_core_elements.turtle @@ -0,0 +1,146 @@ +@prefix rdf: . +@prefix owl: . +@prefix skos: . +@prefix dcam: . +@prefix dcterms: . +@prefix rdfs: . + + + dcterms:modified "2012-06-14"^^ ; + dcterms:publisher ; + dcterms:title "Dublin Core Metadata Element Set, Version 1.1"@en . + + + dcterms:description "The guidelines for using names of persons or organizations as creators also apply to contributors. Typically, the name of a Contributor should be used to indicate the entity."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "An entity responsible for making contributions to the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Contributor"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/contributor) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Spatial topic and spatial applicability may be a named place or a location specified by its geographic coordinates. Temporal topic may be a named period, date, or date range. A jurisdiction may be a named administrative entity or a geographic place to which the resource applies. Recommended practice is to use a controlled vocabulary such as the Getty Thesaurus of Geographic Names [[TGN](https://www.getty.edu/research/tools/vocabulary/tgn/index.html)]. Where appropriate, named places or time periods may be used in preference to numeric identifiers such as sets of coordinates or date ranges."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "The spatial or temporal topic of the resource, spatial applicability of the resource, or jurisdiction under which the resource is relevant."@en ; + rdfs:isDefinedBy ; + rdfs:label "Coverage"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/coverage) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Examples of a Creator include a person, an organization, or a service. Typically, the name of a Creator should be used to indicate the entity."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "An entity primarily responsible for making the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Creator"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/creator) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Date may be used to express temporal information at any level of granularity. Recommended practice is to express the date, date/time, or period of time according to ISO 8601-1 [[ISO 8601-1](https://www.iso.org/iso-8601-date-and-time-format.html)] or a published profile of the ISO standard, such as the W3C Note on Date and Time Formats [[W3CDTF](https://www.w3.org/TR/NOTE-datetime)] or the Extended Date/Time Format Specification [[EDTF](http://www.loc.gov/standards/datetime/)]. If the full date is unknown, month and year (YYYY-MM) or just year (YYYY) may be used. Date ranges may be specified using ISO 8601 period of time specification in which start and end dates are separated by a '/' (slash) character. Either the start or end date may be missing."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "A point or period of time associated with an event in the lifecycle of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Date"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/date) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Description may include but is not limited to: an abstract, a table of contents, a graphical representation, or a free-text account of the resource."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "An account of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Description"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/description) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Recommended practice is to use a controlled vocabulary where available. For example, for file formats one could use the list of Internet Media Types [[MIME](https://www.iana.org/assignments/media-types/media-types.xhtml)]."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "The file format, physical medium, or dimensions of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Format"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/format) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Recommended practice is to identify the resource by means of a string conforming to an identification system."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "An unambiguous reference to the resource within a given context."@en ; + rdfs:isDefinedBy ; + rdfs:label "Identifier"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/identifier) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Recommended practice is to use either a non-literal value representing a language from a controlled vocabulary such as ISO 639-2 or ISO 639-3, or a literal value consisting of an IETF Best Current Practice 47 [[IETF-BCP47](https://tools.ietf.org/html/bcp47)] language tag."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "A language of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Language"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/language) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Examples of a Publisher include a person, an organization, or a service. Typically, the name of a Publisher should be used to indicate the entity."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "An entity responsible for making the resource available."@en ; + rdfs:isDefinedBy ; + rdfs:label "Publisher"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/publisher) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Recommended practice is to identify the related resource by means of a URI. If this is not possible or feasible, a string conforming to a formal identification system may be provided."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "A related resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Relation"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/relation) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Typically, rights information includes a statement about various property rights associated with the resource, including intellectual property rights."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "Information about rights held in and over the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Rights"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/rights) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "The described resource may be derived from the related resource in whole or in part. Recommended best practice is to identify the related resource by means of a string conforming to a formal identification system."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "A related resource from which the described resource is derived."@en ; + rdfs:isDefinedBy ; + rdfs:label "Source"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/source) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Typically, the subject will be represented using keywords, key phrases, or classification codes. Recommended best practice is to use a controlled vocabulary."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "The topic of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Subject"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/subject) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "A name given to the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Title"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/title) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + + + dcterms:description "Recommended practice is to use a controlled vocabulary such as the DCMI Type Vocabulary [[DCMI-TYPE](http://dublincore.org/documents/dcmi-type-vocabulary/)]. To describe the file format, physical medium, or dimensions of the resource, use the Format element."@en ; + dcterms:issued "1999-07-02"^^ ; + a rdf:Property ; + rdfs:comment "The nature or genre of the resource."@en ; + rdfs:isDefinedBy ; + rdfs:label "Type"@en ; + skos:note "A [second property](/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/type) with the same name as this property has been declared in the [dcterms: namespace](http://purl.org/dc/terms/). See the Introduction to the document [DCMI Metadata Terms](/specifications/dublin-core/dcmi-terms/) for an explanation."@en . + diff --git a/trove/vocab/dublin_core_terms.turtle b/trove/vocab/static_vocab/dublin_core_terms.turtle similarity index 100% rename from trove/vocab/dublin_core_terms.turtle rename to trove/vocab/static_vocab/dublin_core_terms.turtle diff --git a/trove/vocab/static_vocab/dublin_core_type.turtle b/trove/vocab/static_vocab/dublin_core_type.turtle new file mode 100644 index 000000000..1e8d35802 --- /dev/null +++ b/trove/vocab/static_vocab/dublin_core_type.turtle @@ -0,0 +1,122 @@ +@prefix rdf: . +@prefix owl: . +@prefix skos: . +@prefix dcam: . +@prefix dcterms: . +@prefix rdfs: . + + + dcterms:modified "2012-06-14"^^ ; + dcterms:publisher ; + dcterms:title "DCMI Type Vocabulary"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "A collection is described as a group; its parts may also be separately described."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "An aggregation of resources."@en ; + rdfs:isDefinedBy ; + rdfs:label "Collection"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include lists, tables, and databases. A dataset may be useful for direct machine processing."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "Data encoded in a defined structure."@en ; + rdfs:isDefinedBy ; + rdfs:label "Dataset"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Metadata for an event provides descriptive information that is the basis for discovery of the purpose, location, duration, and responsible agents associated with an event. Examples include an exhibition, webcast, conference, workshop, open day, performance, battle, trial, wedding, tea party, conflagration."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A non-persistent, time-based occurrence."@en ; + rdfs:isDefinedBy ; + rdfs:label "Event"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include images and photographs of physical objects, paintings, prints, drawings, other images and graphics, animations and moving pictures, film, diagrams, maps, musical notation. Note that Image may include both electronic and physical representations."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A visual representation other than text."@en ; + rdfs:isDefinedBy ; + rdfs:label "Image"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include forms on Web pages, applets, multimedia learning objects, chat services, or virtual reality environments."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A resource requiring interaction from the user to be understood, executed, or experienced."@en ; + rdfs:isDefinedBy ; + rdfs:label "Interactive Resource"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include animations, movies, television programs, videos, zoetropes, or visual output from a simulation. Instances of the type Moving Image must also be describable as instances of the broader type Image."@en ; + dcterms:issued "2003-11-18"^^ ; + a rdfs:Class ; + rdfs:comment "A series of visual representations imparting an impression of motion when shown in succession."@en ; + rdfs:isDefinedBy ; + rdfs:label "Moving Image"@en ; + rdfs:subClassOf . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Note that digital representations of, or surrogates for, these objects should use Image, Text or one of the other types."@en ; + dcterms:issued "2002-07-13"^^ ; + a rdfs:Class ; + rdfs:comment "An inanimate, three-dimensional object or substance."@en ; + rdfs:isDefinedBy ; + rdfs:label "Physical Object"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include a photocopying service, a banking service, an authentication service, interlibrary loans, a Z39.50 or Web server."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A system that provides one or more functions."@en ; + rdfs:isDefinedBy ; + rdfs:label "Service"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include a C source file, MS-Windows .exe executable, or Perl script."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A computer program in source or compiled form."@en ; + rdfs:isDefinedBy ; + rdfs:label "Software"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include a music playback file format, an audio compact disc, and recorded speech or sounds."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A resource primarily intended to be heard."@en ; + rdfs:isDefinedBy ; + rdfs:label "Sound"@en . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include paintings, drawings, graphic designs, plans and maps. Recommended best practice is to assign the type Text to images of textual materials. Instances of the type Still Image must also be describable as instances of the broader type Image."@en ; + dcterms:issued "2003-11-18"^^ ; + a rdfs:Class ; + rdfs:comment "A static visual representation."@en ; + rdfs:isDefinedBy ; + rdfs:label "Still Image"@en ; + rdfs:subClassOf . + + + dcam:memberOf dcterms:DCMIType ; + dcterms:description "Examples include books, letters, dissertations, poems, newspapers, articles, archives of mailing lists. Note that facsimiles or images of texts are still of the genre Text."@en ; + dcterms:issued "2000-07-11"^^ ; + a rdfs:Class ; + rdfs:comment "A resource consisting primarily of words for reading."@en ; + rdfs:isDefinedBy ; + rdfs:label "Text"@en . + diff --git a/trove/vocab/static_vocab/foaf.rdf.xml b/trove/vocab/static_vocab/foaf.rdf.xml new file mode 100644 index 000000000..de8249ab7 --- /dev/null +++ b/trove/vocab/static_vocab/foaf.rdf.xml @@ -0,0 +1,584 @@ + + + + + + + + + + + + + + Label Property + A foaf:LabelProperty is any RDF property with texual values that serve as labels. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/trove/vocab/owl.turtle b/trove/vocab/static_vocab/owl.turtle similarity index 100% rename from trove/vocab/owl.turtle rename to trove/vocab/static_vocab/owl.turtle diff --git a/trove/vocab/prov.turtle b/trove/vocab/static_vocab/prov.turtle similarity index 100% rename from trove/vocab/prov.turtle rename to trove/vocab/static_vocab/prov.turtle diff --git a/trove/vocab/rdf.turtle b/trove/vocab/static_vocab/rdf.turtle similarity index 100% rename from trove/vocab/rdf.turtle rename to trove/vocab/static_vocab/rdf.turtle diff --git a/trove/vocab/rdfs.turtle b/trove/vocab/static_vocab/rdfs.turtle similarity index 100% rename from trove/vocab/rdfs.turtle rename to trove/vocab/static_vocab/rdfs.turtle diff --git a/trove/vocab/static_vocab/skos.rdf.xml b/trove/vocab/static_vocab/skos.rdf.xml new file mode 100644 index 000000000..1ddeb9a5c --- /dev/null +++ b/trove/vocab/static_vocab/skos.rdf.xml @@ -0,0 +1,468 @@ + + + + + SKOS Vocabulary + Dave Beckett + Nikki Rogers + Participants in W3C's Semantic Web Deployment Working Group. + An RDF vocabulary for describing the basic structure and content of concept schemes such as thesauri, classification schemes, subject heading lists, taxonomies, 'folksonomies', other types of controlled vocabulary, and also concept schemes embedded in glossaries and terminologies. + Alistair Miles + Sean Bechhofer + + + + Concept + + An idea or notion; a unit of thought. + + + + + Concept Scheme + + A set of concepts, optionally including statements about semantic relationships between those concepts. + A concept scheme may be defined to include concepts from different sources. + Thesauri, classification schemes, subject heading lists, taxonomies, 'folksonomies', and other types of controlled vocabulary are all examples of concept schemes. Concept schemes are also embedded in glossaries and terminologies. + + + + + + + Collection + + A meaningful collection of concepts. + Labelled collections can be used where you would like a set of concepts to be displayed under a 'node label' in the hierarchy. + + + + + + + + + Ordered Collection + + An ordered collection of concepts, where both the grouping and the ordering are meaningful. + Ordered collections can be used where you would like a set of concepts to be displayed in a specific order, and optionally under a 'node label'. + + + + + + + is in scheme + + Relates a resource (for example a concept) to a concept scheme in which it is included. + A concept may be a member of more than one concept scheme. + + + + + + + + + has top concept + + Relates, by convention, a concept scheme to a concept which is topmost in the broader/narrower concept hierarchies for that scheme, providing an entry point to these hierarchies. + + + + + + + + + + + + + is top concept in scheme + + Relates a concept to the concept scheme that it is a top level concept of. + + + + + + + + + + + + + preferred label + + The preferred lexical label for a resource, in a given language. + + + + + + A resource has no more than one value of skos:prefLabel per language tag, and no more than one value of skos:prefLabel without language tag. + + The range of skos:prefLabel is the class of RDF plain literals. + + skos:prefLabel, skos:altLabel and skos:hiddenLabel are pairwise + disjoint properties. + + + + + alternative label + + An alternative lexical label for a resource. + Acronyms, abbreviations, spelling variants, and irregular plural/singular forms may be included among the alternative labels for a concept. Mis-spelled terms are normally included as hidden labels (see skos:hiddenLabel). + + + + + + The range of skos:altLabel is the class of RDF plain literals. + + skos:prefLabel, skos:altLabel and skos:hiddenLabel are pairwise disjoint properties. + + + + + hidden label + + A lexical label for a resource that should be hidden when generating visual displays of the resource, but should still be accessible to free text search operations. + + + + + + The range of skos:hiddenLabel is the class of RDF plain literals. + + skos:prefLabel, skos:altLabel and skos:hiddenLabel are pairwise disjoint properties. + + + + + notation + + A notation, also known as classification code, is a string of characters such as "T58.5" or "303.4833" used to uniquely identify a concept within the scope of a given concept scheme. + By convention, skos:notation is used with a typed literal in the object position of the triple. + + + + + + + note + + A general note, for any purpose. + This property may be used directly, or as a super-property for more specific note types. + + + + + + + change note + + A note about a modification to a concept. + + + + + + + + + definition + + A statement or formal explanation of the meaning of a concept. + + + + + + + + + editorial note + + A note for an editor, translator or maintainer of the vocabulary. + + + + + + + + + example + + An example of the use of a concept. + + + + + + + + + history note + + A note about the past state/use/meaning of a concept. + + + + + + + + + scope note + + A note that helps to clarify the meaning and/or the use of a concept. + + + + + + + + + is in semantic relation with + + Links a concept to a concept related by meaning. + This property should not be used directly, but as a super-property for all properties denoting a relationship of meaning between concepts. + + + + + + + + + + + has broader + + Relates a concept to a concept that is more general in meaning. + Broader concepts are typically rendered as parents in a concept hierarchy (tree). + By convention, skos:broader is only used to assert an immediate (i.e. direct) hierarchical link between two conceptual resources. + + + + + + + + + + + has narrower + + Relates a concept to a concept that is more specific in meaning. + By convention, skos:broader is only used to assert an immediate (i.e. direct) hierarchical link between two conceptual resources. + Narrower concepts are typically rendered as children in a concept hierarchy (tree). + + + + + + + + + + + has related + + Relates a concept to a concept with which there is an associative semantic relationship. + + + + + + + + skos:related is disjoint with skos:broaderTransitive + + + + + has broader transitive + + skos:broaderTransitive is a transitive superproperty of skos:broader. + By convention, skos:broaderTransitive is not used to make assertions. Rather, the properties can be used to draw inferences about the transitive closure of the hierarchical relation, which is useful e.g. when implementing a simple query expansion algorithm in a search application. + + + + + + + + + + + + + has narrower transitive + + skos:narrowerTransitive is a transitive superproperty of skos:narrower. + By convention, skos:narrowerTransitive is not used to make assertions. Rather, the properties can be used to draw inferences about the transitive closure of the hierarchical relation, which is useful e.g. when implementing a simple query expansion algorithm in a search application. + + + + + + + + + + + + + has member + + Relates a collection to one of its members. + + + + + + + + + + + + + + + + + + has member list + + Relates an ordered collection to the RDF list containing its members. + + + + + + + + + + For any resource, every item in the list given as the value of the + skos:memberList property is also a value of the skos:member property. + + + + + is in mapping relation with + + Relates two concepts coming, by convention, from different schemes, and that have comparable meanings + These concept mapping relations mirror semantic relations, and the data model defined below is similar (with the exception of skos:exactMatch) to the data model defined for semantic relations. A distinct vocabulary is provided for concept mapping relations, to provide a convenient way to differentiate links within a concept scheme from links between concept schemes. However, this pattern of usage is not a formal requirement of the SKOS data model, and relies on informal definitions of best practice. + + + + + + + + + has broader match + + skos:broadMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes. + + + + + + + + + + + + + has narrower match + + skos:narrowMatch is used to state a hierarchical mapping link between two conceptual resources in different concept schemes. + + + + + + + + + + + + + has related match + + skos:relatedMatch is used to state an associative mapping link between two conceptual resources in different concept schemes. + + + + + + + + + + + + + has exact match + + skos:exactMatch is used to link two concepts, indicating a high degree of confidence that the concepts can be used interchangeably across a wide range of information retrieval applications. skos:exactMatch is a transitive property, and is a sub-property of skos:closeMatch. + + + + + + + + + + skos:exactMatch is disjoint with each of the properties skos:broadMatch and skos:relatedMatch. + + + + + has close match + + skos:closeMatch is used to link two concepts that are sufficiently similar that they can be used interchangeably in some information retrieval applications. In order to avoid the possibility of "compound errors" when combining mappings across more than two concept schemes, skos:closeMatch is not declared to be a transitive property. + + + + + + + + + + diff --git a/trove/vocab/trove.py b/trove/vocab/trove.py index b3a16f3a6..9efa03a32 100644 --- a/trove/vocab/trove.py +++ b/trove/vocab/trove.py @@ -1,16 +1,18 @@ +import functools import urllib.parse from django.conf import settings from django.urls import reverse from primitive_metadata.primitive_rdf import ( IriNamespace, + IriShorthand, RdfTripleDictionary, literal, literal_json, blanknode, ) -from trove.util.iri_labeler import IriLabeler +from trove.util.shorthand import build_shorthand_from_thesaurus from trove.vocab.jsonapi import ( JSONAPI_MEMBERNAME, JSONAPI_ATTRIBUTE, @@ -19,7 +21,7 @@ from trove.vocab.osfmap import ( DATE_PROPERTIES, OSFMAP_LINK, - osfmap_labeler, + osfmap_shorthand, ) from trove.vocab.namespaces import ( DCTERMS, @@ -28,6 +30,7 @@ RDFS, SKOS, TROVE, + NAMESPACES_SHORTHAND, ) @@ -50,7 +53,7 @@ def trove_browse_link(iri: str): ) -TROVE_API_VOCAB: RdfTripleDictionary = { +TROVE_API_THESAURUS: RdfTripleDictionary = { TROVE.search_api: { RDFS.label: {literal('trove search api', language='en')}, RDFS.comment: {literal('trove (noun): a store of valuable or delightful things.', language='en')}, @@ -656,7 +659,7 @@ def trove_browse_link(iri: str): a query param to control ordering of search results accepts a short-hand iri for a date property: -{", ".join(f"`{osfmap_labeler.label_for_iri(_date_iri)}`" for _date_iri in DATE_PROPERTIES)} +{", ".join(f"`{osfmap_shorthand().compact_iri(_date_iri)}`" for _date_iri in DATE_PROPERTIES)} prefix with `-` to sort descending (latest first), otherwise sorts ascending (earliest first) @@ -755,9 +758,9 @@ def trove_browse_link(iri: str): RDF.type: {RDF.Property, JSONAPI_RELATIONSHIP}, JSONAPI_MEMBERNAME: {literal('searchResultPage', language='en')}, }, - TROVE.evidenceCard: { + TROVE.evidenceCardIdentifier: { RDF.type: {RDF.Property, OWL.FunctionalProperty, JSONAPI_RELATIONSHIP}, - JSONAPI_MEMBERNAME: {literal('evidenceCard', language='en')}, + JSONAPI_MEMBERNAME: {literal('evidenceCardIdentifier', language='en')}, }, TROVE.relatedPropertyList: { RDF.type: {RDF.Property, JSONAPI_RELATIONSHIP}, @@ -807,13 +810,19 @@ def trove_browse_link(iri: str): }, } -trove_labeler = IriLabeler( - TROVE_API_VOCAB, - label_iri=JSONAPI_MEMBERNAME, - acceptable_prefixes=('trove:',), -) + +@functools.cache +def trove_shorthand() -> IriShorthand: + '''build iri shorthand that includes unprefixed terms (as defined in TROVE_API_THESAURUS) + ''' + return build_shorthand_from_thesaurus( + thesaurus=TROVE_API_THESAURUS, + label_predicate=JSONAPI_MEMBERNAME, + base_shorthand=NAMESPACES_SHORTHAND, + ) +@functools.cache def trove_indexcard_namespace(): return IriNamespace(f'{settings.SHARE_WEB_URL}trove/index-card/')