Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test if OME fields are in JSON-LD OME context #29

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 64 additions & 8 deletions src/omero_rdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
from rdflib import BNode, Graph, Literal, URIRef
from rdflib.namespace import DCTERMS, RDF


import requests
import json
from typing import Dict, Any, Optional

HELP = """A plugin for exporting rdf from OMERO

omero-rdf creates a stream of RDF triples from the starting object that
Expand Down Expand Up @@ -78,6 +83,55 @@ def _wrapper(self, *args: Any, **kwargs: Any): # type: ignore
return _wrapper



def fetch_jsonld_context(url: str) -> Optional[Dict[str, Any]]:
"""
Fetch JSON-LD context from a URL.

Args:
url: The URL of the JSON-LD document

Returns:
The @context object or None if not found/error
"""
try:
# Make HTTP request
response = requests.get(url, headers={'Accept': 'application/ld+json'})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We really want to have this document built into the library so it need not be downloaded remotely but that can be phase 2.

cf. https://github.com/hyperledger-archives/aries-framework-go/blob/main/docs/jsonld_context.md

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we'd keep a local version of the context, or how? 🤔

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In XML-land, you create a catalog.xml (https://github.com/ome/ome-model/blob/908c6ef501f18b329d46a65fb7bd563f0d5580a6/specification/src/main/resources/released-schema/catalog.xml#L5) then you put it into your Java jar (for example) and you never have to hit the network.

response.raise_for_status()

# Parse JSON
data = response.json()

# Extract @context
if '@context' in data:
return data['@context']
else:
print(f"No @context found in {url}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to use logging.warning to prevent losing stdout

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

implementing it right now

return None

except requests.RequestException as e:
print(f"Network error: {e}")
return None
except json.JSONDecodeError as e:
print(f"JSON parsing error: {e}")
return None

def key_in_context(key: str, context: Dict[str, Any]):
"""
Check if a key is in the context.

Args:
key: The key to check
context: The JSON-LD context

Returns:
True if the key is in the context, False otherwise
"""
if key.startswith("omero:"):
return key[6:] in context
else:
return key in context

class Handler:
"""
Instances are used to generate triples.
Expand All @@ -86,9 +140,12 @@ class Handler:
TBD

"""

url = "https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json"
context = fetch_jsonld_context(url)

OME = "http://www.openmicroscopy.org/rdf/2016-06/ome_core/"
OMERO = "http://www.openmicroscopy.org/TBD/omero/"
OME = "ome:"
OMERO = "ome:"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think I would differentiate here. i.e., either we need no OMERO = or leave it as omero:. I'm thinking keep OMERO and will have to split https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json into two files (before we're done)


def __init__(
self,
Expand All @@ -110,12 +167,8 @@ def __init__(
self.graph = Graph()
self.graph.bind("wd", "http://www.wikidata.org/prop/direct/")
self.graph.bind(
"ome", "http://www.openmicroscopy.org/rdf/2016-06/ome_core/"
"ome", "https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json"
)
self.graph.bind(
"ome-xml", "http://www.openmicroscopy.org/Schemas/OME/2016-06#"
) # FIXME
self.graph.bind("omero", "http://www.openmicroscopy.org/TBD/omero/")
# self.graph.bind("xs", XMLSCHEMA)
# TODO: Allow handlers to register namespaces

Expand Down Expand Up @@ -150,6 +203,8 @@ def get_key(self, key: str) -> Optional[URIRef]:
# Types that we want to omit fo
return None
else:
if not key_in_context(key, self.context):
logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", key)
if key.startswith("omero:"):
return URIRef(f"{self.OMERO}{key[6:]}")
else:
Expand Down Expand Up @@ -261,7 +316,8 @@ def rdf(
# Types that we want to omit for now
pass
else:

if not key_in_context(key, self.context):
logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", key)
if k.startswith("omero:"):
key = URIRef(f"{self.OMERO}{k[6:]}")
else:
Expand Down