Skip to content

Commit

Permalink
Merge pull request #25 from joshmoore/gide
Browse files Browse the repository at this point in the history
RO-Crate Investigation
  • Loading branch information
joshmoore authored Dec 4, 2024
2 parents 9473b99 + ccab3a3 commit 90ca3cd
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .isort.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[settings]
known_third_party = entrypoints,omero,omero_marshal,omero_rdf,rdflib,wikidataintegrator
known_third_party = entrypoints,omero,omero_marshal,omero_rdf,pyld,rdflib,rdflib_pyld_compat,wikidataintegrator
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ dependencies = [
"types-entrypoints",
"future",
"rdflib",
"pyld",
"rdflib-pyld-compat",
"omero-marshal",
"wikidataintegrator",
"packaging"
Expand Down
246 changes: 219 additions & 27 deletions src/omero_rdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


import json
import logging
from argparse import Namespace
from functools import wraps
Expand All @@ -30,8 +31,10 @@
from omero.model import Dataset, Image, IObject, Plate, Project, Screen
from omero.sys import ParametersI
from omero_marshal import get_encoder
from pyld import jsonld
from rdflib import BNode, Graph, Literal, URIRef
from rdflib.namespace import DCTERMS, RDF
from rdflib_pyld_compat import pyld_jsonld_from_rdflib_graph

HELP = """A plugin for exporting rdf from OMERO
Expand All @@ -40,7 +43,11 @@
Examples:
omero rdf Image:123
omero rdf Image:123 # Streams each triple found in N-Triples format
omero rdf -F=jsonld Image:123 # Collects all triples and prints formatted output
omero rdf -S=flat Project:123 # Do not recurse into containers ("flat-strategy")
omero rdf --trim-whitespace ... # Strip leading and trailing whitespace from text
omero rdf --first-handler-wins ... # First mapping wins; others will be ignored
"""

Expand Down Expand Up @@ -78,6 +85,157 @@ def _wrapper(self, *args: Any, **kwargs: Any): # type: ignore
return _wrapper


class Format:
"""
Output mechanisms split into two types: streaming and non-streaming.
Critical methods include:
- streaming:
- serialize_triple: return a representation of the triple
- non-streaming:
- add: store a triple for later serialization
- serialize_graph: return a representation of the graph
See the subclasses for more information.
"""

def __init__(self):
self.streaming = None

def __str__(self):
return self.__class__.__name__[:-6].lower()

def __lt__(self, other):
return str(self) < str(other)

def add(self, triple):
raise NotImplementedError()

def serialize_triple(self, triple):
raise NotImplementedError()

def serialize_graph(self):
raise NotImplementedError()


class StreamingFormat(Format):
def __init__(self):
super().__init__()
self.streaming = True

def add(self, triple):
raise RuntimeError("adding not supported during streaming")

def serialize_graph(self):
raise RuntimeError("graph serialization not supported during streaming")


class NTriplesFormat(StreamingFormat):
def __init__(self):
super().__init__()

def serialize_triple(self, triple):
s, p, o = triple
escaped = o.n3().encode("unicode_escape").decode("utf-8")
print(f"""{s.n3()}\t{p.n3()}\t{escaped} .""")


class NonStreamingFormat(Format):
def __init__(self):
super().__init__()
self.streaming = False
self.graph = Graph()
self.graph.bind("wd", "http://www.wikidata.org/prop/direct/")
self.graph.bind("ome", "http://www.openmicroscopy.org/rdf/2016-06/ome_core/")
self.graph.bind(
"ome-xml", "http://www.openmicroscopy.org/Schemas/OME/2016-06#"
) # FIXME
self.graph.bind("omero", "http://www.openmicroscopy.org/TBD/omero/")
# self.graph.bind("xs", XMLSCHEMA)
# TODO: Allow handlers to register namespaces

def add(self, triple):
self.graph.add(triple)

def serialize_triple(self, triple):
raise RuntimeError("triple serialization not supported during streaming")


class TurtleFormat(NonStreamingFormat):
def __init__(self):
super().__init__()

def serialize_graph(self) -> None:
return self.graph.serialize()


class JSONLDFormat(NonStreamingFormat):
def __init__(self):
super().__init__()

def context(self):
# TODO: allow handlers to add to this
return {
"@wd": "http://www.wikidata.org/prop/direct/",
"@ome": "http://www.openmicroscopy.org/rdf/2016-06/ome_core/",
"@ome-xml": "http://www.openmicroscopy.org/Schemas/OME/2016-06#",
"@omero": "http://www.openmicroscopy.org/TBD/omero/",
"@idr": "https://idr.openmicroscopy.org/",
}

def serialize_graph(self) -> None:
return self.graph.serialize(
format="json-ld",
context=self.context(),
indent=4,
)


class ROCrateFormat(JSONLDFormat):
def __init__(self):
super().__init__()

def context(self):
ctx = super().context()
ctx["@rocrate"] = "https://w3id.org/ro/crate/1.1/context"
return ctx

def serialize_graph(self):
ctx = self.context()
j = pyld_jsonld_from_rdflib_graph(self.graph)
j = jsonld.flatten(j, ctx)
j = jsonld.compact(j, ctx)
if "@graph" not in j:
raise Exception(j)
j["@graph"][0:0] = [
{
"@id": "./",
"@type": "Dataset",
"rocrate:license": "https://creativecommons.org/licenses/by/4.0/",
},
{
"@id": "ro-crate-metadata.json",
"@type": "CreativeWork",
"rocrate:conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"},
"rocrate:about": {"@id": "./"},
},
]
return json.dumps(j, indent=4)


def format_mapping():
return {
"ntriples": NTriplesFormat(),
"jsonld": JSONLDFormat(),
"turtle": TurtleFormat(),
"ro-crate": ROCrateFormat(),
}


def format_list():
return format_mapping().keys()


class Handler:
"""
Instances are used to generate triples.
Expand All @@ -93,31 +251,29 @@ class Handler:
def __init__(
self,
gateway: BlitzGateway,
pretty_print=False,
formatter: Format,
trim_whitespace=False,
use_ellide=False,
first_handler_wins=False,
descent="recursive",
) -> None:
self.gateway = gateway
self.cache: Set[URIRef] = set()
self.bnode = 0
self.pretty_print = pretty_print
self.formatter = formatter
self.trim_whitespace = trim_whitespace
self.use_ellide = use_ellide
self.first_handler_wins = first_handler_wins
self.descent = descent
self._descent_level = 0
self.annotation_handlers = self.load_handlers()
self.info = self.load_server()

if self.pretty_print:
self.graph = Graph()
self.graph.bind("wd", "http://www.wikidata.org/prop/direct/")
self.graph.bind(
"ome", "http://www.openmicroscopy.org/rdf/2016-06/ome_core/"
)
self.graph.bind(
"ome-xml", "http://www.openmicroscopy.org/Schemas/OME/2016-06#"
) # FIXME
self.graph.bind("omero", "http://www.openmicroscopy.org/TBD/omero/")
# self.graph.bind("xs", XMLSCHEMA)
# TODO: Allow handlers to register namespaces
def skip_descent(self):
return self.descent != "recursive" and self._descent_level > 0

def descending(self):
self._descent_level += 1

def load_handlers(self) -> Handlers:
annotation_handlers: Handlers = []
Expand Down Expand Up @@ -218,17 +374,14 @@ def handle(self, data: Data) -> URIRef:
return _id

def emit(self, triple: Triple):
if self.pretty_print:
self.graph.add(triple)
if self.formatter.streaming:
print(self.formatter.serialize_triple(triple))
else:
# Streaming
s, p, o = triple
escaped = o.n3().encode("unicode_escape").decode("utf-8")
print(f"""{s.n3()}\t{p.n3()}\t{escaped} .""")
self.formatter.add(triple)

def close(self):
if self.pretty_print:
print(self.graph.serialize())
if not self.formatter.streaming:
print(self.formatter.serialize_graph())

def rdf(
self,
Expand All @@ -246,6 +399,8 @@ def rdf(
None,
data,
)
if self.first_handler_wins and handled:
return
# End workaround

if _id in self.cache:
Expand Down Expand Up @@ -337,15 +492,35 @@ def _configure(self, parser: Parser) -> None:
rdf_type = ProxyStringType("Image")
rdf_help = "Object to be exported to RDF"
parser.add_argument("target", type=rdf_type, nargs="+", help=rdf_help)
parser.add_argument(
format_group = parser.add_mutually_exclusive_group()
format_group.add_argument(
"--pretty",
action="store_true",
default=False,
help="Print in NT, prevents streaming",
help="Shortcut for --format=turtle",
)
format_group.add_argument(
"--format",
"-F",
default="ntriples",
choices=format_list(),
)
parser.add_argument(
"--descent",
"-S",
default="recursive",
help="Descent strategy to use: recursive, flat",
)
parser.add_argument(
"--ellide", action="store_true", default=False, help="Shorten strings"
)
parser.add_argument(
"--first-handler-wins",
"-1",
action="store_true",
default=False,
help="Don't duplicate annotations",
)
parser.add_argument(
"--trim-whitespace",
action="store_true",
Expand All @@ -356,11 +531,20 @@ def _configure(self, parser: Parser) -> None:

@gateway_required
def action(self, args: Namespace) -> None:

# Support hidden --pretty flag
if args.pretty:
args.format = TurtleFormat()
else:
args.format = format_mapping()[args.format]

handler = Handler(
self.gateway,
pretty_print=args.pretty,
formatter=args.format,
use_ellide=args.ellide,
trim_whitespace=args.trim_whitespace,
first_handler_wins=args.first_handler_wins,
descent=args.descent,
)
self.descend(self.gateway, args.target, handler)
handler.close()
Expand All @@ -379,7 +563,15 @@ def descend(
if isinstance(target, list):
return [self.descend(gateway, t, handler) for t in target]

elif isinstance(target, Screen):
# "descent" doesn't apply to a list
if handler.skip_descent():
objid = handler(target)
logging.debug("skip descent: %s", objid)
return objid
else:
handler.descending()

if isinstance(target, Screen):
scr = self._lookup(gateway, "Screen", target.id)
scrid = handler(scr)
for plate in scr.listChildren():
Expand Down

0 comments on commit 90ca3cd

Please sign in to comment.