Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option --file for writing RDF triples to a file instead of stdout #31

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 62 additions & 13 deletions src/omero_rdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


import contextlib
import gzip
import sys
import json
import logging
from argparse import Namespace
Expand All @@ -36,19 +39,24 @@
from rdflib.namespace import DCTERMS, RDF
from rdflib_pyld_compat import pyld_jsonld_from_rdflib_graph

HELP = """A plugin for exporting rdf from OMERO
HELP = """A plugin for exporting RDF from OMERO

omero-rdf creates a stream of RDF triples from the starting object that
it is given. This may be one of: Image, Dataset, Project, Plate, and Screen.

Examples:

omero rdf Image:123 # Streams each triple found in N-Triples format

omero rdf -F=jsonld Image:123 # Collects all triples and prints formatted output
omero rdf -S=flat Project:123 # Do not recurse into containers ("flat-strategy")
omero rdf --trim-whitespace ... # Strip leading and trailing whitespace from text
omero rdf --first-handler-wins ... # First mapping wins; others will be ignored

omero rdf --file - ... # Write RDF triples to stdout
omero rdf --file output.nt ... # Write RDF triples to the specified file
omero rdf --file output.nt.gz # Write RDF triples to the specified file, gzipping

"""

# TYPE DEFINITIONS
Expand All @@ -60,6 +68,37 @@
Handlers = List[Callable[[URIRef, URIRef, Data], Generator[Triple, None, bool]]]


@contextlib.contextmanager
def open_with_default(filename=None, filehandle=None):
"""
Open a file for writing if given and close on completion.

No closing will happen if the file name is "-" since stdout will be used.
If no filehandle is given, stdout will also be used.
Otherwise return the given filehandle will be used.
"""
close = False
if filename:
if filename == "-":
fh = sys.stdout
else:
if filename.endswith(".gz"):
fh = gzip.open(filename, "wt")
else:
fh = open(filename, "w")
close = True
else:
if filehandle is None:
filehandle = sys.stdout
fh = filehandle

try:
yield fh
finally:
if close:
fh.close()


def gateway_required(func: Callable) -> Callable: # type: ignore
"""
Decorator which initializes a client (self.client),
Expand Down Expand Up @@ -256,6 +295,7 @@ def __init__(
use_ellide=False,
first_handler_wins=False,
descent="recursive",
filehandle=sys.stdout,
) -> None:
self.gateway = gateway
self.cache: Set[URIRef] = set()
Expand All @@ -268,6 +308,7 @@ def __init__(
self._descent_level = 0
self.annotation_handlers = self.load_handlers()
self.info = self.load_server()
self.filehandle = filehandle

def skip_descent(self):
return self.descent != "recursive" and self._descent_level > 0
Expand Down Expand Up @@ -375,13 +416,13 @@ def handle(self, data: Data) -> URIRef:

def emit(self, triple: Triple):
if self.formatter.streaming:
print(self.formatter.serialize_triple(triple))
print(self.formatter.serialize_triple(triple), file=self.filehandle)
else:
self.formatter.add(triple)

def close(self):
if not self.formatter.streaming:
print(self.formatter.serialize_graph())
print(self.formatter.serialize_graph(), file=self.filehandle)

def rdf(
self,
Expand Down Expand Up @@ -527,6 +568,12 @@ def _configure(self, parser: Parser) -> None:
default=False,
help="Remove leading and trailing whitespace from literals",
)
parser.add_argument(
"--file",
type=str,
default=None,
help="Write RDF triples to the specified file",
)
parser.set_defaults(func=self.action)

@gateway_required
Expand All @@ -538,16 +585,18 @@ def action(self, args: Namespace) -> None:
else:
args.format = format_mapping()[args.format]

handler = Handler(
self.gateway,
formatter=args.format,
use_ellide=args.ellide,
trim_whitespace=args.trim_whitespace,
first_handler_wins=args.first_handler_wins,
descent=args.descent,
)
self.descend(self.gateway, args.target, handler)
handler.close()
with open_with_default(args.file) as fh:
handler = Handler(
self.gateway,
formatter=args.format,
use_ellide=args.ellide,
trim_whitespace=args.trim_whitespace,
first_handler_wins=args.first_handler_wins,
descent=args.descent,
filehandle=fh,
)
self.descend(self.gateway, args.target, handler)
handler.close()

# TODO: move to handler?
def descend(
Expand Down
Loading