German-BioImaging · stefanches7 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,6 +56,9 @@ repos:
     hooks:
       - id: mypy
         language_version: python3
+        additional_dependencies: [
+          types-requests,
+        ]
 
   - repo: https://github.com/adrienverge/yamllint.git
     rev: v1.26.3

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "omero-py>=5.8",
     "entrypoints",
     "types-entrypoints",
+    "types-requests",
     "future",
     "rdflib",
     "pyld",

diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py
@@ -36,6 +36,9 @@
 from rdflib.namespace import DCTERMS, RDF
 from rdflib_pyld_compat import pyld_jsonld_from_rdflib_graph
 
+import requests
+
+
 HELP = """A plugin for exporting rdf from OMERO
 
 omero-rdf creates a stream of RDF triples from the starting object that
@@ -85,6 +88,58 @@ def _wrapper(self, *args: Any, **kwargs: Any):  # type: ignore
     return _wrapper
 
 
+def fetch_jsonld_context(url: str) -> Optional[Dict[str, Any]]:
+    """
+    Fetch JSON-LD context from a URL.
+
+    Args:
+        url: The URL of the JSON-LD document
+
+    Returns:
+        The @context object or None if not found/error
+    """
+    try:
+        # Make HTTP request
+        response = requests.get(url, headers={"Accept": "application/ld+json"})
+        response.raise_for_status()
+
+        # Parse JSON
+        data = response.json()
+
+        # Extract @context
+        if "@context" in data:
+            return data["@context"]
+        else:
+            logging.warning("No @context found in %s", url)
+            return None
+
+    except requests.RequestException:
+        logging.warning("Network error", exc_info=True)
+        return None
+    except json.JSONDecodeError:
+        logging.warning("JSON parsing error", exc_info=True)
+        return None
+
+
+def key_in_context(key: str, context: Dict[str, Any] | None):
+    """
+    Check if a key is in the context.
+
+    Args:
+        key: The key to check
+        context: The JSON-LD context
+
+    Returns:
+        True if the key is in the context, False otherwise
+    """
+    if context is None:
+        raise Exception("context is None")
+    if key.startswith("omero:"):
+        return key[6:] in context
+    else:
+        return key in context
+
+
 class Format:
     """
     Output mechanisms split into two types: streaming and non-streaming.
@@ -245,8 +300,15 @@ class Handler:
 
     """
 
-    OME = "http://www.openmicroscopy.org/rdf/2016-06/ome_core/"
-    OMERO = "http://www.openmicroscopy.org/TBD/omero/"
+    url = (
+        "https://gist.githubusercontent.com/stefanches7/"
+        "5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa"
+        "630a56ef55670d2b5da2be50ff76/context.ld.json"
+    )
+    context = fetch_jsonld_context(url)
+
+    OME = "ome:"
+    OMERO = "ome:"
 
     def __init__(
         self,
@@ -306,6 +368,12 @@ def get_key(self, key: str) -> Optional[URIRef]:
             # Types that we want to omit fo
             return None
         else:
+            if not key_in_context(key, self.context):
+                logging.warning(
+                    "Did not find in OMERO context: %s. "
+                    "Add it to the spreadsheet of new fields",
+                    key,
+                )
             if key.startswith("omero:"):
                 return URIRef(f"{self.OMERO}{key[6:]}")
             else:
@@ -417,7 +485,12 @@ def rdf(
                 # Types that we want to omit for now
                 pass
             else:
-
+                if not key_in_context(k, self.context):
+                    logging.warning(
+                        "Did not find in OMERO context: %s. "
+                        "Add it to the spreadsheet of new fields",
+                        k,
+                    )
                 if k.startswith("omero:"):
                     key = URIRef(f"{self.OMERO}{k[6:]}")
                 else: