Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/check-profile-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Check added profile link

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'scripts/profile_urls.txt'
workflow_dispatch:
jobs:
check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install dependencies
run: python3 -m pip install -r requirements.workflow.txt
- name: Run script
run: |
cd scripts
python check_profiles.py
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# Environments
.env
venv/
1 change: 1 addition & 0 deletions requirements.workflow.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rdflib==7.4.0
10 changes: 10 additions & 0 deletions requirements.workflow.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=requirements.workflow.txt requirements.workflow.in
#
pyparsing==3.2.5
# via rdflib
rdflib==7.4.0
# via -r requirements.workflow.in
42 changes: 42 additions & 0 deletions scripts/check_profiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import re

from hashlib import md5
from rdflib import Graph, URIRef, Literal, XSD, RDF, OWL
from urllib.parse import urljoin


g = Graph()
profile_class = URIRef("http://www.w3.org/ns/dx/prof/Profile")

with open("profile_urls.txt", "r") as file:
profile_urls = [line.strip() for line in file if line.strip()]

for url in profile_urls:
g.parse(url, format="json-ld", publicID=urljoin(url, '.'))

datetime_pattern = re.compile(r"^-?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(Z|[+-]\d{2}:\d{2})?$")
date_pattern = re.compile(r"^-?\d{4}-\d{2}-\d{2}$")
time_pattern = re.compile(r"^\d{2}:\d{2}:\d{2}(Z|[+-]\d{2}:\d{2})?$")

# Add type to datetime, date and time data
for s, p, o in g.triples((None, None, None)):
typed_o = None

if datetime_pattern.match(o):
typed_o = Literal(o, datatype=XSD.dateTime)
elif date_pattern.match(o):
typed_o = Literal(o, datatype=XSD.date)
elif time_pattern.match(o):
typed_o = Literal(o, datatype=XSD.time)

if typed_o is not None:
g.add((s, p, typed_o))
g.remove((s, p, o))

if p == RDF.type and o == profile_class:
id = md5(s.encode('utf-8')).hexdigest()
new_s = URIRef(f"http://example.org/data/profile/{id}") # TODO: Change URI
g.add((new_s, RDF.type, o))
g.add((new_s, OWL.sameAs, s))

ttl_data = g.serialize(format="turtle")
5 changes: 5 additions & 0 deletions scripts/profile_urls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
https://trefx.uk/5s-crate/0.4/ro-crate-metadata.json
https://www.researchobject.org/workflow-run-crate/profiles/0.5/process_run_crate/ro-crate-metadata.json
https://www.researchobject.org/workflow-run-crate/profiles/0.5/workflow_run_crate/ro-crate-metadata.json
https://about.workflowhub.eu/Workflow-RO-Crate/ro-crate-metadata.json
https://www.researchobject.org/workflow-run-crate/profiles/0.5/provenance_run_crate/ro-crate-metadata.json