From c216c2c2dfd92c83bea682523d2d2cbcd4a762d7 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 08:25:43 +0000
Subject: [PATCH 01/23] Begin refactoring to allow multiple converter types by
 moving get_workflows to new CWL converter class

---
 src/runcrate/cli.py                           |  27 +++++++-
 src/runcrate/convert.py                       |  63 +++---------------
 src/runcrate/converters/__init__.py           |   7 ++
 .../__pycache__/__init__.cpython-312.pyc      | Bin 0 -> 322 bytes
 .../__pycache__/base.cpython-312.pyc          | Bin 0 -> 724 bytes
 .../__pycache__/cwl.cpython-312.pyc           | Bin 0 -> 3177 bytes
 src/runcrate/converters/base.py               |  10 +++
 src/runcrate/converters/cwl.py                |  60 +++++++++++++++++
 tests/test_cli.py                             |   2 +-
 tests/test_step_mapping.py                    |  19 ++++--
 10 files changed, 128 insertions(+), 60 deletions(-)
 create mode 100644 src/runcrate/converters/__init__.py
 create mode 100644 src/runcrate/converters/__pycache__/__init__.cpython-312.pyc
 create mode 100644 src/runcrate/converters/__pycache__/base.cpython-312.pyc
 create mode 100644 src/runcrate/converters/__pycache__/cwl.cpython-312.pyc
 create mode 100644 src/runcrate/converters/base.py
 create mode 100644 src/runcrate/converters/cwl.py

diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py
index b35ab86..1bfb1bc 100644
--- a/src/runcrate/cli.py
+++ b/src/runcrate/cli.py
@@ -21,6 +21,7 @@
 from .convert import ProvCrateBuilder
 from .report import dump_crate_actions
 from .run import run_crate
+from .converters import CONVERTERS
 
 
 @click.group()
@@ -34,6 +35,13 @@ def cli():
     metavar="RO_DIR",
     type=click.Path(exists=True, file_okay=False, readable=True, path_type=Path),
 )
+@click.option(
+    "-c",
+    "--converter",
+    type=click.Choice(CONVERTERS.keys()),
+    default="cwl",
+    help="converter to use",
+)
 @click.option(
     "-o",
     "--output",
@@ -56,15 +64,30 @@ def cli():
     type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path),
     help="path to a README file (should be README.md in Markdown format)",
 )
-def convert(root, output, license, workflow_name, readme):
+def convert(root, converter, output, license, workflow_name, readme):
     """\
     Convert a CWLProv RO bundle into a Workflow Run RO-Crate.
 
     RO_DIR: top-level directory of the CWLProv RO
     """
+
     if not output:
         output = Path(f"{root.name}.crate.zip")
-    builder = ProvCrateBuilder(root, workflow_name, license, readme)
+
+    if converter not in CONVERTERS:
+        sys.stderr.write(f"Unknown converter: {converter}\n")
+        sys.exit(1)
+
+    converter_instance = CONVERTERS[converter]
+    sys.stdout.write(f"Using converter: {converter_instance}\n")
+
+    builder = ProvCrateBuilder(
+        root,
+        converter_instance,
+        workflow_name,
+        license,
+        readme
+    )
     crate = builder.build()
     if output.suffix == ".zip":
         crate.write_zip(output)
diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index cc20fc6..bbe355e 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -27,7 +27,6 @@
 import networkx as nx
 import prov.model
 from bdbag.bdbagit import BDBag
-from cwl_utils.parser import load_document_by_yaml
 from cwlprov.prov import Entity, Provenance
 from cwlprov.ro import ResearchObject
 from cwlprov.utils import first
@@ -37,6 +36,7 @@
 
 from .constants import PROFILES_BASE, PROFILES_VERSION, TERMS_NAMESPACE
 from .utils import as_list, parse_img
+from .converters import CONVERTERS
 
 
 WORKFLOW_BASENAME = "packed.cwl"
@@ -114,10 +114,6 @@ def is_structured(cwl_type):
     return properties
 
 
-def get_fragment(uri):
-    return uri.rsplit("#", 1)[-1]
-
-
 def get_relative_uri(uri):
     doc, fragment = uri.rsplit("#", 1)
     return f"{doc.rsplit('/', 1)[-1]}#{fragment}"
@@ -148,62 +144,25 @@ def build_step_graph(cwl_wf):
     return graph
 
 
-def normalize_cwl_defs(cwl_defs):
-    inline_tools = {}
-    for d in cwl_defs.values():
-        if not hasattr(d, "steps") or not d.steps:
-            continue
-        for s in d.steps:
-            if hasattr(s, "run") and s.run:
-                if hasattr(s.run, "id"):
-                    tool = s.run
-                    if tool.id.startswith("_:"):  # CWL > 1.0
-                        tool.id = f"{s.id}/run"
-                    inline_tools[get_fragment(tool.id)] = tool
-                    s.run = tool.id
-    cwl_defs.update(inline_tools)
-
-
-def get_workflow(wf_path):
-    """\
-    Read the packed CWL workflow.
-
-    Returns a dictionary where tools / workflows are mapped by their ids.
-
-    Does not use load_document_by_uri, so we can hack the json to work around
-    issues.
-    """
-    wf_path = Path(wf_path)
-    with open(wf_path, "rt") as f:
-        json_wf = json.load(f)
-    graph = json_wf.get("$graph", [json_wf])
-    # https://github.com/common-workflow-language/cwltool/pull/1506
-    for n in graph:
-        ns = n.pop("$namespaces", {})
-        if ns:
-            json_wf.setdefault("$namespaces", {}).update(ns)
-    defs = load_document_by_yaml(json_wf, wf_path.absolute().as_uri(), load_all=True)
-    if not isinstance(defs, list):
-        defs = [defs]
-    def_map = {}
-    for d in defs:
-        k = get_fragment(d.id)
-        if k == "main":
-            k = wf_path.name
-        def_map[k] = d
-    normalize_cwl_defs(def_map)
-    return def_map
+def get_fragment(uri):
+    return uri.rsplit("#", 1)[-1]
 
 
 class ProvCrateBuilder:
 
-    def __init__(self, root, workflow_name=None, license=None, readme=None):
+    def __init__(self,
+                 root,
+                 converter=CONVERTERS["cwl"],
+                 workflow_name=None,
+                 license=None,
+                 readme=None):
         self.root = Path(root)
+        self.converter = converter
         self.workflow_name = workflow_name
         self.license = license
         self.readme = Path(readme) if readme else readme
         self.wf_path = self.root / "workflow" / WORKFLOW_BASENAME
-        self.cwl_defs = get_workflow(self.wf_path)
+        self.cwl_defs = self.converter.get_workflow(self.wf_path)
         self.step_maps = self._get_step_maps(self.cwl_defs)
         self.ro = ResearchObject(BDBag(str(root)))
         self.with_prov = set(str(_) for _ in self.ro.resources_with_provenance())
diff --git a/src/runcrate/converters/__init__.py b/src/runcrate/converters/__init__.py
new file mode 100644
index 0000000..7460165
--- /dev/null
+++ b/src/runcrate/converters/__init__.py
@@ -0,0 +1,7 @@
+from .base import converter
+from .cwl import cwlConverter
+
+CONVERTERS = {
+    "base": converter(),
+    "cwl": cwlConverter(),
+}
diff --git a/src/runcrate/converters/__pycache__/__init__.cpython-312.pyc b/src/runcrate/converters/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4bd230982ec500f71cd7ebb8e40ff0b25844105c
GIT binary patch
literal 322
zcmX@j%ge<81obN%(i?#EV-N=hn4yf%RzSvdh7^Vr#vF!R#wbQc5SuB7DVI5lnUR5s
zA(e48R4I_n3}dG-&tZyUsbtY)eaQ%v)?~cJnVg?jmReMjS_I_qB$wwn!}(0NSdtQp
zQ*SW?Mg25ci&%j=ia-u1VgnH@K;jmcv%g=MYfy-5Q1D8I&p;7|UvByt`MIh3`Q`cf
zIr;%b`B|ySCB^zi21X|OMWuPkMTsS;`o%@b2oBr@#rpB_nR%Hd@$q^EmA5!-a`RJ4
zb5iY!xPiukoLejhBt9@RGBVy}kiN?x`;m=-SFGFnCbxLE-A7htKBh*lB2J(Z097_m
AxBvhE

literal 0
HcmV?d00001

diff --git a/src/runcrate/converters/__pycache__/base.cpython-312.pyc b/src/runcrate/converters/__pycache__/base.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf58f88f68c43cebf9bf26c83435d198f7add58e
GIT binary patch
literal 724
zcmY*Wy>8nu5GE-ru?-n5G87%TYO#u-V^I`cvKd83FN7k}wo*xSk8}iiY60g7Is|=$
zAdiw4D75wk3VSHfsXYBS$qmQvj^BNEM?Op@W8&<`&kxJJJAY%)+n@u7*ACngMlOgX
zJ0j^lYj5VHVxgDxd^~9vbTx|1sU&kI>5j}j$($clUO1>ni7u`cSOrO6(#v=b9I*E-
zsYpm?ch1!Vqq2oTD*yK}TIpOxy3{(0K7qbci8ayroAb93Y>EV8t0Du*V{q(cqRZH*
z>AI?iLZn4%h1i}QZ>N0@*E7Cz-^j0FaBzSm-O7j?dpacz+4hEacA+e{E5%C<Uly4z
z`2uv_WR~f*D)>6KtLb>$(?2V_fx_^Z%QUg6E@G&7xl*9Gb^93}9hDr(W%GEwR+3*<
zsF?y!rI{YpW`JXd3{KoppOy=O?F<gjIh<lLSSl;};kK_Hw||;Pm8R-Dfsw<r(8nh$
z;#>()kA=v!++>IYA--<nti6Gu<3fNA6`InfE8ufA4!_VKV@LLk`jf+n?*}arIkS+q
h+~@W6@Lk*SJ-l?RM&yM9e>_U*UpAt_o;cU&{0D6As<r?C

literal 0
HcmV?d00001

diff --git a/src/runcrate/converters/__pycache__/cwl.cpython-312.pyc b/src/runcrate/converters/__pycache__/cwl.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4059217289c3814aab0c3c56eeede7c10413dc52
GIT binary patch
literal 3177
zcmaJ@U1%KF6~1@pZ}sQh)xWGb8q1QKP2*K;%XJ!DW2GWWN)RD2xE7RQv@=@GdUj@g
z@2t1l&eA^kVG{&dk-~0hjW<w;QWcz+Li*H~LMeT*kX$VjwG>RCep4-%KwjE&XLnXV
zUU~=4oO|!N-#Pd1JNK_WJ)H>3-HWrSvV_oMRtbvGRQBHlrGhl1VH0KW0>(%SX`C6#
za0{HT^CrK*Lz`d<8F4|xD1_#aCf-Gwq~i^)fwLg3@bT~y47E7+I+C_^M<<R>p0I9c
z;onuA<#|vBOk33y%}(Yry5%T~E6R$RF_R6HhGZbD>`#EGARR4W4K0K;d>1WnS_mYs
zaUcba2Pv)yanb8e=FJ}lPLlzG4+DY)xzGD(D-g~6=YB|3H<u_gmA{>tTDCL#l%2P2
zbLw4Ur}d;mr!KsDVS0+>tRzt#eTtIgL8Uc8It88+*%dFWD28P?ic%Oj97`f-W?Mno
zs{aDj!`=I!pk#>2W)b1<$N3txR*=)0gNrwsdJ#L#<wnTG8s2D*zl4e*C)iMwW=O}1
z8){*|3bHR4i_y=ybjLAssA`<crCJ_}yvxs`pS*@pQE-KHR~viYKmle^bVZ6+hg~k+
z^ZzZ=k>~qeF^CnT)z^9C!~z?ab8)&as2%(_nreFBpe@jXK27+Y7#!B3<O~J<uH*~{
zdJsS5Tq%W%l7Wh0SAuz8gn4(kC_Nfj9{k;o^a}^IKnrG2>~uRd>DEgKk#30n#i0M-
z4;CG6*zLRmO{<r91YS-b^ah$64iGVN!|$Ur=sco0AHwJ+!tkv={wa48<#9Y*_?O%)
zbGjot%etJm$*m>R&dW>0&iE}U<Bo30S-6pj&d$L6J>AI>i^{638A->mEtRau`DLBx
zvIDn*%2O>QFoM>Mn$7B(ytu+J4I&#FJp?yv>r}REN6t}Qe(uiai1CU{Z8@*YN!5~<
z)#NQdvNW|VaOZn!MWzxvXKAuQX-)?hLBL*!IP(Q@B1P2fa-nO&QZqWus!5$*2c8N0
zK2_87_>5{;@fcwj#N+L(Zh1WO<MGVI<5IfgaalX-byD5Y^d&WCI-Z!zYH;ZZyGveJ
zU8J^|b97HssRFTkodz{5>Zn#y2N;7oUXKDe$`VmijMu!7p?N%u$m>%qn`BheDCi0h
zg909Cyo<0a<VpD@g>8d}mpqBZq~w=8!Jm+aEiYtIk7xFt1X6(o_ps*STa*nBmI3~x
zn}Kz+0D{dfj2t1M!(Hr3QlNGqqG$c6Z>XIA_*_Za?TMAYQ=Q)Kk!v%%gC{G-no+|a
zo29vLICSFlKchpHuC=b}TeW=6*owYd5+26}OEdLYZ)IR@pj@miZpGrIH@rx!9v#|=
zj&DZCYd`p6d@FisCwgTwdgaUa9!0P1NvL<|Sr|npYd6+YJBh2CiK|<QYdeX#&BWYR
z;_Zjmf4m*JS>ksiBh`uD@Y|8edjD|wd!^ZWZ+}VpDmDNPqy0_v^Pf$BI=vNr3ECpj
z@|z$2tlmFbd1vjNGOmxDEX|c?KK#*M2kIT%>qH$rJ7IY<EZ2s%!|{4#uyo>^Ntn~a
z9{F2;C98w8QSMJsVRl41!pJOP_>BAtC>3Dbbc>Urp;<J1z@-{D0<G_(I6v><Gla48
zlm9}2<0ulEH4P^oYHU0QhlsNN=H?N`426?N_Uy=kVj)mQRFFL)y&n}mC{)i>Z*N6T
z*Ty#^lMlm_<OFmc2_G_kBm93r*cAk17HU6*fFgnD{tQIN9ZE9XL6QLi<I-$NjujyK
z7uxp#-aWwM58=T(qD8WJHR|&1k`*AHIy|PGjm90qnQo8cr>gL=aG}~heFX$d`T?!Y
zs9zyXxEFE-rL_jG;D&&%+oU*1k!=tH4~**DTe!T^O7N@qd9?Z$NC>!Xl58b(jNnNl
z&U*rN^epu_c(0H#@K6XTuNQdsVu^=6@s4T&d3w^aN>#@p1Tq>T(@Zd1pgu<>4$T|R
zvVRl_JIHadaah9vZF${>Wg3>Q_z8i$%tm6MD8I^>vcX9{9TAqr3i8nt{X7#+fffBd
z2sqb6Bc<+oY@{-_HuksJcxh(0?_~9SHMQM$c3pjt`faKn9j*@yR&M<AMj1Ey^hbRY
z>#+v|8w0yTFII2g&wY}sX^)1!TOX5|UghhfWA`tAa(QR;+~(-Hb!~g}Vm<!ygZ|I^
z*Tr?MmZ{GEdageER<*l!eP{CW=H%rs`9Dg3khUhTZ4S@W<%!RvPo*lpC;U5t22U~i
zI<q&3I$|G1K8RGiA2l!Cd|YCWKSuV7;&m!Y#@2Es)4LVrc1|@LJ%pVT!tQZnh1vQL
z_9*mkZez{Zp&?AL2N@j%aTh%eVWH=#cuE+0dKC+2_r)IJY-0#BguUe#RjLzqZaj%4
z6Vq7qa~<htO;NUaIma+*BC8TG^i!Je<d}Cae5DcjRq_UmWL!s2f!O0P#(zhnU!&o#
WP}kS!!ZYzAK3o3&e-QlqNd5;?t;rPt

literal 0
HcmV?d00001

diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
new file mode 100644
index 0000000..b5320dc
--- /dev/null
+++ b/src/runcrate/converters/base.py
@@ -0,0 +1,10 @@
+class converter:
+    def __init__(self):
+        pass
+
+    def get_workflow(self, wf_path):
+        """\
+        Get the workflow from the given path.
+
+        Returns a dictionary where tools / workflows are mapped by their ids.
+        """
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
new file mode 100644
index 0000000..2d9d975
--- /dev/null
+++ b/src/runcrate/converters/cwl.py
@@ -0,0 +1,60 @@
+from .base import converter
+
+from pathlib import Path
+import json
+
+from cwl_utils.parser import load_document_by_yaml
+
+
+class cwlConverter(converter):
+    def __init__(self):
+        pass
+
+    def get_workflow(self, wf_path):
+        """\
+        Get the workflow from the given path.
+
+        Returns a dictionary where tools / workflows are mapped by their ids.
+
+        Does not use load_document_by_uri, so we can hack the json to work
+        around issues.
+        """
+
+        wf_path = Path(wf_path)
+        with open(wf_path, "rt") as f:
+            json_wf = json.load(f)
+        graph = json_wf.get("$graph", [json_wf])
+        # https://github.com/common-workflow-language/cwltool/pull/1506
+        for n in graph:
+            ns = n.pop("$namespaces", {})
+            if ns:
+                json_wf.setdefault("$namespaces", {}).update(ns)
+        defs = load_document_by_yaml(json_wf, wf_path.absolute().as_uri(), load_all=True)
+        if not isinstance(defs, list):
+            defs = [defs]
+        def_map = {}
+        for d in defs:
+            k = self._get_fragment(d.id)
+            if k == "main":
+                k = wf_path.name
+            def_map[k] = d
+        self._normalize_cwl_defs(def_map)
+        return def_map
+
+    def _get_fragment(self, uri):
+        return uri.rsplit("#", 1)[-1]
+
+    def _normalize_cwl_defs(self, cwl_defs):
+        inline_tools = {}
+        for d in cwl_defs.values():
+            if not hasattr(d, "steps") or not d.steps:
+                continue
+            for s in d.steps:
+                if hasattr(s, "run") and s.run:
+                    if hasattr(s.run, "id"):
+                        tool = s.run
+                        if tool.id.startswith("_:"):  # CWL > 1.0
+                            tool.id = f"{s.id}/run"
+                        inline_tools[self._get_fragment(tool.id)] = tool
+                        s.run = tool.id
+        cwl_defs.update(inline_tools)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 8d8d357..2428ec6 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -26,7 +26,7 @@ def test_cli_convert(data_dir, tmpdir, monkeypatch):
     monkeypatch.chdir(str(tmpdir))
     root = data_dir / "revsort-run-1"
     runner = CliRunner()
-    args = ["convert", str(root)]
+    args = ["convert", "-c", "cwl", str(root)]
     result = runner.invoke(cli, args)
     assert result.exit_code == 0, result.exception
     crate_zip = tmpdir / f"{root.name}.crate.zip"
diff --git a/tests/test_step_mapping.py b/tests/test_step_mapping.py
index 0a106c7..b5ecac9 100644
--- a/tests/test_step_mapping.py
+++ b/tests/test_step_mapping.py
@@ -12,13 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from runcrate.convert import ProvCrateBuilder, get_workflow
+import pytest
 
+from runcrate.convert import ProvCrateBuilder
+from runcrate.converters.cwl import cwlConverter
 
-def test_step_maps(data_dir):
+
+@pytest.fixture
+def converter():
+    converter = cwlConverter()
+    return converter
+
+
+def test_step_maps_cwl(data_dir, converter):
     wf_basename = "exome-alignment-packed.cwl"
     wf_path = data_dir / wf_basename
-    cwl_defs = get_workflow(wf_path)
+    cwl_defs = converter.get_workflow(wf_path)
     step_maps = ProvCrateBuilder._get_step_maps(cwl_defs)
     assert set(step_maps) == {wf_basename}
     sm = step_maps[wf_basename]
@@ -39,9 +48,9 @@ def test_step_maps(data_dir):
     assert sm["main/samtools_sort"]["pos"] < sm["main/picard_markduplicates"]["pos"]
 
 
-def test_step_maps_disconnected(data_dir):
+def test_step_maps_disconnected_cwl(data_dir, converter):
     wf_path = data_dir / "no-output-run-1/workflow/packed.cwl"
-    cwl_defs = get_workflow(wf_path)
+    cwl_defs = converter.get_workflow(wf_path)
     step_maps = ProvCrateBuilder._get_step_maps(cwl_defs)
     assert set(step_maps) == {"packed.cwl"}
     sm = step_maps["packed.cwl"]

From a26fc10d40c6b61ba7215dd44e40a8ac3b1af08d Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 08:51:49 +0000
Subject: [PATCH 02/23] Add standard python gitignore from github. Remove files
 which fall foul of it

---
 .gitignore                                    | 166 +++++++++++++++++-
 .../__pycache__/cwl.cpython-312.pyc           | Bin 3177 -> 0 bytes
 2 files changed, 162 insertions(+), 4 deletions(-)
 delete mode 100644 src/runcrate/converters/__pycache__/cwl.cpython-312.pyc

diff --git a/.gitignore b/.gitignore
index 2ca8682..efa407c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,162 @@
-_site/
-.sass-cache/
-.jekyll-cache/
-.jekyll-metadata
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/src/runcrate/converters/__pycache__/cwl.cpython-312.pyc b/src/runcrate/converters/__pycache__/cwl.cpython-312.pyc
deleted file mode 100644
index 4059217289c3814aab0c3c56eeede7c10413dc52..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3177
zcmaJ@U1%KF6~1@pZ}sQh)xWGb8q1QKP2*K;%XJ!DW2GWWN)RD2xE7RQv@=@GdUj@g
z@2t1l&eA^kVG{&dk-~0hjW<w;QWcz+Li*H~LMeT*kX$VjwG>RCep4-%KwjE&XLnXV
zUU~=4oO|!N-#Pd1JNK_WJ)H>3-HWrSvV_oMRtbvGRQBHlrGhl1VH0KW0>(%SX`C6#
za0{HT^CrK*Lz`d<8F4|xD1_#aCf-Gwq~i^)fwLg3@bT~y47E7+I+C_^M<<R>p0I9c
z;onuA<#|vBOk33y%}(Yry5%T~E6R$RF_R6HhGZbD>`#EGARR4W4K0K;d>1WnS_mYs
zaUcba2Pv)yanb8e=FJ}lPLlzG4+DY)xzGD(D-g~6=YB|3H<u_gmA{>tTDCL#l%2P2
zbLw4Ur}d;mr!KsDVS0+>tRzt#eTtIgL8Uc8It88+*%dFWD28P?ic%Oj97`f-W?Mno
zs{aDj!`=I!pk#>2W)b1<$N3txR*=)0gNrwsdJ#L#<wnTG8s2D*zl4e*C)iMwW=O}1
z8){*|3bHR4i_y=ybjLAssA`<crCJ_}yvxs`pS*@pQE-KHR~viYKmle^bVZ6+hg~k+
z^ZzZ=k>~qeF^CnT)z^9C!~z?ab8)&as2%(_nreFBpe@jXK27+Y7#!B3<O~J<uH*~{
zdJsS5Tq%W%l7Wh0SAuz8gn4(kC_Nfj9{k;o^a}^IKnrG2>~uRd>DEgKk#30n#i0M-
z4;CG6*zLRmO{<r91YS-b^ah$64iGVN!|$Ur=sco0AHwJ+!tkv={wa48<#9Y*_?O%)
zbGjot%etJm$*m>R&dW>0&iE}U<Bo30S-6pj&d$L6J>AI>i^{638A->mEtRau`DLBx
zvIDn*%2O>QFoM>Mn$7B(ytu+J4I&#FJp?yv>r}REN6t}Qe(uiai1CU{Z8@*YN!5~<
z)#NQdvNW|VaOZn!MWzxvXKAuQX-)?hLBL*!IP(Q@B1P2fa-nO&QZqWus!5$*2c8N0
zK2_87_>5{;@fcwj#N+L(Zh1WO<MGVI<5IfgaalX-byD5Y^d&WCI-Z!zYH;ZZyGveJ
zU8J^|b97HssRFTkodz{5>Zn#y2N;7oUXKDe$`VmijMu!7p?N%u$m>%qn`BheDCi0h
zg909Cyo<0a<VpD@g>8d}mpqBZq~w=8!Jm+aEiYtIk7xFt1X6(o_ps*STa*nBmI3~x
zn}Kz+0D{dfj2t1M!(Hr3QlNGqqG$c6Z>XIA_*_Za?TMAYQ=Q)Kk!v%%gC{G-no+|a
zo29vLICSFlKchpHuC=b}TeW=6*owYd5+26}OEdLYZ)IR@pj@miZpGrIH@rx!9v#|=
zj&DZCYd`p6d@FisCwgTwdgaUa9!0P1NvL<|Sr|npYd6+YJBh2CiK|<QYdeX#&BWYR
z;_Zjmf4m*JS>ksiBh`uD@Y|8edjD|wd!^ZWZ+}VpDmDNPqy0_v^Pf$BI=vNr3ECpj
z@|z$2tlmFbd1vjNGOmxDEX|c?KK#*M2kIT%>qH$rJ7IY<EZ2s%!|{4#uyo>^Ntn~a
z9{F2;C98w8QSMJsVRl41!pJOP_>BAtC>3Dbbc>Urp;<J1z@-{D0<G_(I6v><Gla48
zlm9}2<0ulEH4P^oYHU0QhlsNN=H?N`426?N_Uy=kVj)mQRFFL)y&n}mC{)i>Z*N6T
z*Ty#^lMlm_<OFmc2_G_kBm93r*cAk17HU6*fFgnD{tQIN9ZE9XL6QLi<I-$NjujyK
z7uxp#-aWwM58=T(qD8WJHR|&1k`*AHIy|PGjm90qnQo8cr>gL=aG}~heFX$d`T?!Y
zs9zyXxEFE-rL_jG;D&&%+oU*1k!=tH4~**DTe!T^O7N@qd9?Z$NC>!Xl58b(jNnNl
z&U*rN^epu_c(0H#@K6XTuNQdsVu^=6@s4T&d3w^aN>#@p1Tq>T(@Zd1pgu<>4$T|R
zvVRl_JIHadaah9vZF${>Wg3>Q_z8i$%tm6MD8I^>vcX9{9TAqr3i8nt{X7#+fffBd
z2sqb6Bc<+oY@{-_HuksJcxh(0?_~9SHMQM$c3pjt`faKn9j*@yR&M<AMj1Ey^hbRY
z>#+v|8w0yTFII2g&wY}sX^)1!TOX5|UghhfWA`tAa(QR;+~(-Hb!~g}Vm<!ygZ|I^
z*Tr?MmZ{GEdageER<*l!eP{CW=H%rs`9Dg3khUhTZ4S@W<%!RvPo*lpC;U5t22U~i
zI<q&3I$|G1K8RGiA2l!Cd|YCWKSuV7;&m!Y#@2Es)4LVrc1|@LJ%pVT!tQZnh1vQL
z_9*mkZez{Zp&?AL2N@j%aTh%eVWH=#cuE+0dKC+2_r)IJY-0#BguUe#RjLzqZaj%4
z6Vq7qa~<htO;NUaIma+*BC8TG^i!Je<d}Cae5DcjRq_UmWL!s2f!O0P#(zhnU!&o#
WP}kS!!ZYzAK3o3&e-QlqNd5;?t;rPt


From c867726620a5a24b0b0cfe26b30eeb67ad823241 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 08:53:47 +0000
Subject: [PATCH 03/23] Move helper functions out of cwl class

---
 src/runcrate/converters/cwl.py | 39 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 2d9d975..15f4f56 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -5,6 +5,24 @@
 
 from cwl_utils.parser import load_document_by_yaml
 
+def _get_fragment(uri):
+    return uri.rsplit("#", 1)[-1]
+
+def _normalize_cwl_defs(cwl_defs):
+    inline_tools = {}
+    for d in cwl_defs.values():
+        if not hasattr(d, "steps") or not d.steps:
+            continue
+        for s in d.steps:
+            if hasattr(s, "run") and s.run:
+                if hasattr(s.run, "id"):
+                    tool = s.run
+                    if tool.id.startswith("_:"):  # CWL > 1.0
+                        tool.id = f"{s.id}/run"
+                    inline_tools[_get_fragment(tool.id)] = tool
+                    s.run = tool.id
+    cwl_defs.update(inline_tools)
+
 
 class cwlConverter(converter):
     def __init__(self):
@@ -34,27 +52,10 @@ def get_workflow(self, wf_path):
             defs = [defs]
         def_map = {}
         for d in defs:
-            k = self._get_fragment(d.id)
+            k = _get_fragment(d.id)
             if k == "main":
                 k = wf_path.name
             def_map[k] = d
-        self._normalize_cwl_defs(def_map)
+        _normalize_cwl_defs(def_map)
         return def_map
 
-    def _get_fragment(self, uri):
-        return uri.rsplit("#", 1)[-1]
-
-    def _normalize_cwl_defs(self, cwl_defs):
-        inline_tools = {}
-        for d in cwl_defs.values():
-            if not hasattr(d, "steps") or not d.steps:
-                continue
-            for s in d.steps:
-                if hasattr(s, "run") and s.run:
-                    if hasattr(s.run, "id"):
-                        tool = s.run
-                        if tool.id.startswith("_:"):  # CWL > 1.0
-                            tool.id = f"{s.id}/run"
-                        inline_tools[self._get_fragment(tool.id)] = tool
-                        s.run = tool.id
-        cwl_defs.update(inline_tools)

From 47788cc8731d94d1131ebd6ee2af47614b0fe8cd Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 09:43:54 +0000
Subject: [PATCH 04/23] Ignore pycs

---
 .gitignore                                        |   1 +
 .../__pycache__/__init__.cpython-312.pyc          | Bin 322 -> 0 bytes
 .../converters/__pycache__/base.cpython-312.pyc   | Bin 724 -> 0 bytes
 3 files changed, 1 insertion(+)
 delete mode 100644 src/runcrate/converters/__pycache__/__init__.cpython-312.pyc
 delete mode 100644 src/runcrate/converters/__pycache__/base.cpython-312.pyc

diff --git a/.gitignore b/.gitignore
index efa407c..522376a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
+*.pyc
 
 # C extensions
 *.so
diff --git a/src/runcrate/converters/__pycache__/__init__.cpython-312.pyc b/src/runcrate/converters/__pycache__/__init__.cpython-312.pyc
deleted file mode 100644
index 4bd230982ec500f71cd7ebb8e40ff0b25844105c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 322
zcmX@j%ge<81obN%(i?#EV-N=hn4yf%RzSvdh7^Vr#vF!R#wbQc5SuB7DVI5lnUR5s
zA(e48R4I_n3}dG-&tZyUsbtY)eaQ%v)?~cJnVg?jmReMjS_I_qB$wwn!}(0NSdtQp
zQ*SW?Mg25ci&%j=ia-u1VgnH@K;jmcv%g=MYfy-5Q1D8I&p;7|UvByt`MIh3`Q`cf
zIr;%b`B|ySCB^zi21X|OMWuPkMTsS;`o%@b2oBr@#rpB_nR%Hd@$q^EmA5!-a`RJ4
zb5iY!xPiukoLejhBt9@RGBVy}kiN?x`;m=-SFGFnCbxLE-A7htKBh*lB2J(Z097_m
AxBvhE

diff --git a/src/runcrate/converters/__pycache__/base.cpython-312.pyc b/src/runcrate/converters/__pycache__/base.cpython-312.pyc
deleted file mode 100644
index bf58f88f68c43cebf9bf26c83435d198f7add58e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 724
zcmY*Wy>8nu5GE-ru?-n5G87%TYO#u-V^I`cvKd83FN7k}wo*xSk8}iiY60g7Is|=$
zAdiw4D75wk3VSHfsXYBS$qmQvj^BNEM?Op@W8&<`&kxJJJAY%)+n@u7*ACngMlOgX
zJ0j^lYj5VHVxgDxd^~9vbTx|1sU&kI>5j}j$($clUO1>ni7u`cSOrO6(#v=b9I*E-
zsYpm?ch1!Vqq2oTD*yK}TIpOxy3{(0K7qbci8ayroAb93Y>EV8t0Du*V{q(cqRZH*
z>AI?iLZn4%h1i}QZ>N0@*E7Cz-^j0FaBzSm-O7j?dpacz+4hEacA+e{E5%C<Uly4z
z`2uv_WR~f*D)>6KtLb>$(?2V_fx_^Z%QUg6E@G&7xl*9Gb^93}9hDr(W%GEwR+3*<
zsF?y!rI{YpW`JXd3{KoppOy=O?F<gjIh<lLSSl;};kK_Hw||;Pm8R-Dfsw<r(8nh$
z;#>()kA=v!++>IYA--<nti6Gu<3fNA6`InfE8ufA4!_VKV@LLk`jf+n?*}arIkS+q
h+~@W6@Lk*SJ-l?RM&yM9e>_U*UpAt_o;cU&{0D6As<r?C


From e32a8405ddef1ea008098b67a9d9ea6c8ad7307f Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 09:44:26 +0000
Subject: [PATCH 05/23] Move get_step_maps to cwl class

---
 src/runcrate/convert.py         | 31 +------------------------------
 src/runcrate/converters/base.py | 13 +++++++++++++
 src/runcrate/converters/cwl.py  | 30 ++++++++++++++++++++++++++++++
 tests/test_step_mapping.py      |  4 ++--
 4 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index bbe355e..ed7c61c 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -24,7 +24,6 @@
 from io import StringIO
 from pathlib import Path
 
-import networkx as nx
 import prov.model
 from bdbag.bdbagit import BDBag
 from cwlprov.prov import Entity, Provenance
@@ -126,22 +125,6 @@ def cut_step_part(relative_uri):
     return relative_uri
 
 
-def build_step_graph(cwl_wf):
-    out_map = {}
-    for s in cwl_wf.steps:
-        for o in s.out:
-            out_map[o] = get_fragment(s.id)
-    graph = nx.DiGraph()
-    for s in cwl_wf.steps:
-        fragment = get_fragment(s.id)
-        graph.add_node(fragment)
-        for i in s.in_:
-            sources = [i.source] if not isinstance(i.source, list) else i.source
-            for s in sources:
-                source_fragment = out_map.get(s)
-                if source_fragment:
-                    graph.add_edge(source_fragment, fragment)
-    return graph
 
 
 def get_fragment(uri):
@@ -163,7 +146,7 @@ def __init__(self,
         self.readme = Path(readme) if readme else readme
         self.wf_path = self.root / "workflow" / WORKFLOW_BASENAME
         self.cwl_defs = self.converter.get_workflow(self.wf_path)
-        self.step_maps = self._get_step_maps(self.cwl_defs)
+        self.step_maps = self.converter.get_step_maps(self.cwl_defs)
         self.ro = ResearchObject(BDBag(str(root)))
         self.with_prov = set(str(_) for _ in self.ro.resources_with_provenance())
         self.workflow_run = Provenance(self.ro).activity()
@@ -178,18 +161,6 @@ def __init__(self,
         self.file_map = {}
         self.manifest = self._get_manifest()
 
-    @staticmethod
-    def _get_step_maps(cwl_defs):
-        rval = {}
-        for k, v in cwl_defs.items():
-            if hasattr(v, "steps"):
-                graph = build_step_graph(v)
-                pos_map = {f: i for i, f in enumerate(nx.topological_sort(graph))}
-                rval[k] = {}
-                for s in v.steps:
-                    f = get_fragment(s.id)
-                    rval[k][f] = {"tool": get_fragment(s.run), "pos": pos_map[f]}
-        return rval
 
     def _get_manifest(self):
         manifest = {}
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index b5320dc..5321aa6 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -8,3 +8,16 @@ def get_workflow(self, wf_path):
 
         Returns a dictionary where tools / workflows are mapped by their ids.
         """
+        raise NotImplementedError("get_workflow")
+
+    def get_step_maps(self, wf_defs):
+        """\
+        Get a mapping of step names to their tool names and positions.
+        """
+        raise NotImplementedError("get_step_maps")
+
+    def build_step_graph(self, wf):
+        """\
+        Build a graph of steps in the workflow.
+        """
+        raise NotImplementedError("build_step_graph")
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 15f4f56..fbfe967 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -2,6 +2,7 @@
 
 from pathlib import Path
 import json
+import networkx as nx
 
 from cwl_utils.parser import load_document_by_yaml
 
@@ -59,3 +60,32 @@ def get_workflow(self, wf_path):
         _normalize_cwl_defs(def_map)
         return def_map
 
+    def get_step_maps(self, cwl_defs):
+        rval = {}
+        for k, v in cwl_defs.items():
+            if hasattr(v, "steps"):
+                graph = self.build_step_graph(v)
+                pos_map = {f: i for i, f in enumerate(nx.topological_sort(graph))}
+                rval[k] = {}
+                for s in v.steps:
+                    f = _get_fragment(s.id)
+                    rval[k][f] = {"tool": _get_fragment(s.run), "pos": pos_map[f]}
+        return rval
+
+    def build_step_graph(self, cwl_wf):
+        out_map = {}
+        for s in cwl_wf.steps:
+            for o in s.out:
+                out_map[o] = _get_fragment(s.id)
+        graph = nx.DiGraph()
+        for s in cwl_wf.steps:
+            fragment = _get_fragment(s.id)
+            graph.add_node(fragment)
+            for i in s.in_:
+                sources = [i.source] if not isinstance(i.source, list) else i.source
+                for s in sources:
+                    source_fragment = out_map.get(s)
+                    if source_fragment:
+                        graph.add_edge(source_fragment, fragment)
+        return graph
+
diff --git a/tests/test_step_mapping.py b/tests/test_step_mapping.py
index b5ecac9..79f879e 100644
--- a/tests/test_step_mapping.py
+++ b/tests/test_step_mapping.py
@@ -28,7 +28,7 @@ def test_step_maps_cwl(data_dir, converter):
     wf_basename = "exome-alignment-packed.cwl"
     wf_path = data_dir / wf_basename
     cwl_defs = converter.get_workflow(wf_path)
-    step_maps = ProvCrateBuilder._get_step_maps(cwl_defs)
+    step_maps = converter.get_step_maps(cwl_defs)
     assert set(step_maps) == {wf_basename}
     sm = step_maps[wf_basename]
     assert len(sm) == 8
@@ -51,7 +51,7 @@ def test_step_maps_cwl(data_dir, converter):
 def test_step_maps_disconnected_cwl(data_dir, converter):
     wf_path = data_dir / "no-output-run-1/workflow/packed.cwl"
     cwl_defs = converter.get_workflow(wf_path)
-    step_maps = ProvCrateBuilder._get_step_maps(cwl_defs)
+    step_maps = converter.get_step_maps(cwl_defs)
     assert set(step_maps) == {"packed.cwl"}
     sm = step_maps["packed.cwl"]
     assert set(sm) == {"main/date_step", "main/echo_step", "main/date2_step"}

From 3ced681c139ed06ca9c1d0975d75b537f50dbe27 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 09:48:53 +0000
Subject: [PATCH 06/23] Apply linting

---
 src/runcrate/cli.py                 |  2 +-
 src/runcrate/convert.py             |  5 +----
 src/runcrate/converters/__init__.py |  1 +
 src/runcrate/converters/cwl.py      | 11 ++++++-----
 tests/test_step_mapping.py          |  1 -
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py
index 1bfb1bc..7e5cbd2 100644
--- a/src/runcrate/cli.py
+++ b/src/runcrate/cli.py
@@ -19,9 +19,9 @@
 
 from . import __version__
 from .convert import ProvCrateBuilder
+from .converters import CONVERTERS
 from .report import dump_crate_actions
 from .run import run_crate
-from .converters import CONVERTERS
 
 
 @click.group()
diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index ed7c61c..9037809 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -34,8 +34,8 @@
 from rocrate.rocrate import ROCrate
 
 from .constants import PROFILES_BASE, PROFILES_VERSION, TERMS_NAMESPACE
-from .utils import as_list, parse_img
 from .converters import CONVERTERS
+from .utils import as_list, parse_img
 
 
 WORKFLOW_BASENAME = "packed.cwl"
@@ -125,8 +125,6 @@ def cut_step_part(relative_uri):
     return relative_uri
 
 
-
-
 def get_fragment(uri):
     return uri.rsplit("#", 1)[-1]
 
@@ -161,7 +159,6 @@ def __init__(self,
         self.file_map = {}
         self.manifest = self._get_manifest()
 
-
     def _get_manifest(self):
         manifest = {}
         with open(self.root / Path(MANIFEST_FILE)) as f:
diff --git a/src/runcrate/converters/__init__.py b/src/runcrate/converters/__init__.py
index 7460165..d4d1d78 100644
--- a/src/runcrate/converters/__init__.py
+++ b/src/runcrate/converters/__init__.py
@@ -1,6 +1,7 @@
 from .base import converter
 from .cwl import cwlConverter
 
+
 CONVERTERS = {
     "base": converter(),
     "cwl": cwlConverter(),
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index fbfe967..30c328c 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -1,14 +1,16 @@
-from .base import converter
-
-from pathlib import Path
 import json
-import networkx as nx
+from pathlib import Path
 
+import networkx as nx
 from cwl_utils.parser import load_document_by_yaml
 
+from .base import converter
+
+
 def _get_fragment(uri):
     return uri.rsplit("#", 1)[-1]
 
+
 def _normalize_cwl_defs(cwl_defs):
     inline_tools = {}
     for d in cwl_defs.values():
@@ -88,4 +90,3 @@ def build_step_graph(self, cwl_wf):
                     if source_fragment:
                         graph.add_edge(source_fragment, fragment)
         return graph
-
diff --git a/tests/test_step_mapping.py b/tests/test_step_mapping.py
index 79f879e..ac4fed1 100644
--- a/tests/test_step_mapping.py
+++ b/tests/test_step_mapping.py
@@ -14,7 +14,6 @@
 
 import pytest
 
-from runcrate.convert import ProvCrateBuilder
 from runcrate.converters.cwl import cwlConverter
 
 
From 82473285c0ef5421ddd351479d05c385fa3cd8be Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 25 Nov 2024 13:31:11 +0000
Subject: [PATCH 07/23] Apply linting

---
 src/runcrate/convert.py | 461 ++++++++++++++++++----------------------
 1 file changed, 201 insertions(+), 260 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index 9037809..c60ae36 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -131,6 +131,9 @@ def get_fragment(uri):
 
 class ProvCrateBuilder:
 
+    # --------------------------------------------------------------------------
+    # Public methods, called by the CLI
+
     def __init__(self,
                  root,
                  converter=CONVERTERS["cwl"],
@@ -159,61 +162,6 @@ def __init__(self,
         self.file_map = {}
         self.manifest = self._get_manifest()
 
-    def _get_manifest(self):
-        manifest = {}
-        with open(self.root / Path(MANIFEST_FILE)) as f:
-            for line in f:
-                hash_, relpath = line.strip().split(None, 1)
-                manifest[hash_] = self.root / relpath
-        return manifest
-
-    def _resolve_plan(self, activity):
-        job_qname = activity.plan()
-        plan = activity.provenance.entity(job_qname)
-        if not plan:
-            m = SCATTER_JOB_PATTERN.match(str(job_qname))
-            if m:
-                plan = activity.provenance.entity(m.groups()[0])
-        return plan
-
-    def _get_hash(self, prov_param):
-        k = prov_param.id.localpart
-        try:
-            return self.hashes[k]
-        except KeyError:
-            type_names = frozenset(str(_) for _ in prov_param.types())
-            if "wf4ever:File" in type_names:
-                hash_ = next(prov_param.specializationOf()).id.localpart
-                self.hashes[k] = hash_
-                return hash_
-            elif "ro:Folder" in type_names:
-                m = hashlib.sha1()
-                m.update("".join(sorted(
-                    self._get_hash(_) for _ in self.get_dict(prov_param).values()
-                )).encode())
-                self.hashes[k] = hash_ = m.hexdigest()
-                return hash_
-
-    def _get_hashes(self, provenance):
-        for r in provenance.prov_doc.get_records(prov.model.ProvEntity):
-            self._get_hash(Entity(provenance, r))
-
-    def get_members(self, entity):
-        membership = entity.provenance.record_with_attr(
-            prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
-        )
-        member_ids = (_.get_attribute(prov.model.PROV_ATTR_ENTITY) for _ in membership)
-        return (entity.provenance.entity(first(_)) for _ in member_ids)
-
-    def get_dict(self, entity):
-        d = {}
-        for qname in entity.record.get_attribute("prov:hadDictionaryMember"):
-            kvp = entity.provenance.entity(qname)
-            key = first(kvp.record.get_attribute("prov:pairKey"))
-            entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
-            d[key] = entity.provenance.entity(entity_id)
-        return d
-
     def build(self):
         crate = ROCrate(gen_preview=False)
         crate.metadata.extra_contexts.append(TERMS_NAMESPACE)
@@ -227,6 +175,9 @@ def build(self):
         self.add_output_formats(crate)
         return crate
 
+    # --------------------------------------------------------------------------
+    # Top level methods, called by build()
+
     def add_root_metadata(self, crate):
         if self.license:
             crate.root_dataset["license"] = self.license
@@ -280,6 +231,144 @@ def add_workflow(self, crate):
             self.add_param_connections(crate, workflow)
         return workflow
 
+    def add_engine_run(self, crate):
+        engine = self.workflow_run.start().starter_activity()
+        roc_engine = crate.add(SoftwareApplication(crate, properties={
+            "name": engine.label or "workflow engine"
+        }))
+        roc_engine_run = crate.add(ContextEntity(crate, properties={
+            "@type": "OrganizeAction",
+            "name": f"Run of {roc_engine['name']}",
+            "startTime": engine.start().time.isoformat(),
+        }))
+        roc_engine_run["instrument"] = roc_engine
+        self.add_agent(crate, roc_engine_run, engine)
+        self.roc_engine_run = roc_engine_run
+
+    def add_action(self, crate, activity, parent_instrument=None):
+        workflow = crate.mainEntity
+        action = crate.add(ContextEntity(crate, properties={
+            "@type": "CreateAction",
+            "name": activity.label,
+        }))
+        plan = self._resolve_plan(activity)
+        plan_tag = plan.id.localpart
+        if plan_tag == "main":
+            assert str(activity.type) == "wfprov:WorkflowRun"
+            instrument = workflow
+            self.roc_engine_run["result"] = action
+            crate.root_dataset["mentions"] = [action]
+
+            def to_wf_p(k):
+                return k
+        else:
+            parent_instrument_fragment = get_fragment(parent_instrument.id)
+            if parent_instrument_fragment != WORKFLOW_BASENAME:
+                parts = plan_tag.split("/", 1)
+                if parts[0] == "main":
+                    parts[0] = parent_instrument_fragment
+                    plan_tag = "/".join(parts)
+            tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
+            instrument = crate.dereference(f"{workflow.id}#{tool_name}")
+            control_action = self.control_actions.get(plan_tag)
+            if not control_action:
+                control_action = crate.add(ContextEntity(crate, properties={
+                    "@type": "ControlAction",
+                    "name": f"orchestrate {tool_name}",
+                }))
+                step = crate.dereference(f"{workflow.id}#{plan_tag}")
+                control_action["instrument"] = step
+                self.roc_engine_run.append_to("object", control_action, compact=True)
+                self.control_actions[plan_tag] = control_action
+            control_action.append_to("object", action, compact=True)
+            if activity.uri in self.with_prov:
+                nested_prov = Provenance(self.ro, activity.uri)
+                activity = nested_prov.activity()
+
+            def to_wf_p(k):
+                return k.replace(activity.plan().localpart, tool_name)
+        self._get_hashes(activity.provenance)
+        action["instrument"] = instrument
+        action["startTime"] = activity.start().time.isoformat()
+        action["endTime"] = activity.end().time.isoformat()
+        action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
+        action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
+        self.add_container_images(crate, action, activity)
+        for job in activity.steps():
+            self.add_action(crate, job, parent_instrument=instrument)
+
+    def patch_workflow_input_collection(self, crate, wf=None):
+        """\
+        CWLProv records secondary files only in step runs, not in the workflow
+        run. Thus, when the conversion of parameter values is completed,
+        workflow-level parameters with secondary files get mapped to the main
+        entity of the collection alone (a File). This method fixes the mapping
+        by retrieving the correct Collection entity from the relevant tool
+        execution.
+        """
+        if wf is None:
+            wf = crate.mainEntity
+        sel = [_ for _ in crate.contextual_entities
+               if "CreateAction" in as_list(_.type) and _.get("instrument") is wf]
+        if not sel:
+            return  # skipped subworkflow
+        wf_action = sel[0]
+        connections = [_ for _ in crate.contextual_entities
+                       if "ParameterConnection" in as_list(_.type)]
+        for param in wf.get("input", []):
+            if param.get("additionalType") == "Collection":
+                src_sel = [_ for _ in wf_action.get("object", [])
+                           if param in as_list(_.get("exampleOfWork"))]
+                if not src_sel:
+                    raise RuntimeError(f"object for param {param.id} not found")
+                obj = src_sel[0]
+                if obj.type != "Collection":
+                    param_connections = [_ for _ in connections if _["sourceParameter"] is param]
+                    if not param_connections:
+                        continue
+                    pc = param_connections[0]
+                    tgt_param = pc["targetParameter"]
+                    tgt_sel = [_ for _ in crate.get_entities()
+                               if tgt_param in as_list(_.get("exampleOfWork"))]
+                    if not tgt_sel:
+                        raise RuntimeError(f"object for param {tgt_param.id} not found")
+                    tgt_obj = tgt_sel[0]
+                    wf_action["object"] = [
+                        _ for _ in as_list(wf_action["object"]) if _ is not obj
+                    ] + [tgt_obj]
+                    tgt_obj.append_to("exampleOfWork", param)
+                    obj["exampleOfWork"] = [_ for _ in as_list(obj["exampleOfWork"])
+                                            if _ is not param]
+                    if len(obj["exampleOfWork"]) == 1:
+                        obj["exampleOfWork"] = obj["exampleOfWork"][0]
+                    if len(obj["exampleOfWork"]) == 0:
+                        del obj["exampleOfWork"]
+        for tool in wf.get("hasPart", []):
+            if "ComputationalWorkflow" in as_list(tool.type):
+                self.patch_workflow_input_collection(crate, wf=tool)
+
+    def add_inputs_file(self, crate):
+        path = self.root / "workflow" / INPUTS_FILE_BASENAME
+        if path.is_file():
+            with open(path) as f:
+                data = json.load(f)
+            data = self._map_input_data(crate, data)
+            source = StringIO(json.dumps(data, indent=4))
+            crate.add_file(source, path.name, properties={
+                "name": "input object document",
+                "encodingFormat": "application/json",
+            })
+
+    def add_output_formats(self, crate):
+        path = self.root / "workflow" / OUTPUTS_FILE_BASENAME
+        if path.is_file():
+            with open(path) as f:
+                data = json.load(f)
+            self._map_input_data(crate, data)
+
+    # --------------------------------------------------------------------------
+    # Internal methods, called by the top level methods
+
     def add_step(self, crate, workflow, cwl_step):
         step_fragment = get_fragment(cwl_step.id)
         step_id = f"{self.wf_path.name}#{step_fragment}"
@@ -370,20 +459,6 @@ def add_params(self, crate, cwl_params):
             params.append(p)
         return params
 
-    def add_engine_run(self, crate):
-        engine = self.workflow_run.start().starter_activity()
-        roc_engine = crate.add(SoftwareApplication(crate, properties={
-            "name": engine.label or "workflow engine"
-        }))
-        roc_engine_run = crate.add(ContextEntity(crate, properties={
-            "@type": "OrganizeAction",
-            "name": f"Run of {roc_engine['name']}",
-            "startTime": engine.start().time.isoformat(),
-        }))
-        roc_engine_run["instrument"] = roc_engine
-        self.add_agent(crate, roc_engine_run, engine)
-        self.roc_engine_run = roc_engine_run
-
     def add_agent(self, crate, roc_engine_run, engine):
         delegate = engine.start().starter_activity()
         try:
@@ -408,58 +483,6 @@ def add_agent(self, crate, roc_engine_run, engine):
             ro_a = crate.add(ContextEntity(crate, agent_id, properties=properties))
             roc_engine_run.append_to("agent", ro_a, compact=True)
 
-    def add_action(self, crate, activity, parent_instrument=None):
-        workflow = crate.mainEntity
-        action = crate.add(ContextEntity(crate, properties={
-            "@type": "CreateAction",
-            "name": activity.label,
-        }))
-        plan = self._resolve_plan(activity)
-        plan_tag = plan.id.localpart
-        if plan_tag == "main":
-            assert str(activity.type) == "wfprov:WorkflowRun"
-            instrument = workflow
-            self.roc_engine_run["result"] = action
-            crate.root_dataset["mentions"] = [action]
-
-            def to_wf_p(k):
-                return k
-        else:
-            parent_instrument_fragment = get_fragment(parent_instrument.id)
-            if parent_instrument_fragment != WORKFLOW_BASENAME:
-                parts = plan_tag.split("/", 1)
-                if parts[0] == "main":
-                    parts[0] = parent_instrument_fragment
-                    plan_tag = "/".join(parts)
-            tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
-            instrument = crate.dereference(f"{workflow.id}#{tool_name}")
-            control_action = self.control_actions.get(plan_tag)
-            if not control_action:
-                control_action = crate.add(ContextEntity(crate, properties={
-                    "@type": "ControlAction",
-                    "name": f"orchestrate {tool_name}",
-                }))
-                step = crate.dereference(f"{workflow.id}#{plan_tag}")
-                control_action["instrument"] = step
-                self.roc_engine_run.append_to("object", control_action, compact=True)
-                self.control_actions[plan_tag] = control_action
-            control_action.append_to("object", action, compact=True)
-            if activity.uri in self.with_prov:
-                nested_prov = Provenance(self.ro, activity.uri)
-                activity = nested_prov.activity()
-
-            def to_wf_p(k):
-                return k.replace(activity.plan().localpart, tool_name)
-        self._get_hashes(activity.provenance)
-        action["instrument"] = instrument
-        action["startTime"] = activity.start().time.isoformat()
-        action["endTime"] = activity.end().time.isoformat()
-        action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
-        action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
-        self.add_container_images(crate, action, activity)
-        for job in activity.steps():
-            self.add_action(crate, job, parent_instrument=instrument)
-
     def add_container_images(self, crate, action, activity):
         images = set()
         for assoc in activity.association():
@@ -522,77 +545,6 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
             action_params.append(action_p)
         return action_params
 
-    @staticmethod
-    def _set_alternate_name(prov_param, action_p, parent=None):
-        basename = getattr(prov_param, "basename", None)
-        if not basename:
-            return
-        if not parent:
-            action_p["alternateName"] = basename
-            return
-        if "alternateName" in parent:
-            action_p["alternateName"] = (Path(parent["alternateName"]) / basename).as_posix()
-
-    def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
-        type_names = frozenset(str(_) for _ in prov_param.types())
-        secondary_files = [_.generated_entity() for _ in prov_param.derivations()
-                           if str(_.type) == "cwlprov:SecondaryFile"]
-        if convert_secondary and secondary_files:
-            main_entity = self.convert_param(prov_param, crate, convert_secondary=False)
-            action_p = self.collections.get(main_entity.id)
-            if not action_p:
-                action_p = crate.add(ContextEntity(crate, properties={
-                    "@type": "Collection"
-                }))
-                action_p["mainEntity"] = main_entity
-                action_p["hasPart"] = [main_entity] + [
-                    self.convert_param(_, crate) for _ in secondary_files
-                ]
-                crate.root_dataset.append_to("mentions", action_p)
-                self.collections[main_entity.id] = action_p
-            return action_p
-        if "wf4ever:File" in type_names:
-            hash_ = self.hashes[prov_param.id.localpart]
-            dest = Path(parent.id if parent else "") / hash_
-            action_p = crate.dereference(dest.as_posix())
-            if not action_p:
-                source = self.manifest[hash_]
-                action_p = crate.add_file(source, dest, properties={
-                    "sha1": hash_,
-                    "contentSize": str(Path(source).stat().st_size)
-                })
-                self._set_alternate_name(prov_param, action_p, parent=parent)
-                try:
-                    source_k = str(source.resolve(strict=False))
-                except RuntimeError:
-                    source_k = str(source)
-                self.file_map[source_k] = dest
-            return action_p
-        if "ro:Folder" in type_names:
-            hash_ = self.hashes[prov_param.id.localpart]
-            dest = Path(parent.id if parent else "") / hash_
-            action_p = crate.dereference(dest.as_posix())
-            if not action_p:
-                action_p = crate.add_directory(dest_path=dest)
-                self._set_alternate_name(prov_param, action_p, parent=parent)
-                for child in self.get_dict(prov_param).values():
-                    part = self.convert_param(child, crate, parent=action_p)
-                    action_p.append_to("hasPart", part)
-            return action_p
-        if prov_param.value is not None:
-            return str(prov_param.value)
-        if "prov:Dictionary" in type_names:
-            return dict(
-                (k, self.convert_param(v, crate))
-                for k, v in self.get_dict(prov_param).items()
-                if k != "@id"
-            )
-        if "prov:Collection" in type_names:
-            return [self.convert_param(_, crate) for _ in self.get_members(prov_param)]
-        if prov_param.id.uri == CWLPROV_NONE:
-            return None
-        raise RuntimeError(f"No value to convert for {prov_param}")
-
     def add_param_connections(self, crate, workflow):
         def connect(source, target, entity):
             connection = crate.add(ContextEntity(crate, properties={
@@ -643,55 +595,63 @@ def connect(source, target, entity):
                 to_param = get_fragment(out.id)
                 connect(from_param, to_param, workflow)
 
-    def patch_workflow_input_collection(self, crate, wf=None):
-        """\
-        CWLProv records secondary files only in step runs, not in the workflow
-        run. Thus, when the conversion of parameter values is completed,
-        workflow-level parameters with secondary files get mapped to the main
-        entity of the collection alone (a File). This method fixes the mapping
-        by retrieving the correct Collection entity from the relevant tool
-        execution.
-        """
-        if wf is None:
-            wf = crate.mainEntity
-        sel = [_ for _ in crate.contextual_entities
-               if "CreateAction" in as_list(_.type) and _.get("instrument") is wf]
-        if not sel:
-            return  # skipped subworkflow
-        wf_action = sel[0]
-        connections = [_ for _ in crate.contextual_entities
-                       if "ParameterConnection" in as_list(_.type)]
-        for param in wf.get("input", []):
-            if param.get("additionalType") == "Collection":
-                src_sel = [_ for _ in wf_action.get("object", [])
-                           if param in as_list(_.get("exampleOfWork"))]
-                if not src_sel:
-                    raise RuntimeError(f"object for param {param.id} not found")
-                obj = src_sel[0]
-                if obj.type != "Collection":
-                    param_connections = [_ for _ in connections if _["sourceParameter"] is param]
-                    if not param_connections:
-                        continue
-                    pc = param_connections[0]
-                    tgt_param = pc["targetParameter"]
-                    tgt_sel = [_ for _ in crate.get_entities()
-                               if tgt_param in as_list(_.get("exampleOfWork"))]
-                    if not tgt_sel:
-                        raise RuntimeError(f"object for param {tgt_param.id} not found")
-                    tgt_obj = tgt_sel[0]
-                    wf_action["object"] = [
-                        _ for _ in as_list(wf_action["object"]) if _ is not obj
-                    ] + [tgt_obj]
-                    tgt_obj.append_to("exampleOfWork", param)
-                    obj["exampleOfWork"] = [_ for _ in as_list(obj["exampleOfWork"])
-                                            if _ is not param]
-                    if len(obj["exampleOfWork"]) == 1:
-                        obj["exampleOfWork"] = obj["exampleOfWork"][0]
-                    if len(obj["exampleOfWork"]) == 0:
-                        del obj["exampleOfWork"]
-        for tool in wf.get("hasPart", []):
-            if "ComputationalWorkflow" in as_list(tool.type):
-                self.patch_workflow_input_collection(crate, wf=tool)
+    # --------------------------------------------------------------------------
+    # Utility methods, called by the other methods
+
+    def _get_manifest(self):
+        manifest = {}
+        with open(self.root / Path(MANIFEST_FILE)) as f:
+            for line in f:
+                hash_, relpath = line.strip().split(None, 1)
+                manifest[hash_] = self.root / relpath
+        return manifest
+
+    def _resolve_plan(self, activity):
+        job_qname = activity.plan()
+        plan = activity.provenance.entity(job_qname)
+        if not plan:
+            m = SCATTER_JOB_PATTERN.match(str(job_qname))
+            if m:
+                plan = activity.provenance.entity(m.groups()[0])
+        return plan
+
+    def _get_hash(self, prov_param):
+        k = prov_param.id.localpart
+        try:
+            return self.hashes[k]
+        except KeyError:
+            type_names = frozenset(str(_) for _ in prov_param.types())
+            if "wf4ever:File" in type_names:
+                hash_ = next(prov_param.specializationOf()).id.localpart
+                self.hashes[k] = hash_
+                return hash_
+            elif "ro:Folder" in type_names:
+                m = hashlib.sha1()
+                m.update("".join(sorted(
+                    self._get_hash(_) for _ in self.get_dict(prov_param).values()
+                )).encode())
+                self.hashes[k] = hash_ = m.hexdigest()
+                return hash_
+
+    def _get_hashes(self, provenance):
+        for r in provenance.prov_doc.get_records(prov.model.ProvEntity):
+            self._get_hash(Entity(provenance, r))
+
+    def get_members(self, entity):
+        membership = entity.provenance.record_with_attr(
+            prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
+        )
+        member_ids = (_.get_attribute(prov.model.PROV_ATTR_ENTITY) for _ in membership)
+        return (entity.provenance.entity(first(_)) for _ in member_ids)
+
+    def get_dict(self, entity):
+        d = {}
+        for qname in entity.record.get_attribute("prov:hadDictionaryMember"):
+            kvp = entity.provenance.entity(qname)
+            key = first(kvp.record.get_attribute("prov:pairKey"))
+            entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
+            d[key] = entity.provenance.entity(entity_id)
+        return d
 
     def _map_input_data(self, crate, data):
         if isinstance(data, list):
@@ -716,22 +676,3 @@ def _map_input_data(self, crate, data):
                     rval[k] = self._map_input_data(crate, v)
             return rval
         return data
-
-    def add_inputs_file(self, crate):
-        path = self.root / "workflow" / INPUTS_FILE_BASENAME
-        if path.is_file():
-            with open(path) as f:
-                data = json.load(f)
-            data = self._map_input_data(crate, data)
-            source = StringIO(json.dumps(data, indent=4))
-            crate.add_file(source, path.name, properties={
-                "name": "input object document",
-                "encodingFormat": "application/json",
-            })
-
-    def add_output_formats(self, crate):
-        path = self.root / "workflow" / OUTPUTS_FILE_BASENAME
-        if path.is_file():
-            with open(path) as f:
-                data = json.load(f)
-            self._map_input_data(crate, data)

From 40f52dfadd0c26633b444c7a947a76c03a20b56e Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Mon, 2 Dec 2024 13:46:26 +0000
Subject: [PATCH 08/23] Add convert_param to the cwl object

---
 src/runcrate/convert.py         |  53 ++-------
 src/runcrate/converters/base.py |   6 +
 src/runcrate/converters/cwl.py  | 187 +++++++++++++++++++++++++++++---
 3 files changed, 183 insertions(+), 63 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index c60ae36..3df23cf 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -18,7 +18,6 @@
 Generate a Workflow Run RO-Crate from a CWLProv RO bundle.
 """
 
-import hashlib
 import json
 import re
 from io import StringIO
@@ -26,9 +25,8 @@
 
 import prov.model
 from bdbag.bdbagit import BDBag
-from cwlprov.prov import Entity, Provenance
+from cwlprov.prov import Provenance
 from cwlprov.ro import ResearchObject
-from cwlprov.utils import first
 from rocrate.model.contextentity import ContextEntity
 from rocrate.model.softwareapplication import SoftwareApplication
 from rocrate.rocrate import ROCrate
@@ -58,8 +56,6 @@
 
 SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
 
-CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
-
 WROC_PROFILE_VERSION = "1.0"
 
 DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage"
@@ -287,7 +283,7 @@ def to_wf_p(k):
 
             def to_wf_p(k):
                 return k.replace(activity.plan().localpart, tool_name)
-        self._get_hashes(activity.provenance)
+        self.converter.get_hashes(activity.provenance)
         action["instrument"] = instrument
         action["startTime"] = activity.start().time.isoformat()
         action["endTime"] = activity.end().time.isoformat()
@@ -517,7 +513,12 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
             wf_p = crate.dereference(to_wf_p(k))
             k = get_fragment(k)
             v = rel.entity()
-            value = self.convert_param(v, crate)
+            value = self.converter.convert_param(v,
+                                                 crate,
+                                                 hashes=self.hashes,
+                                                 manifest=self.manifest,
+                                                 file_map=self.file_map
+                                                 )
             if value is None:
                 continue  # param is optional with no default and was not set
             if {"ro:Folder", "wf4ever:File"} & set(str(_) for _ in v.types()):
@@ -615,44 +616,6 @@ def _resolve_plan(self, activity):
                 plan = activity.provenance.entity(m.groups()[0])
         return plan
 
-    def _get_hash(self, prov_param):
-        k = prov_param.id.localpart
-        try:
-            return self.hashes[k]
-        except KeyError:
-            type_names = frozenset(str(_) for _ in prov_param.types())
-            if "wf4ever:File" in type_names:
-                hash_ = next(prov_param.specializationOf()).id.localpart
-                self.hashes[k] = hash_
-                return hash_
-            elif "ro:Folder" in type_names:
-                m = hashlib.sha1()
-                m.update("".join(sorted(
-                    self._get_hash(_) for _ in self.get_dict(prov_param).values()
-                )).encode())
-                self.hashes[k] = hash_ = m.hexdigest()
-                return hash_
-
-    def _get_hashes(self, provenance):
-        for r in provenance.prov_doc.get_records(prov.model.ProvEntity):
-            self._get_hash(Entity(provenance, r))
-
-    def get_members(self, entity):
-        membership = entity.provenance.record_with_attr(
-            prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
-        )
-        member_ids = (_.get_attribute(prov.model.PROV_ATTR_ENTITY) for _ in membership)
-        return (entity.provenance.entity(first(_)) for _ in member_ids)
-
-    def get_dict(self, entity):
-        d = {}
-        for qname in entity.record.get_attribute("prov:hadDictionaryMember"):
-            kvp = entity.provenance.entity(qname)
-            key = first(kvp.record.get_attribute("prov:pairKey"))
-            entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
-            d[key] = entity.provenance.entity(entity_id)
-        return d
-
     def _map_input_data(self, crate, data):
         if isinstance(data, list):
             return [self._map_input_data(crate, _) for _ in data]
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 5321aa6..14a2b07 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -21,3 +21,9 @@ def build_step_graph(self, wf):
         Build a graph of steps in the workflow.
         """
         raise NotImplementedError("build_step_graph")
+
+    def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
+        """\
+        Convert a CWLProv parameter to a RO-Crate entity.
+        """
+        raise NotImplementedError("convert_param")
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 30c328c..5be588b 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -1,33 +1,24 @@
+import hashlib
 import json
 from pathlib import Path
 
 import networkx as nx
+import prov.model
 from cwl_utils.parser import load_document_by_yaml
+from cwlprov.prov import Entity
+from cwlprov.utils import first
+from rocrate.model.contextentity import ContextEntity
 
 from .base import converter
 
 
-def _get_fragment(uri):
-    return uri.rsplit("#", 1)[-1]
-
-
-def _normalize_cwl_defs(cwl_defs):
-    inline_tools = {}
-    for d in cwl_defs.values():
-        if not hasattr(d, "steps") or not d.steps:
-            continue
-        for s in d.steps:
-            if hasattr(s, "run") and s.run:
-                if hasattr(s.run, "id"):
-                    tool = s.run
-                    if tool.id.startswith("_:"):  # CWL > 1.0
-                        tool.id = f"{s.id}/run"
-                    inline_tools[_get_fragment(tool.id)] = tool
-                    s.run = tool.id
-    cwl_defs.update(inline_tools)
+CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
 
 
 class cwlConverter(converter):
+    hashes = {}
+    collections = {}
+
     def __init__(self):
         pass
 
@@ -90,3 +81,163 @@ def build_step_graph(self, cwl_wf):
                     if source_fragment:
                         graph.add_edge(source_fragment, fragment)
         return graph
+
+    def convert_param(self,
+                      prov_param,
+                      crate,
+                      convert_secondary=True,
+                      parent=None,
+                      hashes=None,
+                      manifest=None,
+                      file_map=None
+                      ):
+        type_names = frozenset(str(_) for _ in prov_param.types())
+        secondary_files = [_.generated_entity() for _ in prov_param.derivations()
+                           if str(_.type) == "cwlprov:SecondaryFile"]
+        if convert_secondary and secondary_files:
+            main_entity = self.convert_param(prov_param,
+                                             crate,
+                                             convert_secondary=False,
+                                             manifest=manifest,
+                                             file_map=file_map)
+            action_p = self.collections.get(main_entity.id)
+            if not action_p:
+                action_p = crate.add(ContextEntity(crate, properties={
+                    "@type": "Collection"
+                }))
+                action_p["mainEntity"] = main_entity
+                action_p["hasPart"] = [main_entity] + [
+                    self.convert_param(_,
+                                       crate,
+                                       manifest=manifest,
+                                       file_map=file_map
+                                       ) for _ in secondary_files
+                ]
+                crate.root_dataset.append_to("mentions", action_p)
+                self.collections[main_entity.id] = action_p
+            return action_p
+        if "wf4ever:File" in type_names:
+            hash_ = self.hashes[prov_param.id.localpart]
+            dest = Path(parent.id if parent else "") / hash_
+            action_p = crate.dereference(dest.as_posix())
+            if not action_p:
+                source = manifest[hash_]
+                action_p = crate.add_file(source, dest, properties={
+                    "sha1": hash_,
+                    "contentSize": str(Path(source).stat().st_size)
+                })
+                _set_alternate_name(prov_param, action_p, parent=parent)
+                try:
+                    source_k = str(source.resolve(strict=False))
+                except RuntimeError:
+                    source_k = str(source)
+                file_map[source_k] = dest
+            return action_p
+        if "ro:Folder" in type_names:
+            hash_ = self.hashes[prov_param.id.localpart]
+            dest = Path(parent.id if parent else "") / hash_
+            action_p = crate.dereference(dest.as_posix())
+            if not action_p:
+                action_p = crate.add_directory(dest_path=dest)
+                _set_alternate_name(prov_param, action_p, parent=parent)
+                for child in _get_dict(prov_param).values():
+                    part = self.convert_param(child,
+                                              crate,
+                                              parent=action_p,
+                                              manifest=manifest,
+                                              file_map=file_map
+                                              )
+                    action_p.append_to("hasPart", part)
+            return action_p
+        if prov_param.value is not None:
+            return str(prov_param.value)
+        if "prov:Dictionary" in type_names:
+            return dict(
+                (k, self.convert_param(v,
+                                       crate,
+                                       manifest=manifest,
+                                       file_map=file_map
+                                       ))
+                for k, v in _get_dict(prov_param).items()
+                if k != "@id"
+            )
+        if "prov:Collection" in type_names:
+            return [self.convert_param(_,
+                                       crate,
+                                       manifest=manifest,
+                                       file_map=file_map
+                                       ) for _ in _get_members(prov_param)]
+        if prov_param.id.uri == CWLPROV_NONE:
+            return None
+        raise RuntimeError(f"No value to convert for {prov_param}")
+
+    def get_hashes(self, provenance):
+        for r in provenance.prov_doc.get_records(prov.model.ProvEntity):
+            self._get_hash(self.hashes, Entity(provenance, r))
+
+    def _get_hash(self, hashes, prov_param):
+        k = prov_param.id.localpart
+        try:
+            return hashes[k]
+        except KeyError:
+            type_names = frozenset(str(_) for _ in prov_param.types())
+            if "wf4ever:File" in type_names:
+                hash_ = next(prov_param.specializationOf()).id.localpart
+                self.hashes[k] = hash_
+                return hash_
+            elif "ro:Folder" in type_names:
+                m = hashlib.sha1()
+                m.update("".join(sorted(
+                    self._get_hash(hashes, _) for _ in _get_dict(prov_param).values()
+                )).encode())
+                self.hashes[k] = hash_ = m.hexdigest()
+                return hash_
+
+
+def _get_members(entity):
+    membership = entity.provenance.record_with_attr(
+        prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
+    )
+    member_ids = (_.get_attribute(prov.model.PROV_ATTR_ENTITY) for _ in membership)
+    return (entity.provenance.entity(first(_)) for _ in member_ids)
+
+
+def _get_fragment(uri):
+    return uri.rsplit("#", 1)[-1]
+
+
+def _normalize_cwl_defs(cwl_defs):
+    inline_tools = {}
+    for d in cwl_defs.values():
+        if not hasattr(d, "steps") or not d.steps:
+            continue
+        for s in d.steps:
+            if hasattr(s, "run") and s.run:
+                if hasattr(s.run, "id"):
+                    tool = s.run
+                    if tool.id.startswith("_:"):  # CWL > 1.0
+                        tool.id = f"{s.id}/run"
+                    inline_tools[_get_fragment(tool.id)] = tool
+                    s.run = tool.id
+    cwl_defs.update(inline_tools)
+
+
+def _set_alternate_name(prov_param, action_p, parent=None):
+    basename = getattr(prov_param, "basename", None)
+    if not basename:
+        return
+    if not parent:
+        action_p["alternateName"] = basename
+        return
+    if "alternateName" in parent:
+        action_p["alternateName"] = (Path(parent["alternateName"]) / basename).as_posix()
+
+
+def _get_dict(entity):
+    d = {}
+    for qname in entity.record.get_attribute("prov:hadDictionaryMember"):
+        kvp = entity.provenance.entity(qname)
+        key = first(kvp.record.get_attribute("prov:pairKey"))
+        entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
+        d[key] = entity.provenance.entity(entity_id)
+    return d

From 0458fd59f509ddc9e9ddc263abc62e10ffb3cb70 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Tue, 10 Dec 2024 13:16:47 +0000
Subject: [PATCH 09/23] Complete first go at refactoring to language agnostic
 tool

---
 src/runcrate/constants.py       |   5 +
 src/runcrate/convert.py         | 617 ++------------------------------
 src/runcrate/converters/base.py |  79 +++-
 src/runcrate/converters/cwl.py  | 605 +++++++++++++++++++++++++++++--
 tests/test_step_mapping.py      |  13 +-
 5 files changed, 687 insertions(+), 632 deletions(-)

diff --git a/src/runcrate/constants.py b/src/runcrate/constants.py
index b0498e1..e011ec3 100644
--- a/src/runcrate/constants.py
+++ b/src/runcrate/constants.py
@@ -24,3 +24,8 @@
 PROVENANCE_PROFILE = f"{PROVENANCE_PROFILE_BASE}/{PROFILES_VERSION}"
 
 TERMS_NAMESPACE = "https://w3id.org/ro/terms/workflow-run"
+
+WROC_PROFILE_VERSION = "1.0"
+DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage"
+
+
diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index 3df23cf..fc5f3eb 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -19,8 +19,6 @@
 """
 
 import json
-import re
-from io import StringIO
 from pathlib import Path
 
 import prov.model
@@ -31,611 +29,46 @@
 from rocrate.model.softwareapplication import SoftwareApplication
 from rocrate.rocrate import ROCrate
 
-from .constants import PROFILES_BASE, PROFILES_VERSION, TERMS_NAMESPACE
+from .constants import TERMS_NAMESPACE
 from .converters import CONVERTERS
-from .utils import as_list, parse_img
 
-
-WORKFLOW_BASENAME = "packed.cwl"
-INPUTS_FILE_BASENAME = "primary-job.json"
-OUTPUTS_FILE_BASENAME = "primary-output.json"
 MANIFEST_FILE = "manifest-sha1.txt"
 
-CWL_TYPE_MAP = {
-    "string": "Text",
-    "int": "Integer",
-    "long": "Integer",
-    "float": "Float",
-    "double": "Float",
-    "Any": "DataType",
-    "boolean": "Boolean",
-    "File": "File",
-    "Directory": "Dataset",
-    "null": None,
-}
-
-SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
-
-WROC_PROFILE_VERSION = "1.0"
-
-DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage"
-
-
-def convert_cwl_type(cwl_type):
-    if isinstance(cwl_type, list):
-        s = set(convert_cwl_type(_) for _ in cwl_type)
-        s.discard(None)
-        return s.pop() if len(s) == 1 else sorted(s)
-    if isinstance(cwl_type, str):
-        return CWL_TYPE_MAP[cwl_type]
-    if cwl_type.type_ == "enum":
-        return "Text"  # use actionOption to represent choices?
-    if cwl_type.type_ == "array":
-        return convert_cwl_type(cwl_type.items)
-    if cwl_type.type_ == "record":
-        return "PropertyValue"
-
-
-def properties_from_cwl_param(cwl_p):
-    def is_structured(cwl_type):
-        return getattr(cwl_type, "type_", None) in ("array", "record")
-    additional_type = "Collection" if cwl_p.secondaryFiles else convert_cwl_type(cwl_p.type_)
-    properties = {
-        "@type": "FormalParameter",
-        "additionalType": additional_type
-    }
-    if hasattr(cwl_p, "doc") and cwl_p.doc:
-        properties["description"] = cwl_p.doc
-    elif hasattr(cwl_p, "label") and cwl_p.label:
-        # name is used for the parameter's id to support reproducibility
-        properties["description"] = cwl_p.label
-    if cwl_p.format:
-        properties["encodingFormat"] = cwl_p.format
-    if isinstance(cwl_p.type_, list) and "null" in cwl_p.type_:
-        properties["valueRequired"] = "False"
-    if is_structured(cwl_p.type_):
-        properties["multipleValues"] = "True"
-    if hasattr(cwl_p, "default"):
-        if isinstance(cwl_p.default, dict):
-            if cwl_p.default.get("class") in ("File", "Directory"):
-                default = cwl_p.default.get("location", cwl_p.default.get("path"))
-            if default:
-                properties["defaultValue"] = default
-        elif not is_structured(cwl_p.type_) and cwl_p.default is not None:
-            properties["defaultValue"] = str(cwl_p.default)
-        # TODO: support more cases
-    if getattr(cwl_p.type_, "type_", None) == "enum":
-        properties["valuePattern"] = "|".join(_.rsplit("/", 1)[-1] for _ in cwl_p.type_.symbols)
-    return properties
-
-
-def get_relative_uri(uri):
-    doc, fragment = uri.rsplit("#", 1)
-    return f"{doc.rsplit('/', 1)[-1]}#{fragment}"
-
-
-def cut_step_part(relative_uri):
-    parts = relative_uri.split("/", 2)
-    if len(parts) > 2:
-        relative_uri = parts[0] + "/" + parts[2]
-    return relative_uri
-
-
-def get_fragment(uri):
-    return uri.rsplit("#", 1)[-1]
-
-
 class ProvCrateBuilder:
-
-    # --------------------------------------------------------------------------
-    # Public methods, called by the CLI
-
     def __init__(self,
                  root,
                  converter=CONVERTERS["cwl"],
                  workflow_name=None,
                  license=None,
                  readme=None):
-        self.root = Path(root)
         self.converter = converter
-        self.workflow_name = workflow_name
-        self.license = license
-        self.readme = Path(readme) if readme else readme
-        self.wf_path = self.root / "workflow" / WORKFLOW_BASENAME
-        self.cwl_defs = self.converter.get_workflow(self.wf_path)
-        self.step_maps = self.converter.get_step_maps(self.cwl_defs)
-        self.ro = ResearchObject(BDBag(str(root)))
-        self.with_prov = set(str(_) for _ in self.ro.resources_with_provenance())
-        self.workflow_run = Provenance(self.ro).activity()
-        self.roc_engine_run = None
-        # avoid duplicates - not handled by ro-crate-py, see
-        # https://github.com/ResearchObject/ro-crate-py/issues/132
-        self.control_actions = {}
-        # index collections by their main entity's id
-        self.collections = {}
-        self.hashes = {}
-        # map source files to destination files
-        self.file_map = {}
-        self.manifest = self._get_manifest()
+        self.converter.root = Path(root)
+        self.converter.workflow_name = workflow_name
+        self.converter.license = license
+        self.converter.readme = Path(readme) if readme else readme
+        self.converter.wf_path = self.converter.root / "workflow" / self.converter.WORKFLOW_BASENAME
+        self.converter.workflow_definition = self.converter.get_workflow()
+        self.converter.step_maps = self.converter.get_step_maps()
+        self.converter.ro = ResearchObject(BDBag(str(root)))
+        self.converter.with_prov = set(str(_) for _ in self.converter.ro.resources_with_provenance())
+        self.converter.workflow_run = Provenance(self.converter.ro).activity()
+        self.converter.roc_engine_run = None
+        self.converter.control_actions = {}
+        self.converter.collection = {}
+        self.converter.hashes = {}
+        self.converter.file_map = {}
+        self.converter.manifest = self.converter.get_manifest(self.converter.root, MANIFEST_FILE)
 
     def build(self):
         crate = ROCrate(gen_preview=False)
         crate.metadata.extra_contexts.append(TERMS_NAMESPACE)
-        self.add_root_metadata(crate)
-        self.add_profiles(crate)
-        self.add_workflow(crate)
-        self.add_engine_run(crate)
-        self.add_action(crate, self.workflow_run)
-        self.patch_workflow_input_collection(crate)
-        self.add_inputs_file(crate)
-        self.add_output_formats(crate)
+        self.converter.add_root_metadata(crate)
+        self.converter.add_profiles(crate)
+        self.converter.add_workflow(crate)
+        self.converter.add_engine_run(crate)
+        self.converter.add_action(crate, self.converter.workflow_run)
+        self.converter.patch_workflow_input_collection(crate)
+        self.converter.add_inputs_file(crate)
+        self.converter.add_output_formats(crate)
         return crate
 
-    # --------------------------------------------------------------------------
-    # Top level methods, called by build()
-
-    def add_root_metadata(self, crate):
-        if self.license:
-            crate.root_dataset["license"] = self.license
-        if self.readme:
-            readme = crate.add_file(self.readme)
-            readme["about"] = crate.root_dataset
-            if self.readme.suffix.lower() == ".md":
-                readme["encodingFormat"] = "text/markdown"
-
-    def add_profiles(self, crate):
-        profiles = []
-        for p in "process", "workflow", "provenance":
-            id_ = f"{PROFILES_BASE}/{p}/{PROFILES_VERSION}"
-            profiles.append(crate.add(ContextEntity(crate, id_, properties={
-                "@type": "CreativeWork",
-                "name": f"{p.title()} Run Crate",
-                "version": PROFILES_VERSION,
-            })))
-        # FIXME: in the future, this could go out of sync with the wroc
-        # profile added by ro-crate-py to the metadata descriptor
-        wroc_profile_id = f"https://w3id.org/workflowhub/workflow-ro-crate/{WROC_PROFILE_VERSION}"
-        profiles.append(crate.add(ContextEntity(crate, wroc_profile_id, properties={
-            "@type": "CreativeWork",
-            "name": "Workflow RO-Crate",
-            "version": WROC_PROFILE_VERSION,
-        })))
-        crate.root_dataset["conformsTo"] = profiles
-
-    def add_workflow(self, crate):
-        lang_version = self.cwl_defs[WORKFLOW_BASENAME].cwlVersion
-        properties = {
-            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow", "HowTo"],
-        }
-        workflow = crate.add_workflow(
-            self.wf_path, self.wf_path.name, main=True, lang="cwl",
-            lang_version=lang_version, gen_cwl=False, properties=properties
-        )
-        cwl_workflow = self.cwl_defs[workflow.id]
-        wf_name = self.wf_path.name
-        if hasattr(cwl_workflow, "label") and cwl_workflow.label:
-            wf_name = cwl_workflow.label
-        workflow["name"] = self.workflow_name or wf_name
-        if hasattr(cwl_workflow, "doc") and cwl_workflow.doc:
-            workflow["description"] = cwl_workflow.doc
-        # cannot convert "intent" to featureList: workflow is not a SoftwareApplication
-        workflow["input"] = self.add_params(crate, cwl_workflow.inputs)
-        workflow["output"] = self.add_params(crate, cwl_workflow.outputs)
-        if hasattr(cwl_workflow, "steps"):
-            for s in cwl_workflow.steps:
-                self.add_step(crate, workflow, s)
-            self.add_param_connections(crate, workflow)
-        return workflow
-
-    def add_engine_run(self, crate):
-        engine = self.workflow_run.start().starter_activity()
-        roc_engine = crate.add(SoftwareApplication(crate, properties={
-            "name": engine.label or "workflow engine"
-        }))
-        roc_engine_run = crate.add(ContextEntity(crate, properties={
-            "@type": "OrganizeAction",
-            "name": f"Run of {roc_engine['name']}",
-            "startTime": engine.start().time.isoformat(),
-        }))
-        roc_engine_run["instrument"] = roc_engine
-        self.add_agent(crate, roc_engine_run, engine)
-        self.roc_engine_run = roc_engine_run
-
-    def add_action(self, crate, activity, parent_instrument=None):
-        workflow = crate.mainEntity
-        action = crate.add(ContextEntity(crate, properties={
-            "@type": "CreateAction",
-            "name": activity.label,
-        }))
-        plan = self._resolve_plan(activity)
-        plan_tag = plan.id.localpart
-        if plan_tag == "main":
-            assert str(activity.type) == "wfprov:WorkflowRun"
-            instrument = workflow
-            self.roc_engine_run["result"] = action
-            crate.root_dataset["mentions"] = [action]
-
-            def to_wf_p(k):
-                return k
-        else:
-            parent_instrument_fragment = get_fragment(parent_instrument.id)
-            if parent_instrument_fragment != WORKFLOW_BASENAME:
-                parts = plan_tag.split("/", 1)
-                if parts[0] == "main":
-                    parts[0] = parent_instrument_fragment
-                    plan_tag = "/".join(parts)
-            tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
-            instrument = crate.dereference(f"{workflow.id}#{tool_name}")
-            control_action = self.control_actions.get(plan_tag)
-            if not control_action:
-                control_action = crate.add(ContextEntity(crate, properties={
-                    "@type": "ControlAction",
-                    "name": f"orchestrate {tool_name}",
-                }))
-                step = crate.dereference(f"{workflow.id}#{plan_tag}")
-                control_action["instrument"] = step
-                self.roc_engine_run.append_to("object", control_action, compact=True)
-                self.control_actions[plan_tag] = control_action
-            control_action.append_to("object", action, compact=True)
-            if activity.uri in self.with_prov:
-                nested_prov = Provenance(self.ro, activity.uri)
-                activity = nested_prov.activity()
-
-            def to_wf_p(k):
-                return k.replace(activity.plan().localpart, tool_name)
-        self.converter.get_hashes(activity.provenance)
-        action["instrument"] = instrument
-        action["startTime"] = activity.start().time.isoformat()
-        action["endTime"] = activity.end().time.isoformat()
-        action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
-        action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
-        self.add_container_images(crate, action, activity)
-        for job in activity.steps():
-            self.add_action(crate, job, parent_instrument=instrument)
-
-    def patch_workflow_input_collection(self, crate, wf=None):
-        """\
-        CWLProv records secondary files only in step runs, not in the workflow
-        run. Thus, when the conversion of parameter values is completed,
-        workflow-level parameters with secondary files get mapped to the main
-        entity of the collection alone (a File). This method fixes the mapping
-        by retrieving the correct Collection entity from the relevant tool
-        execution.
-        """
-        if wf is None:
-            wf = crate.mainEntity
-        sel = [_ for _ in crate.contextual_entities
-               if "CreateAction" in as_list(_.type) and _.get("instrument") is wf]
-        if not sel:
-            return  # skipped subworkflow
-        wf_action = sel[0]
-        connections = [_ for _ in crate.contextual_entities
-                       if "ParameterConnection" in as_list(_.type)]
-        for param in wf.get("input", []):
-            if param.get("additionalType") == "Collection":
-                src_sel = [_ for _ in wf_action.get("object", [])
-                           if param in as_list(_.get("exampleOfWork"))]
-                if not src_sel:
-                    raise RuntimeError(f"object for param {param.id} not found")
-                obj = src_sel[0]
-                if obj.type != "Collection":
-                    param_connections = [_ for _ in connections if _["sourceParameter"] is param]
-                    if not param_connections:
-                        continue
-                    pc = param_connections[0]
-                    tgt_param = pc["targetParameter"]
-                    tgt_sel = [_ for _ in crate.get_entities()
-                               if tgt_param in as_list(_.get("exampleOfWork"))]
-                    if not tgt_sel:
-                        raise RuntimeError(f"object for param {tgt_param.id} not found")
-                    tgt_obj = tgt_sel[0]
-                    wf_action["object"] = [
-                        _ for _ in as_list(wf_action["object"]) if _ is not obj
-                    ] + [tgt_obj]
-                    tgt_obj.append_to("exampleOfWork", param)
-                    obj["exampleOfWork"] = [_ for _ in as_list(obj["exampleOfWork"])
-                                            if _ is not param]
-                    if len(obj["exampleOfWork"]) == 1:
-                        obj["exampleOfWork"] = obj["exampleOfWork"][0]
-                    if len(obj["exampleOfWork"]) == 0:
-                        del obj["exampleOfWork"]
-        for tool in wf.get("hasPart", []):
-            if "ComputationalWorkflow" in as_list(tool.type):
-                self.patch_workflow_input_collection(crate, wf=tool)
-
-    def add_inputs_file(self, crate):
-        path = self.root / "workflow" / INPUTS_FILE_BASENAME
-        if path.is_file():
-            with open(path) as f:
-                data = json.load(f)
-            data = self._map_input_data(crate, data)
-            source = StringIO(json.dumps(data, indent=4))
-            crate.add_file(source, path.name, properties={
-                "name": "input object document",
-                "encodingFormat": "application/json",
-            })
-
-    def add_output_formats(self, crate):
-        path = self.root / "workflow" / OUTPUTS_FILE_BASENAME
-        if path.is_file():
-            with open(path) as f:
-                data = json.load(f)
-            self._map_input_data(crate, data)
-
-    # --------------------------------------------------------------------------
-    # Internal methods, called by the top level methods
-
-    def add_step(self, crate, workflow, cwl_step):
-        step_fragment = get_fragment(cwl_step.id)
-        step_id = f"{self.wf_path.name}#{step_fragment}"
-        pos = self.step_maps[get_fragment(workflow.id)][step_fragment]["pos"]
-        step = crate.add(ContextEntity(crate, step_id, properties={
-            "@type": "HowToStep",
-            "position": str(pos),
-        }))
-        tool = self.add_tool(crate, workflow, cwl_step.run)
-        step["workExample"] = tool
-        if hasattr(cwl_step, "label") and cwl_step.label:
-            step["name"] = cwl_step.label
-        if hasattr(cwl_step, "doc") and cwl_step.doc:
-            step["description"] = cwl_step.doc
-        workflow.append_to("step", step)
-
-    def add_tool(self, crate, workflow, cwl_tool):
-        if isinstance(cwl_tool, str):
-            tool_fragment = get_fragment(cwl_tool)
-            cwl_tool = self.cwl_defs[tool_fragment]
-        else:
-            tool_fragment = get_fragment(cwl_tool.id)
-        if hasattr(cwl_tool, "expression"):
-            raise RuntimeError("ExpressionTool not supported yet")
-        tool_id = f"{self.wf_path.name}#{tool_fragment}"
-        tool = crate.dereference(tool_id)
-        if tool:
-            return tool
-        properties = {"name": tool_fragment}
-        if cwl_tool.doc:
-            properties["description"] = cwl_tool.doc
-        if cwl_tool.label:
-            properties["name"] = cwl_tool.label
-        if hasattr(cwl_tool, "steps"):
-            properties["@type"] = ["SoftwareSourceCode", "ComputationalWorkflow", "HowTo"]
-        else:
-            properties["@type"] = "SoftwareApplication"
-        if hasattr(cwl_tool, "intent") and cwl_tool.intent:
-            properties["featureList"] = cwl_tool.intent
-        if hasattr(cwl_tool, "requirements") and cwl_tool.requirements:
-            for req in cwl_tool.requirements:
-                if req.class_ == "ResourceRequirement":
-                    ramMin = req.ramMin
-                    if ramMin:
-                        properties["memoryRequirements"] = f"{int(ramMin)} MiB"
-        deps = []
-        if hasattr(cwl_tool, "hints") and cwl_tool.hints:
-            for req in cwl_tool.hints:
-                if hasattr(req, "class_") and req.class_ == "ResourceRequirement":
-                    ramMin = req.ramMin
-                    if ramMin:
-                        properties["memoryRequirements"] = f"{int(ramMin)} MiB"
-                if hasattr(req, "class_") and req.class_ == "SoftwareRequirement":
-                    for p in req.packages:
-                        if hasattr(p, "specs") and p.specs:
-                            dep_id = p.specs[0]
-                            dep_properties = {
-                                "@type": "SoftwareApplication",
-                                "name": p.package
-                            }
-                            if p.version:
-                                dep_properties["softwareVersion"] = p.version
-                            deps.append(
-                                crate.add(ContextEntity(crate, dep_id, properties=dep_properties))
-                            )
-        tool = crate.add(ContextEntity(crate, tool_id, properties=properties))
-        if deps:
-            tool["softwareRequirements"] = deps
-        if len(deps) == 1:
-            tool["mainEntity"] = deps[0]
-        tool["input"] = self.add_params(crate, cwl_tool.inputs)
-        tool["output"] = self.add_params(crate, cwl_tool.outputs)
-        workflow.append_to("hasPart", tool)
-        if hasattr(cwl_tool, "steps"):
-            tool["programmingLanguage"] = workflow["programmingLanguage"]
-            for s in cwl_tool.steps:
-                self.add_step(crate, tool, s)
-            self.add_param_connections(crate, tool)
-        return tool
-
-    def add_params(self, crate, cwl_params):
-        params = []
-        for cwl_p in cwl_params:
-            p_id = get_relative_uri(cwl_p.id)
-            properties = properties_from_cwl_param(cwl_p)
-            properties["name"] = p_id.rsplit("/", 1)[-1]
-            p = crate.add(ContextEntity(crate, p_id, properties=properties))
-            params.append(p)
-        return params
-
-    def add_agent(self, crate, roc_engine_run, engine):
-        delegate = engine.start().starter_activity()
-        try:
-            delegation = next(engine.provenance.record_with_attr(
-                prov.model.ProvDelegation, delegate.id, prov.model.PROV_ATTR_DELEGATE
-            ))
-        except StopIteration:
-            return
-        responsible = delegation.get_attribute(prov.model.PROV_ATTR_RESPONSIBLE)
-        agent = sum((engine.provenance.prov_doc.get_record(_) for _ in responsible), [])
-        for a in agent:
-            if "prov:Person" not in set(str(_) for _ in a.get_asserted_types()):
-                continue
-            agent_id = a.identifier.uri
-            if not agent_id.startswith("http"):
-                agent_id = "#" + agent_id.rsplit(":", 1)[-1]
-            properties = {
-                "@type": "Person"
-            }
-            if isinstance(a.label, str):
-                properties["name"] = a.label
-            ro_a = crate.add(ContextEntity(crate, agent_id, properties=properties))
-            roc_engine_run.append_to("agent", ro_a, compact=True)
-
-    def add_container_images(self, crate, action, activity):
-        images = set()
-        for assoc in activity.association():
-            for agent in activity.provenance.prov_doc.get_record(assoc.agent_id):
-                images |= agent.get_attribute("cwlprov:image")
-        for im in images:
-            properties = parse_img(im)
-            properties.update({
-                "@type": "ContainerImage",
-                "additionalType": {"@id": DOCKER_IMG_TYPE}
-            })
-            roc_img = crate.add(ContextEntity(crate, properties=properties))
-            action.append_to("containerImage", roc_img, compact=True)
-
-    def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
-        action_params = []
-        all_roles = set()
-        for rel in getattr(activity, ptype)():
-            k = get_relative_uri(rel.role.uri)
-            if str(activity.type) == "wfprov:WorkflowRun":
-                # workflow output roles have a phantom step part
-                if ptype == "generation":
-                    k = cut_step_part(k)
-                # In the case of a single tool run, cwltool reports one WorkflowRun
-                # and no ProcessRun; some parameters are duplicated, appearing both
-                # with role main/PARAM_NAME and main/ORIGINAL_WF_NAME/PARAM_NAME
-                if not list(activity.steps()):
-                    k = cut_step_part(k)
-                    if k in all_roles:
-                        continue
-                    all_roles.add(k)
-            wf_p = crate.dereference(to_wf_p(k))
-            k = get_fragment(k)
-            v = rel.entity()
-            value = self.converter.convert_param(v,
-                                                 crate,
-                                                 hashes=self.hashes,
-                                                 manifest=self.manifest,
-                                                 file_map=self.file_map
-                                                 )
-            if value is None:
-                continue  # param is optional with no default and was not set
-            if {"ro:Folder", "wf4ever:File"} & set(str(_) for _ in v.types()):
-                action_p = value
-            else:
-                # FIXME: assuming arrays and records don't have nested structured types
-                if isinstance(value, dict):
-                    value = [crate.add(ContextEntity(crate, f"#pv-{k}/{nk}", properties={
-                        "@type": "PropertyValue",
-                        "name": nk,
-                        "value": nv,
-                    })) for nk, nv in value.items()]
-                action_p = crate.add(ContextEntity(crate, f"#pv-{k}", properties={
-                    "@type": "PropertyValue",
-                    "name": k.rsplit("/", 1)[-1],
-                }))
-                action_p["value"] = value
-            action_p["exampleOfWork"] = list(set(
-                as_list(action_p.get("exampleOfWork", [])) + [wf_p]
-            ))
-            if len(action_p["exampleOfWork"]) == 1:
-                action_p["exampleOfWork"] = action_p["exampleOfWork"][0]
-            if ptype == "generation":
-                action_p["dateCreated"] = rel.time.isoformat()
-            action_params.append(action_p)
-        return action_params
-
-    def add_param_connections(self, crate, workflow):
-        def connect(source, target, entity):
-            connection = crate.add(ContextEntity(crate, properties={
-                "@type": "ParameterConnection"
-            }))
-            connection["sourceParameter"] = crate.get(f"{WORKFLOW_BASENAME}#{source}")
-            connection["targetParameter"] = crate.get(f"{WORKFLOW_BASENAME}#{target}")
-            entity.append_to("connection", connection)
-        wf_name = get_fragment(workflow.id)
-        wf_def = self.cwl_defs[wf_name]
-        step_map = self.step_maps[wf_name]
-        out_map = {}
-        for step in wf_def.steps:
-            step_name = get_fragment(step.id)
-            tool_name = step_map[step_name]["tool"]
-            for o in step.out:
-                o_name = get_fragment(o)
-                out_map[o_name] = o_name.replace(step_name, tool_name)
-        for step in wf_def.steps:
-            step_name = get_fragment(step.id)
-            ro_step = crate.get(f"{self.wf_path.name}#{step_name}")
-            tool_name = step_map[step_name]["tool"]
-            for mapping in getattr(step, "in_", []):
-                if not mapping.source:
-                    continue
-                sources = [mapping.source] if not isinstance(
-                    mapping.source, list
-                ) else mapping.source
-                for s in sources:
-                    from_param = get_fragment(s)
-                    try:
-                        from_param = out_map[from_param]
-                    except KeyError:
-                        pass  # only needed if source is from another step
-                    to_param = get_fragment(mapping.id).replace(step_name, tool_name)
-                    connect(from_param, to_param, ro_step)
-        for out in getattr(wf_def, "outputs", []):
-            out_sources = [out.outputSource] if not isinstance(
-                out.outputSource, list
-            ) else out.outputSource
-            for out_s in out_sources:
-                from_param = get_fragment(out_s)
-                try:
-                    from_param = out_map[from_param]
-                except KeyError:
-                    # assuming this is a passthrough for a workflow input parameter
-                    pass
-                to_param = get_fragment(out.id)
-                connect(from_param, to_param, workflow)
-
-    # --------------------------------------------------------------------------
-    # Utility methods, called by the other methods
-
-    def _get_manifest(self):
-        manifest = {}
-        with open(self.root / Path(MANIFEST_FILE)) as f:
-            for line in f:
-                hash_, relpath = line.strip().split(None, 1)
-                manifest[hash_] = self.root / relpath
-        return manifest
-
-    def _resolve_plan(self, activity):
-        job_qname = activity.plan()
-        plan = activity.provenance.entity(job_qname)
-        if not plan:
-            m = SCATTER_JOB_PATTERN.match(str(job_qname))
-            if m:
-                plan = activity.provenance.entity(m.groups()[0])
-        return plan
-
-    def _map_input_data(self, crate, data):
-        if isinstance(data, list):
-            return [self._map_input_data(crate, _) for _ in data]
-        if isinstance(data, dict):
-            rval = {}
-            for k, v in data.items():
-                if k == "location":
-                    source = self.root / "workflow" / v
-                    try:
-                        source_k = str(source.resolve(strict=False))
-                    except RuntimeError:
-                        source_k = str(source)
-                    dest = self.file_map.get(source_k)
-                    rval[k] = str(dest) if dest else v
-                    fmt = data.get("format")
-                    if fmt:
-                        entity = crate.get(str(dest))
-                        if entity:
-                            entity["encodingFormat"] = fmt
-                else:
-                    rval[k] = self._map_input_data(crate, v)
-            return rval
-        return data
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 14a2b07..98d8bdf 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -1,9 +1,78 @@
 class converter:
     def __init__(self):
-        pass
+        self.root = None
+        self.workflow_name = None
+        self.license = None
+        self.readme = None
+        self.wf_path = None
+        self.workflow_definition = {}
+        self.step_maps = {}
+        self.ro = None
+        self.with_prov = set()
+        self.workflow_run = None
+        self.roc_engine_run = None
+        self.control_actions = {}        
+        self.collections = {}
+        self.hashes = {}
+        self.file_map = {}
+        self.manifest = None
+
+    # --------------------------------------------------------------------------
+    # Top level functions - called by the build() function
+
+    def add_root_metadata(self, crate):
+        """
+        Add metadata to the root of the crate.
+        """
+        raise NotImplementedError("add_root_metadata")
+
+    def add_profiles(self, crate):
+        """
+        Add profiles to the crate.
+        """
+        raise NotImplementedError("add_profiles")
+
+    def add_workflow(self, crate):
+        """
+        Add the workflow to the crate.
+        """
+        raise NotImplementedError("add_workflow")
+
+    def add_engine_run(self, crate):
+        """
+        Add the engine run to the crate.
+        """
+        raise NotImplementedError("add_engine_run")
+
+    def add_action(self, crate, workflow_run):
+        """
+        Add the action to the crate.
+        """
+        raise NotImplementedError("add_action")
+
+    def patch_workflow_input_collection(self, crate):
+        """
+        Patch the workflow input collection.
+        """
+        raise NotImplementedError("patch_workflow_input_collection")
+
+    def add_inputs_files(self, crate):
+        """
+        Add input files to the crate.
+        """
+        raise NotImplementedError("add_inputs_files")
+
+    def add_output_formats(self, crate):
+        """
+        Add output formats to the crate.
+        """
+        raise NotImplementedError("add_output_formats")
+        
+    # --------------------------------------------------------------------------
+    # Helper functions - called by the top level functions
 
     def get_workflow(self, wf_path):
-        """\
+        """
         Get the workflow from the given path.
 
         Returns a dictionary where tools / workflows are mapped by their ids.
@@ -11,19 +80,19 @@ def get_workflow(self, wf_path):
         raise NotImplementedError("get_workflow")
 
     def get_step_maps(self, wf_defs):
-        """\
+        """
         Get a mapping of step names to their tool names and positions.
         """
         raise NotImplementedError("get_step_maps")
 
     def build_step_graph(self, wf):
-        """\
+        """
         Build a graph of steps in the workflow.
         """
         raise NotImplementedError("build_step_graph")
 
     def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
-        """\
+        """
         Convert a CWLProv parameter to a RO-Crate entity.
         """
         raise NotImplementedError("convert_param")
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 5be588b..f56134a 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -1,28 +1,480 @@
 import hashlib
 import json
+import re
 from pathlib import Path
+from io import StringIO
 
 import networkx as nx
 import prov.model
 from cwl_utils.parser import load_document_by_yaml
 from cwlprov.prov import Entity
 from cwlprov.utils import first
+from cwlprov.prov import Provenance
+
 from rocrate.model.contextentity import ContextEntity
+from rocrate.model.softwareapplication import SoftwareApplication
+from rocrate.rocrate import ROCrate
 
 from .base import converter
-
+from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION, DOCKER_IMG_TYPE
+from ..utils import as_list, parse_img
 
 CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
 
+CWL_TYPE_MAP = {
+    "string": "Text",
+    "int": "Integer",
+    "long": "Integer",
+    "float": "Float",
+    "double": "Float",
+    "Any": "DataType",
+    "boolean": "Boolean",
+    "File": "File",
+    "Directory": "Dataset",
+    "null": None,
+}
+
+INPUTS_FILE_BASENAME = "primary-job.json"
+OUTPUTS_FILE_BASENAME = "primary-output.json"
+
+SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
+
+class cwlConverter(converter):    
+
+    WORKFLOW_BASENAME = "packed.cwl"
+
+    # --------------------------------------------------------------------------
+    # Top level methods, called by build()
+
+    def add_root_metadata(self, crate):
+        if self.license:
+            crate.root_dataset["license"] = self.license
+        if self.readme:
+            readme = crate.add_file(self.readme)
+            readme["about"] = crate.root_dataset
+            if self.readme.suffix.lower() == ".md":
+                readme["encodingFormat"] = "text/markdown"
+
+    def add_profiles(self, crate):
+        profiles = []
+        for p in "process", "workflow", "provenance":
+            id_ = f"{PROFILES_BASE}/{p}/{PROFILES_VERSION}"
+            profiles.append(crate.add(ContextEntity(crate, id_, properties={
+                "@type": "CreativeWork",
+                "name": f"{p.title()} Run Crate",
+                "version": PROFILES_VERSION,
+            })))
+        # FIXME: in the future, this could go out of sync with the wroc
+        # profile added by ro-crate-py to the metadata descriptor
+        wroc_profile_id = f"https://w3id.org/workflowhub/workflow-ro-crate/{WROC_PROFILE_VERSION}"
+        profiles.append(crate.add(ContextEntity(crate, wroc_profile_id, properties={
+            "@type": "CreativeWork",
+            "name": "Workflow RO-Crate",
+            "version": WROC_PROFILE_VERSION,
+        })))
+        crate.root_dataset["conformsTo"] = profiles
+
+    def add_workflow(self, crate):
+        lang_version = self.workflow_definition[self.WORKFLOW_BASENAME].cwlVersion
+        properties = {
+            "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow", "HowTo"],
+        }
+        workflow = crate.add_workflow(
+            self.wf_path, self.wf_path.name, main=True, lang="cwl",
+            lang_version=lang_version, gen_cwl=False, properties=properties
+        )
+        cwl_workflow = self.workflow_definition[workflow.id]
+        wf_name = self.wf_path.name
+        if hasattr(cwl_workflow, "label") and cwl_workflow.label:
+            wf_name = cwl_workflow.label
+        workflow["name"] = self.workflow_name or wf_name
+        if hasattr(cwl_workflow, "doc") and cwl_workflow.doc:
+            workflow["description"] = cwl_workflow.doc
+        # cannot convert "intent" to featureList: workflow is not a SoftwareApplication
+        workflow["input"] = self.add_params(crate, cwl_workflow.inputs)
+        workflow["output"] = self.add_params(crate, cwl_workflow.outputs)
+        if hasattr(cwl_workflow, "steps"):
+            for s in cwl_workflow.steps:
+                self.add_step(crate, workflow, s)
+            self.add_param_connections(crate, workflow)
+        return workflow
+
+    def add_engine_run(self, crate):
+        engine = self.workflow_run.start().starter_activity()
+        roc_engine = crate.add(SoftwareApplication(crate, properties={
+            "name": engine.label or "workflow engine"
+        }))
+        roc_engine_run = crate.add(ContextEntity(crate, properties={
+            "@type": "OrganizeAction",
+            "name": f"Run of {roc_engine['name']}",
+            "startTime": engine.start().time.isoformat(),
+        }))
+        roc_engine_run["instrument"] = roc_engine
+        self.add_agent(crate, roc_engine_run, engine)
+        self.roc_engine_run = roc_engine_run
+
+    def add_action(self, crate, activity, parent_instrument=None):
+        workflow = crate.mainEntity
+        action = crate.add(ContextEntity(crate, properties={
+            "@type": "CreateAction",
+            "name": activity.label,
+        }))
+        plan = _resolve_plan(activity)
+        plan_tag = plan.id.localpart
+        if plan_tag == "main":
+            assert str(activity.type) == "wfprov:WorkflowRun"
+            instrument = workflow
+            self.roc_engine_run["result"] = action
+            crate.root_dataset["mentions"] = [action]
 
-class cwlConverter(converter):
-    hashes = {}
-    collections = {}
+            def to_wf_p(k):
+                return k
+        else:
+            parent_instrument_fragment = _get_fragment(parent_instrument.id)
+            if parent_instrument_fragment != self.WORKFLOW_BASENAME:
+                parts = plan_tag.split("/", 1)
+                if parts[0] == "main":
+                    parts[0] = parent_instrument_fragment
+                    plan_tag = "/".join(parts)
+            tool_name = self.step_maps[parent_instrument_fragment][plan_tag]["tool"]
+            instrument = crate.dereference(f"{workflow.id}#{tool_name}")
+            control_action = self.control_actions.get(plan_tag)
+            if not control_action:
+                control_action = crate.add(ContextEntity(crate, properties={
+                    "@type": "ControlAction",
+                    "name": f"orchestrate {tool_name}",
+                }))
+                step = crate.dereference(f"{workflow.id}#{plan_tag}")
+                control_action["instrument"] = step
+                self.roc_engine_run.append_to("object", control_action, compact=True)
+                self.control_actions[plan_tag] = control_action
+            control_action.append_to("object", action, compact=True)
+            if activity.uri in self.with_prov:
+                nested_prov = Provenance(self.ro, activity.uri)
+                activity = nested_prov.activity()
 
-    def __init__(self):
-        pass
+            def to_wf_p(k):
+                return k.replace(activity.plan().localpart, tool_name)
+        self.get_hashes(activity.provenance)
+        action["instrument"] = instrument
+        action["startTime"] = activity.start().time.isoformat()
+        action["endTime"] = activity.end().time.isoformat()
+        action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
+        action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
+        self.add_container_images(crate, action, activity)
+        for job in activity.steps():
+            self.add_action(crate, job, parent_instrument=instrument)
 
-    def get_workflow(self, wf_path):
+    def patch_workflow_input_collection(self, crate, wf=None):
+        """\
+        CWLProv records secondary files only in step runs, not in the workflow
+        run. Thus, when the conversion of parameter values is completed,
+        workflow-level parameters with secondary files get mapped to the main
+        entity of the collection alone (a File). This method fixes the mapping
+        by retrieving the correct Collection entity from the relevant tool
+        execution.
+        """
+        if wf is None:
+            wf = crate.mainEntity
+        sel = [_ for _ in crate.contextual_entities
+               if "CreateAction" in as_list(_.type) and _.get("instrument") is wf]
+        if not sel:
+            return  # skipped subworkflow
+        wf_action = sel[0]
+        connections = [_ for _ in crate.contextual_entities
+                       if "ParameterConnection" in as_list(_.type)]
+        for param in wf.get("input", []):
+            if param.get("additionalType") == "Collection":
+                src_sel = [_ for _ in wf_action.get("object", [])
+                           if param in as_list(_.get("exampleOfWork"))]
+                if not src_sel:
+                    raise RuntimeError(f"object for param {param.id} not found")
+                obj = src_sel[0]
+                if obj.type != "Collection":
+                    param_connections = [_ for _ in connections if _["sourceParameter"] is param]
+                    if not param_connections:
+                        continue
+                    pc = param_connections[0]
+                    tgt_param = pc["targetParameter"]
+                    tgt_sel = [_ for _ in crate.get_entities()
+                               if tgt_param in as_list(_.get("exampleOfWork"))]
+                    if not tgt_sel:
+                        raise RuntimeError(f"object for param {tgt_param.id} not found")
+                    tgt_obj = tgt_sel[0]
+                    wf_action["object"] = [
+                        _ for _ in as_list(wf_action["object"]) if _ is not obj
+                    ] + [tgt_obj]
+                    tgt_obj.append_to("exampleOfWork", param)
+                    obj["exampleOfWork"] = [_ for _ in as_list(obj["exampleOfWork"])
+                                            if _ is not param]
+                    if len(obj["exampleOfWork"]) == 1:
+                        obj["exampleOfWork"] = obj["exampleOfWork"][0]
+                    if len(obj["exampleOfWork"]) == 0:
+                        del obj["exampleOfWork"]
+        for tool in wf.get("hasPart", []):
+            if "ComputationalWorkflow" in as_list(tool.type):
+                self.patch_workflow_input_collection(crate, wf=tool)
+
+    def add_inputs_file(self, crate):
+        path = self.root / "workflow" / INPUTS_FILE_BASENAME
+        if path.is_file():
+            with open(path) as f:
+                data = json.load(f)
+            data = self.map_input_data(crate, data)
+            source = StringIO(json.dumps(data, indent=4))
+            crate.add_file(source, path.name, properties={
+                "name": "input object document",
+                "encodingFormat": "application/json",
+            })
+
+    def add_output_formats(self, crate):
+        path = self.root / "workflow" / OUTPUTS_FILE_BASENAME
+        if path.is_file():
+            with open(path) as f:
+                data = json.load(f)
+            self.map_input_data(crate, data)
+
+    # --------------------------------------------------------------------------
+    # Internal methods, called by the top level methods
+
+    def add_step(self, crate, workflow, cwl_step):
+        step_fragment = _get_fragment(cwl_step.id)
+        step_id = f"{self.wf_path.name}#{step_fragment}"
+        pos = self.step_maps[_get_fragment(workflow.id)][step_fragment]["pos"]
+        step = crate.add(ContextEntity(crate, step_id, properties={
+            "@type": "HowToStep",
+            "position": str(pos),
+        }))
+        tool = self.add_tool(crate, workflow, cwl_step.run)
+        step["workExample"] = tool
+        if hasattr(cwl_step, "label") and cwl_step.label:
+            step["name"] = cwl_step.label
+        if hasattr(cwl_step, "doc") and cwl_step.doc:
+            step["description"] = cwl_step.doc
+        workflow.append_to("step", step)
+
+    def add_tool(self, crate, workflow, cwl_tool):
+        if isinstance(cwl_tool, str):
+            tool_fragment = _get_fragment(cwl_tool)
+            cwl_tool = self.workflow_definition[tool_fragment]
+        else:
+            tool_fragment = _get_fragment(cwl_tool.id)
+        if hasattr(cwl_tool, "expression"):
+            raise RuntimeError("ExpressionTool not supported yet")
+        tool_id = f"{self.wf_path.name}#{tool_fragment}"
+        tool = crate.dereference(tool_id)
+        if tool:
+            return tool
+        properties = {"name": tool_fragment}
+        if cwl_tool.doc:
+            properties["description"] = cwl_tool.doc
+        if cwl_tool.label:
+            properties["name"] = cwl_tool.label
+        if hasattr(cwl_tool, "steps"):
+            properties["@type"] = ["SoftwareSourceCode", "ComputationalWorkflow", "HowTo"]
+        else:
+            properties["@type"] = "SoftwareApplication"
+        if hasattr(cwl_tool, "intent") and cwl_tool.intent:
+            properties["featureList"] = cwl_tool.intent
+        if hasattr(cwl_tool, "requirements") and cwl_tool.requirements:
+            for req in cwl_tool.requirements:
+                if req.class_ == "ResourceRequirement":
+                    ramMin = req.ramMin
+                    if ramMin:
+                        properties["memoryRequirements"] = f"{int(ramMin)} MiB"
+        deps = []
+        if hasattr(cwl_tool, "hints") and cwl_tool.hints:
+            for req in cwl_tool.hints:
+                if hasattr(req, "class_") and req.class_ == "ResourceRequirement":
+                    ramMin = req.ramMin
+                    if ramMin:
+                        properties["memoryRequirements"] = f"{int(ramMin)} MiB"
+                if hasattr(req, "class_") and req.class_ == "SoftwareRequirement":
+                    for p in req.packages:
+                        if hasattr(p, "specs") and p.specs:
+                            dep_id = p.specs[0]
+                            dep_properties = {
+                                "@type": "SoftwareApplication",
+                                "name": p.package
+                            }
+                            if p.version:
+                                dep_properties["softwareVersion"] = p.version
+                            deps.append(
+                                crate.add(ContextEntity(crate, dep_id, properties=dep_properties))
+                            )
+        tool = crate.add(ContextEntity(crate, tool_id, properties=properties))
+        if deps:
+            tool["softwareRequirements"] = deps
+        if len(deps) == 1:
+            tool["mainEntity"] = deps[0]
+        tool["input"] = self.add_params(crate, cwl_tool.inputs)
+        tool["output"] = self.add_params(crate, cwl_tool.outputs)
+        workflow.append_to("hasPart", tool)
+        if hasattr(cwl_tool, "steps"):
+            tool["programmingLanguage"] = workflow["programmingLanguage"]
+            for s in cwl_tool.steps:
+                self.add_step(crate, tool, s)
+            self.add_param_connections(crate, tool)
+        return tool
+
+    def add_params(self, crate, cwl_params):
+        params = []
+        for cwl_p in cwl_params:
+            p_id = _get_relative_uri(cwl_p.id)
+            properties = _properties_from_cwl_param(cwl_p)
+            properties["name"] = p_id.rsplit("/", 1)[-1]
+            p = crate.add(ContextEntity(crate, p_id, properties=properties))
+            params.append(p)
+        return params
+
+    def add_agent(self, crate, roc_engine_run, engine):
+        delegate = engine.start().starter_activity()
+        try:
+            delegation = next(engine.provenance.record_with_attr(
+                prov.model.ProvDelegation, delegate.id, prov.model.PROV_ATTR_DELEGATE
+            ))
+        except StopIteration:
+            return
+        responsible = delegation.get_attribute(prov.model.PROV_ATTR_RESPONSIBLE)
+        agent = sum((engine.provenance.prov_doc.get_record(_) for _ in responsible), [])
+        for a in agent:
+            if "prov:Person" not in set(str(_) for _ in a.get_asserted_types()):
+                continue
+            agent_id = a.identifier.uri
+            if not agent_id.startswith("http"):
+                agent_id = "#" + agent_id.rsplit(":", 1)[-1]
+            properties = {
+                "@type": "Person"
+            }
+            if isinstance(a.label, str):
+                properties["name"] = a.label
+            ro_a = crate.add(ContextEntity(crate, agent_id, properties=properties))
+            roc_engine_run.append_to("agent", ro_a, compact=True)
+
+    def add_container_images(self, crate, action, activity):
+        images = set()
+        for assoc in activity.association():
+            for agent in activity.provenance.prov_doc.get_record(assoc.agent_id):
+                images |= agent.get_attribute("cwlprov:image")
+        for im in images:
+            properties = parse_img(im)
+            properties.update({
+                "@type": "ContainerImage",
+                "additionalType": {"@id": DOCKER_IMG_TYPE}
+            })
+            roc_img = crate.add(ContextEntity(crate, properties=properties))
+            action.append_to("containerImage", roc_img, compact=True)
+
+    def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
+        action_params = []
+        all_roles = set()
+        for rel in getattr(activity, ptype)():
+            k = _get_relative_uri(rel.role.uri)
+            if str(activity.type) == "wfprov:WorkflowRun":
+                # workflow output roles have a phantom step part
+                if ptype == "generation":
+                    k = _cut_step_part(k)
+                # In the case of a single tool run, cwltool reports one WorkflowRun
+                # and no ProcessRun; some parameters are duplicated, appearing both
+                # with role main/PARAM_NAME and main/ORIGINAL_WF_NAME/PARAM_NAME
+                if not list(activity.steps()):
+                    k = _cut_step_part(k)
+                    if k in all_roles:
+                        continue
+                    all_roles.add(k)
+            wf_p = crate.dereference(to_wf_p(k))
+            k = _get_fragment(k)
+            v = rel.entity()
+            value = self.convert_param(v,
+                                       crate,
+                                       manifest=self.manifest
+                                       )
+            if value is None:
+                continue  # param is optional with no default and was not set
+            if {"ro:Folder", "wf4ever:File"} & set(str(_) for _ in v.types()):
+                action_p = value
+            else:
+                # FIXME: assuming arrays and records don't have nested structured types
+                if isinstance(value, dict):
+                    value = [crate.add(ContextEntity(crate, f"#pv-{k}/{nk}", properties={
+                        "@type": "PropertyValue",
+                        "name": nk,
+                        "value": nv,
+                    })) for nk, nv in value.items()]
+                action_p = crate.add(ContextEntity(crate, f"#pv-{k}", properties={
+                    "@type": "PropertyValue",
+                    "name": k.rsplit("/", 1)[-1],
+                }))
+                action_p["value"] = value
+            action_p["exampleOfWork"] = list(set(
+                as_list(action_p.get("exampleOfWork", [])) + [wf_p]
+            ))
+            if len(action_p["exampleOfWork"]) == 1:
+                action_p["exampleOfWork"] = action_p["exampleOfWork"][0]
+            if ptype == "generation":
+                action_p["dateCreated"] = rel.time.isoformat()
+            action_params.append(action_p)
+        return action_params
+
+    def add_param_connections(self, crate, workflow):
+        def connect(source, target, entity):
+            connection = crate.add(ContextEntity(crate, properties={
+                "@type": "ParameterConnection"
+            }))
+            connection["sourceParameter"] = crate.get(f"{self.WORKFLOW_BASENAME}#{source}")
+            connection["targetParameter"] = crate.get(f"{self.WORKFLOW_BASENAME}#{target}")
+            entity.append_to("connection", connection)
+        wf_name = _get_fragment(workflow.id)
+        wf_def = self.workflow_definition[wf_name]
+        step_map = self.step_maps[wf_name]
+        out_map = {}
+        for step in wf_def.steps:
+            step_name = _get_fragment(step.id)
+            tool_name = step_map[step_name]["tool"]
+            for o in step.out:
+                o_name = _get_fragment(o)
+                out_map[o_name] = o_name.replace(step_name, tool_name)
+        for step in wf_def.steps:
+            step_name = _get_fragment(step.id)
+            ro_step = crate.get(f"{self.wf_path.name}#{step_name}")
+            tool_name = step_map[step_name]["tool"]
+            for mapping in getattr(step, "in_", []):
+                if not mapping.source:
+                    continue
+                sources = [mapping.source] if not isinstance(
+                    mapping.source, list
+                ) else mapping.source
+                for s in sources:
+                    from_param = _get_fragment(s)
+                    try:
+                        from_param = out_map[from_param]
+                    except KeyError:
+                        pass  # only needed if source is from another step
+                    to_param = _get_fragment(mapping.id).replace(step_name, tool_name)
+                    connect(from_param, to_param, ro_step)
+        for out in getattr(wf_def, "outputs", []):
+            out_sources = [out.outputSource] if not isinstance(
+                out.outputSource, list
+            ) else out.outputSource
+            for out_s in out_sources:
+                from_param = _get_fragment(out_s)
+                try:
+                    from_param = out_map[from_param]
+                except KeyError:
+                    # assuming this is a passthrough for a workflow input parameter
+                    pass
+                to_param = _get_fragment(out.id)
+                connect(from_param, to_param, workflow)
+
+    def get_manifest(self, root=None, MANIFEST_FILE=None):
+        manifest = {}
+        with open(root / Path(MANIFEST_FILE)) as f:
+            for line in f:
+                hash_, relpath = line.strip().split(None, 1)
+                manifest[hash_] = root / relpath
+            return manifest
+
+    def get_workflow(self):
         """\
         Get the workflow from the given path.
 
@@ -32,8 +484,7 @@ def get_workflow(self, wf_path):
         around issues.
         """
 
-        wf_path = Path(wf_path)
-        with open(wf_path, "rt") as f:
+        with open(self.wf_path, "rt") as f:
             json_wf = json.load(f)
         graph = json_wf.get("$graph", [json_wf])
         # https://github.com/common-workflow-language/cwltool/pull/1506
@@ -41,21 +492,25 @@ def get_workflow(self, wf_path):
             ns = n.pop("$namespaces", {})
             if ns:
                 json_wf.setdefault("$namespaces", {}).update(ns)
-        defs = load_document_by_yaml(json_wf, wf_path.absolute().as_uri(), load_all=True)
+        defs = load_document_by_yaml(json_wf, self.wf_path.absolute().as_uri(), load_all=True)
         if not isinstance(defs, list):
             defs = [defs]
         def_map = {}
         for d in defs:
             k = _get_fragment(d.id)
             if k == "main":
-                k = wf_path.name
+                k = self.wf_path.name
             def_map[k] = d
-        _normalize_cwl_defs(def_map)
+        self.workflow_definition = _normalize_workflow_definition(def_map)
         return def_map
 
-    def get_step_maps(self, cwl_defs):
+    def get_step_maps(self):
+        """
+        Get a mapping of step names to their tool names and positions.
+        """
+        
         rval = {}
-        for k, v in cwl_defs.items():
+        for k, v in self.workflow_definition.items():
             if hasattr(v, "steps"):
                 graph = self.build_step_graph(v)
                 pos_map = {f: i for i, f in enumerate(nx.topological_sort(graph))}
@@ -66,6 +521,10 @@ def get_step_maps(self, cwl_defs):
         return rval
 
     def build_step_graph(self, cwl_wf):
+        """
+        Build a graph of steps in the workflow.
+        """
+        
         out_map = {}
         for s in cwl_wf.steps:
             for o in s.out:
@@ -87,9 +546,7 @@ def convert_param(self,
                       crate,
                       convert_secondary=True,
                       parent=None,
-                      hashes=None,
                       manifest=None,
-                      file_map=None
                       ):
         type_names = frozenset(str(_) for _ in prov_param.types())
         secondary_files = [_.generated_entity() for _ in prov_param.derivations()
@@ -98,8 +555,7 @@ def convert_param(self,
             main_entity = self.convert_param(prov_param,
                                              crate,
                                              convert_secondary=False,
-                                             manifest=manifest,
-                                             file_map=file_map)
+                                             manifest=manifest)
             action_p = self.collections.get(main_entity.id)
             if not action_p:
                 action_p = crate.add(ContextEntity(crate, properties={
@@ -110,7 +566,6 @@ def convert_param(self,
                     self.convert_param(_,
                                        crate,
                                        manifest=manifest,
-                                       file_map=file_map
                                        ) for _ in secondary_files
                 ]
                 crate.root_dataset.append_to("mentions", action_p)
@@ -131,7 +586,7 @@ def convert_param(self,
                     source_k = str(source.resolve(strict=False))
                 except RuntimeError:
                     source_k = str(source)
-                file_map[source_k] = dest
+                self.file_map[source_k] = dest
             return action_p
         if "ro:Folder" in type_names:
             hash_ = self.hashes[prov_param.id.localpart]
@@ -145,7 +600,6 @@ def convert_param(self,
                                               crate,
                                               parent=action_p,
                                               manifest=manifest,
-                                              file_map=file_map
                                               )
                     action_p.append_to("hasPart", part)
             return action_p
@@ -156,7 +610,6 @@ def convert_param(self,
                 (k, self.convert_param(v,
                                        crate,
                                        manifest=manifest,
-                                       file_map=file_map
                                        ))
                 for k, v in _get_dict(prov_param).items()
                 if k != "@id"
@@ -165,7 +618,6 @@ def convert_param(self,
             return [self.convert_param(_,
                                        crate,
                                        manifest=manifest,
-                                       file_map=file_map
                                        ) for _ in _get_members(prov_param)]
         if prov_param.id.uri == CWLPROV_NONE:
             return None
@@ -173,12 +625,12 @@ def convert_param(self,
 
     def get_hashes(self, provenance):
         for r in provenance.prov_doc.get_records(prov.model.ProvEntity):
-            self._get_hash(self.hashes, Entity(provenance, r))
+            self.get_hash(Entity(provenance, r))
 
-    def _get_hash(self, hashes, prov_param):
+    def get_hash(self, prov_param):
         k = prov_param.id.localpart
         try:
-            return hashes[k]
+            return self.hashes[k]
         except KeyError:
             type_names = frozenset(str(_) for _ in prov_param.types())
             if "wf4ever:File" in type_names:
@@ -188,11 +640,36 @@ def _get_hash(self, hashes, prov_param):
             elif "ro:Folder" in type_names:
                 m = hashlib.sha1()
                 m.update("".join(sorted(
-                    self._get_hash(hashes, _) for _ in _get_dict(prov_param).values()
+                    self.get_hash(_) for _ in _get_dict(prov_param).values()
                 )).encode())
                 self.hashes[k] = hash_ = m.hexdigest()
                 return hash_
 
+    def map_input_data(self, crate, data):
+        if isinstance(data, list):
+            return [self.map_input_data(crate, _) for _ in data]
+        if isinstance(data, dict):
+            rval = {}
+            for k, v in data.items():
+                if k == "location":
+                    source = self.root / "workflow" / v
+                    try:
+                        source_k = str(source.resolve(strict=False))
+                    except RuntimeError:
+                        source_k = str(source)
+                    dest = self.file_map.get(source_k)
+                    rval[k] = str(dest) if dest else v
+                    fmt = data.get("format")
+                    if fmt:
+                        entity = crate.get(str(dest))
+                        if entity:
+                            entity["encodingFormat"] = fmt
+                else:
+                    rval[k] = self.map_input_data(crate, v)
+            return rval
+        return data
+
+
 
 def _get_members(entity):
     membership = entity.provenance.record_with_attr(
@@ -206,9 +683,9 @@ def _get_fragment(uri):
     return uri.rsplit("#", 1)[-1]
 
 
-def _normalize_cwl_defs(cwl_defs):
+def _normalize_workflow_definition(workflow_definition):
     inline_tools = {}
-    for d in cwl_defs.values():
+    for d in workflow_definition.values():
         if not hasattr(d, "steps") or not d.steps:
             continue
         for s in d.steps:
@@ -219,7 +696,7 @@ def _normalize_cwl_defs(cwl_defs):
                         tool.id = f"{s.id}/run"
                     inline_tools[_get_fragment(tool.id)] = tool
                     s.run = tool.id
-    cwl_defs.update(inline_tools)
+    return workflow_definition.update(inline_tools)
 
 
 def _set_alternate_name(prov_param, action_p, parent=None):
@@ -241,3 +718,71 @@ def _get_dict(entity):
         entity_id = first(kvp.record.get_attribute("prov:pairEntity"))
         d[key] = entity.provenance.entity(entity_id)
     return d
+
+def _resolve_plan(activity):
+    job_qname = activity.plan()
+    plan = activity.provenance.entity(job_qname)
+    if not plan:
+        m = SCATTER_JOB_PATTERN.match(str(job_qname))
+        if m:
+            plan = activity.provenance.entity(m.groups()[0])
+    return plan
+
+def _get_relative_uri(uri):
+    doc, fragment = uri.rsplit("#", 1)
+    return f"{doc.rsplit('/', 1)[-1]}#{fragment}"
+
+def _properties_from_cwl_param(cwl_p):
+    def is_structured(cwl_type):
+        return getattr(cwl_type, "type_", None) in ("array", "record")
+    additional_type = "Collection" if cwl_p.secondaryFiles else _convert_cwl_type(cwl_p.type_)
+    properties = {
+        "@type": "FormalParameter",
+        "additionalType": additional_type
+    }
+    if hasattr(cwl_p, "doc") and cwl_p.doc:
+        properties["description"] = cwl_p.doc
+    elif hasattr(cwl_p, "label") and cwl_p.label:
+        # name is used for the parameter's id to support reproducibility
+        properties["description"] = cwl_p.label
+    if cwl_p.format:
+        properties["encodingFormat"] = cwl_p.format
+    if isinstance(cwl_p.type_, list) and "null" in cwl_p.type_:
+        properties["valueRequired"] = "False"
+    if is_structured(cwl_p.type_):
+        properties["multipleValues"] = "True"
+    if hasattr(cwl_p, "default"):
+        if isinstance(cwl_p.default, dict):
+            if cwl_p.default.get("class") in ("File", "Directory"):
+                default = cwl_p.default.get("location", cwl_p.default.get("path"))
+            if default:
+                properties["defaultValue"] = default
+        elif not is_structured(cwl_p.type_) and cwl_p.default is not None:
+            properties["defaultValue"] = str(cwl_p.default)
+        # TODO: support more cases
+    if getattr(cwl_p.type_, "type_", None) == "enum":
+        properties["valuePattern"] = "|".join(_.rsplit("/", 1)[-1] for _ in cwl_p.type_.symbols)
+    return properties
+
+def _convert_cwl_type(cwl_type):
+    if isinstance(cwl_type, list):
+        s = set(_convert_cwl_type(_) for _ in cwl_type)
+        s.discard(None)
+        return s.pop() if len(s) == 1 else sorted(s)
+    if isinstance(cwl_type, str):
+        return CWL_TYPE_MAP[cwl_type]
+    if cwl_type.type_ == "enum":
+        return "Text"  # use actionOption to represent choices?
+    if cwl_type.type_ == "array":
+        return _convert_cwl_type(cwl_type.items)
+    if cwl_type.type_ == "record":
+        return "PropertyValue"
+
+def _get_fragment(uri):
+    return uri.rsplit("#", 1)[-1]
+
+def _cut_step_part(relative_uri):
+    parts = relative_uri.split("/", 2)
+    if len(parts) > 2:
+        relative_uri = parts[0] + "/" + parts[2]
+    return relative_uri
diff --git a/tests/test_step_mapping.py b/tests/test_step_mapping.py
index ac4fed1..1c3191b 100644
--- a/tests/test_step_mapping.py
+++ b/tests/test_step_mapping.py
@@ -26,8 +26,10 @@ def converter():
 def test_step_maps_cwl(data_dir, converter):
     wf_basename = "exome-alignment-packed.cwl"
     wf_path = data_dir / wf_basename
-    cwl_defs = converter.get_workflow(wf_path)
-    step_maps = converter.get_step_maps(cwl_defs)
+    converter.wf_path = wf_path
+    cwl_defs = converter.get_workflow()
+    converter.workflow_definition = cwl_defs
+    step_maps = converter.get_step_maps()
     assert set(step_maps) == {wf_basename}
     sm = step_maps[wf_basename]
     assert len(sm) == 8
@@ -48,9 +50,10 @@ def test_step_maps_cwl(data_dir, converter):
 
 
 def test_step_maps_disconnected_cwl(data_dir, converter):
-    wf_path = data_dir / "no-output-run-1/workflow/packed.cwl"
-    cwl_defs = converter.get_workflow(wf_path)
-    step_maps = converter.get_step_maps(cwl_defs)
+    converter.wf_path = data_dir / "no-output-run-1/workflow/packed.cwl"
+    cwl_defs = converter.get_workflow()
+    converter.workflow_definition = cwl_defs
+    step_maps = converter.get_step_maps()
     assert set(step_maps) == {"packed.cwl"}
     sm = step_maps["packed.cwl"]
     assert set(sm) == {"main/date_step", "main/echo_step", "main/date2_step"}

From 3608552097dae0fdc4790245e2d0ac94a20cc66e Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Tue, 10 Dec 2024 13:56:08 +0000
Subject: [PATCH 10/23] Move generic root metadata creation to base class

---
 src/runcrate/convert.py         |  1 -
 src/runcrate/converters/base.py | 14 ++++++++++++--
 src/runcrate/converters/cwl.py  |  9 ---------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index fc5f3eb..a2996a2 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -62,7 +62,6 @@ def __init__(self,
     def build(self):
         crate = ROCrate(gen_preview=False)
         crate.metadata.extra_contexts.append(TERMS_NAMESPACE)
-        self.converter.add_root_metadata(crate)
         self.converter.add_profiles(crate)
         self.converter.add_workflow(crate)
         self.converter.add_engine_run(crate)
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 98d8bdf..587039d 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -17,14 +17,24 @@ def __init__(self):
         self.file_map = {}
         self.manifest = None
 
+        add_root_metadata(self, crate)
+
     # --------------------------------------------------------------------------
     # Top level functions - called by the build() function
 
     def add_root_metadata(self, crate):
         """
-        Add metadata to the root of the crate.
+        Add license and readme to the root of the crate, if provided.
         """
-        raise NotImplementedError("add_root_metadata")
+        if self.license:
+            crate.root_dataset["license"] = self.license
+        if self.readme:
+            readme = crate.add_file(self.readme)
+            readme["about"] = crate.root_dataset
+            if self.readme.suffix.lower() == ".md":
+                readme["encodingFormat"] = "text/markdown"
+
+        return 
 
     def add_profiles(self, crate):
         """
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index f56134a..6c31d16 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -46,15 +46,6 @@ class cwlConverter(converter):
     # --------------------------------------------------------------------------
     # Top level methods, called by build()
 
-    def add_root_metadata(self, crate):
-        if self.license:
-            crate.root_dataset["license"] = self.license
-        if self.readme:
-            readme = crate.add_file(self.readme)
-            readme["about"] = crate.root_dataset
-            if self.readme.suffix.lower() == ".md":
-                readme["encodingFormat"] = "text/markdown"
-
     def add_profiles(self, crate):
         profiles = []
         for p in "process", "workflow", "provenance":

From ffefec6d6304cfce750744b65064a3c1856381b0 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 08:09:28 +0000
Subject: [PATCH 11/23] Add test of CLI option

---
 tests/test_cli.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 2428ec6..d17bb8a 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -26,7 +26,22 @@ def test_cli_convert(data_dir, tmpdir, monkeypatch):
     monkeypatch.chdir(str(tmpdir))
     root = data_dir / "revsort-run-1"
     runner = CliRunner()
-    args = ["convert", "-c", "cwl", str(root)]
+    args = ["convert", str(root)]
+    result = runner.invoke(cli, args)
+    assert result.exit_code == 0, result.exception
+    crate_zip = tmpdir / f"{root.name}.crate.zip"
+    assert crate_zip.is_file()
+    crate = ROCrate(crate_zip)
+    assert not crate.root_dataset.get("license")
+    workflow = crate.mainEntity
+    assert workflow["name"] == "packed.cwl"
+
+    
+def test_cli_convert_with_cwl_converter_set_explictly(data_dir, tmpdir, monkeypatch):
+    monkeypatch.chdir(str(tmpdir))
+    root = data_dir / "revsort-run-1"
+    runner = CliRunner()
+    args = ["convert", "--converter", "cwl", str(root)]
     result = runner.invoke(cli, args)
     assert result.exit_code == 0, result.exception
     crate_zip = tmpdir / f"{root.name}.crate.zip"

From 4687ca9209e76e49aacfae481e585b3154820bb0 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 08:09:36 +0000
Subject: [PATCH 12/23] Apply linting

---
 src/runcrate/constants.py       |  2 --
 src/runcrate/convert.py         |  7 ++-----
 src/runcrate/converters/base.py |  8 +++-----
 src/runcrate/converters/cwl.py  | 19 ++++++++++---------
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/runcrate/constants.py b/src/runcrate/constants.py
index e011ec3..68f0992 100644
--- a/src/runcrate/constants.py
+++ b/src/runcrate/constants.py
@@ -27,5 +27,3 @@
 
 WROC_PROFILE_VERSION = "1.0"
 DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage"
-
-
diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index a2996a2..03b9c13 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -18,15 +18,11 @@
 Generate a Workflow Run RO-Crate from a CWLProv RO bundle.
 """
 
-import json
 from pathlib import Path
 
-import prov.model
 from bdbag.bdbagit import BDBag
 from cwlprov.prov import Provenance
 from cwlprov.ro import ResearchObject
-from rocrate.model.contextentity import ContextEntity
-from rocrate.model.softwareapplication import SoftwareApplication
 from rocrate.rocrate import ROCrate
 
 from .constants import TERMS_NAMESPACE
@@ -34,6 +30,7 @@
 
 MANIFEST_FILE = "manifest-sha1.txt"
 
+
 class ProvCrateBuilder:
     def __init__(self,
                  root,
@@ -62,6 +59,7 @@ def __init__(self,
     def build(self):
         crate = ROCrate(gen_preview=False)
         crate.metadata.extra_contexts.append(TERMS_NAMESPACE)
+        self.converter.add_root_metadata(crate)
         self.converter.add_profiles(crate)
         self.converter.add_workflow(crate)
         self.converter.add_engine_run(crate)
@@ -70,4 +68,3 @@ def build(self):
         self.converter.add_inputs_file(crate)
         self.converter.add_output_formats(crate)
         return crate
-
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 587039d..8491deb 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -11,14 +11,12 @@ def __init__(self):
         self.with_prov = set()
         self.workflow_run = None
         self.roc_engine_run = None
-        self.control_actions = {}        
+        self.control_actions = {}
         self.collections = {}
         self.hashes = {}
         self.file_map = {}
         self.manifest = None
 
-        add_root_metadata(self, crate)
-
     # --------------------------------------------------------------------------
     # Top level functions - called by the build() function
 
@@ -34,7 +32,7 @@ def add_root_metadata(self, crate):
             if self.readme.suffix.lower() == ".md":
                 readme["encodingFormat"] = "text/markdown"
 
-        return 
+        return
 
     def add_profiles(self, crate):
         """
@@ -77,7 +75,7 @@ def add_output_formats(self, crate):
         Add output formats to the crate.
         """
         raise NotImplementedError("add_output_formats")
-        
+
     # --------------------------------------------------------------------------
     # Helper functions - called by the top level functions
 
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 6c31d16..af43e2a 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -13,7 +13,6 @@
 
 from rocrate.model.contextentity import ContextEntity
 from rocrate.model.softwareapplication import SoftwareApplication
-from rocrate.rocrate import ROCrate
 
 from .base import converter
 from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION, DOCKER_IMG_TYPE
@@ -39,7 +38,8 @@
 
 SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
 
-class cwlConverter(converter):    
+
+class cwlConverter(converter):
 
     WORKFLOW_BASENAME = "packed.cwl"
 
@@ -499,7 +499,7 @@ def get_step_maps(self):
         """
         Get a mapping of step names to their tool names and positions.
         """
-        
+
         rval = {}
         for k, v in self.workflow_definition.items():
             if hasattr(v, "steps"):
@@ -515,7 +515,7 @@ def build_step_graph(self, cwl_wf):
         """
         Build a graph of steps in the workflow.
         """
-        
+
         out_map = {}
         for s in cwl_wf.steps:
             for o in s.out:
@@ -661,7 +661,6 @@ def map_input_data(self, crate, data):
         return data
 
 
-
 def _get_members(entity):
     membership = entity.provenance.record_with_attr(
         prov.model.ProvMembership, entity.id, prov.model.PROV_ATTR_COLLECTION
@@ -670,10 +669,6 @@ def _get_members(entity):
     return (entity.provenance.entity(first(_)) for _ in member_ids)
 
 
-def _get_fragment(uri):
-    return uri.rsplit("#", 1)[-1]
-
-
 def _normalize_workflow_definition(workflow_definition):
     inline_tools = {}
     for d in workflow_definition.values():
@@ -710,6 +705,7 @@ def _get_dict(entity):
         d[key] = entity.provenance.entity(entity_id)
     return d
 
+
 def _resolve_plan(activity):
     job_qname = activity.plan()
     plan = activity.provenance.entity(job_qname)
@@ -719,10 +715,12 @@ def _resolve_plan(activity):
             plan = activity.provenance.entity(m.groups()[0])
     return plan
 
+
 def _get_relative_uri(uri):
     doc, fragment = uri.rsplit("#", 1)
     return f"{doc.rsplit('/', 1)[-1]}#{fragment}"
 
+
 def _properties_from_cwl_param(cwl_p):
     def is_structured(cwl_type):
         return getattr(cwl_type, "type_", None) in ("array", "record")
@@ -755,6 +753,7 @@ def is_structured(cwl_type):
         properties["valuePattern"] = "|".join(_.rsplit("/", 1)[-1] for _ in cwl_p.type_.symbols)
     return properties
 
+
 def _convert_cwl_type(cwl_type):
     if isinstance(cwl_type, list):
         s = set(_convert_cwl_type(_) for _ in cwl_type)
@@ -769,9 +768,11 @@ def _convert_cwl_type(cwl_type):
     if cwl_type.type_ == "record":
         return "PropertyValue"
 
+
 def _get_fragment(uri):
     return uri.rsplit("#", 1)[-1]
 
+
 def _cut_step_part(relative_uri):
     parts = relative_uri.split("/", 2)
     if len(parts) > 2:

From 6f9cc68e21b2447f7bd31810e75ead080940c1a8 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 08:33:09 +0000
Subject: [PATCH 13/23] Remove tab on newline

---
 tests/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index d17bb8a..c8d09bb 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -36,7 +36,7 @@ def test_cli_convert(data_dir, tmpdir, monkeypatch):
     workflow = crate.mainEntity
     assert workflow["name"] == "packed.cwl"
 
-    
+
 def test_cli_convert_with_cwl_converter_set_explictly(data_dir, tmpdir, monkeypatch):
     monkeypatch.chdir(str(tmpdir))
     root = data_dir / "revsort-run-1"

From c71f73bde33baad679029da48f0f6b47c1b2a5d5 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 08:35:27 +0000
Subject: [PATCH 14/23] Fix import order

---
 src/runcrate/convert.py        |  1 +
 src/runcrate/converters/cwl.py | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index 03b9c13..b4ad996 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -28,6 +28,7 @@
 from .constants import TERMS_NAMESPACE
 from .converters import CONVERTERS
 
+
 MANIFEST_FILE = "manifest-sha1.txt"
 
 
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index af43e2a..5b46819 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -1,22 +1,21 @@
 import hashlib
 import json
 import re
-from pathlib import Path
 from io import StringIO
+from pathlib import Path
 
 import networkx as nx
 import prov.model
 from cwl_utils.parser import load_document_by_yaml
-from cwlprov.prov import Entity
+from cwlprov.prov import Entity, Provenance
 from cwlprov.utils import first
-from cwlprov.prov import Provenance
-
 from rocrate.model.contextentity import ContextEntity
 from rocrate.model.softwareapplication import SoftwareApplication
 
-from .base import converter
-from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION, DOCKER_IMG_TYPE
+from ..constants import DOCKER_IMG_TYPE, PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION
 from ..utils import as_list, parse_img
+from .base import converter
+
 
 CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
 

From 9ef32e530ef0ed98d2e99dc62a6e23fa30f080ca Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 09:18:16 +0000
Subject: [PATCH 15/23] Add tests covering non-implemented methods in base
 converter class

---
 tests/test_converter_base.py | 82 ++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 tests/test_converter_base.py

diff --git a/tests/test_converter_base.py b/tests/test_converter_base.py
new file mode 100644
index 0000000..689d28c
--- /dev/null
+++ b/tests/test_converter_base.py
@@ -0,0 +1,82 @@
+# Copyright 2022-2024 CRS4.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from runcrate.converters.base import converter
+
+
+@pytest.fixture
+def converter_instance():
+    converter_instance = converter()
+    return converter_instance
+
+
+def test_initialization(converter_instance):
+    assert isinstance(converter_instance, converter)
+
+
+def test_add_profiles(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_profiles(None)
+
+
+def test_add_workflow(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_workflow(None)
+
+
+def test_add_engine_run(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_engine_run(None)
+
+
+def test_add_action(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_action(None, None)
+
+
+def test_patch_workflow_input_collection(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.patch_workflow_input_collection(None)
+
+
+def test_add_inputs_files(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_inputs_files(None)
+
+
+def test_add_output_formats(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.add_output_formats(None)
+
+
+def test_get_workflow(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.get_workflow(None)
+
+
+def test_get_step_maps(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.get_step_maps(None)
+
+
+def test_build_step_graph(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.build_step_graph(None)
+
+
+def test_convert_param(converter_instance):
+    with pytest.raises(NotImplementedError):
+        converter_instance.convert_param(None, None)

From 925f67da21263d082bd59f9fb03aeb12988e59f5 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Wed, 11 Dec 2024 09:46:09 +0000
Subject: [PATCH 16/23] Move profile addition to base converter class

---
 src/runcrate/converters/base.py | 25 ++++++++++++++++++++++++-
 src/runcrate/converters/cwl.py  | 21 +--------------------
 tests/test_converter_base.py    |  5 -----
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 8491deb..15b9fd8 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -1,3 +1,8 @@
+from rocrate.model.contextentity import ContextEntity
+
+from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION
+
+
 class converter:
     def __init__(self):
         self.root = None
@@ -38,7 +43,25 @@ def add_profiles(self, crate):
         """
         Add profiles to the crate.
         """
-        raise NotImplementedError("add_profiles")
+        profiles = []
+        for p in "process", "workflow", "provenance":
+            id_ = f"{PROFILES_BASE}/{p}/{PROFILES_VERSION}"
+            profiles.append(crate.add(ContextEntity(crate, id_, properties={
+                "@type": "CreativeWork",
+                "name": f"{p.title()} Run Crate",
+                "version": PROFILES_VERSION,
+            })))
+        # FIXME: in the future, this could go out of sync with the wroc
+        # profile added by ro-crate-py to the metadata descriptor
+        wroc_profile_id = f"https://w3id.org/workflowhub/workflow-ro-crate/{WROC_PROFILE_VERSION}"
+        profiles.append(crate.add(ContextEntity(crate, wroc_profile_id, properties={
+            "@type": "CreativeWork",
+            "name": "Workflow RO-Crate",
+            "version": WROC_PROFILE_VERSION,
+        })))
+        crate.root_dataset["conformsTo"] = profiles
+
+        return
 
     def add_workflow(self, crate):
         """
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 5b46819..b214b89 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -12,7 +12,7 @@
 from rocrate.model.contextentity import ContextEntity
 from rocrate.model.softwareapplication import SoftwareApplication
 
-from ..constants import DOCKER_IMG_TYPE, PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION
+from ..constants import DOCKER_IMG_TYPE
 from ..utils import as_list, parse_img
 from .base import converter
 
@@ -45,25 +45,6 @@ class cwlConverter(converter):
     # --------------------------------------------------------------------------
     # Top level methods, called by build()
 
-    def add_profiles(self, crate):
-        profiles = []
-        for p in "process", "workflow", "provenance":
-            id_ = f"{PROFILES_BASE}/{p}/{PROFILES_VERSION}"
-            profiles.append(crate.add(ContextEntity(crate, id_, properties={
-                "@type": "CreativeWork",
-                "name": f"{p.title()} Run Crate",
-                "version": PROFILES_VERSION,
-            })))
-        # FIXME: in the future, this could go out of sync with the wroc
-        # profile added by ro-crate-py to the metadata descriptor
-        wroc_profile_id = f"https://w3id.org/workflowhub/workflow-ro-crate/{WROC_PROFILE_VERSION}"
-        profiles.append(crate.add(ContextEntity(crate, wroc_profile_id, properties={
-            "@type": "CreativeWork",
-            "name": "Workflow RO-Crate",
-            "version": WROC_PROFILE_VERSION,
-        })))
-        crate.root_dataset["conformsTo"] = profiles
-
     def add_workflow(self, crate):
         lang_version = self.workflow_definition[self.WORKFLOW_BASENAME].cwlVersion
         properties = {
diff --git a/tests/test_converter_base.py b/tests/test_converter_base.py
index 689d28c..c75d325 100644
--- a/tests/test_converter_base.py
+++ b/tests/test_converter_base.py
@@ -27,11 +27,6 @@ def test_initialization(converter_instance):
     assert isinstance(converter_instance, converter)
 
 
-def test_add_profiles(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_profiles(None)
-
-
 def test_add_workflow(converter_instance):
     with pytest.raises(NotImplementedError):
         converter_instance.add_workflow(None)

From 5f5e2309bbde121103e9b6780a7fadfd5e584569 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 07:26:01 +0000
Subject: [PATCH 17/23] Add unneeded converters protection

---
 src/runcrate/cli.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py
index 7e5cbd2..1bea861 100644
--- a/src/runcrate/cli.py
+++ b/src/runcrate/cli.py
@@ -74,10 +74,6 @@ def convert(root, converter, output, license, workflow_name, readme):
     if not output:
         output = Path(f"{root.name}.crate.zip")
 
-    if converter not in CONVERTERS:
-        sys.stderr.write(f"Unknown converter: {converter}\n")
-        sys.exit(1)
-
     converter_instance = CONVERTERS[converter]
     sys.stdout.write(f"Using converter: {converter_instance}\n")
 

From ef44044988c054bd2f65dc6da19de0e6768a803c Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 07:32:21 +0000
Subject: [PATCH 18/23] Make class name capital letter

---
 src/runcrate/converters/__init__.py | 8 ++++----
 src/runcrate/converters/base.py     | 2 +-
 src/runcrate/converters/cwl.py      | 4 ++--
 tests/test_converter_base.py        | 6 +++---
 tests/test_step_mapping.py          | 4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/runcrate/converters/__init__.py b/src/runcrate/converters/__init__.py
index d4d1d78..f13f34b 100644
--- a/src/runcrate/converters/__init__.py
+++ b/src/runcrate/converters/__init__.py
@@ -1,8 +1,8 @@
-from .base import converter
-from .cwl import cwlConverter
+from .base import Converter
+from .cwl import CwlConverter
 
 
 CONVERTERS = {
-    "base": converter(),
-    "cwl": cwlConverter(),
+    "base": Converter(),
+    "cwl": CwlConverter(),
 }
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 15b9fd8..d513229 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -3,7 +3,7 @@
 from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION
 
 
-class converter:
+class Converter:
     def __init__(self):
         self.root = None
         self.workflow_name = None
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index b214b89..fea6dd8 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -14,7 +14,7 @@
 
 from ..constants import DOCKER_IMG_TYPE
 from ..utils import as_list, parse_img
-from .base import converter
+from .base import Converter
 
 
 CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
@@ -38,7 +38,7 @@
 SCATTER_JOB_PATTERN = re.compile(r"^(.+)_\d+$")
 
 
-class cwlConverter(converter):
+class CwlConverter(Converter):
 
     WORKFLOW_BASENAME = "packed.cwl"
 
diff --git a/tests/test_converter_base.py b/tests/test_converter_base.py
index c75d325..511acc1 100644
--- a/tests/test_converter_base.py
+++ b/tests/test_converter_base.py
@@ -14,17 +14,17 @@
 
 import pytest
 
-from runcrate.converters.base import converter
+from runcrate.converters.base import Converter
 
 
 @pytest.fixture
 def converter_instance():
-    converter_instance = converter()
+    converter_instance = Converter()
     return converter_instance
 
 
 def test_initialization(converter_instance):
-    assert isinstance(converter_instance, converter)
+    assert isinstance(converter_instance, Converter)
 
 
 def test_add_workflow(converter_instance):
diff --git a/tests/test_step_mapping.py b/tests/test_step_mapping.py
index 1c3191b..6785421 100644
--- a/tests/test_step_mapping.py
+++ b/tests/test_step_mapping.py
@@ -14,12 +14,12 @@
 
 import pytest
 
-from runcrate.converters.cwl import cwlConverter
+from runcrate.converters.cwl import CwlConverter
 
 
 @pytest.fixture
 def converter():
-    converter = cwlConverter()
+    converter = CwlConverter()
     return converter
 
 
From a0383affd88fc28dbdaa855bbf3e460606cdd54e Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 08:53:56 +0000
Subject: [PATCH 19/23] Make base an abstract base class

---
 revsort-run-1.crate.zip             | Bin 0 -> 6056 bytes
 src/runcrate/convert.py             |   2 +-
 src/runcrate/converters/__init__.py |   1 -
 src/runcrate/converters/base.py     |  34 +++++++++++++++++++---------
 src/runcrate/converters/cwl.py      |   2 +-
 tests/test_converter_base.py        |  33 ++++++++++++++++++++++++++-
 6 files changed, 57 insertions(+), 15 deletions(-)
 create mode 100644 revsort-run-1.crate.zip

diff --git a/revsort-run-1.crate.zip b/revsort-run-1.crate.zip
new file mode 100644
index 0000000000000000000000000000000000000000..f69f19a678695f121883753cdd0d5ea116e7f8c6
GIT binary patch
literal 6056
zcma)AWl$W<mK|mw5Zv7vT!Ib`!DVoF7zRjicL^@ZKycUKfe;8bKydfq1cFP@1Puh;
zyxkw)zHDvP>;BQzU474wTj$<7_h>;usKfvO00Y4G9AT^nab+b20RVCY008EnS2q~k
z(bk3s?(5WV2-W<?i{Ek1S>08XM$ewhk>IRfse?yS_)AN_KSvEhnvl1Ii&Av|^O9-B
z`;$AwI6ZkrcK39AO!%z?4a2=WD>ST%u~lCBcx~6;<na?Bd%uoD*d98Wujr1t*AmH)
zpM!;Ew<N|>Ws*=f7(D?DgXzABC#+KUJe*-TYPu8Y6t&sW7|zQq?icOuymi|*BaUx)
zQqBTIjON?Z)so&B=sL{u8{v=ftSOoHR871JxeO&Vm^)8=KZuDKoQ&+jfG~L!r@W*o
zG{a)*p8{$z`(f(H&TB52h<|_AH$u*q`hMFsSx}wm@WgGbpMNTj5o)KyltRcKK8X#^
zV)N5dQQ(8g7$t6HQ;zo585emoD*_wBQ_GDACJ@zenp<j^7P5?K1BuE>EcS9F-}U5D
zD4kMR(X>Y3mxWD-Hu?Ghkzqv2*7RcIQyRAOrh^0^k05^+QF5s~&Q{bzvN!{H>6d?s
z3?>(o)9-l*bSRF_sOolkWU_?s;~ey4F{^#IQ5goB1w?YWKVxkOC6dR{juJLpaubL&
zj?NvnQo6QxSeZ~u)YBSQ#e+pmZ<$c}wx^0tN9PQ;93?aKUA&rfI2r%w#;yc9yNl+R
zu(O!;!KHQ#o;j`iaTk~SN`sTGN!j8^fMd?5x5he(dqSy&g6Q16>EJQTU?r)JtR4|3
zI_tsnVhu!vb)T~vxj|?Xb@_|-TVc}IFZInnWb03Il`_AuUnOOf(&k%UinLF~eRZEg
z(ZCW9(B{6h_>^*OP`^Q1I-z0tXsf~GcnPX&n<{Z19J$(6aXar^wY%8F@a;-Ii{ksP
z_e(0|PVJ}}L}nyAN&%l5Tm8VD)e`-y8`n2hdJj)?$?-gF;|J>74`roz$_o&}SGCoH
z@Iq1|*FmY_-t7t`_?RGA%EQqR=y7HxQIHp}adNyb3D3dZh99|oty*I(-L!UaigCFw
zB1WvWd=%o!U$_Mg_U7*XJla&hqVvyZx<6>!wZ9GXdqvyn7_Y#}&t2HJ(xdhe8Tm=)
zZJo};US@d^tdVB1c+OJm-)g6LWYN_P2mlzO0sw4(YDa)y*bXiXv$e4kw1ZefL}7yb
zFk!fewFs|>4O|E&470O?@CgV*^mWA(mU;1yy<hK7FLrdLo3^N=04nY6!GtWL*f9--
zlEJH5xZ!|k?yPfv>5x`(cD|K4*1=OnX;fGAw)fkR77Kw+qz<M(!!vP(VXCwdPpyQ(
z%YzlpZRgiTjYHtHPg==i=uv9>h0|4MQPfNIf^^l(bELbp=A|rbj%LO=VTlrz8Lw}|
zg`NeU<7l>BAQM~hhb2pdJuWO>=%!ILs<rjz!m(M6%TF_Ak>B=K*+Q{7aMeIoTqgFF
z-HBW7H1{Tqp0rGyuFn?`8F^zx6I<qI@ge?FBrP)z*(D&c>IVI<&PxVz0mTjTk?4<n
z$FFjFs75w*TF4_me)Rj|>+c@0c$%ozeHHQRkxc!k0z)VG*EchqsYRd0Mm!4!s&V=V
zinc23%hXHa2W@S*Z9BFs$&YMQ$U|Y{iy9tlO=}mr;)upBnEcjt3~1<1Ti$*PVqYN1
z!B@9FWQzBMMP%W#<3Rn{Pg*OkMTe^NY;aQ<V2qj`GFE_JLiT;*-h_FohwnLtQQY_v
zk~kthc4%L_D%3Ao6%ok=gCx!fjNVayweH??#Z*=wUp^#>{|emMHKxynox{#&W%;oc
z2SZX<wuv{fWD6?_XpSECH^`*yps!wHeGs-%n1Fs^Ih`dVPegp7AJz+seHTvmyjV!}
zql{#5Yc~oXN>gHVM!Qu8w-(s@cbA6p08UVLAg)tH173gVdOCS>XR~r(RBFIML{Fje
zP3)NrUf<6w|AO#1l9cs;yeo-<erk?})xupgggQDAsG2wTFv>27$^mOAm4IxKuU5qK
z^@h$WF(9cyVK8nCA#zkEv$=eM{_m_!0BI+?{x8;Ai}LdcLWD%%a6usf0RcX12tS;k
zUxeRUSePFs1m}ea*jU@}iwNrLw$&{2Vz1njd}{^&4nz2kOqZgGapF2ZO;XS=A-oO=
z#!LH#MT|i$t(;HRC>)|SJ1a8b)ioEHjqf__Mo<d!H2tl}y+M^GA0<2@npo*{EpC+k
z%PIH9vh~aF`R&RQcAcap`{8OT+6#7^W9togq4zM>4sW@U0`nH0+}&@MDS#WN6vdaa
zzG8wQN0mnY)uSwK!HzO)>zoEOy<tpg<lO%7j>i1QhQKY4BBDj?-92<KXY+efRjgP-
zxeHgZIpWU3*q_}jTWNX+wDxNlJnkFU;1^kTH6syQ6x70d6NS1rvTbD3Ks=Bzop3j5
z=*B|LGyRbzW)Cb)HxtsV?&qW(xB-%S&r3+FFSzg$ebBF*Jq%jPC_LX64@U>M2LjoJ
zk$4hN8x%s^byVEYH&zxhC|&M9#7HO%iXv9D)tniwM2$fw8SD`(j)!xl#ZKRW5<#cq
zLw4+%6~U2y5{#HpbmMKzGrEjRGt0GG+6G@D$l9LGJQK6UHJ!z3_u1<mB*&<x|5*{m
za7F^kb(eiR>rxXH_N2fUNh%>ESJRBJrD7?zVhg<|9pJQ{=R4~sQ=i|VWGfH7YAHUI
zB{h_XA;sw<Du^nc`Q2#TB4mSeLW7sD8KjdYzn9mG&2oQRb(;2_BnDtx1_w(BN-Mkf
z6P&oIX87YK#OR3AnwK#0Pt)P^Nbn;kb$$>5>o3p~c1}!kxymXTflh7S(6R+7Bnll>
z0tpXn;AK=iaGtWvGjh*)@l>a?v`GwAW9w#>>&8+5{|!QIH+>iO<0&i*YhLJt3!c4N
z+o4sSnU-~b7iH)asJLW%^A~AR3>u@{{}*XRh3#zltc7^31%*WT;kMRrVH*)40bx;y
zt+fq=-&Vku4<Z1A3E1iDj{a%2*q?7X{SKLj@s8XzTP~)7lB1A6td&noCG_j;VDk43
z1IpYniuPtr<@e6~KD>-|g2A{OUc_AxYC7V^r55CVH2g}FB_<V{a`(>ECgbzUsXSln
z6+5)^!@!6Tda~+)wW+NB0_zh?W_V#SR!)<w5@RY&E;B&~+Y<Z(gGSDmJyG7F_Sj%%
zJE{*a&_v&>+bQvhe15BL;Z=Tc#Zu7@dfvZ`I)?c~{BUo4tZtFK83kxrAY<6W4G(Uh
z0rb8OJIHf!)T3CO$w4=w3}$t}Y`yxGvEC{p=mac+Ajoy<9_5<3KtWTr2=+&^&wDu4
z$GV^$(No=!X#!~}v2v)2%l$TCM_}2s7*~i`fh;AJfgPGVVhvS`gKDyU%p`;NiMLQQ
zo9241S^0=~R%);BwW}<;TI6lN-&jZ$w(w8lJP#LB;jLkFF7Wd~MPBmoXf!X?XAz1+
z@dk%|#&WlBWWRjG9Wm#{9?v{NF|K{edpGTLfP@-tP?vjh(qFNgyX~v=@R#zg(if6=
z?c)2Ff?MOeZ1HFKMdeXbZ#?phG1Km7@2nDYxZWHNcfG$1IDVk=OS<pE>|rW+t8fMf
zR#C3g8Rao5Rgz+EK5bZXsDL!XsK@HoU-ddPTQxAifY5PR>4*{Cvo(W~p#T~w>-`_k
zdm<Rb;gVMxtKpD1DsP$~f|S_Ni_N>|>vp+``9|`Dx*T}{g{5pSgJ?M2y6X;YR8K7b
zq{}R?)g2UZFH3f3Nb%wWR%C=zZM1hxA?0;$!6Z?9F&Dd!bdOgr=U#ZGsD;f~Gf5pN
zHcZauhXs=8ckGaI{sYRt_u6#%SM&3Kl))Pm0094A>e|EJ8Rp^7?ci$7<KXG)5}&B9
z(aVb;btS1_TNjzxz^oAns0m}4K-QgTZf7OR&b7hOpT4WfT>0#}8aQnxye!cZ?QZS9
zWRc;W)Bt+N6=!&;CGmhlCR7_=RC}*M@c|I%$*aCVy-^D)U9Jgs3A_hlBJ2;^9p*!b
zQet#ip6n)R=uDiO8DnVmd!@I`<wp#5fhB*D=3jftCnLzxi$PTLFq=+Zr8$2C2lPmk
zAO>sNE?7UA#WJQE-`US@$uZlgmD`KZn>-dvavUw`yIU0Kz%i>T_^wO-T9Ge}QiI6F
zW$oW{uuy`^`TQvWP%Q%hkp9VmhbuSS1LkGR?QH7>vw?ZR{yP)Lrlzi2NRp5>O9t>W
zZm|Y=ZQ@D^@0=9{G9#}iDTU`+*sWlNui0tLa>-jNWan#vV9SAd^QI#jVjRM*x|e5W
zo##n05BK7rkRK24lSLK^&fCzpEO*3r+TKW5+TxY9Fce8cr(3dE%2X9<)Fpa%ADCQQ
zr4tXmva)i(_uR3`II0V$=D)+<y81vTb?g3gwY%GmIUo7G-mSJQ|AyILG7qiO{l#?h
zJp6fv1rd=lz$$50)*0bvy5==18R~$|oGL+Wu~9Y0=${U{LH+_?#R7B8gBi5;6b2a$
zZcL!Z`o`5e+HByLbm|7Z?^u5ZRU_wPCfiL%*~Um)&8C~2&|y>|1>6r-8|Y&X*2Z}0
z`Z%wnhF0SQrI{EFWbe!FvzFwEGGiCHwA5Zoy8#Vb^WB`fi8H=!cDxIZqj%$j{Xm7P
zOb#SKTXb-lBdzwAMw(i<DeBSm)1g88%iprB*x#a2LLF3MJWTB?Gbh<$`BkMqm$F|T
z5i%(AcI#9Ln!Uat30B=c(oOG!GreT*xgHe3J2%Yy(4ki_)2UZJ!)r+R;zi$_WV7=w
zfhqknEn?Ac(?di}QRJR!qcodZRnjuJk2EUt*d@20<)C?3%HX<2ODd#PnaTFXn+Jfu
zDGk`>(}egjdc}%;^Dy6yR$+%aHPw$Y7}_57$9`+$1pnr|LBGy~_(9o-gD$c>JN()H
z#grbmup?H5W5NyTBbL_6i!@dLqElbp71@Y~uDk&<SM35uR+Z_hA%eRm45L9rTR~3+
zhs^H>UBYduq=RysJdXR!>-b{YlpTk>H^6a)hJ|YT^|ch7LW37cC|++GN1ETit-&%f
zLf~i6SMn+0YN7QNxiG1iABgvPinz$OxNMdPq(xWdDf|BP-k@~e#&VX2RSPg7aA4PQ
zCS)YX>ZkOOPvG$l{p8~(<25qxf9T#Pv#JOZbc<yp@jKzW8i7%#4#7nhXfwRc=W6c_
zL|t3G*E_Lj1`nDv*M5N`4y595+Ts*3G2F1)dUyB{-#cUdI!HZNR3HQ0tHIAIM?KoJ
zpZlLokw{qjQU8)M1XJJ7J91MpM;~0QH*&Zl)?wK7cW|=sw{#(u2~Y3Cb<m4627gm;
z^IH~<T;NXzpF~uQf+4uf)#}^clsGBy`m9Qi33a`Z8SVjX+aS?qL0TrY9HkIoGB+c@
zUW>}f(lhpF%<}82&yUaBV`Wb<`ou;a+247KvV`&#eri!pUm82(=g`&nD-1OiSTN2<
z@#H5Hd!20j(86L-N+~Hv!ZFADIxAj+uLR=bM5gy4#AJ};6opE4<?KC?EWw*;6vojW
zf3wv3F*65g;tF0>JJNE)T@GO7vY$FygHqczYA);f*jiVU$!uhe*mqHLVUd%*RcUlZ
z+~QSoahZ7O-zNLW+WmrfmZGa@`x>$0<N7L0!-TIUvCbJn5Lkx@qm+1FkX&C!9om9P
z-|^+g^m<RmV*TS}Ln)Hd7dnt(=OSc`B(V|oV#zd5vP5l?CRa65F4*+T6q?v@PrI$7
zY3w_jyK7@ELv7zVT3M9NT-$@K{iW*mHR7O76NMrU`&XJ6$D;M54%!0=brbaP5v?1N
z^t(B>6S{X522(uWla(kNbw*@zKlTdhCU(F@(LFk$l0k2PA~LX-wPWh?9ps;Ug3bpj
zN<oI=>0SgR<V;HI0Dm*~J{pxQ8-)EZ3;n`X-=$MOMuayIPc)OSFN%|(DQMKws)}20
z2IaSvS2Yc3URc8M?c(|RsZGcJ$!Vns_u~94lNA?f8w)=zXFaRMg190QJE#p?8to=u
zIY9l|S@5PY&Lt``AKe~j;%=T(DaEjJe3}p8*fboHcysEwQ$ahH%8Im{qrxb3W_6!a
z_ve{znNZSPlbcWsy{P@<8wmO})f$KWHKqa1C0bGYC{wf;;unY`2O;g}E2Ex$i)?2P
ztXh#DaFQINh08&akvB_<i*;SmChlOiW_-h33<Zht%D|^&%kW=6uqV0%9PLaq;aJO=
zrVyWjbz~|+48lBw8w4EQ3ab~5G0av`jZ!Fay1HKW;@LcrzPVL=@unrA?IZ3H-g&a#
zae@!`waiHj3N{UuKlZtcW{i7!xh-pzXDQ5L-^||i>Rm<-s$$IVsvymfa3_%$j>LW;
z^rG%b&s0LGPteYS;<V|x*3P52xy1UJV*~BA_jQ?*K8n&;g!7|jK79<BMwiAY^PeHD
zPI+1`W~wBqLGg;aY_*fX{(;&>QWuG$MfdfBVg`LMnBL0{J__GP3<UV9HCfZIy9SO(
z?G(kvmxB>jF_Ei>hI#Q@W}vaTuRE^8dNj8mAYMab;leA~6CeTCz03Ei0W>a2dh7n5
zwp%wYKi|$_wIb63hsx_-TeykV>K39@#_-Xk1Vormx3W4cZq8CB<wV7L=~lh}{Db<t
zrq-8{b855iVupuG`^TbF%Y~mKJugAqQ1O`;dGR(_VUweuLtZ+K)fyyfq@-z_FE}%O
z7$jO)zlm3Ga#+4D85RCU95HkM4!o4SEl^*9Tx#c53K8%*$x;aWHH(YSGM4;9oKrWC
zA>WtXu7GVTsZClNC{8~TwZhjsf}*N|mTd5xAQ9cvaf5xMF-DOrhV2d)GPgfD+5wfg
z%2%jWmyEM&-e($gr&(__V;qUSNS&w8p2?FjM?@h7CT$}F$OO_8Tz34AN-H_<lz1}_
zdE>D<sN5)K=oiC|7b*nFKh-;^gD@@y<Fe1Kf)a(+=pEniE@p5Ea-qb%^lJ3G(w<9-
zj;LMNtJq+nrN&-!qu~gRTx@s}OEh03!(EOwGDYEhlkOlW)pk4Mrp2(Q+FSg3W&uFq
z-d%eNjlYrxTg|OlN9fEg@K*~r>qz1;ys{i6*h#JDt;8Ma|Liw0NIr8)Gg+S%(*VX6
zRnr2~$0e1}Zq@wcFAM1S!YgBMNIS^m`P5p?bm(bUGf)@(X$&wIiQTr;QP=<y@RCG0
z4pMJvHy6)^*g3m)NV)s?cWwwD<1Uwfz@!<!e5_=`4E0rXmK_9tEi9fkC5$P9ZptC$
zzT-rpY{T%Ea-p<Kz%w858<gP2d?*&t@%I>pgGa}0diZ>Yjzo0OJl<p90#Dvo!Ij}O
ze0Yw2jH?AroO#F1^-9JgPz#CzBnJKezV094_1~)`>|e(}F7MxQe|uX069oX2gh~8y
ziT_iF>u=P*J%@i#&(HvX|I~r_8};v@@E_E23;^Ii9TERV{adsDgKB~Ge--@S(SNJm
ef1<6i|3B@k1x0)EuYXVe?9YEDg?W;H-Tn)QLdVnq

literal 0
HcmV?d00001

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index b4ad996..392ee49 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -66,6 +66,6 @@ def build(self):
         self.converter.add_engine_run(crate)
         self.converter.add_action(crate, self.converter.workflow_run)
         self.converter.patch_workflow_input_collection(crate)
-        self.converter.add_inputs_file(crate)
+        self.converter.add_inputs_files(crate)
         self.converter.add_output_formats(crate)
         return crate
diff --git a/src/runcrate/converters/__init__.py b/src/runcrate/converters/__init__.py
index f13f34b..8d05b1d 100644
--- a/src/runcrate/converters/__init__.py
+++ b/src/runcrate/converters/__init__.py
@@ -3,6 +3,5 @@
 
 
 CONVERTERS = {
-    "base": Converter(),
     "cwl": CwlConverter(),
 }
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index d513229..de72f97 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -1,9 +1,11 @@
+from abc import ABC, abstractmethod
+
 from rocrate.model.contextentity import ContextEntity
 
 from ..constants import PROFILES_BASE, PROFILES_VERSION, WROC_PROFILE_VERSION
 
 
-class Converter:
+class Converter(ABC):
     def __init__(self):
         self.root = None
         self.workflow_name = None
@@ -63,67 +65,77 @@ def add_profiles(self, crate):
 
         return
 
+    @abstractmethod
     def add_workflow(self, crate):
         """
         Add the workflow to the crate.
         """
-        raise NotImplementedError("add_workflow")
+        pass
 
+    @abstractmethod
     def add_engine_run(self, crate):
         """
         Add the engine run to the crate.
         """
-        raise NotImplementedError("add_engine_run")
+        pass
 
+    @abstractmethod
     def add_action(self, crate, workflow_run):
         """
         Add the action to the crate.
         """
-        raise NotImplementedError("add_action")
+        pass
 
+    @abstractmethod
     def patch_workflow_input_collection(self, crate):
         """
         Patch the workflow input collection.
         """
-        raise NotImplementedError("patch_workflow_input_collection")
+        pass
 
+    @abstractmethod
     def add_inputs_files(self, crate):
         """
         Add input files to the crate.
         """
-        raise NotImplementedError("add_inputs_files")
+        pass
 
+    @abstractmethod
     def add_output_formats(self, crate):
         """
         Add output formats to the crate.
         """
-        raise NotImplementedError("add_output_formats")
+        pass
 
     # --------------------------------------------------------------------------
     # Helper functions - called by the top level functions
 
+    @abstractmethod
     def get_workflow(self, wf_path):
         """
         Get the workflow from the given path.
 
         Returns a dictionary where tools / workflows are mapped by their ids.
         """
-        raise NotImplementedError("get_workflow")
+        pass
 
+    @abstractmethod
     def get_step_maps(self, wf_defs):
         """
         Get a mapping of step names to their tool names and positions.
         """
-        raise NotImplementedError("get_step_maps")
+        pass
 
+    @abstractmethod
     def build_step_graph(self, wf):
         """
         Build a graph of steps in the workflow.
         """
-        raise NotImplementedError("build_step_graph")
+        pass
 
+    @abstractmethod
     def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
         """
         Convert a CWLProv parameter to a RO-Crate entity.
         """
-        raise NotImplementedError("convert_param")
+        pass
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index fea6dd8..0894f21 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -186,7 +186,7 @@ def patch_workflow_input_collection(self, crate, wf=None):
             if "ComputationalWorkflow" in as_list(tool.type):
                 self.patch_workflow_input_collection(crate, wf=tool)
 
-    def add_inputs_file(self, crate):
+    def add_inputs_files(self, crate):
         path = self.root / "workflow" / INPUTS_FILE_BASENAME
         if path.is_file():
             with open(path) as f:
diff --git a/tests/test_converter_base.py b/tests/test_converter_base.py
index 511acc1..4ae37a0 100644
--- a/tests/test_converter_base.py
+++ b/tests/test_converter_base.py
@@ -16,10 +16,41 @@
 
 from runcrate.converters.base import Converter
 
+class TestConverter(Converter):
+    def add_workflow(self, workflow):
+        raise NotImplementedError
+
+    def add_engine_run(self, engine_run):
+        raise NotImplementedError
+
+    def add_action(self, action, step):
+        raise NotImplementedError
+
+    def patch_workflow_input_collection(self, input_collection):
+        raise NotImplementedError
+
+    def add_inputs_files(self, inputs_files):
+        raise NotImplementedError
+
+    def add_output_formats(self, output_formats):
+        raise NotImplementedError
+
+    def get_workflow(self, workflow):
+        raise NotImplementedError
+
+    def get_step_maps(self, step_maps):
+        raise NotImplementedError
+
+    def build_step_graph(self, step_graph):
+        raise NotImplementedError
+
+    def convert_param(self, param, step):
+        raise NotImplementedError
+
 
 @pytest.fixture
 def converter_instance():
-    converter_instance = Converter()
+    converter_instance = TestConverter()
     return converter_instance
 
 
From f49369e8c1c92985755bf9e5b9c2d535b2aa3425 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 08:58:18 +0000
Subject: [PATCH 20/23] Make abstract base class not need tests

---
 src/runcrate/converters/__init__.py |   1 -
 src/runcrate/converters/base.py     |  10 ---
 tests/test_converter_base.py        | 108 ----------------------------
 3 files changed, 119 deletions(-)
 delete mode 100644 tests/test_converter_base.py

diff --git a/src/runcrate/converters/__init__.py b/src/runcrate/converters/__init__.py
index 8d05b1d..09a4250 100644
--- a/src/runcrate/converters/__init__.py
+++ b/src/runcrate/converters/__init__.py
@@ -1,4 +1,3 @@
-from .base import Converter
 from .cwl import CwlConverter
 
 
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index de72f97..16af8fa 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -70,42 +70,36 @@ def add_workflow(self, crate):
         """
         Add the workflow to the crate.
         """
-        pass
 
     @abstractmethod
     def add_engine_run(self, crate):
         """
         Add the engine run to the crate.
         """
-        pass
 
     @abstractmethod
     def add_action(self, crate, workflow_run):
         """
         Add the action to the crate.
         """
-        pass
 
     @abstractmethod
     def patch_workflow_input_collection(self, crate):
         """
         Patch the workflow input collection.
         """
-        pass
 
     @abstractmethod
     def add_inputs_files(self, crate):
         """
         Add input files to the crate.
         """
-        pass
 
     @abstractmethod
     def add_output_formats(self, crate):
         """
         Add output formats to the crate.
         """
-        pass
 
     # --------------------------------------------------------------------------
     # Helper functions - called by the top level functions
@@ -117,25 +111,21 @@ def get_workflow(self, wf_path):
 
         Returns a dictionary where tools / workflows are mapped by their ids.
         """
-        pass
 
     @abstractmethod
     def get_step_maps(self, wf_defs):
         """
         Get a mapping of step names to their tool names and positions.
         """
-        pass
 
     @abstractmethod
     def build_step_graph(self, wf):
         """
         Build a graph of steps in the workflow.
         """
-        pass
 
     @abstractmethod
     def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
         """
         Convert a CWLProv parameter to a RO-Crate entity.
         """
-        pass
diff --git a/tests/test_converter_base.py b/tests/test_converter_base.py
deleted file mode 100644
index 4ae37a0..0000000
--- a/tests/test_converter_base.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright 2022-2024 CRS4.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-
-from runcrate.converters.base import Converter
-
-class TestConverter(Converter):
-    def add_workflow(self, workflow):
-        raise NotImplementedError
-
-    def add_engine_run(self, engine_run):
-        raise NotImplementedError
-
-    def add_action(self, action, step):
-        raise NotImplementedError
-
-    def patch_workflow_input_collection(self, input_collection):
-        raise NotImplementedError
-
-    def add_inputs_files(self, inputs_files):
-        raise NotImplementedError
-
-    def add_output_formats(self, output_formats):
-        raise NotImplementedError
-
-    def get_workflow(self, workflow):
-        raise NotImplementedError
-
-    def get_step_maps(self, step_maps):
-        raise NotImplementedError
-
-    def build_step_graph(self, step_graph):
-        raise NotImplementedError
-
-    def convert_param(self, param, step):
-        raise NotImplementedError
-
-
-@pytest.fixture
-def converter_instance():
-    converter_instance = TestConverter()
-    return converter_instance
-
-
-def test_initialization(converter_instance):
-    assert isinstance(converter_instance, Converter)
-
-
-def test_add_workflow(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_workflow(None)
-
-
-def test_add_engine_run(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_engine_run(None)
-
-
-def test_add_action(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_action(None, None)
-
-
-def test_patch_workflow_input_collection(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.patch_workflow_input_collection(None)
-
-
-def test_add_inputs_files(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_inputs_files(None)
-
-
-def test_add_output_formats(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.add_output_formats(None)
-
-
-def test_get_workflow(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.get_workflow(None)
-
-
-def test_get_step_maps(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.get_step_maps(None)
-
-
-def test_build_step_graph(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.build_step_graph(None)
-
-
-def test_convert_param(converter_instance):
-    with pytest.raises(NotImplementedError):
-        converter_instance.convert_param(None, None)

From 64c709cdf3eccb0b7328e7e31a86dc404a30f2a7 Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 09:07:14 +0000
Subject: [PATCH 21/23] Remove accidentally added zip

---
 revsort-run-1.crate.zip | Bin 6056 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 revsort-run-1.crate.zip

diff --git a/revsort-run-1.crate.zip b/revsort-run-1.crate.zip
deleted file mode 100644
index f69f19a678695f121883753cdd0d5ea116e7f8c6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6056
zcma)AWl$W<mK|mw5Zv7vT!Ib`!DVoF7zRjicL^@ZKycUKfe;8bKydfq1cFP@1Puh;
zyxkw)zHDvP>;BQzU474wTj$<7_h>;usKfvO00Y4G9AT^nab+b20RVCY008EnS2q~k
z(bk3s?(5WV2-W<?i{Ek1S>08XM$ewhk>IRfse?yS_)AN_KSvEhnvl1Ii&Av|^O9-B
z`;$AwI6ZkrcK39AO!%z?4a2=WD>ST%u~lCBcx~6;<na?Bd%uoD*d98Wujr1t*AmH)
zpM!;Ew<N|>Ws*=f7(D?DgXzABC#+KUJe*-TYPu8Y6t&sW7|zQq?icOuymi|*BaUx)
zQqBTIjON?Z)so&B=sL{u8{v=ftSOoHR871JxeO&Vm^)8=KZuDKoQ&+jfG~L!r@W*o
zG{a)*p8{$z`(f(H&TB52h<|_AH$u*q`hMFsSx}wm@WgGbpMNTj5o)KyltRcKK8X#^
zV)N5dQQ(8g7$t6HQ;zo585emoD*_wBQ_GDACJ@zenp<j^7P5?K1BuE>EcS9F-}U5D
zD4kMR(X>Y3mxWD-Hu?Ghkzqv2*7RcIQyRAOrh^0^k05^+QF5s~&Q{bzvN!{H>6d?s
z3?>(o)9-l*bSRF_sOolkWU_?s;~ey4F{^#IQ5goB1w?YWKVxkOC6dR{juJLpaubL&
zj?NvnQo6QxSeZ~u)YBSQ#e+pmZ<$c}wx^0tN9PQ;93?aKUA&rfI2r%w#;yc9yNl+R
zu(O!;!KHQ#o;j`iaTk~SN`sTGN!j8^fMd?5x5he(dqSy&g6Q16>EJQTU?r)JtR4|3
zI_tsnVhu!vb)T~vxj|?Xb@_|-TVc}IFZInnWb03Il`_AuUnOOf(&k%UinLF~eRZEg
z(ZCW9(B{6h_>^*OP`^Q1I-z0tXsf~GcnPX&n<{Z19J$(6aXar^wY%8F@a;-Ii{ksP
z_e(0|PVJ}}L}nyAN&%l5Tm8VD)e`-y8`n2hdJj)?$?-gF;|J>74`roz$_o&}SGCoH
z@Iq1|*FmY_-t7t`_?RGA%EQqR=y7HxQIHp}adNyb3D3dZh99|oty*I(-L!UaigCFw
zB1WvWd=%o!U$_Mg_U7*XJla&hqVvyZx<6>!wZ9GXdqvyn7_Y#}&t2HJ(xdhe8Tm=)
zZJo};US@d^tdVB1c+OJm-)g6LWYN_P2mlzO0sw4(YDa)y*bXiXv$e4kw1ZefL}7yb
zFk!fewFs|>4O|E&470O?@CgV*^mWA(mU;1yy<hK7FLrdLo3^N=04nY6!GtWL*f9--
zlEJH5xZ!|k?yPfv>5x`(cD|K4*1=OnX;fGAw)fkR77Kw+qz<M(!!vP(VXCwdPpyQ(
z%YzlpZRgiTjYHtHPg==i=uv9>h0|4MQPfNIf^^l(bELbp=A|rbj%LO=VTlrz8Lw}|
zg`NeU<7l>BAQM~hhb2pdJuWO>=%!ILs<rjz!m(M6%TF_Ak>B=K*+Q{7aMeIoTqgFF
z-HBW7H1{Tqp0rGyuFn?`8F^zx6I<qI@ge?FBrP)z*(D&c>IVI<&PxVz0mTjTk?4<n
z$FFjFs75w*TF4_me)Rj|>+c@0c$%ozeHHQRkxc!k0z)VG*EchqsYRd0Mm!4!s&V=V
zinc23%hXHa2W@S*Z9BFs$&YMQ$U|Y{iy9tlO=}mr;)upBnEcjt3~1<1Ti$*PVqYN1
z!B@9FWQzBMMP%W#<3Rn{Pg*OkMTe^NY;aQ<V2qj`GFE_JLiT;*-h_FohwnLtQQY_v
zk~kthc4%L_D%3Ao6%ok=gCx!fjNVayweH??#Z*=wUp^#>{|emMHKxynox{#&W%;oc
z2SZX<wuv{fWD6?_XpSECH^`*yps!wHeGs-%n1Fs^Ih`dVPegp7AJz+seHTvmyjV!}
zql{#5Yc~oXN>gHVM!Qu8w-(s@cbA6p08UVLAg)tH173gVdOCS>XR~r(RBFIML{Fje
zP3)NrUf<6w|AO#1l9cs;yeo-<erk?})xupgggQDAsG2wTFv>27$^mOAm4IxKuU5qK
z^@h$WF(9cyVK8nCA#zkEv$=eM{_m_!0BI+?{x8;Ai}LdcLWD%%a6usf0RcX12tS;k
zUxeRUSePFs1m}ea*jU@}iwNrLw$&{2Vz1njd}{^&4nz2kOqZgGapF2ZO;XS=A-oO=
z#!LH#MT|i$t(;HRC>)|SJ1a8b)ioEHjqf__Mo<d!H2tl}y+M^GA0<2@npo*{EpC+k
z%PIH9vh~aF`R&RQcAcap`{8OT+6#7^W9togq4zM>4sW@U0`nH0+}&@MDS#WN6vdaa
zzG8wQN0mnY)uSwK!HzO)>zoEOy<tpg<lO%7j>i1QhQKY4BBDj?-92<KXY+efRjgP-
zxeHgZIpWU3*q_}jTWNX+wDxNlJnkFU;1^kTH6syQ6x70d6NS1rvTbD3Ks=Bzop3j5
z=*B|LGyRbzW)Cb)HxtsV?&qW(xB-%S&r3+FFSzg$ebBF*Jq%jPC_LX64@U>M2LjoJ
zk$4hN8x%s^byVEYH&zxhC|&M9#7HO%iXv9D)tniwM2$fw8SD`(j)!xl#ZKRW5<#cq
zLw4+%6~U2y5{#HpbmMKzGrEjRGt0GG+6G@D$l9LGJQK6UHJ!z3_u1<mB*&<x|5*{m
za7F^kb(eiR>rxXH_N2fUNh%>ESJRBJrD7?zVhg<|9pJQ{=R4~sQ=i|VWGfH7YAHUI
zB{h_XA;sw<Du^nc`Q2#TB4mSeLW7sD8KjdYzn9mG&2oQRb(;2_BnDtx1_w(BN-Mkf
z6P&oIX87YK#OR3AnwK#0Pt)P^Nbn;kb$$>5>o3p~c1}!kxymXTflh7S(6R+7Bnll>
z0tpXn;AK=iaGtWvGjh*)@l>a?v`GwAW9w#>>&8+5{|!QIH+>iO<0&i*YhLJt3!c4N
z+o4sSnU-~b7iH)asJLW%^A~AR3>u@{{}*XRh3#zltc7^31%*WT;kMRrVH*)40bx;y
zt+fq=-&Vku4<Z1A3E1iDj{a%2*q?7X{SKLj@s8XzTP~)7lB1A6td&noCG_j;VDk43
z1IpYniuPtr<@e6~KD>-|g2A{OUc_AxYC7V^r55CVH2g}FB_<V{a`(>ECgbzUsXSln
z6+5)^!@!6Tda~+)wW+NB0_zh?W_V#SR!)<w5@RY&E;B&~+Y<Z(gGSDmJyG7F_Sj%%
zJE{*a&_v&>+bQvhe15BL;Z=Tc#Zu7@dfvZ`I)?c~{BUo4tZtFK83kxrAY<6W4G(Uh
z0rb8OJIHf!)T3CO$w4=w3}$t}Y`yxGvEC{p=mac+Ajoy<9_5<3KtWTr2=+&^&wDu4
z$GV^$(No=!X#!~}v2v)2%l$TCM_}2s7*~i`fh;AJfgPGVVhvS`gKDyU%p`;NiMLQQ
zo9241S^0=~R%);BwW}<;TI6lN-&jZ$w(w8lJP#LB;jLkFF7Wd~MPBmoXf!X?XAz1+
z@dk%|#&WlBWWRjG9Wm#{9?v{NF|K{edpGTLfP@-tP?vjh(qFNgyX~v=@R#zg(if6=
z?c)2Ff?MOeZ1HFKMdeXbZ#?phG1Km7@2nDYxZWHNcfG$1IDVk=OS<pE>|rW+t8fMf
zR#C3g8Rao5Rgz+EK5bZXsDL!XsK@HoU-ddPTQxAifY5PR>4*{Cvo(W~p#T~w>-`_k
zdm<Rb;gVMxtKpD1DsP$~f|S_Ni_N>|>vp+``9|`Dx*T}{g{5pSgJ?M2y6X;YR8K7b
zq{}R?)g2UZFH3f3Nb%wWR%C=zZM1hxA?0;$!6Z?9F&Dd!bdOgr=U#ZGsD;f~Gf5pN
zHcZauhXs=8ckGaI{sYRt_u6#%SM&3Kl))Pm0094A>e|EJ8Rp^7?ci$7<KXG)5}&B9
z(aVb;btS1_TNjzxz^oAns0m}4K-QgTZf7OR&b7hOpT4WfT>0#}8aQnxye!cZ?QZS9
zWRc;W)Bt+N6=!&;CGmhlCR7_=RC}*M@c|I%$*aCVy-^D)U9Jgs3A_hlBJ2;^9p*!b
zQet#ip6n)R=uDiO8DnVmd!@I`<wp#5fhB*D=3jftCnLzxi$PTLFq=+Zr8$2C2lPmk
zAO>sNE?7UA#WJQE-`US@$uZlgmD`KZn>-dvavUw`yIU0Kz%i>T_^wO-T9Ge}QiI6F
zW$oW{uuy`^`TQvWP%Q%hkp9VmhbuSS1LkGR?QH7>vw?ZR{yP)Lrlzi2NRp5>O9t>W
zZm|Y=ZQ@D^@0=9{G9#}iDTU`+*sWlNui0tLa>-jNWan#vV9SAd^QI#jVjRM*x|e5W
zo##n05BK7rkRK24lSLK^&fCzpEO*3r+TKW5+TxY9Fce8cr(3dE%2X9<)Fpa%ADCQQ
zr4tXmva)i(_uR3`II0V$=D)+<y81vTb?g3gwY%GmIUo7G-mSJQ|AyILG7qiO{l#?h
zJp6fv1rd=lz$$50)*0bvy5==18R~$|oGL+Wu~9Y0=${U{LH+_?#R7B8gBi5;6b2a$
zZcL!Z`o`5e+HByLbm|7Z?^u5ZRU_wPCfiL%*~Um)&8C~2&|y>|1>6r-8|Y&X*2Z}0
z`Z%wnhF0SQrI{EFWbe!FvzFwEGGiCHwA5Zoy8#Vb^WB`fi8H=!cDxIZqj%$j{Xm7P
zOb#SKTXb-lBdzwAMw(i<DeBSm)1g88%iprB*x#a2LLF3MJWTB?Gbh<$`BkMqm$F|T
z5i%(AcI#9Ln!Uat30B=c(oOG!GreT*xgHe3J2%Yy(4ki_)2UZJ!)r+R;zi$_WV7=w
zfhqknEn?Ac(?di}QRJR!qcodZRnjuJk2EUt*d@20<)C?3%HX<2ODd#PnaTFXn+Jfu
zDGk`>(}egjdc}%;^Dy6yR$+%aHPw$Y7}_57$9`+$1pnr|LBGy~_(9o-gD$c>JN()H
z#grbmup?H5W5NyTBbL_6i!@dLqElbp71@Y~uDk&<SM35uR+Z_hA%eRm45L9rTR~3+
zhs^H>UBYduq=RysJdXR!>-b{YlpTk>H^6a)hJ|YT^|ch7LW37cC|++GN1ETit-&%f
zLf~i6SMn+0YN7QNxiG1iABgvPinz$OxNMdPq(xWdDf|BP-k@~e#&VX2RSPg7aA4PQ
zCS)YX>ZkOOPvG$l{p8~(<25qxf9T#Pv#JOZbc<yp@jKzW8i7%#4#7nhXfwRc=W6c_
zL|t3G*E_Lj1`nDv*M5N`4y595+Ts*3G2F1)dUyB{-#cUdI!HZNR3HQ0tHIAIM?KoJ
zpZlLokw{qjQU8)M1XJJ7J91MpM;~0QH*&Zl)?wK7cW|=sw{#(u2~Y3Cb<m4627gm;
z^IH~<T;NXzpF~uQf+4uf)#}^clsGBy`m9Qi33a`Z8SVjX+aS?qL0TrY9HkIoGB+c@
zUW>}f(lhpF%<}82&yUaBV`Wb<`ou;a+247KvV`&#eri!pUm82(=g`&nD-1OiSTN2<
z@#H5Hd!20j(86L-N+~Hv!ZFADIxAj+uLR=bM5gy4#AJ};6opE4<?KC?EWw*;6vojW
zf3wv3F*65g;tF0>JJNE)T@GO7vY$FygHqczYA);f*jiVU$!uhe*mqHLVUd%*RcUlZ
z+~QSoahZ7O-zNLW+WmrfmZGa@`x>$0<N7L0!-TIUvCbJn5Lkx@qm+1FkX&C!9om9P
z-|^+g^m<RmV*TS}Ln)Hd7dnt(=OSc`B(V|oV#zd5vP5l?CRa65F4*+T6q?v@PrI$7
zY3w_jyK7@ELv7zVT3M9NT-$@K{iW*mHR7O76NMrU`&XJ6$D;M54%!0=brbaP5v?1N
z^t(B>6S{X522(uWla(kNbw*@zKlTdhCU(F@(LFk$l0k2PA~LX-wPWh?9ps;Ug3bpj
zN<oI=>0SgR<V;HI0Dm*~J{pxQ8-)EZ3;n`X-=$MOMuayIPc)OSFN%|(DQMKws)}20
z2IaSvS2Yc3URc8M?c(|RsZGcJ$!Vns_u~94lNA?f8w)=zXFaRMg190QJE#p?8to=u
zIY9l|S@5PY&Lt``AKe~j;%=T(DaEjJe3}p8*fboHcysEwQ$ahH%8Im{qrxb3W_6!a
z_ve{znNZSPlbcWsy{P@<8wmO})f$KWHKqa1C0bGYC{wf;;unY`2O;g}E2Ex$i)?2P
ztXh#DaFQINh08&akvB_<i*;SmChlOiW_-h33<Zht%D|^&%kW=6uqV0%9PLaq;aJO=
zrVyWjbz~|+48lBw8w4EQ3ab~5G0av`jZ!Fay1HKW;@LcrzPVL=@unrA?IZ3H-g&a#
zae@!`waiHj3N{UuKlZtcW{i7!xh-pzXDQ5L-^||i>Rm<-s$$IVsvymfa3_%$j>LW;
z^rG%b&s0LGPteYS;<V|x*3P52xy1UJV*~BA_jQ?*K8n&;g!7|jK79<BMwiAY^PeHD
zPI+1`W~wBqLGg;aY_*fX{(;&>QWuG$MfdfBVg`LMnBL0{J__GP3<UV9HCfZIy9SO(
z?G(kvmxB>jF_Ei>hI#Q@W}vaTuRE^8dNj8mAYMab;leA~6CeTCz03Ei0W>a2dh7n5
zwp%wYKi|$_wIb63hsx_-TeykV>K39@#_-Xk1Vormx3W4cZq8CB<wV7L=~lh}{Db<t
zrq-8{b855iVupuG`^TbF%Y~mKJugAqQ1O`;dGR(_VUweuLtZ+K)fyyfq@-z_FE}%O
z7$jO)zlm3Ga#+4D85RCU95HkM4!o4SEl^*9Tx#c53K8%*$x;aWHH(YSGM4;9oKrWC
zA>WtXu7GVTsZClNC{8~TwZhjsf}*N|mTd5xAQ9cvaf5xMF-DOrhV2d)GPgfD+5wfg
z%2%jWmyEM&-e($gr&(__V;qUSNS&w8p2?FjM?@h7CT$}F$OO_8Tz34AN-H_<lz1}_
zdE>D<sN5)K=oiC|7b*nFKh-;^gD@@y<Fe1Kf)a(+=pEniE@p5Ea-qb%^lJ3G(w<9-
zj;LMNtJq+nrN&-!qu~gRTx@s}OEh03!(EOwGDYEhlkOlW)pk4Mrp2(Q+FSg3W&uFq
z-d%eNjlYrxTg|OlN9fEg@K*~r>qz1;ys{i6*h#JDt;8Ma|Liw0NIr8)Gg+S%(*VX6
zRnr2~$0e1}Zq@wcFAM1S!YgBMNIS^m`P5p?bm(bUGf)@(X$&wIiQTr;QP=<y@RCG0
z4pMJvHy6)^*g3m)NV)s?cWwwD<1Uwfz@!<!e5_=`4E0rXmK_9tEi9fkC5$P9ZptC$
zzT-rpY{T%Ea-p<Kz%w858<gP2d?*&t@%I>pgGa}0diZ>Yjzo0OJl<p90#Dvo!Ij}O
ze0Yw2jH?AroO#F1^-9JgPz#CzBnJKezV094_1~)`>|e(}F7MxQe|uX069oX2gh~8y
ziT_iF>u=P*J%@i#&(HvX|I~r_8};v@@E_E23;^Ii9TERV{adsDgKB~Ge--@S(SNJm
ef1<6i|3B@k1x0)EuYXVe?9YEDg?W;H-Tn)QLdVnq


From 47482b93ebdfd011a0ee824d46530f7fac1e2d6c Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Thu, 12 Dec 2024 09:50:53 +0000
Subject: [PATCH 22/23] Update docstring

---
 src/runcrate/cli.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/runcrate/cli.py b/src/runcrate/cli.py
index 1bea861..589e5f7 100644
--- a/src/runcrate/cli.py
+++ b/src/runcrate/cli.py
@@ -66,9 +66,12 @@ def cli():
 )
 def convert(root, converter, output, license, workflow_name, readme):
     """\
-    Convert a CWLProv RO bundle into a Workflow Run RO-Crate.
+    Convert a provenance bundle into a Workflow Run RO-Crate.
 
-    RO_DIR: top-level directory of the CWLProv RO
+    Supported (see: converters):
+    - CWLProv RO.
+
+    RO_DIR: top-level directory of the provenance bundle.
     """
 
     if not output:

From 5985eef0110839ce6566ad1bfd10c2ebb39be9ff Mon Sep 17 00:00:00 2001
From: Oliver Woolland <oliver.woolland@manchester.ac.uk>
Date: Fri, 13 Dec 2024 07:57:24 +0000
Subject: [PATCH 23/23] Refactor converter initialisation to be lazy and
 bespoke

---
 src/runcrate/convert.py         | 30 ++++++------------------------
 src/runcrate/converters/base.py | 17 ++++++++++++++---
 src/runcrate/converters/cwl.py  | 22 ++++++++++++++++++++++
 3 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py
index 392ee49..908ef33 100644
--- a/src/runcrate/convert.py
+++ b/src/runcrate/convert.py
@@ -18,20 +18,12 @@
 Generate a Workflow Run RO-Crate from a CWLProv RO bundle.
 """
 
-from pathlib import Path
-
-from bdbag.bdbagit import BDBag
-from cwlprov.prov import Provenance
-from cwlprov.ro import ResearchObject
 from rocrate.rocrate import ROCrate
 
 from .constants import TERMS_NAMESPACE
 from .converters import CONVERTERS
 
 
-MANIFEST_FILE = "manifest-sha1.txt"
-
-
 class ProvCrateBuilder:
     def __init__(self,
                  root,
@@ -40,22 +32,12 @@ def __init__(self,
                  license=None,
                  readme=None):
         self.converter = converter
-        self.converter.root = Path(root)
-        self.converter.workflow_name = workflow_name
-        self.converter.license = license
-        self.converter.readme = Path(readme) if readme else readme
-        self.converter.wf_path = self.converter.root / "workflow" / self.converter.WORKFLOW_BASENAME
-        self.converter.workflow_definition = self.converter.get_workflow()
-        self.converter.step_maps = self.converter.get_step_maps()
-        self.converter.ro = ResearchObject(BDBag(str(root)))
-        self.converter.with_prov = set(str(_) for _ in self.converter.ro.resources_with_provenance())
-        self.converter.workflow_run = Provenance(self.converter.ro).activity()
-        self.converter.roc_engine_run = None
-        self.converter.control_actions = {}
-        self.converter.collection = {}
-        self.converter.hashes = {}
-        self.converter.file_map = {}
-        self.converter.manifest = self.converter.get_manifest(self.converter.root, MANIFEST_FILE)
+        self.converter.populate(
+            root,
+            workflow_name=workflow_name,
+            license=license,
+            readme=readme
+        )
 
     def build(self):
         crate = ROCrate(gen_preview=False)
diff --git a/src/runcrate/converters/base.py b/src/runcrate/converters/base.py
index 16af8fa..fa6d54b 100644
--- a/src/runcrate/converters/base.py
+++ b/src/runcrate/converters/base.py
@@ -65,6 +65,12 @@ def add_profiles(self, crate):
 
         return
 
+    @abstractmethod
+    def populate(self, root, workflow_name=None, license=None, readme=None):
+        """
+        Populate the converter with the given root directory and optional metadata.
+        """
+
     @abstractmethod
     def add_workflow(self, crate):
         """
@@ -105,11 +111,16 @@ def add_output_formats(self, crate):
     # Helper functions - called by the top level functions
 
     @abstractmethod
-    def get_workflow(self, wf_path):
+    def get_workflow(self):
         """
-        Get the workflow from the given path.
+        Should return a dictionary describing the workflow
+        Fetched from e.g. a file at self.wf_path
 
-        Returns a dictionary where tools / workflows are mapped by their ids.
+        The definition should contain:
+        - name: the workflow name
+        - inputs: a list of inputs
+        - outputs: a list of outputs
+        - steps: a list of steps
         """
 
     @abstractmethod
diff --git a/src/runcrate/converters/cwl.py b/src/runcrate/converters/cwl.py
index 0894f21..3f33494 100644
--- a/src/runcrate/converters/cwl.py
+++ b/src/runcrate/converters/cwl.py
@@ -6,8 +6,10 @@
 
 import networkx as nx
 import prov.model
+from bdbag.bdbagit import BDBag
 from cwl_utils.parser import load_document_by_yaml
 from cwlprov.prov import Entity, Provenance
+from cwlprov.ro import ResearchObject
 from cwlprov.utils import first
 from rocrate.model.contextentity import ContextEntity
 from rocrate.model.softwareapplication import SoftwareApplication
@@ -17,6 +19,8 @@
 from .base import Converter
 
 
+MANIFEST_FILE = "manifest-sha1.txt"
+
 CWLPROV_NONE = "https://w3id.org/cwl/prov#None"
 
 CWL_TYPE_MAP = {
@@ -42,6 +46,24 @@ class CwlConverter(Converter):
 
     WORKFLOW_BASENAME = "packed.cwl"
 
+    def populate(self, root, workflow_name=None, license=None, readme=None):
+        self.root = Path(root)
+        self.workflow_name = workflow_name
+        self.license = license
+        self.readme = Path(readme) if readme else readme
+        self.wf_path = self.root / "workflow" / self.WORKFLOW_BASENAME
+        self.workflow_definition = self.get_workflow()
+        self.step_maps = self.get_step_maps()
+        self.ro = ResearchObject(BDBag(str(root)))
+        self.with_prov = set(str(_) for _ in self.ro.resources_with_provenance())
+        self.workflow_run = Provenance(self.ro).activity()
+        self.roc_engine_run = None
+        self.control_actions = {}
+        self.collection = {}
+        self.hashes = {}
+        self.file_map = {}
+        self.manifest = self.get_manifest(self.root, MANIFEST_FILE)
+
     # --------------------------------------------------------------------------
     # Top level methods, called by build()