Skip to content

Commit f8a609e

Browse files
authored
Add BCO format (#3)
Signed-off-by: Ben Sherman <[email protected]>
1 parent cf29db6 commit f8a609e

File tree

13 files changed

+552
-115
lines changed

13 files changed

+552
-115
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ gradle.properties
77
# Ignore Gradle build output directory
88
build
99
work
10-
out
10+
results

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
config ?= compileClasspath
3+
version ?= $(shell grep 'Plugin-Version' plugins/nf-prov/src/resources/META-INF/MANIFEST.MF | awk '{ print $$2 }')
34

45
ifdef module
56
mm = :${module}:
@@ -42,6 +43,11 @@ else
4243
./gradlew ${mm}test --tests ${class}
4344
endif
4445

46+
install:
47+
./gradlew copyPluginZip
48+
rm -rf ${HOME}/.nextflow/plugins/nf-prov-${version}
49+
cp -r build/plugins/nf-prov-${version} ${HOME}/.nextflow/plugins/
50+
4551
#
4652
# Upload JAR artifacts to Maven Central
4753
#

README.md

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,31 +34,37 @@ Create the provenance manifest (default: `true` if plugin is loaded).
3434

3535
The path of the provenance manifest (default: `manifest.json`).
3636

37-
`prov.patterns`
37+
`prov.format`
3838

39-
List of file patterns to include in the provenance manifest, from the set of published files. By default, all published files are included.
39+
The manifest format. Can be `legacy` or `bco` (default: `legacy`).
40+
41+
*Note: The BCO format is experimental and may change in future releases. Visit the [BCO User Guide](https://docs.biocomputeobject.org/user_guide/) to learn more about this format and how to extend it with information that isn't available to Nextflow.*
4042

4143
`prov.overwrite`
4244

4345
Overwrite any existing provenance report with the same name (default: `false`).
4446

47+
`prov.patterns`
48+
49+
List of file patterns to include in the provenance manifest, from the set of published files. By default, all published files are included.
50+
4551
## Development
4652

4753
Run the following commands to build and test the nf-prov Nextflow plugin. Refer to the [nf-hello](https://github.com/nextflow-io/nf-hello) README for additional instructions (_e.g._ for publishing the plugin).
4854

49-
```console
55+
```bash
5056
# (Optional) Checkout relevant feature branch
5157
# git checkout <branch>
5258

5359
# Create an empty folder for nf-prov and nextflow repos
54-
git clone --depth 1 https://github.com/nextflow-io/nextflow ../nextflow-nf-prov
60+
git clone --depth 1 https://github.com/nextflow-io/nextflow ../nextflow
5561

5662
# Prepare the nextflow repo
57-
cd ../nextflow-nf-prov && ./gradlew compile exportClasspath && cd -
63+
cd ../nextflow && ./gradlew compile exportClasspath && cd -
5864

5965
# Prepare the nf-prov repo
6066
grep -v 'includeBuild' settings.gradle > settings.gradle.bkp
61-
echo "includeBuild('../nextflow-nf-prov')" >> settings.gradle.bkp
67+
echo "includeBuild('../nextflow')" >> settings.gradle.bkp
6268
mv -f settings.gradle.bkp settings.gradle
6369
./gradlew assemble
6470

@@ -69,7 +75,7 @@ mv -f settings.gradle.bkp settings.gradle
6975
## Package, Upload, and Publish
7076

7177
The project should hosted in a GitHub repository whose name should match the name of the plugin,
72-
that is the name of the directory in the `plugins` folder e.g. `nf-synapse` in this project.
78+
that is the name of the directory in the `plugins` folder e.g. `nf-prov` in this project.
7379

7480
Following these step to package, upload and publish the plugin:
7581

@@ -83,13 +89,13 @@ Following these step to package, upload and publish the plugin:
8389

8490
2. Update the `Plugin-Version` field in the following file with the release version:
8591

86-
```
92+
```bash
8793
plugins/nf-prov/src/resources/META-INF/MANIFEST.MF
8894
```
8995

9096
3. Run the following command to package and upload the plugin in the GitHub project releases page:
9197

92-
```
98+
```bash
9399
./gradlew :plugins:nf-prov:upload
94100
```
95101

nextflow.config

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,13 @@
1-
prov {
2-
enabled = true
3-
overwrite = true
4-
file = 'out/manifest.json'
1+
plugins {
2+
3+
}
4+
5+
params {
6+
outdir = 'results'
57
}
68

7-
manifest {
8-
name = "nf-prov-test"
9-
author = "Bruno Grande"
10-
homePage = "https://github.com/sage-bionetworks-workflows/nf-prov"
11-
description = "Nextflow plugin for tracking provenance"
12-
mainScript = "main.nf"
13-
nextflowVersion = "!>=21.10.3"
14-
version = "0.2.0"
15-
doi = ""
9+
prov {
10+
overwrite = true
11+
file = "${params.outdir}"
12+
format = 'bco'
1613
}
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/*
2+
* Copyright 2023, Seqera Labs
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package nextflow.prov
18+
19+
import java.nio.file.Files
20+
import java.nio.file.Path
21+
import java.time.format.DateTimeFormatter
22+
23+
import groovy.json.JsonOutput
24+
import groovy.transform.CompileStatic
25+
import nextflow.Session
26+
import nextflow.exception.AbortOperationException
27+
import nextflow.processor.TaskRun
28+
import nextflow.script.WorkflowMetadata
29+
import nextflow.util.CacheHelper
30+
31+
/**
32+
* Renderer for the BioCompute Object (BCO) format.
33+
*
34+
* @author Ben Sherman <[email protected]>
35+
*/
36+
@CompileStatic
37+
class BcoRenderer implements Renderer {
38+
39+
private URL repository
40+
41+
private String commitId
42+
43+
private String launchDir
44+
45+
private String projectDir
46+
47+
private String workDir
48+
49+
/**
50+
* Normalize local paths to remove environment-specific directories.
51+
*
52+
* @param path
53+
*/
54+
private String normalizePath(Path path) {
55+
normalizePath(path.toUriString())
56+
}
57+
58+
private String normalizePath(String path) {
59+
// replace work directory with relative path
60+
if( path.startsWith(workDir) )
61+
return path.replace(workDir, 'work')
62+
63+
// replace project directory with source URL (if applicable)
64+
if( repository && path.startsWith(projectDir) )
65+
return getProjectSourceUrl(path)
66+
67+
// replace launch directory with relative path
68+
if( path.startsWith(launchDir) )
69+
return path.replace(launchDir + '/', '')
70+
71+
return path
72+
}
73+
74+
/**
75+
* Get the source URL for a project asset.
76+
*
77+
* @param path
78+
*/
79+
private String getProjectSourceUrl(String path) {
80+
// TODO: add other git providers
81+
if( repository.host == 'github.com' )
82+
return path.replace(projectDir, "${repository}/tree/${commitId}")
83+
else
84+
return path
85+
}
86+
87+
@Override
88+
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs, Path path) {
89+
// get workflow inputs
90+
final taskLookup = ProvHelper.getTaskLookup(tasks)
91+
final workflowInputs = ProvHelper.getWorkflowInputs(tasks, taskLookup)
92+
93+
// get workflow metadata
94+
final metadata = session.workflowMetadata
95+
final manifest = metadata.manifest
96+
final nextflowMeta = metadata.nextflow
97+
this.repository = metadata.repository ? new URL(metadata.repository) : null
98+
this.commitId = metadata.commitId
99+
this.projectDir = metadata.projectDir.toUriString()
100+
this.launchDir = metadata.launchDir.toUriString()
101+
this.workDir = metadata.workDir.toUriString()
102+
103+
final dateCreated = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(metadata.start)
104+
final authors = (manifest.author ?: '').tokenize(',')*.trim()
105+
final nextflowVersion = nextflowMeta.version.toString()
106+
final params = session.config.params as Map
107+
108+
// create BCO manifest
109+
final bco = [
110+
"object_id": null,
111+
"spec_version": null,
112+
"etag": null,
113+
"provenance_domain": [
114+
"name": manifest.name ?: "",
115+
"version": manifest.version ?: "",
116+
"created": dateCreated,
117+
"modified": dateCreated,
118+
"contributors": authors.collect( name -> [
119+
"contribution": ["authoredBy"],
120+
"name": name
121+
] ),
122+
"license": ""
123+
],
124+
"usability_domain": [],
125+
"extension_domain": [],
126+
"description_domain": [
127+
"keywords": [],
128+
"platform": ["Nextflow"],
129+
"pipeline_steps": tasks.sort( (task) -> task.id ).collect { task -> [
130+
"step_number": task.id,
131+
"name": task.hash.toString(),
132+
"description": task.name,
133+
"input_list": task.getInputFilesMap().collect { name, source -> [
134+
"uri": normalizePath(source)
135+
] },
136+
"output_list": ProvHelper.getTaskOutputs(task).collect { source -> [
137+
"uri": normalizePath(source)
138+
] }
139+
] },
140+
],
141+
"execution_domain": [
142+
"script": [ normalizePath(metadata.scriptFile) ],
143+
"script_driver": "nextflow",
144+
"software_prerequisites": [
145+
[
146+
"name": "Nextflow",
147+
"version": nextflowVersion,
148+
"uri": [
149+
"uri": "https://github.com/nextflow-io/nextflow/releases/tag/v${nextflowVersion}"
150+
]
151+
]
152+
],
153+
"external_data_endpoints": [],
154+
"environment_variables": [:]
155+
],
156+
"parametric_domain": params.collect( (k, v) -> [
157+
"param": k,
158+
"value": normalizePath(v.toString()),
159+
"step": "0"
160+
] ),
161+
"io_domain": [
162+
"input_subdomain": workflowInputs.collect { source -> [
163+
"uri": [
164+
"uri": normalizePath(source)
165+
]
166+
] },
167+
"output_subdomain": workflowOutputs.collect { source, target -> [
168+
"mediatype": Files.probeContentType(source) ?: "",
169+
"uri": [
170+
"filename": normalizePath(source),
171+
"uri": normalizePath(target)
172+
]
173+
] }
174+
],
175+
"error_domain": [
176+
"empirical_error": [:],
177+
"algorithmic_error": [:]
178+
]
179+
]
180+
181+
// append git repository info
182+
if( metadata.repository ) {
183+
final extension_domain = bco.extension_domain as List
184+
extension_domain << [
185+
"extension_schema": "https://w3id.org/biocompute/extension_domain/1.1.0/scm/scm_extension.json",
186+
"scm_extension": [
187+
"scm_repository": metadata.repository,
188+
"scm_type": "git",
189+
"scm_commit": metadata.commitId,
190+
"scm_path": metadata.scriptFile.toUriString().replace(projectDir + '/', ''),
191+
"scm_preview": normalizePath(metadata.scriptFile)
192+
]
193+
]
194+
}
195+
196+
// compute etag
197+
// TODO: make a more canonical hash
198+
final etag = CacheHelper.hasher(bco, CacheHelper.HashMode.SHA256).hash()
199+
200+
// append non-cacheable fields
201+
bco.object_id = "urn:uuid:${UUID.randomUUID()}"
202+
bco.spec_version = "https://w3id.org/ieee/ieee-2791-schema/2791object.json"
203+
bco.etag = etag.toString()
204+
205+
// render BCO manifest to JSON file
206+
path.text = JsonOutput.prettyPrint(JsonOutput.toJson(bco))
207+
}
208+
209+
}

0 commit comments

Comments
 (0)