Skip to content

Commit

Permalink
Update gzrt to v0.9.1 (#7241)
Browse files Browse the repository at this point in the history
* Add module gzrt (#6918)

* fixed environment.yml

* Reply to review

* gzrecover and gzip oneliner

* gzrt uses a named pipe to avoid temp files

* Updated test file URL

* Improved syntax of prefixes in gzrt

* Updated gzrt to 0.9.1 version

* corrected name clash control in stub as well

* updated biocontainers and singularity links

* updated gzrt module

* Fixed gzrt through named pipe

* gzrt update snapshot

* updated tests for gzrt

* updated environment.yml

* gzrt - updated snapshots

* gzrt alpha - pre-release

* Add args in command line - gzrt

* gzrt - update snapshots and test dataset

* should have not change it

---------

Co-authored-by: tm4zza <[email protected]>
Co-authored-by: Usman Rashid <[email protected]>
  • Loading branch information
3 people authored Jan 7, 2025
1 parent 7053802 commit 56f433b
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 46 deletions.
2 changes: 1 addition & 1 deletion modules/nf-core/gzrt/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::gzrt=0.8"
- "bioconda::gzrt=0.9.1"
62 changes: 50 additions & 12 deletions modules/nf-core/gzrt/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,52 @@ process GZRT {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gzrt:0.8--he4a0461_0':
'biocontainers/gzrt:0.8--he4a0461_0' }"
'https://depot.galaxyproject.org/singularity/gzrt:0.9.1--h577a1d6_1':
'biocontainers/gzrt:0.9.1--h577a1d6_1' }"

input:
tuple val(meta), path(fastqgz)

output:
tuple val(meta), path("*_recovered.fastq.gz"), emit: fastqrecovered
path "versions.yml" , emit: versions
tuple val(meta), path("${prefix}*.fastq.gz"), emit: recovered
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
if (fastqgz.extension != "gz") {
error "GZRT works with .gz files only."
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}_recovered"
fastqgz.each { file ->
if (file.extension != "gz") {
error "GZRT works with .gz files only. Offending file: ${file}"
}

if ((meta.single_end && "${file}" == "${prefix}.fastq.gz") ||
(!meta.single_end && ("${file}" == "${prefix}_1.fastq.gz" || "${file}" == "${prefix}_2.fastq.gz"))) {
error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
}
}

def prefix = task.ext.prefix ?: "${meta.id}"
"""
gzrecover -o ${prefix}_recovered.fastq ${fastqgz}
gzip ${prefix}_recovered.fastq
if [ "$meta.single_end" == true ]; then
gzrecover ${args} -p ${fastqgz} | gzip > ${prefix}.fastq.gz
if [ -e "${prefix}.fastq.gz" ] && [ ! -s "${prefix}.fastq.gz" ]; then
echo "" | gzip > ${prefix}.fastq.gz
fi
else
gzrecover ${args} -p ${fastqgz[0]} | gzip > ${prefix}_1.fastq.gz
gzrecover ${args} -p ${fastqgz[1]} | gzip > ${prefix}_2.fastq.gz
if [ -e "${prefix}_1.fastq.gz" ] && [ ! -s "${prefix}_1.fastq.gz" ]; then
echo "" | gzip > ${prefix}_1.fastq.gz
fi
if [ -e "${prefix}_2.fastq.gz" ] && [ ! -s "${prefix}_2.fastq.gz" ]; then
echo "" | gzip > ${prefix}_2.fastq.gz
fi
fi
soft_line="${task.process}"
ver_line="gzrt: \$(gzrecover -V |& sed '1!d ; s/gzrecover //')"
Expand All @@ -36,13 +60,27 @@ process GZRT {
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: "${meta.id}_recovered"
fastqgz.each { file ->
if (file.extension != "gz") {
error "GZRT works with .gz files only. Offending file: ${file}"
}

if ((meta.single_end && "${file}" == "${prefix}.fastq.gz") ||
(!meta.single_end && ("${file}" == "${prefix}_1.fastq.gz" || "${file}" == "${prefix}_2.fastq.gz"))) {
error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
}
}
"""
echo "" | gzip > ${prefix}_recovered.fastq.gz
if [ "$meta.single_end" == true ]; then
echo "" | gzip > ${prefix}.fastq.gz
else
echo "" | gzip > ${prefix}_1.fastq.gz
echo "" | gzip > ${prefix}_2.fastq.gz
fi
soft_line="${task.process}"
ver_line="gzrt: \$(gzrecover -V |& sed '1!d ; s/gzrecover //')"
cat <<-END_VERSIONS > versions.yml
"\${soft_line}":
\${ver_line}
Expand Down
8 changes: 4 additions & 4 deletions modules/nf-core/gzrt/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ input:
- fastqgz:
type: file
description: FASTQ.gz file
description: FASTQ.gz files
pattern: "*.{gz}"
ontologies:
- edam: "http://edamontology.org/format_3989"

output:
- fastqrecovered:
- recovered:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*_recovered.fastq.gz":
- "${prefix}*.fastq.gz":
type: file
description: Recovered FASTQ.gz file
description: Recovered FASTQ.gz files
pattern: "*.{gz}"
ontologies:
- edam: "http://edamontology.org/format_3989"
Expand Down
63 changes: 54 additions & 9 deletions modules/nf-core/gzrt/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,87 @@ nextflow_process {
tag "modules_nfcore"
tag "gzrt"

test("Run gzrt on 30 paired-end reads - fastq.gz") {

test("Run gzrt on test2_1_corrupted single-end - fastq.gz") {
when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_corrupted_30reads_R1.fastq.gz', checkIfExists: true)
input[0] =
[
[ id:'test-single', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_1_corrupted_10kb.fastq.gz', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("Run gzrt on test2_1_corrupted and test2_2 paired-end - fastq.gz") {
when {
process {
"""
input[0] =
[
[ id:'test-paired', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_1_corrupted_10kb.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("Run gzrt on 30 paired-end reads - fastq.gz - stub") {
test("Run gzrt on test2_1_corrupted single-end - fastq.gz - stub") {
options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_corrupted_30reads_R1.fastq.gz', checkIfExists: true)
[ id:'test-single-stub', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_1_corrupted_10kb.fastq.gz', checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("Run gzrt on test2_1_corrupted and test2_2 paired-end - fastq.gz - stub") {
options "-stub"

when {
process {
"""
input[0] =
[
[ id:'test-paired-stub', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_1_corrupted_10kb.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)
]
]
"""
}
}
then {
assertAll(
{ assert process.success },
Expand Down
122 changes: 102 additions & 20 deletions modules/nf-core/gzrt/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,72 +1,154 @@
{
"Run gzrt on 30 paired-end reads - fastq.gz": {
"Run gzrt on test2_1_corrupted single-end - fastq.gz": {
"content": [
{
"0": [
[
{
"id": "test",
"id": "test-single",
"single_end": true
},
"test-single_recovered.fastq.gz:md5,6bc7d0b6304039eadba7321d21e7b2c9"
]
],
"1": [
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
],
"recovered": [
[
{
"id": "test-single",
"single_end": true
},
"test-single_recovered.fastq.gz:md5,6bc7d0b6304039eadba7321d21e7b2c9"
]
],
"versions": [
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-24T11:23:22.53088"
},
"Run gzrt on test2_1_corrupted single-end - fastq.gz - stub": {
"content": [
{
"0": [
[
{
"id": "test-single-stub",
"single_end": true
},
"test-single-stub_recovered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"1": [
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
],
"recovered": [
[
{
"id": "test-single-stub",
"single_end": true
},
"test-single-stub_recovered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"versions": [
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-24T11:24:13.88887"
},
"Run gzrt on test2_1_corrupted and test2_2 paired-end - fastq.gz - stub": {
"content": [
{
"0": [
[
{
"id": "test-paired-stub",
"single_end": false
},
"test_recovered.fastq.gz:md5,5eca10f954656478b5af96868222eec4"
[
"test-paired-stub_recovered_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
"test-paired-stub_recovered_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
]
],
"1": [
"versions.yml:md5,b9467d4cb860eb2941a2078a3dd3cf41"
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
],
"fastqrecovered": [
"recovered": [
[
{
"id": "test",
"id": "test-paired-stub",
"single_end": false
},
"test_recovered.fastq.gz:md5,5eca10f954656478b5af96868222eec4"
[
"test-paired-stub_recovered_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
"test-paired-stub_recovered_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
]
],
"versions": [
"versions.yml:md5,b9467d4cb860eb2941a2078a3dd3cf41"
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.1"
"nextflow": "24.10.3"
},
"timestamp": "2024-12-15T13:49:10.174131"
"timestamp": "2024-12-24T11:24:58.632197"
},
"Run gzrt on 30 paired-end reads - fastq.gz - stub": {
"Run gzrt on test2_1_corrupted and test2_2 paired-end - fastq.gz": {
"content": [
{
"0": [
[
{
"id": "test",
"id": "test-paired",
"single_end": false
},
"test_recovered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
[
"test-paired_recovered_1.fastq.gz:md5,6bc7d0b6304039eadba7321d21e7b2c9",
"test-paired_recovered_2.fastq.gz:md5,641919946a0e572ca7483f6e54476f3b"
]
]
],
"1": [
"versions.yml:md5,b9467d4cb860eb2941a2078a3dd3cf41"
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
],
"fastqrecovered": [
"recovered": [
[
{
"id": "test",
"id": "test-paired",
"single_end": false
},
"test_recovered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
[
"test-paired_recovered_1.fastq.gz:md5,6bc7d0b6304039eadba7321d21e7b2c9",
"test-paired_recovered_2.fastq.gz:md5,641919946a0e572ca7483f6e54476f3b"
]
]
],
"versions": [
"versions.yml:md5,b9467d4cb860eb2941a2078a3dd3cf41"
"versions.yml:md5,6d1e28b8a8043e3cba67606c7acdc676"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.1"
"nextflow": "24.10.3"
},
"timestamp": "2024-12-15T13:49:20.410351"
"timestamp": "2024-12-24T11:23:40.906933"
}
}

0 comments on commit 56f433b

Please sign in to comment.