Skip to content

Commit

Permalink
feat: fix #45
Browse files Browse the repository at this point in the history
  • Loading branch information
kopardev committed Jun 20, 2024
1 parent d4329a2 commit b6caeac
Show file tree
Hide file tree
Showing 6 changed files with 430 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/CCBR/fastqc/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: fastqc
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::fastqc=0.12.1
64 changes: 64 additions & 0 deletions modules/CCBR/fastqc/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// nf-core fastqc module was used as template to modify
// - memory def's were commented out
// - suffix def was added
process FASTQC {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
'biocontainers/fastqc:0.12.1--hdfd78af_0' }"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.html"), emit: html
tuple val(meta), path("*.zip") , emit: zip
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def suffix = task.ext.suffix ?: '' // suffix should include underscore
def prefix = task.ext.prefix ?: "${meta.id}${suffix}"
// Make list of old name and new name pairs to use for renaming in the bash while loop
def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
def rename_to = old_new_pairs*.join(' ').join(' ')
def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')

// def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
// FastQC memory value allowed range (100 - 10000)
// def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)

"""
printf "%s %s\\n" $rename_to | while read old_name new_name; do
[ -f "\${new_name}" ] || ln -s \$old_name \$new_name
done
fastqc \\
$args \\
--threads $task.cpus \\
$renamed_files
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.html
touch ${prefix}.zip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
END_VERSIONS
"""
}
57 changes: 57 additions & 0 deletions modules/CCBR/fastqc/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: fastqc
description: Run FastQC on sequenced reads
keywords:
- quality control
- qc
- adapters
- fastq
tools:
- fastqc:
description: |
FastQC gives general quality metrics about your reads.
It provides information about the quality score distribution
across your reads, the per base sequence content (%A/C/G/T).
You get information about adapter contamination and other
overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
licence: ["GPL-2.0-only"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- html:
type: file
description: FastQC report
pattern: "*_{fastqc.html}"
- zip:
type: file
description: FastQC report archive
pattern: "*_{fastqc.zip}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@grst"
- "@ewels"
- "@FelixKrueger"
maintainers:
- "@drpatelh"
- "@grst"
- "@ewels"
- "@FelixKrueger"
212 changes: 212 additions & 0 deletions modules/CCBR/fastqc/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
nextflow_process {

name "Test Process FASTQC"
script "../main.nf"
process "FASTQC"

tag "modules"
tag "modules_nfcore"
tag "fastqc"

test("sarscov2 single-end [fastq]") {

when {
process {
"""
input[0] = Channel.of([
[ id: 'test', single_end:true ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
])
"""
}
}

then {
assertAll (
{ assert process.success },

// NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
// looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
// https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039

{ assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_single") }
)
}
}

test("sarscov2 paired-end [fastq]") {

when {
process {
"""
input[0] = Channel.of([
[id: 'test', single_end: false], // meta map
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
])
"""
}
}

then {
assertAll (
{ assert process.success },

{ assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
{ assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
{ assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
{ assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
{ assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
{ assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_paired") }
)
}
}

test("sarscov2 interleaved [fastq]") {

when {
process {
"""
input[0] = Channel.of([
[id: 'test', single_end: false], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
])
"""
}
}

then {
assertAll (
{ assert process.success },

{ assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_interleaved") }
)
}
}

test("sarscov2 paired-end [bam]") {

when {
process {
"""
input[0] = Channel.of([
[id: 'test', single_end: false], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
])
"""
}
}

then {
assertAll (
{ assert process.success },

{ assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_bam") }
)
}
}

test("sarscov2 multiple [fastq]") {

when {
process {
"""
input[0] = Channel.of([
[id: 'test', single_end: false], // meta map
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
])
"""
}
}

then {
assertAll (
{ assert process.success },

{ assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
{ assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
{ assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
{ assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
{ assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
{ assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
{ assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
{ assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
{ assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
{ assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
{ assert path(process.out.html[0][1][2]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
{ assert path(process.out.html[0][1][3]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_multiple") }
)
}
}

test("sarscov2 custom_prefix") {

when {
process {
"""
input[0] = Channel.of([
[ id:'mysample', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
])
"""
}
}

then {
assertAll (
{ assert process.success },

{ assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
{ assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },

{ assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") }
)
}
}

test("sarscov2 single-end [fastq] - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of([
[ id: 'test', single_end:true ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
])
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(process.out.html.collect { file(it[1]).getName() } +
process.out.zip.collect { file(it[1]).getName() } +
process.out.versions ).match("fastqc_stub") }
)
}
}

}
Loading

0 comments on commit b6caeac

Please sign in to comment.