diff --git a/CHANGELOG.md b/CHANGELOG.md index bd9cb42..4b635d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## 1.0.0 2024-06-19 -Initial release of nf-core/demo, created with the [nf-core](https://nf-co.re/) template. +### Credits + +Special thanks to the following for their reviews and assistance: + +- [Maxime Garcia](https://github.com/maxulysse) +- [Friederike Hanssen](https://github.com/FriederikeHanssen) ### `Added` -### `Fixed` +- `nf-core/seqtk/trim` module +- `skip_trim` parameter -### `Dependencies` +## v1.0dev - 2024-05-5 -### `Deprecated` +Initial release of nf-core/demo, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATIONS.md b/CITATIONS.md index b5d0459..934aa07 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,9 +14,7 @@ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. -- [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) - - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. +- [seqtk](https://github.com/lh3/seqtk) - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) diff --git a/README.md b/README.md index 9ac9041..796989d 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ ![nf-core/demo metro map](docs/images/nf-core-demo-subway.png) 1. Read QC ([`FASTQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Adapter and quality trimming ([`FASTP`](https://github.com/OpenGene/fastp)) +2. Adapter and quality trimming ([`SEQTK_TRIM`](https://github.com/lh3/seqtk)) 3. Present QC for raw reads ([`MULTIQC`](http://multiqc.info/)) ## Usage diff --git a/assets/nf-core-demo_logo_light.png b/assets/nf-core-demo_logo_light.png index 0ef9fa5..2543e50 100644 Binary files a/assets/nf-core-demo_logo_light.png and b/assets/nf-core-demo_logo_light.png differ diff --git a/conf/modules.config b/conf/modules.config index a689cbd..189cef9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -28,11 +28,11 @@ process { } - withName: 'FASTP' { + withName: 'SEQTK_TRIM' { publishDir = [ - path: { "${params.outdir}/fastp/${meta.id}" }, + path: { "${params.outdir}/fq/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{html,json,log}" + pattern: "*.{fastq.gz}" ] } diff --git a/conf/test.config b/conf/test.config index 4bbce52..5bff1d1 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,6 +22,4 @@ params { // Input data input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - // Genome references - genome = 'R64-1-1' } diff --git a/conf/test_full.config b/conf/test_full.config index 1d82110..6346afd 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,9 +14,7 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - // Genome references - genome = 'R64-1-1' } diff --git a/docs/images/nf-core-demo-subway.png b/docs/images/nf-core-demo-subway.png index 1a2ebcd..e7914fe 100644 Binary files a/docs/images/nf-core-demo-subway.png and b/docs/images/nf-core-demo-subway.png differ diff --git a/docs/images/nf-core-demo-subway.svg b/docs/images/nf-core-demo-subway.svg index 90706b8..2b8c2ab 100644 --- a/docs/images/nf-core-demo-subway.svg +++ b/docs/images/nf-core-demo-subway.svg @@ -1,33 +1,29 @@ + + + - - - - - - + + + - - - - - - - - - - - - + + + + + - + + + + - - + + - + diff --git a/docs/images/nf-core-demo_logo_dark.png b/docs/images/nf-core-demo_logo_dark.png index 5e22400..44f9995 100644 Binary files a/docs/images/nf-core-demo_logo_dark.png and b/docs/images/nf-core-demo_logo_dark.png differ diff --git a/docs/images/nf-core-demo_logo_light.png b/docs/images/nf-core-demo_logo_light.png index c9d0c4b..b945f8c 100644 Binary files a/docs/images/nf-core-demo_logo_light.png and b/docs/images/nf-core-demo_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index f8f0f90..411fdf5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,7 +11,7 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [FastQC](#fastqc) - Raw read QC -- [fastp](#fastp) - Adapter and quality trimming +- [seqtk](#seqtk) - Processing sequences in the FASTA or FASTQ format. - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -38,20 +38,17 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. ::: -### fastp +### seqtk
Output files -- `fastp/` - - `*.fastp.html`: Trimming report in html format. - - `*.fastp.json`: Trimming report in json format. - - `*.fastp.log`: Trimming log file. - - `*.fastq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. +- `fq/` + - `*.fastq.gz`: Trimmed FASTQ files.
-[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp can be used in this pipeline for standard adapter trimming and quality filtering. +[seqtk](https://github.com/lh3/seqtk) is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. It seamlessly parses both FASTA and FASTQ files which can also be optionally compressed by gzip. ### MultiQC diff --git a/modules.json b/modules.json index a1e9750..67933c1 100644 --- a/modules.json +++ b/modules.json @@ -5,11 +5,6 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "fastp": { - "branch": "master", - "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080", - "installed_by": ["modules"] - }, "fastqc": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", @@ -19,6 +14,11 @@ "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] + }, + "seqtk/trim": { + "branch": "master", + "git_sha": "71c669747731cbc360dc220069c9f83015558c07", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf deleted file mode 100644 index 4fc19b7..0000000 --- a/modules/nf-core/fastp/main.nf +++ /dev/null @@ -1,120 +0,0 @@ -process FASTP { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : - 'biocontainers/fastp:0.23.4--h5f740d0_0' }" - - input: - tuple val(meta), path(reads) - path adapter_fasta - val save_trimmed_fail - val save_merged - - output: - tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads - tuple val(meta), path('*.json') , emit: json - tuple val(meta), path('*.html') , emit: html - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions - tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail - tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' - // Added soft-links to original fastqs for consistent naming in MultiQC - // Use single ended for interleaved. Add --interleaved_in in config. - if ( task.ext.args?.contains('--interleaved_in') ) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - - fastp \\ - --stdout \\ - --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) \\ - | gzip -c > ${prefix}.fastp.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - - fastp \\ - --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ - --thread $task.cpus \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } else { - def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz - fastp \\ - --in1 ${prefix}_1.fastq.gz \\ - --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --json ${prefix}.fastp.json \\ - --html ${prefix}.fastp.html \\ - $adapter_list \\ - $fail_fastq \\ - $merge_fastq \\ - --thread $task.cpus \\ - --detect_adapter_for_pe \\ - $args \\ - 2> >(tee ${prefix}.fastp.log >&2) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end - def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" - def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" - """ - touch $touch_reads - touch "${prefix}.fastp.json" - touch "${prefix}.fastp.html" - touch "${prefix}.fastp.log" - $touch_merged - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml deleted file mode 100644 index c22a16a..0000000 --- a/modules/nf-core/fastp/meta.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: fastp -description: Perform adapter/quality trimming on sequencing reads -keywords: - - trimming - - quality control - - fastq -tools: - - fastp: - description: | - A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. - documentation: https://github.com/OpenGene/fastp - doi: 10.1093/bioinformatics/bty560 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. If you wish to run interleaved paired-end data, supply as single-end data - but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. - - adapter_fasta: - type: file - description: File in FASTA format containing possible adapters to remove. - pattern: "*.{fasta,fna,fas,fa}" - - save_trimmed_fail: - type: boolean - description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` - - save_merged: - type: boolean - description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: The trimmed/modified/unmerged fastq reads - pattern: "*fastp.fastq.gz" - - json: - type: file - description: Results in JSON format - pattern: "*.json" - - html: - type: file - description: Results in HTML format - pattern: "*.html" - - log: - type: file - description: fastq log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - reads_fail: - type: file - description: Reads the failed the preprocessing - pattern: "*fail.fastq.gz" - - reads_merged: - type: file - description: Reads that were successfully merged - pattern: "*.{merged.fastq.gz}" -authors: - - "@drpatelh" - - "@kevinmenden" -maintainers: - - "@drpatelh" - - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test deleted file mode 100644 index 6f1f489..0000000 --- a/modules/nf-core/fastp/tests/main.nf.test +++ /dev/null @@ -1,725 +0,0 @@ -nextflow_process { - - name "Test Process FASTP" - script "../main.nf" - process "FASTP" - tag "modules" - tag "modules_nfcore" - tag "fastp" - - test("test_fastp_single_end") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:12.922000 K (92.984097%)", - "single end (151 cycles)" ] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end") } - ) - } - } - - test("test_fastp_single_end-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end_stub") } - ) - } - } - - test("test_fastp_paired_end") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 198'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end") } - ) - } - } - - test("test_fastp_paired_end-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end-stub") } - ) - } - } - - test("fastp test_fastp_interleaved") { - - config './nextflow.interleaved.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "paired end (151 cycles + 151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 162"] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved") } - ) - } - } - - test("fastp test_fastp_interleaved-stub") { - - options '-stub' - - config './nextflow.interleaved.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved-stub") } - ) - } - } - - test("test_fastp_single_end_trim_fail") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:12.922000 K (92.984097%)", - "single end (151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { failed_read_lines.each { failed_read_line -> - { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } - ) - } - } - - test("test_fastp_paired_end_trim_fail") { - - config './nextflow.save_failed.config' - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 162'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { failed_read2_lines.each { failed_read2_line -> - { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } - ) - } - } - - test("test_fastp_paired_end_merged") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] - def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged") } - ) - } - } - - test("test_fastp_paired_end_merged-stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } - ) - } - } - - test("test_fastp_paired_end_merged_adapterlist") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) - save_trimmed_fail = false - save_merged = true - - input[0] = Channel.of([ - [ id:'test', single_end:false ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] - ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged - """ - } - } - - then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] - def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } - ) - } - } -} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap deleted file mode 100644 index 3e87628..0000000 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ /dev/null @@ -1,330 +0,0 @@ -{ - "fastp test_fastp_interleaved_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:19:15.063001" - }, - "test_fastp_paired_end_merged-for_stub_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:10:13.467574" - }, - "versions_interleaved": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:24.615634793" - }, - "test_fastp_single_end_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:18:43.526412" - }, - "versions_paired_end": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:42.333545689" - }, - "test_fastp_paired_end_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:03:06.431833729" - }, - "test_fastp_interleaved-_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:19:15.111894" - }, - "test_fastp_paired_end_merged_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:08:44.496251446" - }, - "versions_single_end_stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:27.354051299" - }, - "versions_interleaved-stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:46.535528418" - }, - "versions_single_end_trim_fail": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:03.724591407" - }, - "test_fastp_paired_end-for_stub_match": { - "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:07:15.398827" - }, - "versions_paired_end-stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:56:06.50017282" - }, - "versions_single_end": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:07.67921647" - }, - "versions_paired_end_merged_stub": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:47.350653154" - }, - "test_fastp_interleaved-for_stub_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:08:06.127974" - }, - "versions_paired_end_trim_fail": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:18.140484878" - }, - "test_fastp_single_end-for_stub_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:06:00.244202" - }, - "test_fastp_single_end-_match": { - "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:18:43.580336" - }, - "versions_paired_end_merged_adapterlist": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:05:37.845370554" - }, - "versions_paired_end_merged": { - "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:59:32.860543858" - }, - "test_fastp_single_end_trim_fail_json": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-01-17T18:08:41.942317" - } -} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config deleted file mode 100644 index 4be8dbd..0000000 --- a/modules/nf-core/fastp/tests/nextflow.interleaved.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: FASTP { - ext.args = "--interleaved_in -e 30" - } -} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config deleted file mode 100644 index 53b61b0..0000000 --- a/modules/nf-core/fastp/tests/nextflow.save_failed.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: FASTP { - ext.args = "-e 30" - } -} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml deleted file mode 100644 index c1afcce..0000000 --- a/modules/nf-core/fastp/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -fastp: - - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/seqtk/trim/environment.yml similarity index 61% rename from modules/nf-core/fastp/environment.yml rename to modules/nf-core/seqtk/trim/environment.yml index 70389e6..389a3a9 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/seqtk/trim/environment.yml @@ -1,7 +1,7 @@ -name: fastp +name: seqtk_trim channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::fastp=0.23.4 + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/trim/main.nf b/modules/nf-core/seqtk/trim/main.nf new file mode 100644 index 0000000..0f7e4d7 --- /dev/null +++ b/modules/nf-core/seqtk/trim/main.nf @@ -0,0 +1,38 @@ +process SEQTK_TRIM { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + printf "%s\\n" $reads | while read f; + do + seqtk \\ + trimfq \\ + $args \\ + \$f \\ + | gzip --no-name > ${prefix}_\$(basename \$f) + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqtk/trim/meta.yml b/modules/nf-core/seqtk/trim/meta.yml new file mode 100644 index 0000000..1177057 --- /dev/null +++ b/modules/nf-core/seqtk/trim/meta.yml @@ -0,0 +1,44 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: seqtk_trim +description: Trim low quality bases from FastQ files +keywords: + - trimfq + - fastq + - seqtk +tools: + - "seqtk": + description: "Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format" + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input FastQ files + pattern: "*.{fastq.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Filtered FastQ files + pattern: "*.{fastq.gz}" + +authors: + - "@laramiellindsey" diff --git a/modules/nf-core/seqtk/trim/tests/main.nf.test b/modules/nf-core/seqtk/trim/tests/main.nf.test new file mode 100644 index 0000000..d99b6b2 --- /dev/null +++ b/modules/nf-core/seqtk/trim/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process SEQTK_TRIM" + script "modules/nf-core/seqtk/trim/main.nf" + process "SEQTK_TRIM" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/trim" + + test("Single-end") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + + } + +test("Paired-end") { + + when { + params { + outdir = $outputDir + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/trim/tests/main.nf.test.snap b/modules/nf-core/seqtk/trim/tests/main.nf.test.snap new file mode 100644 index 0000000..da181dc --- /dev/null +++ b/modules/nf-core/seqtk/trim/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "Single-end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_test_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d061ca0231d089b087e22d2001cd7c32" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_test_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d061ca0231d089b087e22d2001cd7c32" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T06:10:55.544977" + }, + "Paired-end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_test_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec", + "test_test_2.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + "1": [ + "versions.yml:md5,d061ca0231d089b087e22d2001cd7c32" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_test_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec", + "test_test_2.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + "versions": [ + "versions.yml:md5,d061ca0231d089b087e22d2001cd7c32" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T06:11:38.487227" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/trim/tests/tags.yml b/modules/nf-core/seqtk/trim/tests/tags.yml new file mode 100644 index 0000000..250a138 --- /dev/null +++ b/modules/nf-core/seqtk/trim/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/trim: + - "modules/nf-core/seqtk/trim/**" diff --git a/nextflow.config b/nextflow.config index 3895320..28b04a5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,11 +24,8 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null - - // FASTP options - adapters = null - save_trimmed_fail = false - save_merged = false + // Trimming + skip_trim = false // Boilerplate options outdir = null @@ -42,7 +39,6 @@ params { version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - // Config options config_profile_name = null config_profile_description = null diff --git a/nextflow_schema.json b/nextflow_schema.json index bbd7c16..005be41 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -74,29 +74,19 @@ } } }, - "fastp_options": { - "title": "Fastp options", + "process_skipping_options": { + "title": "Process skipping options", "type": "object", - "description": "Parameters for running fastp", + "description": "Options to skip various steps within the workflow.", "default": "", + "fa_icon": "fas fa-forward", "properties": { - "adapters": { - "type": "string", - "fa_icon": "fas fa-cut", - "description": "Fasta file with adapter sequences to be trimmed." - }, - "save_trimmed_fail": { + "skip_trim": { "type": "boolean", - "fa_icon": "far fa-save", - "description": "Option to save trimmed reads." - }, - "save_merged": { - "type": "boolean", - "fa_icon": "far fa-save", - "description": "Option to save merged reads." + "description": "Skip trimming fastq files with seqtk", + "fa_icon": "fas fa-chevron-circle-right" } - }, - "fa_icon": "fas fa-cut" + } }, "institutional_config_options": { "title": "Institutional config options", @@ -308,7 +298,7 @@ "$ref": "#/definitions/reference_genome_options" }, { - "$ref": "#/definitions/fastp_options" + "$ref": "#/definitions/process_skipping_options" }, { "$ref": "#/definitions/institutional_config_options" diff --git a/workflows/demo.nf b/workflows/demo.nf index c69af6f..80b1246 100644 --- a/workflows/demo.nf +++ b/workflows/demo.nf @@ -5,7 +5,7 @@ */ include { FASTQC } from '../modules/nf-core/fastqc/main' -include { FASTP } from '../modules/nf-core/fastp/main' +include { SEQTK_TRIM } from '../modules/nf-core/seqtk/trim/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -29,7 +29,7 @@ workflow DEMO { ch_multiqc_files = Channel.empty() // - // MODULE: Run FastQC + // MODULE: Run FASTQC // FASTQC ( ch_samplesheet @@ -38,18 +38,15 @@ workflow DEMO { ch_versions = ch_versions.mix(FASTQC.out.versions.first()) // - // MODULE: Run Fastp + // MODULE: Run SEQTK_TRIM // - ch_adapters = params.adapters ? params.adapters : [] - - FASTP ( - ch_samplesheet, - ch_adapters, - params.save_trimmed_fail, - params.save_merged - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect{it[1]}.ifEmpty([])) - ch_versions = ch_versions.mix(FASTP.out.versions.first()) + if (!params.skip_trim) { + SEQTK_TRIM ( + ch_samplesheet + ) + ch_trimmed = SEQTK_TRIM.out.reads + ch_versions = ch_versions.mix(SEQTK_TRIM.out.versions.first()) + } // // Collate and save software versions @@ -63,7 +60,7 @@ workflow DEMO { ).set { ch_collated_versions } // - // MODULE: MultiQC + // MODULE: MULTIQC // ch_multiqc_config = Channel.fromPath( "$projectDir/assets/multiqc_config.yml", checkIfExists: true)