From bf8d5eb7c2590ce687b7d0539b7efd51afc0efe0 Mon Sep 17 00:00:00 2001 From: Karin Date: Sun, 17 Jul 2022 19:24:19 +0200 Subject: [PATCH 01/25] Changed name of qc file to something more understandable --- qc_track.nf => fastqc_multi.nf | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename qc_track.nf => fastqc_multi.nf (100%) diff --git a/qc_track.nf b/fastqc_multi.nf similarity index 100% rename from qc_track.nf rename to fastqc_multi.nf From 0bcb7ab3de7d8318e58840a390156b46f0cb9c2a Mon Sep 17 00:00:00 2001 From: Karin Date: Sun, 17 Jul 2022 20:02:38 +0200 Subject: [PATCH 02/25] This is the first commit in the conda fix. In this commit I have: - added a conda directive to the two processes in fastqc_multi.nf - added a condahome param to the two conda config files. Note, I had to change that from process to params to get it to read the condahome setting. I am not happy about not quite understanding why that had to be. --- conf/condaslurm.config | 6 +++--- conf/condastandard.config | 4 ++-- fastqc_multi.nf | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/conf/condaslurm.config b/conf/condaslurm.config index dce1f40..83ed53a 100644 --- a/conf/condaslurm.config +++ b/conf/condaslurm.config @@ -4,6 +4,6 @@ * are available on the command line as expected. */ -process { - conda = '/cluster/projects/nn9305k/src/miniconda/envs/bifrost' -} +params { + condahome = '/cluster/projects/nn9305k/src/miniconda/envs' + } diff --git a/conf/condastandard.config b/conf/condastandard.config index e2d8e5a..8bcddec 100644 --- a/conf/condastandard.config +++ b/conf/condastandard.config @@ -5,6 +5,6 @@ * are available on the command line as expected. */ -process { - conda = '/home/karinlag/src/anaconda3/envs/bifrost' +params { + condahome = "/home/karinlag/src/miniconda/envs" } diff --git a/fastqc_multi.nf b/fastqc_multi.nf index 24143c6..1ea5c2c 100644 --- a/fastqc_multi.nf +++ b/fastqc_multi.nf @@ -31,6 +31,7 @@ Channel // Second is to send all of through fastqc process run_fastqc { + conda "${params.condahome}/bifrost2022-fastqc" publishDir "${params.out_dir}/${params.fastqc}", mode: "${params.savemode}" tag {pair_id} label 'one' @@ -48,6 +49,7 @@ process run_fastqc { } process run_multiqc { + conda "${params.condahome}/bifrost2022-multiqc" publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}" tag {"multiqc"} label 'one' From 11861499bcfdd2a7fb502c6b8cebb600b8deaf8d Mon Sep 17 00:00:00 2001 From: Karin Date: Thu, 21 Jul 2022 16:11:54 +0200 Subject: [PATCH 03/25] This commit introduces the conda param to each process for the asm_annot.nf script. It might not work on its own. The next commit will update all command lines. --- asm_annot.nf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/asm_annot.nf b/asm_annot.nf index 2118828..4c331b5 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -33,6 +33,8 @@ Channel // run_fastq and run_multiqc are exactly the same as qc_track process run_fastqc { + conda "${params.condahome}/bifrost2022-fastqc" + publishDir "${params.out_dir}/fastqc", mode: "${params.savemode}" tag { pair_id } label 'one' @@ -50,6 +52,7 @@ process run_fastqc { } process run_multiqc { + conda "${params.condahome}/bifrost2022-multiqc" publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}" tag {"multiqc"} label 'one' @@ -90,7 +93,7 @@ process collate_data { * Strip PhiX with bbmap */ process run_strip { - + conda "${params.condahome}/bifrost2022-bbtools" publishDir "${params.out_dir}/bbduk", mode: "${params.savemode}" tag { pair_id } @@ -117,6 +120,7 @@ process run_strip { * Remove adapter sequences and low quality base pairs with Trimmomatic */ process run_trim { + conda "${params.condahome}/bifrost2022-trimmomatic" publishDir "${params.out_dir}/bbduk_trimmed", mode: "${params.savemode}" tag { pair_id } @@ -144,6 +148,7 @@ process run_trim { * Build assembly with SPAdes */ process run_spadesasm { + conda "${params.condahome}/bifrost2022-spades" publishDir "${params.out_dir}/spades", mode: "${params.savemode}" tag { pair_id } label 'longtime' @@ -177,6 +182,7 @@ process run_spadesasm { * Map reads to the spades assembly */ process run_bwamem { + conda "${params.condahome}/bifrost2022-bwa" publishDir "${params.out_dir}/bwamem", mode: "${params.savemode}" tag { pair_id } label 'longtime' @@ -202,6 +208,7 @@ process run_bwamem { */ process run_pilon { + conda "${params.condahome}/bifrost2022-pilon" publishDir "${params.out_dir}/pilon", mode: "${params.savemode}" tag { pair_id } @@ -228,6 +235,7 @@ process run_pilon { * Annotation using PROKKA */ process run_prokka { + conda "${params.condahome}/bifrost2022-prokka" publishDir "${params.out_dir}/prokka", mode: "${params.savemode}" tag { pair_id } From 9006f3c27c885b3252bf3713e52b6bf6b4c1fd2c Mon Sep 17 00:00:00 2001 From: Karin Date: Thu, 21 Jul 2022 16:14:35 +0200 Subject: [PATCH 04/25] Reordered trimmomatic options to conform with the order they have in the manual --- asm_annot.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asm_annot.nf b/asm_annot.nf index 4c331b5..254cf76 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -134,8 +134,8 @@ process run_trim { """ trimmomatic PE -threads $task.cpus -trimlog ${pair_id}_concat_stripped_trimmed.log ${pair_id}*_concat_stripped.fq.gz \ -baseout ${pair_id}_trimmed.fq.gz ILLUMINACLIP:${params.adapter_dir}/${params.adapters}:${params.illuminaClipOptions} \ - SLIDINGWINDOW:${params.slidingwindow} \ LEADING:${params.leading} TRAILING:${params.trailing} \ + SLIDINGWINDOW:${params.slidingwindow} \ MINLEN:${params.minlen} &> ${pair_id}_run.log mv ${pair_id}_trimmed_1P.fq.gz ${pair_id}_R1_concat_stripped_trimmed.fq.gz mv ${pair_id}_trimmed_2P.fq.gz ${pair_id}_R2_concat_stripped_trimmed.fq.gz From a5da6c4b33c20d33704c437ec1102020ae5a5a17 Mon Sep 17 00:00:00 2001 From: Karin Date: Thu, 21 Jul 2022 21:00:15 +0200 Subject: [PATCH 05/25] SPADES options were updated with the following: -careful is removed --isolate and --only-assembly is introduced Per recommendations here: http://cab.spbu.ru/files/release3.14.0/manual.html#isolate --- asm_annot.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index 254cf76..693b087 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -162,11 +162,15 @@ process run_spadesasm { file "${pair_id}_spades_scaffolds.fasta" file "${pair_id}_spades.log" + + // For 2022 version, params.careful was removed to do --isolate and --only-assembler """ - spades.py ${params.careful} --cov-cutoff=${params.cov_cutoff} \ + spades.py --cov-cutoff=${params.cov_cutoff} \ -1 ${pair_id}_R1_concat_stripped_trimmed.fq.gz \ -2 ${pair_id}_R2_concat_stripped_trimmed.fq.gz \ - -s ${pair_id}_S_concat_stripped_trimmed.fq.gz -t $task.cpus -o ${pair_id}_spades + -s ${pair_id}_S_concat_stripped_trimmed.fq.gz \ + -t $task.cpus --isolate --only-assembler \ + -o ${pair_id}_spades filter_fasta_length.py -i ${pair_id}_spades/scaffolds.fasta \ -o ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ -m ${params.min_contig_len} From 18ae82576001c91012ee3c6f1b0ca4fe84cdfa64 Mon Sep 17 00:00:00 2001 From: Karin Date: Fri, 22 Jul 2022 11:34:06 +0200 Subject: [PATCH 06/25] This commit fixes some tab&format issues. As of this commit command lines are ok. Note, no command line was touched in this commit except for formatting. --- asm_annot.nf | 79 ++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index 693b087..b3a6c82 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -148,34 +148,34 @@ process run_trim { * Build assembly with SPAdes */ process run_spadesasm { - conda "${params.condahome}/bifrost2022-spades" - publishDir "${params.out_dir}/spades", mode: "${params.savemode}" - tag { pair_id } - label 'longtime' + conda "${params.condahome}/bifrost2022-spades" + publishDir "${params.out_dir}/spades", mode: "${params.savemode}" + tag { pair_id } + label 'longtime' - input: - set pair_id, file(reads) from reads_trimmed + input: + set pair_id, file(reads) from reads_trimmed - output: - set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta") \ - into (assembly_results, tobwa_results) - file "${pair_id}_spades_scaffolds.fasta" - file "${pair_id}_spades.log" + output: + set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta") \ + into (assembly_results, tobwa_results) + file "${pair_id}_spades_scaffolds.fasta" + file "${pair_id}_spades.log" - // For 2022 version, params.careful was removed to do --isolate and --only-assembler - """ - spades.py --cov-cutoff=${params.cov_cutoff} \ - -1 ${pair_id}_R1_concat_stripped_trimmed.fq.gz \ - -2 ${pair_id}_R2_concat_stripped_trimmed.fq.gz \ - -s ${pair_id}_S_concat_stripped_trimmed.fq.gz \ - -t $task.cpus --isolate --only-assembler \ - -o ${pair_id}_spades - filter_fasta_length.py -i ${pair_id}_spades/scaffolds.fasta \ - -o ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ - -m ${params.min_contig_len} - cp ${pair_id}_spades/scaffolds.fasta ${pair_id}_spades_scaffolds.fasta - cp ${pair_id}_spades/spades.log ${pair_id}_spades.log + // For 2022 version, params.careful was removed to do --isolate and --only-assembler + """ + spades.py --cov-cutoff=${params.cov_cutoff} \ + -1 ${pair_id}_R1_concat_stripped_trimmed.fq.gz \ + -2 ${pair_id}_R2_concat_stripped_trimmed.fq.gz \ + -s ${pair_id}_S_concat_stripped_trimmed.fq.gz \ + -t $task.cpus --isolate --only-assembler \ + -o ${pair_id}_spades + filter_fasta_length.py -i ${pair_id}_spades/scaffolds.fasta \ + -o ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ + -m ${params.min_contig_len} + cp ${pair_id}_spades/scaffolds.fasta ${pair_id}_spades_scaffolds.fasta + cp ${pair_id}_spades/spades.log ${pair_id}_spades.log """ } @@ -187,24 +187,24 @@ process run_spadesasm { */ process run_bwamem { conda "${params.condahome}/bifrost2022-bwa" - publishDir "${params.out_dir}/bwamem", mode: "${params.savemode}" - tag { pair_id } - label 'longtime' + publishDir "${params.out_dir}/bwamem", mode: "${params.savemode}" + tag { pair_id } + label 'longtime' - input: - set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta"), \ - file(reads) from tobwa_results.join(pilon_reads) + input: + set pair_id, file("${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta"), \ + file(reads) from tobwa_results.join(pilon_reads) - output: - set pair_id, file("${pair_id}_mapped_sorted.bam"), \ - file("${pair_id}_mapped_sorted.bam.bai") into bwamem_results + output: + set pair_id, file("${pair_id}_mapped_sorted.bam"), \ + file("${pair_id}_mapped_sorted.bam.bai") into bwamem_results - """ - bwa index ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta - bwa mem -t $task.cpus ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ - *.fq.gz | samtools sort -o ${pair_id}_mapped_sorted.bam - - samtools index ${pair_id}_mapped_sorted.bam - """ + """ + bwa index ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta + bwa mem -t $task.cpus ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ + *.fq.gz | samtools sort -o ${pair_id}_mapped_sorted.bam - + samtools index ${pair_id}_mapped_sorted.bam + """ } /* @@ -264,6 +264,7 @@ process run_prokka { process quast_eval { // The output here is a directory in and of itself // thus not creating a new one + conda "${params.condahome}/bifrost2022-quast" publishDir "${params.out_dir}/", mode: "${params.savemode}" tag { pair_id } From 5ebb411685d5734a94fa763e9e63081475119191 Mon Sep 17 00:00:00 2001 From: Karin Date: Fri, 22 Jul 2022 17:02:06 +0200 Subject: [PATCH 07/25] With this commit, fixing the conda stuff is done. I have parsed all the conda env things to the three conda config files, thus making the scripts themselves independent. --- asm_annot.nf | 12 +----------- conf/condastandard.config | 2 +- fastqc_multi.nf | 2 -- nextflow.config | 4 ++++ 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index b3a6c82..88389cc 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -33,8 +33,6 @@ Channel // run_fastq and run_multiqc are exactly the same as qc_track process run_fastqc { - conda "${params.condahome}/bifrost2022-fastqc" - publishDir "${params.out_dir}/fastqc", mode: "${params.savemode}" tag { pair_id } label 'one' @@ -52,7 +50,6 @@ process run_fastqc { } process run_multiqc { - conda "${params.condahome}/bifrost2022-multiqc" publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}" tag {"multiqc"} label 'one' @@ -93,7 +90,6 @@ process collate_data { * Strip PhiX with bbmap */ process run_strip { - conda "${params.condahome}/bifrost2022-bbtools" publishDir "${params.out_dir}/bbduk", mode: "${params.savemode}" tag { pair_id } @@ -120,7 +116,6 @@ process run_strip { * Remove adapter sequences and low quality base pairs with Trimmomatic */ process run_trim { - conda "${params.condahome}/bifrost2022-trimmomatic" publishDir "${params.out_dir}/bbduk_trimmed", mode: "${params.savemode}" tag { pair_id } @@ -148,7 +143,6 @@ process run_trim { * Build assembly with SPAdes */ process run_spadesasm { - conda "${params.condahome}/bifrost2022-spades" publishDir "${params.out_dir}/spades", mode: "${params.savemode}" tag { pair_id } label 'longtime' @@ -186,7 +180,6 @@ process run_spadesasm { * Map reads to the spades assembly */ process run_bwamem { - conda "${params.condahome}/bifrost2022-bwa" publishDir "${params.out_dir}/bwamem", mode: "${params.savemode}" tag { pair_id } label 'longtime' @@ -212,7 +205,6 @@ process run_bwamem { */ process run_pilon { - conda "${params.condahome}/bifrost2022-pilon" publishDir "${params.out_dir}/pilon", mode: "${params.savemode}" tag { pair_id } @@ -239,7 +231,6 @@ process run_pilon { * Annotation using PROKKA */ process run_prokka { - conda "${params.condahome}/bifrost2022-prokka" publishDir "${params.out_dir}/prokka", mode: "${params.savemode}" tag { pair_id } @@ -261,10 +252,9 @@ process run_prokka { /* * Evaluate ALL assemblies with QUAST */ -process quast_eval { +process run_quast { // The output here is a directory in and of itself // thus not creating a new one - conda "${params.condahome}/bifrost2022-quast" publishDir "${params.out_dir}/", mode: "${params.savemode}" tag { pair_id } diff --git a/conf/condastandard.config b/conf/condastandard.config index 8bcddec..26838c3 100644 --- a/conf/condastandard.config +++ b/conf/condastandard.config @@ -7,4 +7,4 @@ params { condahome = "/home/karinlag/src/miniconda/envs" -} +} \ No newline at end of file diff --git a/fastqc_multi.nf b/fastqc_multi.nf index 1ea5c2c..24143c6 100644 --- a/fastqc_multi.nf +++ b/fastqc_multi.nf @@ -31,7 +31,6 @@ Channel // Second is to send all of through fastqc process run_fastqc { - conda "${params.condahome}/bifrost2022-fastqc" publishDir "${params.out_dir}/${params.fastqc}", mode: "${params.savemode}" tag {pair_id} label 'one' @@ -49,7 +48,6 @@ process run_fastqc { } process run_multiqc { - conda "${params.condahome}/bifrost2022-multiqc" publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}" tag {"multiqc"} label 'one' diff --git a/nextflow.config b/nextflow.config index 3f2b0f4..d8a8efa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,8 @@ profiles { condalocal { includeConfig 'conf/standard.config' includeConfig 'conf/condastandard.config' + includeConfig 'conf/conda.config' + } slurm { includeConfig 'conf/slurm.config' @@ -25,6 +27,8 @@ profiles { condaslurm { includeConfig 'conf/slurm.config' includeConfig 'conf/condaslurm.config' + includeConfig 'conf/conda.config' + } } From 5f72e5c8d50f8f2d8f2fc2851c4e042a43e5949a Mon Sep 17 00:00:00 2001 From: Karin Date: Fri, 22 Jul 2022 17:08:33 +0200 Subject: [PATCH 08/25] And adding the new file which has the conda envs in them --- conf/conda.config | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 conf/conda.config diff --git a/conf/conda.config b/conf/conda.config new file mode 100644 index 0000000..aecc886 --- /dev/null +++ b/conf/conda.config @@ -0,0 +1,74 @@ +/* +* Standard profile file for use with conda. +* This setup will ensure that all command line programs +* are available on the command line as expected. +* condahome should be set in condastandard or condaslurm files +*/ + +process { + + withName: run_fastqc { + conda = "${params.condahome}/bifrost2022-fastqc" + } + + withName: run_multiqc { + conda = "${params.condahome}/bifrost2022-multiqc" + } + + withName: run_strip { + conda = "${params.condahome}/bifrost2022-bbtools" + } + + withName: run_trim { + conda = "${params.condahome}/bifrost2022-trimmomatic" + } + + withName: run_spadesasm { + conda = "${params.condahome}/bifrost2022-spades" + } + + withName: run_bwamem { + conda = "${params.condahome}/bifrost2022-bwa" + } + + withName: run_pilon { + conda = "${params.condahome}/bifrost2022-pilon" + } + + withName: run_prokka { + conda = "${params.condahome}/bifrost2022-prokka" + } + + withName: run_quast { + conda = "${params.condahome}/bifrost2022-quast" + } + + withName: run_pilon { + conda = "${params.condahome}/bifrost2022-pilon" + } + + withName:run_ariba_mlst_pred { + conda = "${params.condahome}/bifrost2022-ariba" + } + + withName:run_ariba_mlst_summarize { + conda = "${params.condahome}/bifrost2022-ariba" + } + + withName:run_ariba_amr_pred { + conda = "${params.condahome}/bifrost2022-ariba" + } + + withName:run_ariba_amr_summarize { + conda = "${params.condahome}/bifrost2022-ariba" + } + + withName:run_ariba_vir_pred { + conda = "${params.condahome}/bifrost2022-ariba" + } + + withName:run_ariba_vir_summarize { + conda = "${params.condahome}/bifrost2022-ariba" + } + +} \ No newline at end of file From e6413613f57e7be9920a4cebb2803ff3d5deafa4 Mon Sep 17 00:00:00 2001 From: Karin Date: Sat, 23 Jul 2022 17:53:29 +0200 Subject: [PATCH 09/25] This commit introduces a new script to keep track of software versions. This script prints all the packages in conda envs used if using conda, and if not, prints versions of relevant tools. This script is run from the run_track.sh script. In addition, a duplicate of the conda config for pilon in conda.config was removed. --- bin/printversions.sh | 37 +++++++++++++++++++++++++++++++++++++ conf/conda.config | 4 ---- run_track.sh | 2 ++ 3 files changed, 39 insertions(+), 4 deletions(-) create mode 100755 bin/printversions.sh diff --git a/bin/printversions.sh b/bin/printversions.sh new file mode 100755 index 0000000..aac875c --- /dev/null +++ b/bin/printversions.sh @@ -0,0 +1,37 @@ +## This script attempts to print whatever versions we have of tools +## There are two scenarios, conda, and and not conda + +profile=$1 +output_file=$2 + + +if [[ $profile == *"$conda"* ]]; + then + conda list -n bifrost2022-fastqc >> ${output_file} + conda list -n bifrost2022-multiqc >> ${output_file} + conda list -n bifrost2022-bbtools >> ${output_file} + conda list -n bifrost2022-trimmomatic >> ${output_file} + conda list -n bifrost2022-spades >> ${output_file} + conda list -n bifrost2022-bwa >> ${output_file} + conda list -n bifrost2022-pilon >> ${output_file} + conda list -n bifrost2022-prokka >> ${output_file} + conda list -n bifrost2022-quast >> ${output_file} + conda list -n bifrost2022-ariba >> ${output_file} + + else + fastqc --version >> ${output_file} + multiqc --version >> ${output_file} + echo "bbduk.sh " `bbversion.sh` >> ${output_file} + echo "trimmomatic " `trimmomatic -version` >> ${output_file} + spades.py --version >> ${output_file} + bwa &> tmpfile + cat tmpfile |grep Version | awk '{print "bwa " $0}' >> ${output_file} + rm tmpfile + samtools --version |head -1 >> ${output_file} + pilon --version >> ${output_file} + prokka -v >> ${output_file} + quast -v >> ${output_file} + ariba version |head -1 >> ${output_file} + + +fi \ No newline at end of file diff --git a/conf/conda.config b/conf/conda.config index aecc886..2e776ac 100644 --- a/conf/conda.config +++ b/conf/conda.config @@ -42,10 +42,6 @@ process { withName: run_quast { conda = "${params.condahome}/bifrost2022-quast" } - - withName: run_pilon { - conda = "${params.condahome}/bifrost2022-pilon" - } withName:run_ariba_mlst_pred { conda = "${params.condahome}/bifrost2022-ariba" diff --git a/run_track.sh b/run_track.sh index e48349f..c80a475 100755 --- a/run_track.sh +++ b/run_track.sh @@ -16,8 +16,10 @@ workdir=${5:-$USERWORK/bifrost_work} mkdir -p ${out_directory}/config_files git --git-dir ${script_directory}/.git branch -v |grep "\*" | awk '{print $2, $3}' > ${out_directory}/config_files/pipeline_version.log +bash ${script_directory}/bin/printversions.sh profile ${out_directory}/config_files/software_versions.txt cp ${script_directory}/${track_script} ${out_directory}/config_files cp ${template} ${out_directory}/config_files + echo "TEMPORARY WORKING DIRECTORY IS ${workdir}" nextflow -c ${template} run -resume ${script_directory}/${track_script} -profile ${profile} --out_dir=${out_directory} -work-dir ${workdir} From d2b0a48240a01af0f0517daf6e4e27babd9fbdca Mon Sep 17 00:00:00 2001 From: Karin Date: Fri, 29 Jul 2022 14:41:32 +0200 Subject: [PATCH 10/25] This commit adjusts the format of the input that is allowed. Input reads have three variables: 1. number of reads - 2 or 4 2. either containing _R1 or _1 3. either containg 1_ or 1. The latter two means that _R1_, _R1., _1_ and _1. are all permitted. This has been tested for all eight cases of types of data, and works. This change has been incorporated into the two files that use concatenation, which is specific_genes and asm_annot. --- asm_annot.nf | 6 ++++-- specific_genes.nf | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index 88389cc..f71b620 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -79,9 +79,11 @@ process collate_data { output: set pair_id, file("${pair_id}*_concat.fq.gz") into (reads, pilon_reads) + """ - cat ${pair_id}*R1* > ${pair_id}_R1_concat.fq.gz - cat ${pair_id}*R2* > ${pair_id}_R2_concat.fq.gz + shopt -s extglob + cat ${pair_id}*_?(R)1[_.]*.gz > ${pair_id}_R1_concat.fq.gz + cat ${pair_id}*_?(R)2[_.]*.gz > ${pair_id}_R2_concat.fq.gz """ } diff --git a/specific_genes.nf b/specific_genes.nf index eb862f6..7545c77 100644 --- a/specific_genes.nf +++ b/specific_genes.nf @@ -59,9 +59,11 @@ process collate_data { set pair_id, file("${pair_id}*_concat.fq.gz") into \ (read_pairs_mlst, read_pairs_amr, read_pairs_vir) + """ - cat ${pair_id}*R1* > ${pair_id}_R1_concat.fq.gz - cat ${pair_id}*R2* > ${pair_id}_R2_concat.fq.gz + shopt -s extglob + cat ${pair_id}*_?(R)1[_.]*.gz > ${pair_id}_R1_concat.fq.gz + cat ${pair_id}*_?(R)2[_.]*.gz > ${pair_id}_R2_concat.fq.gz """ } From abd8e0e8a1eee10831289e4d130d517c4212ec13 Mon Sep 17 00:00:00 2001 From: Karin Date: Fri, 29 Jul 2022 17:21:56 +0200 Subject: [PATCH 11/25] In this commit, three new logfiles have been output to the publish dir. One for bwa, one for bbduk, and one for trimming. --- asm_annot.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index f71b620..f550206 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -101,6 +101,7 @@ process run_strip { output: set pair_id, file("${pair_id}*_concat_stripped.fq.gz") into reads_stripped file "${pair_id}_bbduk_output.log" + file "${pair_id}_stats.txt" """ bbduk.sh threads=$task.cpus ref=${params.stripgenome} \ @@ -109,7 +110,7 @@ process run_strip { outm=${pair_id}_matched.fq.gz \ out1=${pair_id}_R1_concat_stripped.fq.gz \ out2=${pair_id}_R2_concat_stripped.fq.gz \ - k=31 hdist=1 stats=stats.txt &> ${pair_id}_bbduk_output.log + k=31 hdist=1 stats=${pair_id}_stats.txt &> ${pair_id}_bbduk_output.log """ } @@ -127,6 +128,7 @@ process run_trim { output: set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into reads_trimmed file "${pair_id}_concat_stripped_trimmed.log" + file "${pair_id}_run.log" """ trimmomatic PE -threads $task.cpus -trimlog ${pair_id}_concat_stripped_trimmed.log ${pair_id}*_concat_stripped.fq.gz \ @@ -193,11 +195,12 @@ process run_bwamem { output: set pair_id, file("${pair_id}_mapped_sorted.bam"), \ file("${pair_id}_mapped_sorted.bam.bai") into bwamem_results + file "${pair_id}_bwa.log" """ bwa index ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta bwa mem -t $task.cpus ${pair_id}_spades_scaffolds_min${params.min_contig_len}.fasta \ - *.fq.gz | samtools sort -o ${pair_id}_mapped_sorted.bam - + *.fq.gz 2> ${pair_id}_bwa.log| samtools sort -o ${pair_id}_mapped_sorted.bam - samtools index ${pair_id}_mapped_sorted.bam """ } From 091377a47eaf3c05b8193f677cfcae2f87ef9b5d Mon Sep 17 00:00:00 2001 From: Karin Date: Mon, 8 Aug 2022 13:03:45 +0200 Subject: [PATCH 12/25] This commit contains all that is needed for multiqc to happen at the end of the analysis, which means including info on a lot of things. As a consequence new channels has been made, and some command lines have been changed to pick up new files for processing by multiqc. Also, the initial multiqc run has been removed. Notable changes: The output from trimmomatic has been split into stderr and stdout. Multiqc uses the stderr for putting things into the report. The output from prokka has been given a more specific name, so that the naming in the report will be more meaningful. --- asm_annot.nf | 75 +++++++++++++++++++++++++++++----------- conf/conda.config | 8 +++++ conf/multiqc_config.yaml | 7 ++++ 3 files changed, 69 insertions(+), 21 deletions(-) create mode 100644 conf/multiqc_config.yaml diff --git a/asm_annot.nf b/asm_annot.nf index f550206..e14189c 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -41,7 +41,7 @@ process run_fastqc { set pair_id, file(reads) from fastqc_reads output: - file "$pair_id" into fastqc_results + file "$pair_id" into fastqc_multiqc """ mkdir ${pair_id} @@ -49,21 +49,6 @@ process run_fastqc { """ } -process run_multiqc { - publishDir "${params.out_dir}/multiqc", mode: "${params.savemode}" - tag {"multiqc"} - label 'one' - - input: - file "fastqc_output/*" from fastqc_results.toSortedList() - - output: - file "multiqc_report.html" into multiqc_report - - """ - multiqc fastqc_output - """ -} // if there are more than two data files, we need to cat them together // because spades becomes complicated with more than two files @@ -102,6 +87,8 @@ process run_strip { set pair_id, file("${pair_id}*_concat_stripped.fq.gz") into reads_stripped file "${pair_id}_bbduk_output.log" file "${pair_id}_stats.txt" + file "${pair_id}*_stats.txt" into bbduk_stats_stripped_multiqc + """ bbduk.sh threads=$task.cpus ref=${params.stripgenome} \ @@ -126,22 +113,44 @@ process run_trim { set pair_id, file(reads) from reads_stripped output: - set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into reads_trimmed - file "${pair_id}_concat_stripped_trimmed.log" - file "${pair_id}_run.log" + set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into (reads_trimmed, fq2) + file "${pair_id}_stripped_trimmed_stderr.log" + file "${pair_id}_stripped_trimmed_stdout.log" + file "${pair_id}_stripped_trimmed_stderr.log" into bbduk_trimmed_multiqc + """ trimmomatic PE -threads $task.cpus -trimlog ${pair_id}_concat_stripped_trimmed.log ${pair_id}*_concat_stripped.fq.gz \ -baseout ${pair_id}_trimmed.fq.gz ILLUMINACLIP:${params.adapter_dir}/${params.adapters}:${params.illuminaClipOptions} \ LEADING:${params.leading} TRAILING:${params.trailing} \ SLIDINGWINDOW:${params.slidingwindow} \ - MINLEN:${params.minlen} &> ${pair_id}_run.log + MINLEN:${params.minlen} \ + 2> ${pair_id}_stripped_trimmed_stderr.log 1> ${pair_id}_stripped_trimmed_stdout.log mv ${pair_id}_trimmed_1P.fq.gz ${pair_id}_R1_concat_stripped_trimmed.fq.gz mv ${pair_id}_trimmed_2P.fq.gz ${pair_id}_R2_concat_stripped_trimmed.fq.gz cat ${pair_id}_trimmed_1U.fq.gz ${pair_id}_trimmed_2U.fq.gz > ${pair_id}_S_concat_stripped_trimmed.fq.gz """ } +process run_fastqc_trimmed { + publishDir "${params.out_dir}/fastqc_bbduk_trimmed", mode: "${params.savemode}" + tag { pair_id } + label 'one' + + input: + set pair_id, file(reads) from fq2 + + output: + file "$pair_id" + file "${pair_id}" into fastqc_bbduk_trimmed + + """ + mkdir ${pair_id} + fastqc -q ${reads} -o ${pair_id} -t $task.cpus + """ +} + + /* * Build assembly with SPAdes @@ -244,12 +253,13 @@ process run_prokka { output: set pair_id, file("${pair_id}.*") into annotation_results + file "${pair_id}.*" into annotation_multiqc """ prokka --compliant --force --usegenus --cpus $task.cpus \ --centre ${params.centre} --prefix ${pair_id} --locustag ${params.locustag} \ --genus ${params.genus} --species ${params.species} \ - --kingdom ${params.kingdom} ${params.prokka_additional} \ + --kingdom ${params.kingdom} --strain ${pair_id}_prokka_info ${params.prokka_additional} \ --outdir . ${pair_id}_pilon_spades.fasta """ } @@ -269,9 +279,32 @@ process run_quast { //TODO: fix this, is why output is not going anywhere output: file quast_evaluation_all into quast_evaluation_all + file quast_evaluation_all into quast_multiqc """ quast --threads $task.cpus -o quast_evaluation_all \ -g ${params.quast_genes} -R ${params.quast_ref} ${asm_list} """ } + +process run_multiqc_final { + publishDir "${params.out_dir}/multiqc_final", mode: "${params.savemode}" + tag {"multiqc"} + label 'one' + + input: + file "fastqc_output/*" from fastqc_multiqc.collect() + file "bbduk/*" from bbduk_stats_stripped_multiqc.collect() + file "bbduk_trimmed/*" from bbduk_trimmed_multiqc.collect() + file "bbduk_trimmed_fastqc/*" from fastqc_bbduk_trimmed.collect() + file "prokka/*" from annotation_multiqc.collect() + file quast_evaluation_all from quast_multiqc + + output: + file("*") + + """ + multiqc --fullnames --config ${params.multiqc_config} . + """ + +} diff --git a/conf/conda.config b/conf/conda.config index 2e776ac..62bef30 100644 --- a/conf/conda.config +++ b/conf/conda.config @@ -11,10 +11,18 @@ process { conda = "${params.condahome}/bifrost2022-fastqc" } + withName: run_fastqc_trimmed { + conda = "${params.condahome}/bifrost2022-fastqc" + } + withName: run_multiqc { conda = "${params.condahome}/bifrost2022-multiqc" } + withName: run_multiqc_final { + conda = "${params.condahome}/bifrost2022-multiqc" + } + withName: run_strip { conda = "${params.condahome}/bifrost2022-bbtools" } diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml new file mode 100644 index 0000000..2f15b51 --- /dev/null +++ b/conf/multiqc_config.yaml @@ -0,0 +1,7 @@ +report_comment: > + This MultiQC report has been generated on results from running the + Bifrost 2022 + analysis pipeline. +trimmomatic: + s_name_filenames: true \ No newline at end of file From ada2fd660f8dda5a8a975a336880b1da36a410b5 Mon Sep 17 00:00:00 2001 From: Karin Date: Mon, 8 Aug 2022 14:03:17 +0200 Subject: [PATCH 13/25] Adjusted number of concurrent jobs --- conf/slurm.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/slurm.config b/conf/slurm.config index 649ac51..f3ba8a2 100644 --- a/conf/slurm.config +++ b/conf/slurm.config @@ -20,7 +20,7 @@ process { executor = 'slurm' clusterOptions = '--job-name=nxf_test --account=nn9305k --mem-per-cpu=4700M' - queueSize = 24 + queueSize = 40 maxRetries = 3 errorStrategy='retry' From a94a8744ab5b7fe5f77535eba798e53e45b851c7 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 10 Aug 2022 13:19:55 +0200 Subject: [PATCH 14/25] Changed publishdir for bbduk process so as to not publish fastq files to save space --- asm_annot.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/asm_annot.nf b/asm_annot.nf index e14189c..5f66eb4 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -77,7 +77,9 @@ process collate_data { * Strip PhiX with bbmap */ process run_strip { - publishDir "${params.out_dir}/bbduk", mode: "${params.savemode}" + publishDir "${params.out_dir}/bbduk", + saveAs: {filename -> filename.endsWith('.gz') ? null:filename}, + mode: "${params.savemode}" tag { pair_id } input: From f64f7d27db26348ce56786c72a9a1f8ba59eb3cd Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 10 Aug 2022 14:01:18 +0200 Subject: [PATCH 15/25] Updated asm_annot conf to reflect new data structure and to add multiqc yaml file. Also removed careful and set coverage cutoff to auto --- conf/asm_annot_template.config | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/asm_annot_template.config b/conf/asm_annot_template.config index 89bcc47..7c28c54 100644 --- a/conf/asm_annot_template.config +++ b/conf/asm_annot_template.config @@ -16,17 +16,19 @@ params.reads = "../testdata/risk_short/*L00{1,2}_R{1,2}_001.fastq.gz" params.setsize = 4 -// Specify the name of the output directory, relative to where the script is being run -params.out_dir = "track_three" - // General configuration variables params.pwd = "$PWD" params.help = false params.savemode = "copy" + +// Directory to where data is stored +params.data_dir = "/cluster/projects/nn9305k/vi_pipeline_data/bifrost_data" +params.multiqc_config = "/cluster/projects/nn9305k/Bifrost22/conf/multiqc_config.yaml" + // BBDuk params, has to be absolute paths -params.stripgenome = "/cluster/projects/nn9305k/genome_references/genomes/PhiX/PhiX.fasta" -params.stripdir = "/cluster/projects/nn9305k/genome_references/bbmap_refs" +params.stripgenome = "${params.data_dir}/genome_references/genomes/PhiX/PhiX.fasta" +params.stripdir = "${params.data_dir}/genome_references/bbmap_refs" // Trimmomatic configuration variables @@ -36,14 +38,13 @@ params.slidingwindow = "4:15" params.leading = 3 params.trailing = 3 params.minlen = 36 -params.adapters = "TruSeq3-PE.fa" -params.adapter_dir = "/cluster/projects/nn9305k/db_flatfiles/trimmomatic_adapters" +params.adapters = "NexteraPE-PE.fa" +params.adapter_dir = "${params.data_dir}/trimmomatic_adapters" // SPAdes configuration variables params.assembly = "spades_asm" -params.careful = "--careful" -params.cov_cutoff = "off" +params.cov_cutoff = "auto" params.min_contig_len = "500" // PROKKA configuration variables @@ -58,6 +59,5 @@ params.centre = "NVI" // QUAST variables -params.genome_directory = "/cluster/projects/nn9305k/genome_references/genomes/" -params.quast_ref = "${params.genome_directory}ecoli/GCF_000005845.2_ASM584v2_genomic.fna" -params.quast_genes = "${params.genome_directory}ecoli/GCF_000005845.2_ASM584v2_genomic.gff" +params.quast_ref = "${params.data_dir}/genome_references/genomes/ecoli/GCF_000005845.2_ASM584v2_genomic.fna" +params.quast_genes = "${params.data_dir}/genome_references/genomes/ecoli/GCF_000005845.2_ASM584v2_genomic.gff" From c76c3c621d0f9fa7e18d80d9bac8bc6cad8f5dc5 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 10 Aug 2022 14:14:32 +0200 Subject: [PATCH 16/25] Removed out_dir from both config files --- conf/qc_track_template.config | 3 --- conf/specific_genes_template.config | 2 -- 2 files changed, 5 deletions(-) diff --git a/conf/qc_track_template.config b/conf/qc_track_template.config index d0ec741..686b9b9 100644 --- a/conf/qc_track_template.config +++ b/conf/qc_track_template.config @@ -15,9 +15,6 @@ params.reads = "../testdata/risk_short/*L00{1,2}_R{1,2}_001.fastq.gz" params.setsize = 2 -// Specify the name of the output directory, relative to where the script is being run -params.out_dir = "track_one" - // General configuration variables params.pwd = "$PWD" params.help = false diff --git a/conf/specific_genes_template.config b/conf/specific_genes_template.config index 2625e3a..ccc941a 100644 --- a/conf/specific_genes_template.config +++ b/conf/specific_genes_template.config @@ -9,8 +9,6 @@ */ params.reads = "../testdata/fastq_files/*L00{1,2}_R{1,2}_001.fastq.gz" params.setsize = 2 -// Specify the name of the output directory, relative to where the script is being run -params.out_dir = "track_two" // General configuration variables params.pwd = "$PWD" From 558b201d98e98124d933ae9477f60cac4f355b72 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 10 Aug 2022 14:15:43 +0200 Subject: [PATCH 17/25] Removed third party software file due to being out of date --- conf/third_party_software.md | 46 ------------------------------------ 1 file changed, 46 deletions(-) delete mode 100644 conf/third_party_software.md diff --git a/conf/third_party_software.md b/conf/third_party_software.md deleted file mode 100644 index 46793dd..0000000 --- a/conf/third_party_software.md +++ /dev/null @@ -1,46 +0,0 @@ -# Third party software - -The Bifrost pipeline depends on several third party packages. -These have to be made available to the pipeline in some way. -The way that these are made available to the pipeline depends -on which system the pipeline is being run on. - -Please note: not all of the software is used for all tracks. -The track(s) that each software is used in is noted below. - - -## Currently used software - -* FastQC - Track One and Three -* Ariba - Track Two -* Trimmomatic - Track Three -* SPAdes - Track Three -* QUAST - Track Three - - -## Tracks - -There are currently three tracks: - -* Track One: FastQC on input reads -* Track Two: Ariba MLST, virulence and AMR analysis -* Track Three: Trimming with trimmomatic followed by assembly - with SPAdes. Trimming results are evaluated with MultiQC, and - assemblies with QUAST - -## Profiles - -We currently have two profiles set up, standard and slurm. - -### standard.config -This profile is used when running on a normal stand-alone -computer. This assumes that all software is available on -the command line, unless otherwise noted with a full path in -the standard.config file. - -### slurm.config -This profile is used when running on a system that uses the -slurm queue management system. At present, this also depends -heavily on the module system. Any software not in the module -system needs to either be available on the command line, or -should be specified using the full path. From aa649dfdf8ffc2d5d90ac0008fe4ce3f81a456a9 Mon Sep 17 00:00:00 2001 From: Karin Date: Mon, 15 Aug 2022 18:12:18 +0200 Subject: [PATCH 18/25] In this commit, I have done: - fixed it so that the profile name is actually used for the printversions script to determine what software dump to do - fixed the printversions script so that the if test actually works. --- bin/printversions.sh | 8 ++++---- run_track.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/printversions.sh b/bin/printversions.sh index aac875c..1706f1f 100755 --- a/bin/printversions.sh +++ b/bin/printversions.sh @@ -1,12 +1,12 @@ ## This script attempts to print whatever versions we have of tools ## There are two scenarios, conda, and and not conda -profile=$1 +STR=$1 output_file=$2 - -if [[ $profile == *"$conda"* ]]; - then +SUBSTR='conda' +if [[ "$STR" == *"$SUBSTR"* ]]; then + echo "CONDA" conda list -n bifrost2022-fastqc >> ${output_file} conda list -n bifrost2022-multiqc >> ${output_file} conda list -n bifrost2022-bbtools >> ${output_file} diff --git a/run_track.sh b/run_track.sh index c80a475..aa32fe4 100755 --- a/run_track.sh +++ b/run_track.sh @@ -16,7 +16,7 @@ workdir=${5:-$USERWORK/bifrost_work} mkdir -p ${out_directory}/config_files git --git-dir ${script_directory}/.git branch -v |grep "\*" | awk '{print $2, $3}' > ${out_directory}/config_files/pipeline_version.log -bash ${script_directory}/bin/printversions.sh profile ${out_directory}/config_files/software_versions.txt +bash ${script_directory}/bin/printversions.sh ${profile} ${out_directory}/config_files/software_versions.txt cp ${script_directory}/${track_script} ${out_directory}/config_files cp ${template} ${out_directory}/config_files From b23d6dd2263900a2a8c51c61f5b7fd3ce19c3f32 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 11:07:41 +0200 Subject: [PATCH 19/25] Removing nonsensical comment --- asm_annot.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/asm_annot.nf b/asm_annot.nf index 5f66eb4..9d27821 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -278,7 +278,6 @@ process run_quast { input: file asm_list from asms_for_quast.toSortedList() - //TODO: fix this, is why output is not going anywhere output: file quast_evaluation_all into quast_evaluation_all file quast_evaluation_all into quast_multiqc From b0ad6ea5f3d70054d2827fa15a4e053e137b24b4 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 11:13:00 +0200 Subject: [PATCH 20/25] Updated channel name for trimmed fastqc to be more sensical --- asm_annot.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index 9d27821..302c77d 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -115,7 +115,7 @@ process run_trim { set pair_id, file(reads) from reads_stripped output: - set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into (reads_trimmed, fq2) + set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into (reads_trimmed, trimmed_fastqc) file "${pair_id}_stripped_trimmed_stderr.log" file "${pair_id}_stripped_trimmed_stdout.log" file "${pair_id}_stripped_trimmed_stderr.log" into bbduk_trimmed_multiqc @@ -140,7 +140,7 @@ process run_fastqc_trimmed { label 'one' input: - set pair_id, file(reads) from fq2 + set pair_id, file(reads) from trimmed_fastqc output: file "$pair_id" From 56875b76ba455c199e35751979174afdec849ed5 Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 11:14:06 +0200 Subject: [PATCH 21/25] Removing debugging from printversions --- bin/printversions.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/printversions.sh b/bin/printversions.sh index 1706f1f..a9fd9b1 100755 --- a/bin/printversions.sh +++ b/bin/printversions.sh @@ -6,7 +6,6 @@ output_file=$2 SUBSTR='conda' if [[ "$STR" == *"$SUBSTR"* ]]; then - echo "CONDA" conda list -n bifrost2022-fastqc >> ${output_file} conda list -n bifrost2022-multiqc >> ${output_file} conda list -n bifrost2022-bbtools >> ${output_file} From be4f34a0dc7b5e73b3acb7f0bad8e39d4291ecfe Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 12:42:24 +0200 Subject: [PATCH 22/25] Removed star in output for runstrip output, did not match anything --- asm_annot.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asm_annot.nf b/asm_annot.nf index 302c77d..ee571c8 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -89,7 +89,7 @@ process run_strip { set pair_id, file("${pair_id}*_concat_stripped.fq.gz") into reads_stripped file "${pair_id}_bbduk_output.log" file "${pair_id}_stats.txt" - file "${pair_id}*_stats.txt" into bbduk_stats_stripped_multiqc + file "${pair_id}_stats.txt" into bbduk_stats_stripped_multiqc """ From a42ccf7a50b1aa67ea13359f11aecc0e33f4923f Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 12:45:13 +0200 Subject: [PATCH 23/25] Changed channel name for input to multiqc --- asm_annot.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index ee571c8..1e1f9f5 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -144,7 +144,7 @@ process run_fastqc_trimmed { output: file "$pair_id" - file "${pair_id}" into fastqc_bbduk_trimmed + file "${pair_id}" into fastqc_bbduk_trimmed_multiqc """ mkdir ${pair_id} @@ -297,7 +297,7 @@ process run_multiqc_final { file "fastqc_output/*" from fastqc_multiqc.collect() file "bbduk/*" from bbduk_stats_stripped_multiqc.collect() file "bbduk_trimmed/*" from bbduk_trimmed_multiqc.collect() - file "bbduk_trimmed_fastqc/*" from fastqc_bbduk_trimmed.collect() + file "bbduk_trimmed_fastqc/*" from fastqc_bbduk_trimmed_multiqc.collect() file "prokka/*" from annotation_multiqc.collect() file quast_evaluation_all from quast_multiqc From ce8c9184ff7825f9e15f1dd92b0a109ced8e3d1a Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 15:37:58 +0200 Subject: [PATCH 24/25] Shortened the specs for output from trimming --- asm_annot.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/asm_annot.nf b/asm_annot.nf index 1e1f9f5..02ff2c4 100644 --- a/asm_annot.nf +++ b/asm_annot.nf @@ -116,8 +116,7 @@ process run_trim { output: set pair_id, file("${pair_id}*_concat_stripped_trimmed.fq.gz") into (reads_trimmed, trimmed_fastqc) - file "${pair_id}_stripped_trimmed_stderr.log" - file "${pair_id}_stripped_trimmed_stdout.log" + file "${pair_id}_stripped_trimmed_*.log" file "${pair_id}_stripped_trimmed_stderr.log" into bbduk_trimmed_multiqc From b3b656c268a1e4ed143505547b50452aa1cde89c Mon Sep 17 00:00:00 2001 From: Karin Date: Wed, 17 Aug 2022 15:51:52 +0200 Subject: [PATCH 25/25] Added report option to run_track --- run_track.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/run_track.sh b/run_track.sh index aa32fe4..660882a 100755 --- a/run_track.sh +++ b/run_track.sh @@ -14,6 +14,9 @@ profile=$3 out_directory=$4 workdir=${5:-$USERWORK/bifrost_work} +#to add to report name +now=$(date +"%Y%m%d_%H%M") + mkdir -p ${out_directory}/config_files git --git-dir ${script_directory}/.git branch -v |grep "\*" | awk '{print $2, $3}' > ${out_directory}/config_files/pipeline_version.log bash ${script_directory}/bin/printversions.sh ${profile} ${out_directory}/config_files/software_versions.txt @@ -22,4 +25,5 @@ cp ${template} ${out_directory}/config_files echo "TEMPORARY WORKING DIRECTORY IS ${workdir}" -nextflow -c ${template} run -resume ${script_directory}/${track_script} -profile ${profile} --out_dir=${out_directory} -work-dir ${workdir} +nextflow -c ${template} run -resume ${script_directory}/${track_script} \ +-profile ${profile} --out_dir=${out_directory} -work-dir ${workdir} -with-report ${now}_run_report.html