From 8cb65f21bbcc5e5f24a67138911dd9306607b7e4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 15:47:48 +0200 Subject: [PATCH 01/10] Update single_end_config --- conf/test_single_end.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/test_single_end.config b/conf/test_single_end.config index fb60a3d0..74bc16b5 100644 --- a/conf/test_single_end.config +++ b/conf/test_single_end.config @@ -29,7 +29,6 @@ params { binning_map_mode = 'own' min_length_unbinned_contigs = 1000000 max_unbinned_contigs = 2 - skip_gtdbtk = true skip_concoct = true skip_binqc = true skip_gtdbtk = true From 3d5e4cffee7f6594f2c9193a60784c9dd840b68a Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 30 May 2024 15:56:37 +0200 Subject: [PATCH 02/10] Start restructuring tests --- conf/test_adapterremoval.config | 36 ---------------- conf/test_alternatives.config | 0 conf/test_ancient_dna.config | 42 ------------------ conf/test_bbnorm.config | 41 ------------------ conf/test_bigdb.config | 0 conf/test_concoct.config | 43 ------------------- conf/test_extras.config | 0 conf/test_host_rm.config | 31 ------------- conf/test_hybrid.config | 30 ------------- ...inement.config => test_preassembly.config} | 0 conf/test_virus_identification.config | 43 ------------------- nextflow.config | 18 +++----- 12 files changed, 6 insertions(+), 278 deletions(-) delete mode 100644 conf/test_adapterremoval.config create mode 100644 conf/test_alternatives.config delete mode 100644 conf/test_ancient_dna.config delete mode 100644 conf/test_bbnorm.config create mode 100644 conf/test_bigdb.config delete mode 100644 conf/test_concoct.config create mode 100644 conf/test_extras.config delete mode 100644 conf/test_host_rm.config delete mode 100644 conf/test_hybrid.config rename conf/{test_binrefinement.config => test_preassembly.config} (100%) delete mode 100644 conf/test_virus_identification.config diff --git a/conf/test_adapterremoval.config b/conf/test_adapterremoval.config deleted file mode 100644 index 7ec304e8..00000000 --- a/conf/test_adapterremoval.config +++ /dev/null @@ -1,36 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile for running with AdapterRemoval and domain classification' - config_profile_description = 'Minimal test dataset to check pipeline function with AdapterRemoval data and domain classification.' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.euk.csv' - centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' - kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' - metaeuk_db = params.pipelines_testdata_base_path + '/modules/data/proteomics/database/yeast_UPS.fasta' - skip_krona = true - min_length_unbinned_contigs = 1 - max_unbinned_contigs = 2 - busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - clip_tool = 'adapterremoval' - skip_concoct = true - bin_domain_classification = true -} diff --git a/conf/test_alternatives.config b/conf/test_alternatives.config new file mode 100644 index 00000000..e69de29b diff --git a/conf/test_ancient_dna.config b/conf/test_ancient_dna.config deleted file mode 100644 index e9d48205..00000000 --- a/conf/test_ancient_dna.config +++ /dev/null @@ -1,42 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test_ancient_dna, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Ancient DNA test profile ' - config_profile_description = 'Minimal test dataset to check pipeline function for ancient DNA step' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' - centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' - kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' - skip_krona = true - min_length_unbinned_contigs = 1 - max_unbinned_contigs = 2 - busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - ancient_dna = true - binning_map_mode = 'own' - skip_spades = false - skip_spadeshybrid = true - bcftools_view_high_variant_quality = 0 - bcftools_view_medium_variant_quality = 0 - bcftools_view_minimal_allelesupport = 3 - refine_bins_dastool = true - refine_bins_dastool_threshold = 0 - skip_concoct = true -} diff --git a/conf/test_bbnorm.config b/conf/test_bbnorm.config deleted file mode 100644 index 35442fea..00000000 --- a/conf/test_bbnorm.config +++ /dev/null @@ -1,41 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' - keep_phix = true - skip_clipping = true - skip_prokka = true - skip_prodigal = true - skip_quast = true - skip_binning = true - centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' - kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' - skip_krona = true - min_length_unbinned_contigs = 1 - max_unbinned_contigs = 2 - busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" - busco_clean = true - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - bbnorm = true - coassemble_group = true -} diff --git a/conf/test_bigdb.config b/conf/test_bigdb.config new file mode 100644 index 00000000..e69de29b diff --git a/conf/test_concoct.config b/conf/test_concoct.config deleted file mode 100644 index b427fd2c..00000000 --- a/conf/test_concoct.config +++ /dev/null @@ -1,43 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Runs input data but skipping all possible steps to allow for a fast testing - profile for input checks etc. - - Use as follows: - nextflow run nf-core/mag -profile test_nothing, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test CONCOCT profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' - centrifuge_db = null - kraken2_db = null - skip_krona = true - skip_clipping = true - skip_adapter_trimming = false - skip_spades = true - skip_spadeshybrid = true - skip_megahit = false - skip_quast = true - skip_prodigal = true - skip_binning = false - skip_metabat2 = false - skip_maxbin2 = true - skip_concoct = false - skip_prokka = true - skip_binqc = true - skip_gtdbtk = true - gtdbtk_min_completeness = 0 -} diff --git a/conf/test_extras.config b/conf/test_extras.config new file mode 100644 index 00000000..e69de29b diff --git a/conf/test_host_rm.config b/conf/test_host_rm.config deleted file mode 100644 index 68c03fb1..00000000 --- a/conf/test_host_rm.config +++ /dev/null @@ -1,31 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test_host_rm, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - host_fasta = params.pipelines_testdata_base_path + 'mag/host_reference/genome.hg38.chr21_10000bp_region.fa' - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.host_rm.csv' - min_length_unbinned_contigs = 1 - max_unbinned_contigs = 2 - busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - skip_concoct = true -} diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config deleted file mode 100644 index ca6f4c74..00000000 --- a/conf/test_hybrid.config +++ /dev/null @@ -1,30 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test_hybrid, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.hybrid.csv' - min_length_unbinned_contigs = 1 - max_unbinned_contigs = 2 - busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - skip_concoct = true -} diff --git a/conf/test_binrefinement.config b/conf/test_preassembly.config similarity index 100% rename from conf/test_binrefinement.config rename to conf/test_preassembly.config diff --git a/conf/test_virus_identification.config b/conf/test_virus_identification.config deleted file mode 100644 index 24893899..00000000 --- a/conf/test_virus_identification.config +++ /dev/null @@ -1,43 +0,0 @@ -/* -======================================================================================== - Nextflow config file for running minimal tests -======================================================================================== - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/mag -profile test_virus_identification, --outdir - ----------------------------------------------------------------------------------------- -*/ - -params { - config_profile_name = 'Test profile for running virus_identification' - config_profile_description = 'Minimal test dataset to check pipeline function virus identification' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - // Input data - input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' - run_virus_identification = true - genomad_splits = 7 - - // For computational efficiency - reads_minlength = 150 - coassemble_group = true - skip_gtdbtk = true - gtdbtk_min_completeness = 0 - skip_binning = true - skip_prokka = true - skip_spades = true - skip_spadeshybrid = true - skip_quast = true - skip_prodigal = true - skip_krona = true - skip_adapter_trimming = true - skip_metabat2 = true - skip_maxbin2 = true - skip_busco = true -} diff --git a/nextflow.config b/nextflow.config index 0d6840bf..656a6a34 100644 --- a/nextflow.config +++ b/nextflow.config @@ -313,20 +313,14 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - test_host_rm { includeConfig 'conf/test_host_rm.config' } - test_hybrid { includeConfig 'conf/test_hybrid.config' } + test_single_end { includeConfig 'conf/test_single_end.config' } + test_alternatives { includeConfig 'conf/test_alternatives.config' } + test_pressembly { includeConfig 'conf/test_preassembly.config' } test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } - test_busco_auto { includeConfig 'conf/test_busco_auto.config' } - test_ancient_dna { includeConfig 'conf/test_ancient_dna.config' } - test_adapterremoval { includeConfig 'conf/test_adapterremoval.config' } - test_binning_entry { includeConfig 'conf/test_binning_entry.config' } - test_binrefinement { includeConfig 'conf/test_binrefinement.config' } - test_no_clipping { includeConfig 'conf/test_no_clipping.config' } - test_bbnorm { includeConfig 'conf/test_bbnorm.config' } test_nothing { includeConfig 'conf/test_nothing.config' } - test_virus_identification { includeConfig 'conf/test_virus_identification.config' } - test_single_end { includeConfig 'conf/test_single_end.config' } - test_concoct { includeConfig 'conf/test_concoct.config' } + test_extras { includeConfig 'conf/test_extras.config' } + test_bigdb { includeConfig 'conf/test_bigdb.config' } + test_busco_auto { includeConfig 'conf/test_busco_auto.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From 500272eaafd53646fe024e6d98a2df0e2effe506 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 22 Jun 2024 08:28:45 +0200 Subject: [PATCH 03/10] Add test_alternative config --- .github/workflows/ci.yml | 18 ++++++---------- conf/test_alternatives.config | 40 +++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3afa7887..bc88d700 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,18 +55,14 @@ jobs: strategy: matrix: # Run remaining test profiles with minimum nextflow version - profile: - [ - test_host_rm, - test_hybrid, - test_hybrid_host_rm, - test_busco_auto, - test_ancient_dna, - test_adapterremoval, - test_binrefinement, - test_virus_identification, + profile: [ + test_nothing, ## fast alternative to kill all other jobs test_single_end, - test_concoct, + test_alternatives, + test_preassembly, + test_hybrid_host_rm, + test_extras, + test_bigdb, ] steps: - name: Free some space diff --git a/conf/test_alternatives.config b/conf/test_alternatives.config index e69de29b..c30ff66c 100644 --- a/conf/test_alternatives.config +++ b/conf/test_alternatives.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests of alternative tools from default +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test + of all alternative tools from a default test run. + + Use as follows: + nextflow run nf-core/mag -profile test_alternatives, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test alternatives profile' + config_profile_description = 'Minimal test dataset with alternative tools to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data -> Defaults + input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.multirun.csv' + centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' + kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' + skip_krona = false + min_length_unbinned_contigs = 1 + max_unbinned_contigs = 2 + busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" + busco_clean = true + skip_gtdbtk = true + gtdbtk_min_completeness = 0 + skip_concoct = true + + // Alternate tools from default test + clip_tool = 'adapterremoval' + binqc_tool = 'checkm' + bin_domain_classification = true // i.e., run tiara +} From 21bfb3b7abfa5c4e6c64bddc082e3eeffe36f284 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Sat, 22 Jun 2024 09:41:02 +0200 Subject: [PATCH 04/10] Add pressaembly_binrefine --- .github/workflows/ci.yml | 8 ++++---- ...nfig => test_preassembly_binrefine.config} | 8 ++++---- nextflow.config | 20 +++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) rename conf/{test_preassembly.config => test_preassembly_binrefine.config} (82%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc88d700..b4580f07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,12 +57,12 @@ jobs: # Run remaining test profiles with minimum nextflow version profile: [ test_nothing, ## fast alternative to kill all other jobs - test_single_end, + #test_single_end, test_alternatives, - test_preassembly, + test_preassembly_binrefine, test_hybrid_host_rm, - test_extras, - test_bigdb, + #test_extras, + #test_bigdb, ] steps: - name: Free some space diff --git a/conf/test_preassembly.config b/conf/test_preassembly_binrefine.config similarity index 82% rename from conf/test_preassembly.config rename to conf/test_preassembly_binrefine.config index 180775e2..e0b1eeb9 100644 --- a/conf/test_preassembly.config +++ b/conf/test_preassembly_binrefine.config @@ -22,8 +22,6 @@ params { // Input data input = params.pipelines_testdata_base_path + 'mag/samplesheets/samplesheet.csv' assembly_input = params.pipelines_testdata_base_path + 'mag/samplesheets/assembly_samplesheet.csv' - centrifuge_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_cf.tar.gz' - kraken2_db = params.pipelines_testdata_base_path + 'mag/test_data/minigut_kraken.tgz' skip_krona = true min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 @@ -32,7 +30,9 @@ params { gtdbtk_min_completeness = 0 refine_bins_dastool = true refine_bins_dastool_threshold = 0 - // TODO not using 'both' until #489 merged - postbinning_input = 'refined_bins_only' + postbinning_input = 'both' busco_clean = true + + // For runtime reasons + skip_prokka = true // CONCOCT makes hundreds of bins, and Prokka is slow (keeping on runs for 28m, without 22m), } diff --git a/nextflow.config b/nextflow.config index 656a6a34..b054191f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -311,16 +311,16 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_single_end { includeConfig 'conf/test_single_end.config' } - test_alternatives { includeConfig 'conf/test_alternatives.config' } - test_pressembly { includeConfig 'conf/test_preassembly.config' } - test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - test_extras { includeConfig 'conf/test_extras.config' } - test_bigdb { includeConfig 'conf/test_bigdb.config' } - test_busco_auto { includeConfig 'conf/test_busco_auto.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_single_end { includeConfig 'conf/test_single_end.config' } + test_alternatives { includeConfig 'conf/test_alternatives.config' } + test_pressembly_binrefine { includeConfig 'conf/test_preassembly_binrefine.config' } + test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + test_extras { includeConfig 'conf/test_extras.config' } + test_bigdb { includeConfig 'conf/test_bigdb.config' } + test_busco_auto { includeConfig 'conf/test_busco_auto.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From 13f26b1f28aa3387bb77d4a1964195f392e55d65 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Jun 2024 14:23:22 +0200 Subject: [PATCH 05/10] TODO --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b4580f07..2f752a6a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: test_nothing, ## fast alternative to kill all other jobs #test_single_end, test_alternatives, - test_preassembly_binrefine, + test_preassembly_binrefine, ## TODO CHE KOUTPUT test_hybrid_host_rm, #test_extras, #test_bigdb, From f9b15e8d948de2786974bbf1766ceafd690ae6a4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 4 Jul 2024 15:30:44 +0200 Subject: [PATCH 06/10] Activate single_end test --- .github/workflows/ci.yml | 4 ++-- nextflow.config | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f752a6a..6ca5a491 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,8 +56,8 @@ jobs: matrix: # Run remaining test profiles with minimum nextflow version profile: [ - test_nothing, ## fast alternative to kill all other jobs - #test_single_end, + test_nothing, ## fast config to kill all other jobs if something fundamentally wrong + test_single_end, test_alternatives, test_preassembly_binrefine, ## TODO CHE KOUTPUT test_hybrid_host_rm, diff --git a/nextflow.config b/nextflow.config index 5f27331b..a6f433d6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -311,16 +311,16 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_single_end { includeConfig 'conf/test_single_end.config' } - test_alternatives { includeConfig 'conf/test_alternatives.config' } - test_pressembly_binrefine { includeConfig 'conf/test_preassembly_binrefine.config' } - test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - test_extras { includeConfig 'conf/test_extras.config' } - test_bigdb { includeConfig 'conf/test_bigdb.config' } - test_busco_auto { includeConfig 'conf/test_busco_auto.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_single_end { includeConfig 'conf/test_single_end.config' } + test_alternatives { includeConfig 'conf/test_alternatives.config' } + test_preassembly_binrefine { includeConfig 'conf/test_preassembly_binrefine.config' } + test_hybrid_host_rm { includeConfig 'conf/test_hybrid_host_rm.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + test_extras { includeConfig 'conf/test_extras.config' } + test_bigdb { includeConfig 'conf/test_bigdb.config' } + test_busco_auto { includeConfig 'conf/test_busco_auto.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From ea6a7b8664af16011a438fcc4a36b85dec0f2093 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 4 Jul 2024 15:38:12 +0200 Subject: [PATCH 07/10] Remove dedicated CheckM ci job as now in test_alternatives --- .github/workflows/ci.yml | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6ca5a491..706a0773 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,33 +81,3 @@ jobs: - name: Run pipeline with ${{ matrix.profile }} test profile run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }},docker --outdir ./results - - checkm: - name: Run single test to checkm due to database download - # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/mag') }} - runs-on: ubuntu-latest - - steps: - - name: Free some space - run: | - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - - name: Download and prepare CheckM database - run: | - mkdir -p databases/checkm - wget https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz -P databases/checkm - tar xzvf databases/checkm/checkm_data_2015_01_16.tar.gz -C databases/checkm/ - - - name: Run pipeline with ${{ matrix.profile }} test profile - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --binqc_tool checkm --checkm_db databases/checkm From 8aa74f0ecfe455d679f58a25a6f534d484601003 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Mon, 8 Jul 2024 10:27:12 +0200 Subject: [PATCH 08/10] Add fix for genomad failure --- conf/test_preassembly_binrefine.config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/conf/test_preassembly_binrefine.config b/conf/test_preassembly_binrefine.config index e0b1eeb9..bb73ddcb 100644 --- a/conf/test_preassembly_binrefine.config +++ b/conf/test_preassembly_binrefine.config @@ -25,13 +25,17 @@ params { skip_krona = true min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 + skip_metaeuk = false + run_virus_identification = true + genomad_splits = 4 busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz" skip_gtdbtk = true gtdbtk_min_completeness = 0 refine_bins_dastool = true refine_bins_dastool_threshold = 0 - postbinning_input = 'both' + postbinning_input = 'refined_bins_only' busco_clean = true + run_gunc = true // For runtime reasons skip_prokka = true // CONCOCT makes hundreds of bins, and Prokka is slow (keeping on runs for 28m, without 22m), From 95f44bcbb38af1d92dfc4fbc644856b518b61573 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 18 Jul 2024 15:12:38 +0200 Subject: [PATCH 09/10] Fix problematic Prokka module --- modules.json | 2 +- modules/nf-core/prokka/environment.yml | 7 + modules/nf-core/prokka/main.nf | 4 +- modules/nf-core/prokka/meta.yml | 5 +- modules/nf-core/prokka/tests/main.nf.test | 46 +++++++ .../nf-core/prokka/tests/main.nf.test.snap | 128 ++++++++++++++++++ modules/nf-core/prokka/tests/tags.yml | 2 + 7 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 modules/nf-core/prokka/environment.yml create mode 100644 modules/nf-core/prokka/tests/main.nf.test create mode 100644 modules/nf-core/prokka/tests/main.nf.test.snap create mode 100644 modules/nf-core/prokka/tests/tags.yml diff --git a/modules.json b/modules.json index 16a805b8..b37eea51 100644 --- a/modules.json +++ b/modules.json @@ -219,7 +219,7 @@ }, "prokka": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "49ebda931c36c2b282f7958d00e1236b751f1031", "installed_by": ["modules"] }, "pydamage/analyze": { diff --git a/modules/nf-core/prokka/environment.yml b/modules/nf-core/prokka/environment.yml new file mode 100644 index 00000000..d7c44d5a --- /dev/null +++ b/modules/nf-core/prokka/environment.yml @@ -0,0 +1,7 @@ +name: prokka +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::prokka=1.14.6 diff --git a/modules/nf-core/prokka/main.nf b/modules/nf-core/prokka/main.nf index 60fbe232..adfda037 100644 --- a/modules/nf-core/prokka/main.nf +++ b/modules/nf-core/prokka/main.nf @@ -2,9 +2,9 @@ process PROKKA { tag "$meta.id" label 'process_low' - conda "bioconda::prokka=1.14.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/prokka%3A1.14.6--pl5321hdfd78af_4' : + 'https://depot.galaxyproject.org/singularity/prokka:1.14.6--pl5321hdfd78af_4' : 'biocontainers/prokka:1.14.6--pl5321hdfd78af_4' }" input: diff --git a/modules/nf-core/prokka/meta.yml b/modules/nf-core/prokka/meta.yml index 7fc9e185..9d82ffac 100644 --- a/modules/nf-core/prokka/meta.yml +++ b/modules/nf-core/prokka/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://github.com/tseemann/prokka doi: "10.1093/bioinformatics/btu153" licence: ["GPL v2"] - input: - meta: type: map @@ -27,7 +26,6 @@ input: - prodigal_tf: type: file description: Training file to use for Prodigal (optional) - output: - meta: type: map @@ -86,6 +84,7 @@ output: type: file description: tab-separated file of all features (locus_tag,ftype,len_bp,gene,EC_number,COG,product) pattern: "*.{tsv}" - authors: - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/prokka/tests/main.nf.test b/modules/nf-core/prokka/tests/main.nf.test new file mode 100644 index 00000000..3b59ef3a --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test @@ -0,0 +1,46 @@ +nextflow_process { + + name "Test Process PROKKA" + script "../main.nf" + process "PROKKA" + + tag "modules" + tag "modules_nfcore" + tag "prokka" + + test("Prokka - sarscov2 - genome.fasta") { + + when { + process { + """ + input[0] = Channel.fromList([ + tuple([ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)) + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.gff).match("gff") }, + { assert snapshot(process.out.fna).match("fna") }, + { assert snapshot(process.out.faa).match("faa") }, + { assert snapshot(process.out.ffn).match("ffn") }, + { assert snapshot(process.out.fsa).match("fsa") }, + { assert snapshot(process.out.tbl).match("tbl") }, + { assert snapshot(process.out.err).match("err") }, + { assert snapshot(process.out.txt).match("txt") }, + { assert snapshot(process.out.tsv).match("tsv") }, + { assert path(process.out.gbk.get(0).get(1)).exists() }, + { assert path(process.out.log.get(0).get(1)).exists() }, + { assert path(process.out.sqn.get(0).get(1)).exists() } + ) + } + + } + +} diff --git a/modules/nf-core/prokka/tests/main.nf.test.snap b/modules/nf-core/prokka/tests/main.nf.test.snap new file mode 100644 index 00000000..859e8df8 --- /dev/null +++ b/modules/nf-core/prokka/tests/main.nf.test.snap @@ -0,0 +1,128 @@ +{ + "txt": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.txt:md5,b40e485ffc8eaf1feacf8d79d9751a33" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.84139118" + }, + "err": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.err:md5,b3daedc646fddd422824e2b3e5e9229d" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.837204155" + }, + "fsa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fsa:md5,71bbefcb7f12046bcd3263f58cfd5404" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.803513721" + }, + "gff": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,5dbfb8fcf2db020564c16045976a0933" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.710100529" + }, + "tsv": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,da7c720c3018c5081d6a70b517b7d450" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.846026731" + }, + "faa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.faa:md5,a4ceda83262b3c222a6b1f508fb9e24b" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.722112433" + }, + "fna": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fna:md5,787307f29a263e5657cc276ebbf7e2b3" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.717325796" + }, + "ffn": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.ffn:md5,80f474b5367b7ea5ed23791935f65e34" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.727149899" + }, + "tbl": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d8f816a066ced94b62d9618b13fb8add" + ] + ] + ], + "timestamp": "2023-12-14T15:19:54.831206944" + } +} \ No newline at end of file diff --git a/modules/nf-core/prokka/tests/tags.yml b/modules/nf-core/prokka/tests/tags.yml new file mode 100644 index 00000000..a2dc7bdc --- /dev/null +++ b/modules/nf-core/prokka/tests/tags.yml @@ -0,0 +1,2 @@ +prokka: + - "modules/nf-core/prokka/**" From 8711913aa7686b30a68584cdd1ac55988edc97b8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 15 Aug 2024 15:52:02 +0200 Subject: [PATCH 10/10] Add metaEuk mini database --- conf/test_preassembly_binrefine.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_preassembly_binrefine.config b/conf/test_preassembly_binrefine.config index bb73ddcb..890f844c 100644 --- a/conf/test_preassembly_binrefine.config +++ b/conf/test_preassembly_binrefine.config @@ -26,6 +26,7 @@ params { min_length_unbinned_contigs = 1 max_unbinned_contigs = 2 skip_metaeuk = false + metaeuk_db = 'https://github.com/nf-core/test-datasets/raw/modules/data/proteomics/database/yeast_UPS.fasta' run_virus_identification = true genomad_splits = 4 busco_db = "https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz"