diff --git a/bin/taxref_reformat_sidle.sh b/bin/taxref_reformat_sidle.sh index 48267f73..c1e7c3c5 100755 --- a/bin/taxref_reformat_sidle.sh +++ b/bin/taxref_reformat_sidle.sh @@ -1,19 +1,21 @@ #!/bin/sh +derep="$1" + # Untar any tar file in the working directory tar xzf database.tar.gz # Greengenes 13_8 if [ -d "gg_13_8_otus" ]; then - mv gg_13_8_otus/rep_set/99_otus.fasta gg_13_8_otus_rep_set_99_otus.seq.fasta - mv gg_13_8_otus/rep_set_aligned/99_otus.fasta gg_13_8_otus_rep_set_aligned_99_otus.alnseq.fasta - mv gg_13_8_otus/taxonomy/99_otu_taxonomy.txt gg_13_8_otus_taxonomy_99_otu_taxonomy.tax.txt + mv gg_13_8_otus/rep_set/${derep}_otus.fasta gg_13_8_otus_rep_set_${derep}_otus.seq.fasta + mv gg_13_8_otus/rep_set_aligned/${derep}_otus.fasta gg_13_8_otus_rep_set_aligned_${derep}_otus.alnseq.fasta + mv gg_13_8_otus/taxonomy/${derep}_otu_taxonomy.txt gg_13_8_otus_taxonomy_${derep}_otu_taxonomy.tax.txt # remove uncompressed folder rm -r gg_13_8_otus elif [ -d "SILVA_128_QIIME_release" ]; then - mv SILVA_128_QIIME_release/rep_set/rep_set_all/99/99_otus.fasta SILVA_128_QIIME_release_rep_set_all_99_otus.seq.fasta - gunzip -c /SILVA_128_QIIME_release/rep_set_aligned/99/99_otus_aligned.fasta.gz > SILVA_128_QIIME_release_rep_set_aligned_99_otus_aligned.alnseq.fasta - mv SILVA_128_QIIME_release/taxonomy/taxonomy_all/99/consensus_taxonomy_7_levels SILVA_128_QIIME_release_taxonomy_all_99_consensus_taxonomy_7_levels.tax.txt + mv SILVA_128_QIIME_release/rep_set/rep_set_all/${derep}/${derep}_otus.fasta SILVA_128_QIIME_release_rep_set_all_${derep}_otus.seq.fasta + gunzip -c /SILVA_128_QIIME_release/rep_set_aligned/${derep}/${derep}_otus_aligned.fasta.gz > SILVA_128_QIIME_release_rep_set_aligned_${derep}_otus_aligned.alnseq.fasta + mv SILVA_128_QIIME_release/taxonomy/taxonomy_all/${derep}/consensus_taxonomy_7_levels SILVA_128_QIIME_release_taxonomy_all_${derep}_consensus_taxonomy_7_levels.tax.txt # remove uncompressed folder rm -r SILVA_128_QIIME_release else diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 7b702dad..2d11769e 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -524,5 +524,14 @@ params { fmtscript = "taxref_reformat_sidle.sh" taxlevels = "D,P,C,O,F,G,S" } + 'greengenes88' { + title = "Greengenes - Version 13_8 - clustered at 88% similarity - for testing purposes only" + file = [ "ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_8_otus.tar.gz" ] + tree_qza = [ "https://data.qiime2.org/2021.4/common/sepp-refs-gg-13-8.qza" ] + citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" + fmtscript = "taxref_reformat_sidle.sh" + taxlevels = "D,P,C,O,F,G,S" + derep = "88" + } } } diff --git a/conf/test_multiregion.config b/conf/test_multiregion.config index d320b192..1fc0fc9b 100644 --- a/conf/test_multiregion.config +++ b/conf/test_multiregion.config @@ -23,11 +23,12 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/samplesheet_multiregion.tsv" metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/metadata_multiregion.tsv" input_multiregion = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/regions_multiregion.tsv" - sidle_ref_taxonomy = "greengenes" + sidle_ref_taxonomy = "greengenes88" // Prevent default taxonomic classification skip_dada_taxonomy = true - //restrict ANCOM analysis to higher taxonomic levels - tax_agglom_max = 4 + // Reduce runtimes + skip_alpha_rarefaction = true + tax_agglom_max = 3 } diff --git a/modules/local/format_taxonomy_sidle.nf b/modules/local/format_taxonomy_sidle.nf index a6b909f5..72e55cd7 100644 --- a/modules/local/format_taxonomy_sidle.nf +++ b/modules/local/format_taxonomy_sidle.nf @@ -21,8 +21,9 @@ process FORMAT_TAXONOMY_SIDLE { task.ext.when == null || task.ext.when script: + def derep = params.sidle_ref_databases[params.sidle_ref_taxonomy]["derep"] ?: "99" """ - ${params.sidle_ref_databases[params.sidle_ref_taxonomy]["fmtscript"]} + ${params.sidle_ref_databases[params.sidle_ref_taxonomy]["fmtscript"]} ${derep} #Giving out information echo -e "--sidle_ref_taxonomy: ${params.sidle_ref_taxonomy}\n" >ref_taxonomy.${suffix}.txt diff --git a/nextflow_schema.json b/nextflow_schema.json index 34f3ea1f..742f5cfc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -508,7 +508,13 @@ "type": "string", "help_text": "", "description": "Name of supported database, and optionally also version number", - "enum": ["silva", "silva=128", "greengenes", "greengenes=13_8"] + "enum": [ + "silva", + "silva=128", + "greengenes", + "greengenes=13_8", + "greengenes88" + ] }, "sidle_ref_tax_custom": { "type": "string",