From 1715457d75d1b073f5ac2114cacbcf886876bb27 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 1 Jul 2022 17:19:36 +0100 Subject: [PATCH 01/24] Bump pipeline version to 1.8dev --- CHANGELOG.md | 4 ++++ nextflow.config | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3bbf9a7..8bf55a90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unpublished Version / DEV] + +### Enhancements & fixes + ## [[1.7](https://github.com/nf-core/fetchngs/releases/tag/1.7)] - 2022-07-01 ### :warning: Major enhancements diff --git a/nextflow.config b/nextflow.config index 80ed6507..eb5a19a0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -158,7 +158,7 @@ manifest { description = 'Pipeline to fetch metadata and raw FastQ files from public databases' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '1.7' + version = '1.8dev' } // Load modules.config for DSL2 module specific options From f071b7b8ce83b37ff3cde24b511cc46102eb177b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 15:55:19 +0100 Subject: [PATCH 02/24] Fetch SRR and DRR id info from ENA API instead of NCBI API --- bin/sra_ids_to_runinfo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py index 21c7225d..d85b0996 100755 --- a/bin/sra_ids_to_runinfo.py +++ b/bin/sra_ids_to_runinfo.py @@ -200,17 +200,17 @@ class DatabaseResolver: _SRA_PREFIXES = { "PRJNA", "SAMN", - "SRR", "DRA", "DRP", - "DRR", "DRS", "DRX", "PRJDB", "SAMD", } _ENA_PREFIXES = { - "ERR" + "ERR", + "SRR", + "DRR" } @classmethod From b4f05430b6d055cd5d745b9dd35d574d0b99a9e2 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 15:58:39 +0100 Subject: [PATCH 03/24] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf55a90..59d123ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- Fetch `SRR` and `DRR` metadata from ENA API instead of NCBI API to bypass frequent breaking changes + ## [[1.7](https://github.com/nf-core/fetchngs/releases/tag/1.7)] - 2022-07-01 ### :warning: Major enhancements From a7b3ff8815a5e6728f54ecd2a2544fc3d63290a3 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 30 Aug 2022 13:31:17 +0000 Subject: [PATCH 04/24] Template update for nf-core/tools version 2.5 --- .editorconfig | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/ci.yml | 23 ++------ .github/workflows/linting.yml | 38 +++++++++++-- CHANGELOG.md | 2 +- CITATION.cff | 56 +++++++++++++++++++ LICENSE | 2 +- README.md | 23 +++----- assets/email_template.txt | 1 - bin/check_samplesheet.py | 41 +++++++------- conf/base.config | 5 ++ docs/usage.md | 12 ++-- lib/WorkflowFetchngs.groovy | 5 +- lib/WorkflowMain.groovy | 9 ++- main.nf | 2 +- modules.json | 22 +++++--- .../templates/dumpsoftwareversions.py | 14 +++-- nextflow.config | 25 ++++++++- 18 files changed, 189 insertions(+), 96 deletions(-) create mode 100644 CITATION.cff diff --git a/.editorconfig b/.editorconfig index b6b31907..b78de6e6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1f17f774..c9f23b88 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/fetc - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/fetchngs/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/fetchngs _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/fetchngs/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/fetchngs _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3787d962..1da98e12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none jobs: test: @@ -20,27 +19,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..8a5ce69b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index c79ecb8d..83d55de4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.6dev - [date] +## v1.8dev - [date] Initial release of nf-core/fetchngs, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..4533e2f2 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/LICENSE b/LICENSE index 515e402d..7a9b6d5a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Harshil Patel +Copyright (c) Harshil Patel, Moritz E. Beber and Jose Espinosa-Carrasco Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index a3d4c2f1..20636e60 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,14 @@ # ![nf-core/fetchngs](docs/images/nf-core-fetchngs_logo_light.png#gh-light-mode-only) ![nf-core/fetchngs](docs/images/nf-core-fetchngs_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/fetchngs/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/fetchngs/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/fetchngs/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/fetchngs/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/fetchngs/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/fetchngs/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/fetchngs) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23fetchngs-4A154B?logo=slack)](https://nfcore.slack.com/channels/fetchngs) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23fetchngs-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/fetchngs)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/fetchngs/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/fetchngs/results). ## Pipeline summary @@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/fetchngs -profile test,YOURPROFILE --outdir ``` @@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - ```console + ```bash nextflow run nf-core/fetchngs --input samplesheet.csv --outdir --genome GRCh37 -profile ``` @@ -67,7 +62,7 @@ The nf-core/fetchngs pipeline comes with documentation about the pipeline [usage ## Credits -nf-core/fetchngs was originally written by Harshil Patel. +nf-core/fetchngs was originally written by Harshil Patel, Moritz E. Beber and Jose Espinosa-Carrasco. We thank the following people for their extensive assistance in the development of this pipeline: diff --git a/assets/email_template.txt b/assets/email_template.txt index 2de93008..f9393aa8 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/fetchngs v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3652c63c..9a8b8962 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -11,7 +11,6 @@ from collections import Counter from pathlib import Path - logger = logging.getLogger() @@ -79,13 +78,15 @@ def validate_and_transform(self, row): def _validate_sample(self, row): """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -97,36 +98,34 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." + if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) def validate_unique_samples(self): """ Assert that the combination of sample name and FASTQ filename is unique. - In addition to the validation, also rename the sample if more than one sample, - FASTQ file combination exists. + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - if len({pair[0] for pair in self._seen}) < len(self._seen): - counts = Counter(pair[0] for pair in self._seen) - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - if counts[sample] > 1: - row[self._sample_col] = f"{sample}_T{seen[sample]}" + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" def read_head(handle, num_lines=10): diff --git a/conf/base.config b/conf/base.config index 211eb24a..a15391ba 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,11 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/docs/usage.md b/docs/usage.md index 9ac1f8fc..b8f67abe 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -56,7 +56,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: -```console +```bash nextflow run nf-core/fetchngs --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` @@ -64,9 +64,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -75,7 +75,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/fetchngs ``` @@ -251,6 +251,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/WorkflowFetchngs.groovy b/lib/WorkflowFetchngs.groovy index a9c0bccf..4f66a396 100755 --- a/lib/WorkflowFetchngs.groovy +++ b/lib/WorkflowFetchngs.groovy @@ -10,6 +10,7 @@ class WorkflowFetchngs { public static void initialise(params, log) { genomeExistsError(params, log) + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) @@ -41,9 +42,7 @@ class WorkflowFetchngs { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text - } - - // + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index d6a67b1f..23d4247f 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -59,6 +59,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -78,17 +79,15 @@ class WorkflowMain { System.exit(1) } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/main.nf b/main.nf index 4da08510..06251112 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,7 @@ nf-core/fetchngs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/fetchngs - Website: https://nf-co.re/fetchngs +Website: https://nf-co.re/fetchngs Slack : https://nfcore.slack.com/channels/fetchngs ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index b599fa52..7ce083dc 100644 --- a/modules.json +++ b/modules.json @@ -3,14 +3,20 @@ "homePage": "https://github.com/nf-core/fetchngs", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index d1390392..787bdb7b 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): html = [ @@ -58,11 +59,12 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/nextflow.config b/nextflow.config index c8a3c7f1..5a9e9daf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,11 @@ params { // Input options input = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -37,6 +37,7 @@ params { schema_ignore_params = 'genomes' enable_conda = false + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -45,6 +46,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -72,6 +74,7 @@ try { // } + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -82,6 +85,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true docker.userEmulation = true @@ -119,10 +131,16 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -130,6 +148,7 @@ if (!params.igenomes_ignore) { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -164,12 +183,12 @@ dag { manifest { name = 'nf-core/fetchngs' - author = 'Harshil Patel' + author = 'Harshil Patel, Moritz E. Beber and Jose Espinosa-Carrasco' homePage = 'https://github.com/nf-core/fetchngs' description = 'Pipeline to fetch metadata and raw FastQ files from public databases' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '1.6dev' + version = '1.8dev' } // Load modules.config for DSL2 module specific options From 03aed1aec0d26f979fb1e3cd8811ffb53c8e2fd2 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 1 Sep 2022 13:26:38 +0000 Subject: [PATCH 05/24] Template update for nf-core/tools version 2.5.1 --- bin/check_samplesheet.py | 9 ++++++--- pyproject.toml | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9a8b8962..11b15572 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -98,7 +98,9 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True @@ -157,7 +159,7 @@ def sniff_format(handle): handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") + logger.critical("The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) return dialect @@ -195,7 +197,8 @@ def check_samplesheet(file_in, file_out): reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) # Validate each row. checker = RowChecker() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 From a0672f7a7e9286247227a8f0592a33156b00a9bb Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 4 Oct 2022 21:53:11 +0000 Subject: [PATCH 06/24] Template update for nf-core/tools version 2.6 --- .github/workflows/awsfulltest.yml | 4 ++ .github/workflows/awstest.yml | 4 ++ .prettierignore | 1 + CITATION.cff | 8 +-- assets/adaptivecard.json | 67 +++++++++++++++++++ assets/methods_description_template.yml | 25 +++++++ assets/multiqc_config.yml | 6 +- docs/usage.md | 8 +++ lib/NfcoreTemplate.groovy | 55 +++++++++++++++ lib/Utils.groovy | 21 ++++-- lib/WorkflowFetchngs.groovy | 19 ++++++ main.nf | 3 +- modules.json | 27 ++++---- .../custom/dumpsoftwareversions/main.nf | 8 +-- .../custom/dumpsoftwareversions/meta.yml | 0 .../templates/dumpsoftwareversions.py | 0 modules/nf-core/{modules => }/fastqc/main.nf | 12 ++++ modules/nf-core/{modules => }/fastqc/meta.yml | 0 modules/nf-core/modules/multiqc/main.nf | 31 --------- modules/nf-core/multiqc/main.nf | 53 +++++++++++++++ .../nf-core/{modules => }/multiqc/meta.yml | 15 +++++ nextflow.config | 5 +- nextflow_schema.json | 18 +++++ workflows/fetchngs.nf | 26 ++++--- 24 files changed, 345 insertions(+), 71 deletions(-) create mode 100644 assets/adaptivecard.json create mode 100644 assets/methods_description_template.yml mode change 100755 => 100644 lib/Utils.groovy rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (79%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%) rename modules/nf-core/{modules => }/fastqc/main.nf (85%) rename modules/nf-core/{modules => }/fastqc/meta.yml (100%) delete mode 100644 modules/nf-core/modules/multiqc/main.nf create mode 100644 modules/nf-core/multiqc/main.nf rename modules/nf-core/{modules => }/multiqc/meta.yml (73%) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 0067883d..b471dde0 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,3 +28,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/fetchngs/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index d6cd3108..6e6a8c52 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/fetchngs/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.prettierignore b/.prettierignore index d0e7ae58..eb74a574 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CITATION.cff b/CITATION.cff index 4533e2f2..017666c0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,8 +13,8 @@ authors: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen @@ -39,8 +39,8 @@ prefered-citation: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..26660eca --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/fetchngs v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..f9309867 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-fetchngs-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/fetchngs Methods Description" +section_href: "https://github.com/nf-core/fetchngs" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/fetchngs v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index a1144be1..a7d7cf37 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,11 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - software_versions: + "nf-core-fetchngs-methods-description": order: -1000 - "nf-core-fetchngs-summary": + software_versions: order: -1001 + "nf-core-fetchngs-summary": + order: -1002 export_plots: true diff --git a/docs/usage.md b/docs/usage.md index b8f67abe..231046a5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,6 +237,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..27feb009 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowFetchngs.groovy b/lib/WorkflowFetchngs.groovy index 4f66a396..7a1e5ab7 100755 --- a/lib/WorkflowFetchngs.groovy +++ b/lib/WorkflowFetchngs.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/fetchngs.nf in the nf-core/fetchngs pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowFetchngs { // @@ -42,6 +44,23 @@ class WorkflowFetchngs { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html }// // Exit pipeline if incorrect --genome key provided // diff --git a/main.nf b/main.nf index 06251112..0bded66d 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,8 @@ nf-core/fetchngs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/fetchngs -Website: https://nf-co.re/fetchngs + + Website: https://nf-co.re/fetchngs Slack : https://nfcore.slack.com/channels/fetchngs ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 7ce083dc..0ea8ef88 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,21 @@ "name": "nf-core/fetchngs", "homePage": "https://github.com/nf-core/fetchngs", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d5100..cebb6e05 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 85% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf index ed6b8c50..05730368 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac1..00000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..a8159a57 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 73% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891ef..ebc29b27 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -12,11 +12,25 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +52,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 5a9e9daf..349fd4c6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,7 +21,9 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -31,6 +33,7 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false @@ -74,7 +77,6 @@ try { // } - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -189,6 +191,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' version = '1.8dev' + doi = '' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 6976f507..14b336cd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -213,12 +213,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", diff --git a/workflows/fetchngs.nf b/workflows/fetchngs.nf index 3f2eb0ff..057b6ad6 100644 --- a/workflows/fetchngs.nf +++ b/workflows/fetchngs.nf @@ -23,8 +23,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +48,9 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,15 +91,20 @@ workflow FETCHNGS { workflow_summary = WorkflowFetchngs.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowFetchngs.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions) @@ -114,6 +121,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } /* From 91ce8905e078b18139136b6ef82a18e351a6263f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 1 Nov 2022 16:09:59 +0000 Subject: [PATCH 07/24] add a tower.yml --- tower.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tower.yml diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..ec0609cc --- /dev/null +++ b/tower.yml @@ -0,0 +1,15 @@ +reports: + "samplesheet/samplesheet.csv": + display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "samplesheet/id_mappings.csv": + display: "File with selected fields that can be used to rename samples" + "samplesheet/multiqc_config.yml": + display: "MultiQC config file that can be passed to most nf-core pipelines" + "metadata/*.runinfo_ftp.tsv": + display: "Re-formatted metadata file downloaded from the ENA" + "metadata/*.runinfo.tsv": + display: "Original metadata file downloaded from the ENA" + "metadata/*.metadata.txt": + display: "Original metadata file generated using the synapse show command" + "metadata/*.list.txt": + display: "Original output of the synapse list command" From 2c06e2eac4e24474932128a79dc5967364940510 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Tue, 1 Nov 2022 16:18:35 +0000 Subject: [PATCH 08/24] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59d123ab..cc9af791 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ This downloads a text file called `SRR_Acc_List.txt` that can be directly provid ### Enhancements & fixes +- [#121](https://github.com/nf-core/fetchngs/pull/123) - Add Tower reporting - [#97](https://github.com/nf-core/fetchngs/pull/97) - Add support for generating nf-core/taxprofiler compatible samplesheets. - [#99](https://github.com/nf-core/fetchngs/issues/99) - SRA_IDS_TO_RUNINFO fails due to bad request - Add `enum` field for `--nf_core_pipeline` to parameter schema so only accept supported pipelines are accepted From a927872ba0f7b482304cdd20b1b11865442feb75 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 2 Nov 2022 10:47:09 +0000 Subject: [PATCH 09/24] Keep to main outputs Co-authored-by: Harshil Patel --- tower.yml | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tower.yml b/tower.yml index ec0609cc..7b7fd106 100644 --- a/tower.yml +++ b/tower.yml @@ -1,15 +1,7 @@ reports: - "samplesheet/samplesheet.csv": + samplesheet.csv: display: "Auto-created samplesheet with collated metadata and FASTQ paths" - "samplesheet/id_mappings.csv": - display: "File with selected fields that can be used to rename samples" - "samplesheet/multiqc_config.yml": - display: "MultiQC config file that can be passed to most nf-core pipelines" - "metadata/*.runinfo_ftp.tsv": - display: "Re-formatted metadata file downloaded from the ENA" - "metadata/*.runinfo.tsv": - display: "Original metadata file downloaded from the ENA" - "metadata/*.metadata.txt": - display: "Original metadata file generated using the synapse show command" - "metadata/*.list.txt": - display: "Original output of the synapse list command" + id_mappings.csv: + display: "File with database identifier mappings that can be used to rename samples" + multiqc_config.yml: + display: "MultiQC config file for bulk renaming of sample names from database ids" From e3906f5440bb05bd2bb3f782c6d6ab64373940e6 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 2 Nov 2022 10:47:27 +0000 Subject: [PATCH 10/24] Improve changelog Co-authored-by: Harshil Patel --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc9af791..b16a252a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ This downloads a text file called `SRR_Acc_List.txt` that can be directly provid ### Enhancements & fixes -- [#121](https://github.com/nf-core/fetchngs/pull/123) - Add Tower reporting +- [#121](https://github.com/nf-core/fetchngs/pull/123) - Add `tower.yml` to render Reports in Tower - [#97](https://github.com/nf-core/fetchngs/pull/97) - Add support for generating nf-core/taxprofiler compatible samplesheets. - [#99](https://github.com/nf-core/fetchngs/issues/99) - SRA_IDS_TO_RUNINFO fails due to bad request - Add `enum` field for `--nf_core_pipeline` to parameter schema so only accept supported pipelines are accepted From c4d365833b109a3587d8621427f5eae3aafd2714 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Wed, 2 Nov 2022 10:52:42 +0000 Subject: [PATCH 11/24] Set input file type to Tower-compatible one (CSV) --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index a51dc45a..bde6337d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,8 +15,8 @@ "input": { "type": "string", "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.txt$", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", "schema": "assets/schema_input.json", "fa_icon": "fas fa-file-excel", "description": "File containing SRA/ENA/DDBJ identifiers one per line to download their associated metadata and FastQ files." From 95bd0569910eb103d7d88d210a48f0dee96b438e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 3 Nov 2022 14:16:26 +0000 Subject: [PATCH 12/24] Fix tests for csv --- conf/test.config | 2 +- conf/test_full.config | 2 +- conf/test_synapse.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 39a4ffdb..3c58e9cf 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,5 +20,5 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.txt' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.csv' } diff --git a/conf/test_full.config b/conf/test_full.config index 0887326a..2f0303ea 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,5 +15,5 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.txt' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.csv' } diff --git a/conf/test_synapse.config b/conf/test_synapse.config index b49f2463..1ac1388a 100644 --- a/conf/test_synapse.config +++ b/conf/test_synapse.config @@ -20,6 +20,6 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/synapse_ids_test.txt' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/synapse_ids_test.csv' input_type = 'synapse' } From 484e8185507c11b99763cb4fd45a716c8544e835 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 3 Nov 2022 14:19:23 +0000 Subject: [PATCH 13/24] Update docs for csv --- README.md | 6 +++--- docs/usage.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1151db3d..24d77103 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary -Via a single file of ids, provided one-per-line (see [example input file](https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.txt)) the pipeline performs the following steps: +Via a single file of ids, provided one-per-line (see [example input file](https://raw.githubusercontent.com/nf-core/test-datasets/fetchngs/sra_ids_test.csv)) the pipeline performs the following steps: ### SRA / ENA / DDBJ ids @@ -46,7 +46,7 @@ As a workaround, if you have a GEO accession you can directly download a text fi - Click `SRA Run Selector` at the bottom of the GEO accession page - Select the desired samples in the `SRA Run Selector` and then download the `Accession List` -This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline e.g. `--input SRR_Acc_List.txt`. +This downloads a text file called `SRR_Acc_List.csv` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. ### Synapse ids @@ -87,7 +87,7 @@ You can use the `--nf_core_pipeline` parameter to customise this behaviour e.g. 4. Start running your own analysis! ```bash - nextflow run nf-core/fetchngs --input ids.txt --outdir -profile + nextflow run nf-core/fetchngs --input ids.csv --outdir -profile ``` ## Documentation diff --git a/docs/usage.md b/docs/usage.md index ba4de1b2..b7fbe8f6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,7 +32,7 @@ If you have a GEO accession (found in the data availability section of published - Click `SRA Run Selector` at the bottom of the GEO accession page - Select the desired samples in the `SRA Run Selector` and then download the `Accession List` -This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline e.g. `--input SRR_Acc_List.txt`. +This downloads a text file called `SRR_Acc_List.csv` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. ### Synapse ids @@ -72,7 +72,7 @@ If FTP connections are blocked on your network use the [`--force_sratools_downlo The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/fetchngs --input ids.txt --outdir -profile docker +nextflow run nf-core/fetchngs --input ids.csv --outdir -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. From 70f0038a52c89859fc8cb2df1e0f55425eb8c427 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 3 Nov 2022 14:22:17 +0000 Subject: [PATCH 14/24] update WorkflowMain.groovy for CSV --- lib/WorkflowMain.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 77b7ffde..ef9131b7 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -21,7 +21,7 @@ class WorkflowMain { // Print help to screen if required // public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input ids.txt -profile docker" + def command = "nextflow run ${workflow.manifest.name} --input ids.csv -profile docker" def help_string = '' help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) help_string += NfcoreSchema.paramsHelp(workflow, params, command) @@ -74,7 +74,7 @@ class WorkflowMain { // Check input has been provided if (!params.input) { - log.error "Please provide an input file containing ids to the pipeline - one per line e.g. '--input ids.txt'" + log.error "Please provide an input file containing ids to the pipeline - one per line e.g. '--input ids.csv'" System.exit(1) } From 5c340b0b501e03f7fd59bc2d623f233ad3f2dc49 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 3 Nov 2022 14:32:39 +0000 Subject: [PATCH 15/24] Undo bad txt -> csv --- README.md | 2 +- docs/usage.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 24d77103..fe68e6b2 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ As a workaround, if you have a GEO accession you can directly download a text fi - Click `SRA Run Selector` at the bottom of the GEO accession page - Select the desired samples in the `SRA Run Selector` and then download the `Accession List` -This downloads a text file called `SRR_Acc_List.csv` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. +This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. ### Synapse ids diff --git a/docs/usage.md b/docs/usage.md index b7fbe8f6..9d78a37b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -32,7 +32,7 @@ If you have a GEO accession (found in the data availability section of published - Click `SRA Run Selector` at the bottom of the GEO accession page - Select the desired samples in the `SRA Run Selector` and then download the `Accession List` -This downloads a text file called `SRR_Acc_List.csv` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. +This downloads a text file called `SRR_Acc_List.txt` that can be directly provided to the pipeline once renamed with a .csv extension e.g. `--input SRR_Acc_List.csv`. ### Synapse ids From cabff52754f5da2e0973f72af0ab06a46cc0654c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 4 Nov 2022 09:55:09 +0000 Subject: [PATCH 16/24] Fix relative paths after module restructuring --- subworkflows/nf-core/srafastq/main.nf | 6 +++--- workflows/sra.nf | 2 +- workflows/synapse.nf | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/subworkflows/nf-core/srafastq/main.nf b/subworkflows/nf-core/srafastq/main.nf index f57b6fac..580fef18 100644 --- a/subworkflows/nf-core/srafastq/main.nf +++ b/subworkflows/nf-core/srafastq/main.nf @@ -1,6 +1,6 @@ -include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/modules/custom/sratoolsncbisettings/main' -include { SRATOOLS_PREFETCH } from '../../../modules/nf-core/modules/sratools/prefetch/main' -include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/modules/sratools/fasterqdump/main' +include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sratoolsncbisettings/main' +include { SRATOOLS_PREFETCH } from '../../../modules/nf-core/sratools/prefetch/main' +include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/fasterqdump/main' /** * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). diff --git a/workflows/sra.nf b/workflows/sra.nf index 0dde4588..ef654b2d 100644 --- a/workflows/sra.nf +++ b/workflows/sra.nf @@ -34,7 +34,7 @@ include { SRAFASTQ } from '../subworkflows/nf-core/srafastq/main' ======================================================================================== */ -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ======================================================================================== diff --git a/workflows/synapse.nf b/workflows/synapse.nf index 2a673623..1203056a 100644 --- a/workflows/synapse.nf +++ b/workflows/synapse.nf @@ -31,7 +31,7 @@ include { SYNAPSE_MERGE_SAMPLESHEET } from '../modules/local/synapse_merge_sampl ======================================================================================== */ -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ======================================================================================== From 2b22ab6b92417ae5efd61b1a2a657fa1fe91b77e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 4 Nov 2022 09:57:46 +0000 Subject: [PATCH 17/24] Fix PythonBlack --- bin/multiqc_mappings_config.py | 5 +-- bin/sra_ids_to_runinfo.py | 80 ++++++++-------------------------- bin/sra_runinfo_to_ftp.py | 36 ++++----------- 3 files changed, 29 insertions(+), 92 deletions(-) diff --git a/bin/multiqc_mappings_config.py b/bin/multiqc_mappings_config.py index a7fc92ba..2fcdb49a 100755 --- a/bin/multiqc_mappings_config.py +++ b/bin/multiqc_mappings_config.py @@ -3,11 +3,10 @@ import sys with open(sys.argv[1], "r") as fin, open(sys.argv[2], "w") as fout: - header = fin.readline().split(',') + header = fin.readline().split(",") config = "sample_names_rename_buttons:\n" - config += "\n".join([' - ' + x.strip('"') for x in header]) + config += "\n".join([" - " + x.strip('"') for x in header]) config += "sample_names_rename:\n" for line in fin: config += f" - [{', '.join(line.strip().split(','))}]\n" fout.write(config) - diff --git a/bin/sra_ids_to_runinfo.py b/bin/sra_ids_to_runinfo.py index d85b0996..70627791 100755 --- a/bin/sra_ids_to_runinfo.py +++ b/bin/sra_ids_to_runinfo.py @@ -49,9 +49,7 @@ ) GEO_IDS = ("GSE18729", "GSM465244") ID_REGEX = re.compile(r"^([A-Z]+)([0-9]+)$") -PREFIX_LIST = sorted( - {ID_REGEX.match(id).group(1) for id in SRA_IDS + ENA_IDS + DDBJ_IDS + GEO_IDS} -) +PREFIX_LIST = sorted({ID_REGEX.match(id).group(1) for id in SRA_IDS + ENA_IDS + DDBJ_IDS + GEO_IDS}) # List of metadata fields fetched from the ENA API - can be overriden by options @@ -193,10 +191,7 @@ def is_valid(cls, identifier): class DatabaseResolver: """Define a service class for resolving various identifiers to experiments.""" - _GEO_PREFIXES = { - "GSE", - "GSM" - } + _GEO_PREFIXES = {"GSE", "GSM"} _SRA_PREFIXES = { "PRJNA", "SAMN", @@ -207,11 +202,7 @@ class DatabaseResolver: "PRJDB", "SAMD", } - _ENA_PREFIXES = { - "ERR", - "SRR", - "DRR" - } + _ENA_PREFIXES = {"ERR", "SRR", "DRR"} @classmethod def expand_identifier(cls, identifier): @@ -240,24 +231,14 @@ def expand_identifier(cls, identifier): def _content_check(cls, response, identifier): """Check that the response has content or terminate.""" if response.status == 204: - logger.error( - f"There is no content for id {identifier}. Maybe you lack the right " - f"permissions?" - ) + logger.error(f"There is no content for id {identifier}. Maybe you lack the right " f"permissions?") sys.exit(1) @classmethod def _id_to_srx(cls, identifier): """Resolve the identifier to SRA experiments.""" - params = { - "id": identifier, - "db": "sra", - "rettype": "runinfo", - "retmode": "text" - } - response = fetch_url( - f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}" - ) + params = {"id": identifier, "db": "sra", "rettype": "runinfo", "retmode": "text"} + response = fetch_url(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}") cls._content_check(response, identifier) return [row["Experiment"] for row in open_table(response, delimiter=",")] @@ -265,20 +246,13 @@ def _id_to_srx(cls, identifier): def _gse_to_srx(cls, identifier): """Resolve the identifier to SRA experiments.""" ids = [] - params = { - "id": identifier, - "db": "gds", - "rettype": "runinfo", - "retmode": "text" - } - response = fetch_url( - f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}" - ) + params = {"id": identifier, "db": "gds", "rettype": "runinfo", "retmode": "text"} + response = fetch_url(f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?{urlencode(params)}") cls._content_check(response, identifier) gsm_ids = [ line.split("=")[1].strip() for line in response.text().splitlines() - if line.split('=')[1].strip().startswith('GSM') + if line.split("=")[1].strip().startswith("GSM") ] for gsm_id in gsm_ids: ids += cls._id_to_srx(gsm_id) @@ -293,13 +267,9 @@ def _id_to_erx(cls, identifier): "result": "read_run", "fields": ",".join(fields), } - response = fetch_url( - f"https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}" - ) + response = fetch_url(f"https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}") cls._content_check(response, identifier) - return [ - row["experiment_accession"] for row in open_table(response, delimiter="\t") - ] + return [row["experiment_accession"] for row in open_table(response, delimiter="\t")] class ENAMetadataFetcher: @@ -328,9 +298,7 @@ def open_experiment_table(self, accession): """ params = {**self._params, "accession": accession} - response = fetch_url( - f"https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}" - ) + response = fetch_url(f"https://www.ebi.ac.uk/ena/portal/api/filereport?{urlencode(params)}") self._content_check(response, accession) return open_table(response, delimiter="\t") @@ -338,10 +306,7 @@ def open_experiment_table(self, accession): def _content_check(cls, response, identifier): """Check that the response has content or terminate.""" if response.status == 204: - logger.error( - f"There is no content for id {identifier}. Maybe you lack the right " - f"permissions?" - ) + logger.error(f"There is no content for id {identifier}. Maybe you lack the right " f"permissions?") sys.exit(1) @@ -362,8 +327,7 @@ def open_table(response, delimiter=","): def parse_args(args=None): parser = argparse.ArgumentParser( - description="Download and create a run information metadata file from SRA / " - "ENA / DDBJ / GEO identifiers.", + description="Download and create a run information metadata file from SRA / " "ENA / DDBJ / GEO identifiers.", epilog="Example usage: python fetch_sra_runinfo.py ", ) parser.add_argument( @@ -383,8 +347,7 @@ def parse_args(args=None): "--ena_metadata_fields", type=str, default=",".join(ENA_METADATA_FIELDS), - help=f"Comma-separated list of ENA metadata fields to fetch " - f"(default: {','.join(ENA_METADATA_FIELDS)}).", + help=f"Comma-separated list of ENA metadata fields to fetch " f"(default: {','.join(ENA_METADATA_FIELDS)}).", ) parser.add_argument( "-l", @@ -431,9 +394,7 @@ def get_ena_fields(): return [ row["columnId"] for row in open_table( - fetch_url( - f"https://www.ebi.ac.uk/ena/portal/api/returnFields?{urlencode(params)}" - ), + fetch_url(f"https://www.ebi.ac.uk/ena/portal/api/returnFields?{urlencode(params)}"), delimiter="\t", ) ] @@ -453,16 +414,11 @@ def fetch_sra_runinfo(file_in, file_out, ena_metadata_fields): seen_ids.add(db_id) if not DatabaseIdentifierChecker.is_valid(db_id): id_str = ", ".join([x + "*" for x in PREFIX_LIST]) - logger.error( - f"Please provide a valid database id starting with {id_str}!\n" - f"Line: '{line.strip()}'" - ) + logger.error(f"Please provide a valid database id starting with {id_str}!\n" f"Line: '{line.strip()}'") sys.exit(1) ids = DatabaseResolver.expand_identifier(db_id) if not ids: - logger.error( - f"No matches found for database id {db_id}!\nLine: '{line.strip()}'" - ) + logger.error(f"No matches found for database id {db_id}!\nLine: '{line.strip()}'") sys.exit(1) for accession in ids: for row in ena_fetcher.open_experiment_table(accession): diff --git a/bin/sra_runinfo_to_ftp.py b/bin/sra_runinfo_to_ftp.py index d9400b2f..ef80ec80 100755 --- a/bin/sra_runinfo_to_ftp.py +++ b/bin/sra_runinfo_to_ftp.py @@ -62,10 +62,7 @@ def parse_sra_runinfo(file_in): reader = csv.DictReader(fin, delimiter="\t", skipinitialspace=True) header = list(reader.fieldnames) if missing := frozenset(columns).difference(frozenset(header)): - logger.critical( - f"The following expected columns are missing from {file_in}: " - f"{', '.join(missing)}." - ) + logger.critical(f"The following expected columns are missing from {file_in}: " f"{', '.join(missing)}.") sys.exit(1) for row in reader: db_id = row["experiment_accession"] @@ -73,14 +70,9 @@ def parse_sra_runinfo(file_in): fq_files = row["fastq_ftp"].split(";")[-2:] fq_md5 = row["fastq_md5"].split(";")[-2:] if len(fq_files) == 1: - assert fq_files[0].endswith( - ".fastq.gz" - ), f"Unexpected FastQ file format {file_in.name}." + assert fq_files[0].endswith(".fastq.gz"), f"Unexpected FastQ file format {file_in.name}." if row["library_layout"] != "SINGLE": - logger.warning( - f"The library layout '{row['library_layout']}' should be " - f"'SINGLE'." - ) + logger.warning(f"The library layout '{row['library_layout']}' should be " f"'SINGLE'.") sample = { "fastq_1": fq_files[0], "fastq_2": None, @@ -89,17 +81,10 @@ def parse_sra_runinfo(file_in): "single_end": "true", } elif len(fq_files) == 2: - assert fq_files[0].endswith( - "_1.fastq.gz" - ), f"Unexpected FastQ file format {file_in.name}." - assert fq_files[1].endswith( - "_2.fastq.gz" - ), f"Unexpected FastQ file format {file_in.name}." + assert fq_files[0].endswith("_1.fastq.gz"), f"Unexpected FastQ file format {file_in.name}." + assert fq_files[1].endswith("_2.fastq.gz"), f"Unexpected FastQ file format {file_in.name}." if row["library_layout"] != "PAIRED": - logger.warning( - f"The library layout '{row['library_layout']}' should be " - f"'PAIRED'." - ) + logger.warning(f"The library layout '{row['library_layout']}' should be " f"'PAIRED'.") sample = { "fastq_1": fq_files[0], "fastq_2": fq_files[1], @@ -124,8 +109,7 @@ def parse_sra_runinfo(file_in): else: if sample in runinfo[db_id]: logger.error( - f"Input run info file contains duplicate rows!\n" - f"{', '.join([row[col] for col in header])}" + f"Input run info file contains duplicate rows!\n" f"{', '.join([row[col] for col in header])}" ) else: runinfo[db_id].append(sample) @@ -146,9 +130,7 @@ def sra_runinfo_to_ftp(files_in, file_out): logger.warning(f"Duplicate sample identifier found!\nID: '{db_id}'") # Create a combined header from all input files. - combined_header = header[0] + list( - set().union(chain.from_iterable(header)).difference(header[0]) - ) + combined_header = header[0] + list(set().union(chain.from_iterable(header)).difference(header[0])) combined_header.insert(0, "id") # Write samplesheet with paths to FastQ files and md5 sums. @@ -159,7 +141,7 @@ def sra_runinfo_to_ftp(files_in, file_out): for db_id in sorted(samplesheet): for idx, row in enumerate(samplesheet[db_id], start=1): row["id"] = f"{db_id}" - if 'run_accession' in row: + if "run_accession" in row: row["id"] = f"{db_id}_{row['run_accession']}" writer.writerow(row) From d67b3ef6eaf437c7a22da800fcb0d26f19d0a9a7 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 4 Nov 2022 10:19:08 +0000 Subject: [PATCH 18/24] Update regex for --input file extension --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 4d02b3c5..435079b3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "type": "string", "format": "file-path", "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", + "pattern": "^\\S+\\.(csv|tsv|txt)$", "schema": "assets/schema_input.json", "fa_icon": "fas fa-file-excel", "description": "File containing SRA/ENA/DDBJ identifiers one per line to download their associated metadata and FastQ files." From 4747774e31434ac9688e668730f3517bab30d675 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 4 Nov 2022 10:59:03 +0000 Subject: [PATCH 19/24] Enable conda explicilty in profiles block --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 3f07ba06..f8f97ca3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -72,6 +72,7 @@ profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false From 12e8b857b29bfeb37ff7d3841762190166832ef9 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 7 Nov 2022 11:48:22 +0000 Subject: [PATCH 20/24] Fix #114 --- CHANGELOG.md | 5 +- modules/local/sra_to_samplesheet.nf | 26 ++++----- workflows/sra.nf | 86 ++++++++++++++++++----------- 3 files changed, 71 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 993bc934..679644f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- [#111](https://github.com/nf-core/fetchngs/issues/111) - Change input mimetype to csv +- [#114](https://github.com/nf-core/fetchngs/issues/114) - Final samplesheet is not created when `--skip_fastq_download` is provided +- [#118](https://github.com/nf-core/fetchngs/issues/118) - Allow input pattern validation for csv/tsv/txt +- [#121](https://github.com/nf-core/fetchngs/issues/121) - Add `tower.yml` to render samplesheet as Report in Tower - Fetch `SRR` and `DRR` metadata from ENA API instead of NCBI API to bypass frequent breaking changes - Updated pipeline template to [nf-core/tools 2.6](https://github.com/nf-core/tools/releases/tag/2.6) @@ -26,7 +30,6 @@ This downloads a text file called `SRR_Acc_List.txt` that can be directly provid ### Enhancements & fixes -- [#121](https://github.com/nf-core/fetchngs/pull/123) - Add `tower.yml` to render Reports in Tower - [#97](https://github.com/nf-core/fetchngs/pull/97) - Add support for generating nf-core/taxprofiler compatible samplesheets. - [#99](https://github.com/nf-core/fetchngs/issues/99) - SRA_IDS_TO_RUNINFO fails due to bad request - Add `enum` field for `--nf_core_pipeline` to parameter schema so only accept supported pipelines are accepted diff --git a/modules/local/sra_to_samplesheet.nf b/modules/local/sra_to_samplesheet.nf index 4b448e89..6dce588b 100644 --- a/modules/local/sra_to_samplesheet.nf +++ b/modules/local/sra_to_samplesheet.nf @@ -6,9 +6,9 @@ process SRA_TO_SAMPLESHEET { memory 100.MB input: - tuple val(meta), path(fastq) - val pipeline - val mapping_fields + val meta + val pipeline + val mapping_fields output: tuple val(meta), path("*samplesheet.csv"), emit: samplesheet @@ -20,19 +20,19 @@ process SRA_TO_SAMPLESHEET { // // Remove custom keys needed to download the data - def meta_map = meta.clone() - meta_map.remove("id") - meta_map.remove("fastq_1") - meta_map.remove("fastq_2") - meta_map.remove("md5_1") - meta_map.remove("md5_2") - meta_map.remove("single_end") + def meta_clone = meta.clone() + meta_clone.remove("id") + meta_clone.remove("fastq_1") + meta_clone.remove("fastq_2") + meta_clone.remove("md5_1") + meta_clone.remove("md5_2") + meta_clone.remove("single_end") // Add relevant fields to the beginning of the map pipeline_map = [ sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : "${params.outdir}/fastq/${fastq[0]}", - fastq_2 : meta.single_end ? '' : "${params.outdir}/fastq/${fastq[1]}" + fastq_1 : meta.fastq_1, + fastq_2 : meta.fastq_2 ] // Add nf-core pipeline specific entries @@ -43,7 +43,7 @@ process SRA_TO_SAMPLESHEET { pipeline_map << [ fasta: '' ] } } - pipeline_map << meta_map + pipeline_map << meta_clone // Create a samplesheet samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' diff --git a/workflows/sra.nf b/workflows/sra.nf index ef654b2d..ebb15602 100644 --- a/workflows/sra.nf +++ b/workflows/sra.nf @@ -80,19 +80,27 @@ workflow SRA { .splitCsv(header:true, sep:'\t') .map { meta -> - meta.single_end = meta.single_end.toBoolean() - [ meta, [ meta.fastq_1, meta.fastq_2 ] ] + def meta_clone = meta.clone() + meta_clone.single_end = meta_clone.single_end.toBoolean() + return meta_clone } .unique() - .branch { - ftp: it[0].fastq_1 && !params.force_sratools_download - sra: !it[0].fastq_1 || params.force_sratools_download - } - .set { ch_sra_reads } + .set { ch_sra_metadata } ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first()) if (!params.skip_fastq_download) { + ch_sra_metadata + .map { + meta -> + [ meta, [ meta.fastq_1, meta.fastq_2 ] ] + } + .branch { + ftp: it[0].fastq_1 && !params.force_sratools_download + sra: !it[0].fastq_1 || params.force_sratools_download + } + .set { ch_sra_reads } + // // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // @@ -109,33 +117,47 @@ workflow SRA { ) ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first()) - // - // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet - // - SRA_TO_SAMPLESHEET ( - SRA_FASTQ_FTP.out.fastq.mix(SRAFASTQ.out.reads), - params.nf_core_pipeline ?: '', - params.sample_mapping_fields - ) + SRA_FASTQ_FTP + .out + .fastq + .mix(SRAFASTQ.out.reads) + .map { + meta, fastq -> + def reads = meta.single_end ? [ fastq ] : fastq + def meta_clone = meta.clone() + meta_clone.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' + meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' + return meta_clone + } + .set { ch_sra_metadata } + } - // - // MODULE: Create a merged samplesheet across all samples for the pipeline - // - SRA_MERGE_SAMPLESHEET ( - SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]}, - SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]} - ) - ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions) + // + // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet + // + SRA_TO_SAMPLESHEET ( + ch_sra_metadata, + params.nf_core_pipeline ?: '', + params.sample_mapping_fields + ) - // - // MODULE: Create a MutiQC config file with sample name mappings - // - if (params.sample_mapping_fields) { - MULTIQC_MAPPINGS_CONFIG ( - SRA_MERGE_SAMPLESHEET.out.mappings - ) - ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions) - } + // + // MODULE: Create a merged samplesheet across all samples for the pipeline + // + SRA_MERGE_SAMPLESHEET ( + SRA_TO_SAMPLESHEET.out.samplesheet.collect{it[1]}, + SRA_TO_SAMPLESHEET.out.mappings.collect{it[1]} + ) + ch_versions = ch_versions.mix(SRA_MERGE_SAMPLESHEET.out.versions) + + // + // MODULE: Create a MutiQC config file with sample name mappings + // + if (params.sample_mapping_fields) { + MULTIQC_MAPPINGS_CONFIG ( + SRA_MERGE_SAMPLESHEET.out.mappings + ) + ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions) } // From 317262243aa5e120441ec05dadac0636cf8c86bc Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 7 Nov 2022 19:30:29 +0000 Subject: [PATCH 21/24] Fix #119 --- CHANGELOG.md | 3 ++- conf/modules.config | 13 +++---------- modules/nf-core/sratools/fasterqdump/main.nf | 16 ++++++++++------ .../main.nf | 2 +- .../meta.yml | 3 ++- workflows/sra.nf | 10 +++++----- 6 files changed, 23 insertions(+), 24 deletions(-) rename subworkflows/nf-core/{srafastq => fastq_download_prefetch_fasterqdump_sratools}/main.nf (96%) rename subworkflows/nf-core/{srafastq => fastq_download_prefetch_fasterqdump_sratools}/meta.yml (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 679644f1..e12fa79a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.8](https://github.com/nf-core/fetchngs/releases/tag/1.8)] - 2022-11-07 +## [[1.8](https://github.com/nf-core/fetchngs/releases/tag/1.8)] - 2022-11-08 ### Enhancements & fixes - [#111](https://github.com/nf-core/fetchngs/issues/111) - Change input mimetype to csv - [#114](https://github.com/nf-core/fetchngs/issues/114) - Final samplesheet is not created when `--skip_fastq_download` is provided - [#118](https://github.com/nf-core/fetchngs/issues/118) - Allow input pattern validation for csv/tsv/txt +- [#119](https://github.com/nf-core/fetchngs/issues/119) - `--force_sratools_download` results in different fastq names compared to FTP download - [#121](https://github.com/nf-core/fetchngs/issues/121) - Add `tower.yml` to render samplesheet as Report in Tower - Fetch `SRR` and `DRR` metadata from ENA API instead of NCBI API to bypass frequent breaking changes - Updated pipeline template to [nf-core/tools 2.6](https://github.com/nf-core/tools/releases/tag/2.6) diff --git a/conf/modules.config b/conf/modules.config index 82a5f75c..c42f4d60 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -78,16 +78,9 @@ if (params.input_type == 'sra') { withName: SRATOOLS_FASTERQDUMP { publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] + path: { "${params.outdir}/fastq" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz" ] } diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 18f46e51..16454a54 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -12,8 +12,8 @@ process SRATOOLS_FASTERQDUMP { path ncbi_settings output: - tuple val(meta), path(output), emit: reads - path "versions.yml" , emit: versions + tuple val(meta), path(fastq), emit: reads + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,16 +21,20 @@ process SRATOOLS_FASTERQDUMP { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - // Paired-end data extracted by fasterq-dump (--split-3 the default) always creates - // *_1.fastq *_2.fastq files but sometimes also an additional *.fastq file - // for unpaired reads which we ignore here. - output = meta.single_end ? '*.fastq.gz' : '*_{1,2}.fastq.gz' + def prefix = task.ext.prefix ?: "${meta.id}" + + // WARNING: Paired-end data extracted by fasterq-dump (--split-3 the default) + // always creates *_1.fastq *_2.fastq files but sometimes also + // an additional *.fastq file for unpaired reads which we ignore here. + fastq = meta.single_end ? '*.fastq.gz' : '*_{1,2}.fastq.gz' + def outfile = meta.single_end ? "${prefix}.fastq" : prefix """ export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ $args \\ --threads $task.cpus \\ + --outfile $outfile \\ ${sra.name} pigz \\ diff --git a/subworkflows/nf-core/srafastq/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf similarity index 96% rename from subworkflows/nf-core/srafastq/main.nf rename to subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index 580fef18..cacde6f5 100644 --- a/subworkflows/nf-core/srafastq/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -5,7 +5,7 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ /** * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). */ -workflow SRAFASTQ { +workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: sra_ids // channel: [ val(meta), val(id) ] diff --git a/subworkflows/nf-core/srafastq/meta.yml b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml similarity index 93% rename from subworkflows/nf-core/srafastq/meta.yml rename to subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml index 873ccaca..f032628b 100644 --- a/subworkflows/nf-core/srafastq/meta.yml +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml @@ -1,4 +1,4 @@ -name: sra_fastq +name: fastq_download_prefetch_fasterqdump_sratools description: Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). keywords: - SRA @@ -38,3 +38,4 @@ output: pattern: "versions.yml" authors: - "@Midnighter" + - "@drpatelh" diff --git a/workflows/sra.nf b/workflows/sra.nf index ebb15602..9f38e7f8 100644 --- a/workflows/sra.nf +++ b/workflows/sra.nf @@ -26,8 +26,6 @@ include { SRA_TO_SAMPLESHEET } from '../modules/local/sra_to_samplesheet' include { SRA_MERGE_SAMPLESHEET } from '../modules/local/sra_merge_samplesheet' include { MULTIQC_MAPPINGS_CONFIG } from '../modules/local/multiqc_mappings_config' -include { SRAFASTQ } from '../subworkflows/nf-core/srafastq/main' - /* ======================================================================================== IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -36,6 +34,8 @@ include { SRAFASTQ } from '../subworkflows/nf-core/srafastq/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main' + /* ======================================================================================== RUN MAIN WORKFLOW @@ -112,15 +112,15 @@ workflow SRA { // // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - SRAFASTQ ( + FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( ch_sra_reads.sra.map { meta, reads -> [ meta, meta.run_accession ] } ) - ch_versions = ch_versions.mix(SRAFASTQ.out.versions.first()) + ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) SRA_FASTQ_FTP .out .fastq - .mix(SRAFASTQ.out.reads) + .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .map { meta, fastq -> def reads = meta.single_end ? [ fastq ] : fastq From 0ff28b46c324397ab3013ca41053310b626535ff Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 7 Nov 2022 19:31:02 +0000 Subject: [PATCH 22/24] Bump pipeline version to 1.8 --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index f8f97ca3..633a0fb2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -174,7 +174,7 @@ manifest { description = 'Pipeline to fetch metadata and raw FastQ files from public databases' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '1.8dev' + version = '1.8' doi = 'https://doi.org/10.5281/zenodo.5070524' } From 53858ba59dee0bc79ff146aa1382d2b2e5433e24 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 7 Nov 2022 21:20:11 +0000 Subject: [PATCH 23/24] Re-install subworkflow from nf-core/modules --- modules.json | 8 ++++++++ .../main.nf | 16 ++++++++-------- .../meta.yml | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/modules.json b/modules.json index c687eccd..fc957282 100644 --- a/modules.json +++ b/modules.json @@ -22,6 +22,14 @@ "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" } } + }, + "subworkflows": { + "nf-core": { + "fastq_download_prefetch_fasterqdump_sratools": { + "branch": "master", + "git_sha": "03711bcb7fa2a7088eb54abb1fca326d30e602c2" + } + } } } } diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index cacde6f5..1e1d0d7b 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -2,12 +2,12 @@ include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sr include { SRATOOLS_PREFETCH } from '../../../modules/nf-core/sratools/prefetch/main' include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/fasterqdump/main' -/** - * Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). - */ +// +// Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA). +// workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - sra_ids // channel: [ val(meta), val(id) ] + ch_sra_ids // channel: [ val(meta), val(id) ] main: @@ -18,19 +18,19 @@ workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { // CUSTOM_SRATOOLSNCBISETTINGS() def settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings // value channel: path(settings) - ch_versions = ch_versions.mix( CUSTOM_SRATOOLSNCBISETTINGS.out.versions ) + ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions) // // Prefetch sequencing reads in SRA format. // - SRATOOLS_PREFETCH ( sra_ids, settings ) - ch_versions = ch_versions.mix( SRATOOLS_PREFETCH.out.versions.first() ) + SRATOOLS_PREFETCH ( ch_sra_ids, settings ) + ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first()) // // Convert the SRA format into one or more compressed FASTQ files. // SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, settings ) - ch_versions = ch_versions.mix( SRATOOLS_FASTERQDUMP.out.versions.first() ) + ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) emit: reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml index f032628b..c385ca21 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml @@ -4,7 +4,7 @@ keywords: - SRA - NCBI - sequencing - - FASTQ + - fastq - prefetch - fasterq-dump modules: From c791a37cd0fed39703394e652eb3a5c602654a7d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 7 Nov 2022 21:24:53 +0000 Subject: [PATCH 24/24] Update sratools/fasterqdump manually --- CHANGELOG.md | 2 +- modules.json | 2 +- modules/nf-core/sratools/fasterqdump/main.nf | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e12fa79a..b57bfefc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#111](https://github.com/nf-core/fetchngs/issues/111) - Change input mimetype to csv - [#114](https://github.com/nf-core/fetchngs/issues/114) - Final samplesheet is not created when `--skip_fastq_download` is provided - [#118](https://github.com/nf-core/fetchngs/issues/118) - Allow input pattern validation for csv/tsv/txt -- [#119](https://github.com/nf-core/fetchngs/issues/119) - `--force_sratools_download` results in different fastq names compared to FTP download +- [#119](https://github.com/nf-core/fetchngs/issues/119) - `--force_sratools_download` results in different fastq names compared to FTP download - [#121](https://github.com/nf-core/fetchngs/issues/121) - Add `tower.yml` to render samplesheet as Report in Tower - Fetch `SRR` and `DRR` metadata from ENA API instead of NCBI API to bypass frequent breaking changes - Updated pipeline template to [nf-core/tools 2.6](https://github.com/nf-core/tools/releases/tag/2.6) diff --git a/modules.json b/modules.json index fc957282..512b3fea 100644 --- a/modules.json +++ b/modules.json @@ -15,7 +15,7 @@ }, "sratools/fasterqdump": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "03711bcb7fa2a7088eb54abb1fca326d30e602c2" }, "sratools/prefetch": { "branch": "master", diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index 16454a54..1ffb21f0 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -23,9 +23,9 @@ process SRATOOLS_FASTERQDUMP { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // WARNING: Paired-end data extracted by fasterq-dump (--split-3 the default) - // always creates *_1.fastq *_2.fastq files but sometimes also - // an additional *.fastq file for unpaired reads which we ignore here. + // WARNING: Paired-end data extracted by fasterq-dump (--split-3 the default) + // always creates *_1.fastq *_2.fastq files but sometimes also + // an additional *.fastq file for unpaired reads which we ignore here. fastq = meta.single_end ? '*.fastq.gz' : '*_{1,2}.fastq.gz' def outfile = meta.single_end ? "${prefix}.fastq" : prefix """