diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 01fbda84..de8d5a87 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -87,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d24f1701..6e6e8ffe 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/quan - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/quantms/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/quantms _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 562af8fa..8d04e9b7 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -20,7 +20,9 @@ jobs: steps: - name: Launch workflow via tower uses: seqeralabs/action-tower-launch@v2 - + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -34,7 +36,7 @@ jobs: } profiles: test_${{ matrix.mode }} - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index eed8b846..7d528d38 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test_tmt - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index b4ff5e13..ec9e9185 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 835360cf..96b5e2c4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,13 +45,16 @@ jobs: exec_profile: "conda" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Install micromamba if: matrix.exec_profile == 'conda' run: | diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..08622fd5 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,72 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 20f90708..4549bf44 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/quantms/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,72 +11,33 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 80% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..d468aeaa 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitignore b/.gitignore index f716649b..b25226dd 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ debug_dir test_out lint_log.txt +node_modules diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b4afbd4..318d735d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,39 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.3.0] nfcore/quantms - [08/04/2024] - Santiago de Cuba + +### `Added` + +- [#335](https://github.com/bigbio/quantms/pull/335) (Performance improvement) Improvements in DIA pipeline to use random/subset files for library search +- [#351](https://github.com/bigbio/quantms/pull/351) Identification workflow for DDA data + +### `Changed` + +- [#365](https://github.com/bigbio/quantms/pull/365) Updated sdrf-pipelines==0.0.26 +- [#359](https://github.com/bigbio/quantms/pull/359) Updated pmultiqc==0.0.25 + +### `Fixed` + +- [#357](https://github.com/bigbio/quantms/pull/357) Chymotrypsin -> Chymotrypsin/P in MSGF+ +- [#355](https://github.com/bigbio/quantms/pull/355) Fixes bin/diann_convert.py +- [#316](https://github.com/bigbio/quantms/pull/316) Fixing MSGF+ error + +### `Dependencies` + +### `Parameters` + +- id_only: Only perform identification, no quantification +- min_peaks: Minimum number of peaks in a spectrum to be considered for search +- export_decoy_psm: Export decoy PSMs +- skip_rescoring: Skip rescoring +- skip_preliminary_analysis: Skip preliminary analysis in DIA-NN +- empirical_assembly_log: Path to the empirical assembly log file +- random_preanalysis: Use random/subset files for library search +- empirical_assembly_ms_n: Number of MS runs to use for empirical assembly + +### `Deprecations` + ## [1.2.0] nfcore/quantms - [11/02/2023] - Thimphu ### `Added` @@ -39,8 +72,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - diann_speclib: Path to the spectral library to use in DIA-NN (default: null) - convert_dotd: if convert .d file to mzml (default: false) -### `Deprecations` - ## [1.1.1] nfcore/quantms - [03/27/23] - Berlin-Bern ### `Added` @@ -119,11 +150,11 @@ The pipeline is using Nextflow DSL2, each process will be run with its own [Bioc | `thermorawfileparser` | 1.3.4 | | `comet` | 2021010 | | `msgf+` | 2022.01.07 | -| `openms` | 2.9.1 | -| `sdrf-pipelines` | 0.0.22 | +| `openms` | 3.1.0 | +| `sdrf-pipelines` | 0.0.26 | | `percolator` | 3.5 | -| `pmultiqc` | 0.0.11 | +| `pmultiqc` | 0.0.24 | | `luciphor` | 2020_04_03 | | `dia-nn` | 1.8.1 | -| `msstats` | 4.2.0 | -| `msstatstmt` | 2.2.0 | +| `msstats` | 4.10.0 | +| `msstatstmt` | 2.10.0 | diff --git a/README.md b/README.md index 79d5e02c..2c7a34e5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # ![nf-core/quantms](docs/images/nf-core-quantms_logo_light.png#gh-light-mode-only) ![nf-core/quantms](docs/images/nf-core-quantms_logo_dark.png#gh-dark-mode-only) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](https://img.shields.io/badge/DOI-10.5281/zenodo.7754148-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7754148) +[![GitHub Actions CI Status](https://github.com/nf-core/quantms/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/quantms/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/quantms/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/quantms/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](https://img.shields.io/badge/DOI-10.5281/zenodo.7754148-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7754148) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](https://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/quantms) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/quantms) [![Get help on Slack](https://img.shields.io/badge/slack-nf--core%20%23quantms-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/quantms)[![Follow on Twitter](https://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](https://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -71,11 +74,8 @@ A graphical overview of suggested routes through the pipeline depending on conte ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, find or create a sample-to-data relationship file ([SDRF](https://github.com/bigbio/proteomics-sample-metadata)). Have a look at public datasets that were already annotated [here](https://github.com/bigbio/proteomics-sample-metadata/tree/master/annotated-projects). @@ -95,11 +95,9 @@ nextflow run nf-core/quantms \ --outdir ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/quantms/usage) and the [parameter documentation](https://nf-co.re/quantms/parameters). @@ -130,7 +128,10 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/quantms for your analysis, please cite it using the following doi: [10.5281/zenodo.7754148](https://doi.org/10.5281/zenodo.7754148) + + + + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json index 5c061ce5..db2612fc 100644 --- a/assets/adaptivecard.json +++ b/assets/adaptivecard.json @@ -54,7 +54,8 @@ "body": [ { "type": "FactSet", - "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}" + }.join(",\n") %> ] } ] @@ -64,4 +65,4 @@ } } ] -} \ No newline at end of file +} diff --git a/assets/email_template.html b/assets/email_template.html index 7a622e34..9010ff46 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/quantms v${version}

+

nf-core/quantms ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 77db9a18..da8858d2 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/quantms v${version} + nf-core/quantms ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f8a0113a..aad01fbd 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/quantms + This report has been generated by the nf-core/quantms analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: pmultiqc: order: 1 @@ -65,3 +65,5 @@ sp: fn: "general_stats.tsv" quantms/exp_design: fn: "*_design.tsv" + +disable_version_detection: true diff --git a/assets/nf-core-quantms_logo_light.png b/assets/nf-core-quantms_logo_light.png index 030a38e8..66c6e9d1 100644 Binary files a/assets/nf-core-quantms_logo_light.png and b/assets/nf-core-quantms_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 09ea57c3..0493921c 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,25 +10,22 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, "required": ["sample", "fastq_1"] diff --git a/assets/slackreport.json b/assets/slackreport.json index 5f7ba081..34ac0761 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/quantms v${version} - ${runName}", + "author_name": "nf-core/quantms ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/diann_convert.py b/bin/diann_convert.py index a30e91fc..e4e47eb1 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -9,7 +9,7 @@ import logging import os import re -from dataclasses import dataclass +import warnings from pathlib import Path from typing import Any, List, Tuple, Dict, Set, Union @@ -47,8 +47,7 @@ def cli(): def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_cleavages, qvalue_threshold): """ Convert DIA-NN output to MSstats, Triqler or mzTab. - The output formats are - used for quality control and downstream analysis. + The output formats are used for quality control and downstream analysis. :param folder: DiannConvert specifies the folder where the required file resides. The folder contains the DiaNN main report, protein matrix, precursor matrix, experimental design file, protein sequence @@ -442,9 +441,9 @@ def mztab_MTD(index_ref, dia_params, fasta, charge, missed_cleavages): out_mztab_MTD.loc[1, "title"] = "ConsensusMap export from OpenMS" out_mztab_MTD.loc[1, "description"] = "OpenMS export from consensusXML" out_mztab_MTD.loc[1, "protein_search_engine_score[1]"] = "[, , DIA-NN Global.PG.Q.Value, ]" - out_mztab_MTD.loc[ - 1, "peptide_search_engine_score[1]" - ] = "[, , DIA-NN Q.Value (minimum of the respective precursor q-values), ]" + out_mztab_MTD.loc[1, "peptide_search_engine_score[1]"] = ( + "[, , DIA-NN Q.Value (minimum of the respective precursor q-values), ]" + ) out_mztab_MTD.loc[1, "psm_search_engine_score[1]"] = "[MS, MS:MS:1001869, protein-level q-value, ]" out_mztab_MTD.loc[1, "software[1]"] = "[MS, MS:1003253, DIA-NN, Release (v1.8.1)]" out_mztab_MTD.loc[1, "software[1]-setting[1]"] = fasta @@ -486,19 +485,25 @@ def mztab_MTD(index_ref, dia_params, fasta, charge, missed_cleavages): out_mztab_MTD.loc[1, "ms_run[" + str(i) + "]-location"] = ( "file://" + index_ref[index_ref["ms_run"] == i]["Spectra_Filepath"].values[0] ) - out_mztab_MTD.loc[ - 1, "ms_run[" + str(i) + "]-id_format" - ] = "[MS, MS:1000777, spectrum identifier nativeID format, ]" + out_mztab_MTD.loc[1, "ms_run[" + str(i) + "]-id_format"] = ( + "[MS, MS:1000777, spectrum identifier nativeID format, ]" + ) out_mztab_MTD.loc[1, "assay[" + str(i) + "]-quantification_reagent"] = "[MS, MS:1002038, unlabeled sample, ]" out_mztab_MTD.loc[1, "assay[" + str(i) + "]-ms_run_ref"] = "ms_run[" + str(i) + "]" - for i in range(1, max(index_ref["study_variable"]) + 1): - study_variable = [] - for j in list(index_ref[index_ref["study_variable"] == i]["ms_run"].values): - study_variable.append("assay[" + str(j) + "]") - out_mztab_MTD.loc[1, "study_variable[" + str(i) + "]-assay_refs"] = ",".join(study_variable) - out_mztab_MTD.loc[1, "study_variable[" + str(i) + "]-description"] = "no description given" - + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # This is used here in order to ignore performance warnings from pandas. + for i in range(1, max(index_ref["study_variable"]) + 1): + study_variable = [] + for j in list(index_ref[index_ref["study_variable"] == i]["ms_run"].values): + study_variable.append("assay[" + str(j) + "]") + out_mztab_MTD.loc[1, "study_variable[" + str(i) + "]-assay_refs"] = ",".join(study_variable) + out_mztab_MTD.loc[1, "study_variable[" + str(i) + "]-description"] = "no description given" + + # The former loop makes a very sharded frame, this + # makes the frame more compact in memory. + out_mztab_MTD = out_mztab_MTD.copy() out_mztab_MTD.loc[2, :] = "MTD" # Transpose out_mztab_MTD @@ -552,8 +557,9 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): pg["opt_global_result_type"] = "single_protein" pg.loc[pg["Protein.Ids"].str.contains(";"), "opt_global_result_type"] = "indistinguishable_protein_group" - out_mztab_PRH = pd.DataFrame() - out_mztab_PRH = pg.drop(["Protein.Names"], axis=1) + out_mztab_PRH = pg + del pg + out_mztab_PRH = out_mztab_PRH.drop(["Protein.Names"], axis=1) out_mztab_PRH.rename( columns={"Protein.Group": "accession", "First.Protein.Description": "description"}, inplace=True ) @@ -580,9 +586,14 @@ def mztab_PRH(report, pg, index_ref, database, fasta_df): protein_details_df = ( protein_details_df.drop("accession", axis=1).join(prh_series).reset_index().drop(columns="index") ) - protein_details_df.loc[:, "col"] = "protein_details" - # protein_details_df = protein_details_df[-protein_details_df["accession"].str.contains("-")] - out_mztab_PRH = pd.concat([out_mztab_PRH, protein_details_df]).reset_index(drop=True) + if len(protein_details_df) > 0: + logger.info(f"Found {len(protein_details_df)} indistinguishable protein groups") + # The Following line fails if there are no indistinguishable protein groups + protein_details_df.loc[:, "col"] = "protein_details" + # protein_details_df = protein_details_df[-protein_details_df["accession"].str.contains("-")] + out_mztab_PRH = pd.concat([out_mztab_PRH, protein_details_df]).reset_index(drop=True) + else: + logger.info("No indistinguishable protein groups found") logger.debug("Calculating protein coverage (bottleneck)...") # This is a bottleneck @@ -825,7 +836,7 @@ def mztab_PSH(report, folder, database): def __find_info(directory, n): # This line matches n="220101_myfile", folder="." to # "myfolder/220101_myfile_ms_info.tsv" - files = list(Path(directory).glob(f"*{n}*_info.tsv")) + files = list(Path(directory).rglob(f"{n}_ms_info.tsv")) # Check that it matches one and only one file if not files: raise ValueError(f"Could not find {n} info file in {directory}") @@ -1161,10 +1172,9 @@ def per_peptide_study_report(report: pd.DataFrame) -> pd.DataFrame: This implementation differs in several aspects in the output values: 1. in the fact that it actually gets values for the m/z 2. always returns a float, whilst the apply version returns an 'object' dtype. - 3. The original implementation, missing values had the string 'null', here - they have the value np.nan. + 3. The original implementation, missing values had the string 'null', here they have the value np.nan. 4. The order of the final output is different; the original orders columns by - study variables > calculated value, this one is calculated value > study variables. + study variables > calculated value, this one is calculated value > study variables. Calculates the mean, standard deviation and std error of the precursor abundances, as well as the mean retention time and m/z. diff --git a/bin/mzml_statistics.py b/bin/mzml_statistics.py index 825f5fd3..208ad3ab 100755 --- a/bin/mzml_statistics.py +++ b/bin/mzml_statistics.py @@ -7,12 +7,12 @@ import sys from pathlib import Path import sqlite3 - +import re import pandas as pd from pyopenms import MSExperiment, MzMLFile -def ms_dataframe(ms_path: str) -> None: +def ms_dataframe(ms_path: str, id_only: bool = False) -> None: file_columns = [ "SpectrumID", "MSLevel", @@ -25,8 +25,9 @@ def ms_dataframe(ms_path: str) -> None: "AcquisitionDateTime", ] - def parse_mzml(file_name: str, file_columns: list): + def parse_mzml(file_name: str, file_columns: list, id_only: bool = False): info = [] + psm_part_info = [] exp = MSExperiment() acquisition_datetime = exp.getDateTime().get() MzMLFile().load(file_name, exp) @@ -54,15 +55,30 @@ def parse_mzml(file_name: str, file_columns: list): charge_state = spectrum.getPrecursors()[0].getCharge() emz = spectrum.getPrecursors()[0].getMZ() if spectrum.getPrecursors()[0].getMZ() else None info_list = [id_, MSLevel, charge_state, peak_per_ms, bpc, tic, rt, emz, acquisition_datetime] + mz_array = peaks_tuple[0] + intensity_array = peaks_tuple[1] else: info_list = [id_, MSLevel, None, None, None, None, rt, None, acquisition_datetime] + if id_only and MSLevel == 2: + psm_part_info.append([re.findall(r"[scan|spectrum]=(\d+)", id_)[0], MSLevel, mz_array, intensity_array]) info.append(info_list) + if id_only and len(psm_part_info) > 0: + pd.DataFrame(psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]).to_csv( + f"{Path(ms_path).stem}_spectrum_df.csv", + mode="w", + index=False, + header=True, + ) + return pd.DataFrame(info, columns=file_columns) def parse_bruker_d(file_name: str, file_columns: list): sql_filepath = f"{file_name}/analysis.tdf" + if not Path(sql_filepath).exists(): + msg = f"File '{sql_filepath}' not found" + raise FileNotFoundError(msg) conn = sqlite3.connect(sql_filepath) c = conn.cursor() @@ -77,13 +93,21 @@ def parse_bruker_d(file_name: str, file_columns: list): mslevel_map = {0: 1, 8: 2} elif 9 in df["MsMsType"].values: mslevel_map = {0: 1, 9: 2} + else: + msg = f"Unrecognized ms type '{df['MsMsType'].values}'" + raise ValueError(msg) df["MsMsType"] = df["MsMsType"].map(mslevel_map) try: + # This line raises an sqlite error if the table does not exist + _ = conn.execute("SELECT * from Precursors LIMIT 1").fetchall() precursor_df = pd.read_sql_query("SELECT * from Precursors", conn) - except Exception as e: - print(f"No precursers recorded in {file_name}") - precursor_df = pd.DataFrame() + except sqlite3.OperationalError as e: + if "no such table: Precursors" in str(e): + print(f"No precursers recorded in {file_name}, This is normal for DIA data.") + precursor_df = pd.DataFrame() + else: + raise if len(df) == len(precursor_df): df = pd.concat([df, precursor_df["Charge", "MonoisotopicMz"]], axis=1) @@ -108,10 +132,30 @@ def parse_bruker_d(file_name: str, file_columns: list): return df + if not (Path(ms_path).exists()): + print(f"Not found '{ms_path}', trying to find alias") + ms_path_path = Path(ms_path) + path_stem = str(ms_path_path.stem) + candidates = ( + list(ms_path_path.parent.glob("*.d")) + + list(ms_path_path.parent.glob("*.mzml")) + + list(ms_path_path.parent.glob("*.mzML")) + ) + + candidates = [c for c in candidates if path_stem in str(c)] + + if len(candidates) == 1: + ms_path = str(candidates[0].resolve()) + else: + raise FileNotFoundError() + if Path(ms_path).suffix == ".d" and Path(ms_path).is_dir(): ms_df = parse_bruker_d(ms_path, file_columns) elif Path(ms_path).suffix in [".mzML", ".mzml"]: - ms_df = parse_mzml(ms_path, file_columns) + ms_df = parse_mzml(ms_path, file_columns, id_only) + else: + msg = f"Unrecognized or inexistent mass spec file '{ms_path}'" + raise RuntimeError(msg) ms_df.to_csv( f"{Path(ms_path).stem}_ms_info.tsv", @@ -124,7 +168,8 @@ def parse_bruker_d(file_name: str, file_columns: list): def main(): ms_path = sys.argv[1] - ms_dataframe(ms_path) + id_only = sys.argv[2] + ms_dataframe(ms_path, id_only) if __name__ == "__main__": diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py new file mode 100644 index 00000000..c122a24f --- /dev/null +++ b/bin/psm_conversion.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +import numpy as np +import pyopenms as oms +import pandas as pd +import re +import os +from pathlib import Path +import sys + +_parquet_field = [ + "sequence", "protein_accessions", "protein_start_positions", "protein_end_positions", + "modifications", "retention_time", "charge", "calc_mass_to_charge", "reference_file_name", + "scan_number", "peptidoform", "posterior_error_probability", "global_qvalue", "is_decoy", + "consensus_support", "mz_array", "intensity_array", "num_peaks", "search_engines", "id_scores", "hit_rank" +] + + +def mods_position(peptide): + pattern = re.compile(r"\((.*?)\)") + original_mods = pattern.findall(peptide) + peptide = re.sub(r"\(.*?\)", ".", peptide) + position = [i.start() for i in re.finditer(r"\.", peptide)] + for j in range(1, len(position)): + position[j] -= j + + for k in range(0, len(original_mods)): + original_mods[k] = str(position[k]) + "-" + original_mods[k] + + original_mods = [str(i) for i in original_mods] if len(original_mods) > 0 else np.nan + + return original_mods + + +def convert_psm(idxml, spectra_file, export_decoy_psm): + prot_ids = [] + pep_ids = [] + parquet_data = [] + consensus_support = np.nan + mz_array = [] + intensity_array = [] + num_peaks = np.nan + id_scores = [] + search_engines = [] + + oms.IdXMLFile().load(idxml, prot_ids, pep_ids) + if "ConsensusID" in prot_ids[0].getSearchEngine(): + if prot_ids[0].getSearchParameters().metaValueExists("SE:MS-GF+"): + search_engines = ["MS-GF+"] + if prot_ids[0].getSearchParameters().metaValueExists("SE:Comet"): + search_engines.append("Comet") + if prot_ids[0].getSearchParameters().metaValueExists("SE:Sage"): + search_engines.append("Sage") + else: + search_engines = [prot_ids[0].getSearchEngine()] + + reference_file_name = os.path.splitext(prot_ids[0].getMetaValue("spectra_data")[0].decode("UTF-8"))[0] + spectra_df = pd.read_csv(spectra_file) if spectra_file else None + + for peptide_id in pep_ids: + retention_time = peptide_id.getRT() + calc_mass_to_charge = peptide_id.getMZ() + scan_number = int(re.findall(r"(spectrum|scan)=(\d+)", peptide_id.getMetaValue("spectrum_reference"))[0][1]) + + if isinstance(spectra_df, pd.DataFrame): + spectra = spectra_df[spectra_df["scan"] == scan_number] + mz_array = spectra["mz"].values[0] + intensity_array = spectra["intensity"].values[0] + num_peaks = len(mz_array) + + for hit in peptide_id.getHits(): + # if remove decoy when mapped to target+decoy? + is_decoy = 0 if hit.getMetaValue("target_decoy") == "target" else 1 + if export_decoy_psm == "false" and is_decoy: + continue + global_qvalue = np.nan + if len(search_engines) > 1: + if "q-value" in peptide_id.getScoreType(): + global_qvalue = hit.getScore() + consensus_support = hit.getMetaValue("consensus_support") + elif search_engines == "Comet": + id_scores = ["Comet:Expectation value: " + str(hit.getScore())] + elif search_engines == "MS-GF+": + id_scores = ["MS-GF:SpecEValue: " + str(hit.getScore())] + elif search_engines == "Sage": + id_scores = ["Sage:hyperscore: " + str(hit.getScore())] + + charge = hit.getCharge() + peptidoform = hit.getSequence().toString() + modifications = mods_position(peptidoform) + sequence = hit.getSequence().toUnmodifiedString() + protein_accessions = [ev.getProteinAccession() for ev in hit.getPeptideEvidences()] + posterior_error_probability = hit.getMetaValue("Posterior Error Probability_score") + protein_start_positions = [ev.getStart() for ev in hit.getPeptideEvidences()] + protein_end_positions = [ev.getEnd() for ev in hit.getPeptideEvidences()] + hit_rank = hit.getRank() + + parquet_data.append([sequence, protein_accessions, protein_start_positions, protein_end_positions, + modifications, retention_time, charge, calc_mass_to_charge, reference_file_name, + scan_number, peptidoform, posterior_error_probability, global_qvalue, is_decoy, + consensus_support, mz_array, intensity_array, num_peaks, search_engines, id_scores, + hit_rank]) + + pd.DataFrame(parquet_data, columns=_parquet_field).to_csv(f"{Path(idxml).stem}_psm.csv", + mode="w", + index=False, + header=True) + + +def main(): + idxml_path = sys.argv[1] + spectra_file = sys.argv[2] + export_decoy_psm = sys.argv[3] + convert_psm(idxml_path, spectra_file, export_decoy_psm) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/modules.config b/conf/modules.config index ba7381ac..c220be03 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,16 +35,6 @@ process { pattern: '*_versions.yml' ] } - - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } // OPENMSPEAKPICKER @@ -255,6 +245,16 @@ process { ] } + withName: '.*:DDA_ID:PSMFDRCONTROL:IDFILTER' { + ext.args = "-score:pep \"$params.run_fdr_cutoff\"" + ext.suffix = '.idXML' + publishDir = [ + path: { "${params.outdir}/idfilter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // PROTEOMICSLFQ withName: '.*:LFQ:PROTEOMICSLFQ' { ext.args = "-debug $params.plfq_debug" diff --git a/conf/test_dia.config b/conf/test_dia.config index 666101ff..deb1d03c 100644 --- a/conf/test_dia.config +++ b/conf/test_dia.config @@ -33,6 +33,8 @@ params { max_precursor_charge = 3 allowed_missed_cleavages = 1 diann_normalize = false + skip_post_msstats = false + publish_dir_mode = 'symlink' max_mods = 2 } diff --git a/docs/images/id_pipeline.png b/docs/images/id_pipeline.png index b4c9a7f4..7ac1c07b 100644 Binary files a/docs/images/id_pipeline.png and b/docs/images/id_pipeline.png differ diff --git a/docs/images/id_pipeline.svg b/docs/images/id_pipeline.svg index b2939b96..7f226118 100644 --- a/docs/images/id_pipeline.svg +++ b/docs/images/id_pipeline.svg @@ -1,1420 +1,4 @@ - - - - - - - - - - - - - - - - - - ID Comet - - - - ID Comet - - - - - - - - - - - mix - - - - mix - - - - - - - - - - - if multi-engine - - - - - if multi-engine - - - - - - - - - - - if single-engine - - - - - if single-engine - - - - - - - - - merge - - - - merge - - - - - - - - - - - Percolator - - - - Percolator - - - - - - - - - - - ID MSGF - - - - ID MSGF - - - - - - - - - - - FDR - - - - FDR - - - - - - - - - - - Distribution-based PEP - - - - Distribution-basedPEP - - - - - - - - - - - ConsensusID - - - - ConsensusID - - - - - - - - - Luciphor - - - - Luciphor - - - - - - - - - - - combined FDR - - - - combined FDR - - - - - - - - - - - Switch to q-value/FDR - - - - Switch toq-value/FDR - - - - - - - - - - - IDFilter - - - - IDFilter - - - - - - - - - - - Raw file conversion/Indexing - - - - Raw fileconversion/... - - - - - - - - - - - - - - - - - - if !multi-engine - - - - - if !multi-engine - - - - - - - - - - - - - if localize - - - - - - if localize - - - - - - - - - - if necessary - - - - - if necessary - - - - - - - - - Decoy generation - - - - Decoy generation - - - - - - - - - - if requested - - - - - if requested - - - - - - - - - Database - - - - Database - - - - - - - - - - Input - - - - Input - - - - - - - - - + - - - - + - - - - Design - - - - - - - - - Spectra - - - - - Spectra - - - - - - - - - - or - - - - or - - - - - - - - - - Viewer does not support full SVG 1.1 - - - + + + +
ID Comet
ID Comet
mix
mix
if multi-engine
if multi-engine
if single-engine
if single-engine
merge
merge
Percolator
Percolator
ID MSGF
ID MSGF
FDR
FDR
Distribution-based PEP
Distribution-based P...
ConsensusID
ConsensusID
Luciphor
Luciphor
combined FDR
combined FDR
Quantification +
Inference and experiment-wide FDR filter
Quantification +...
Switch to q-value/FDR
Switch to q-value/FDR
IDFilter
IDFilter
MSstats
MSstats
Raw file conversion/Indexing
Raw file conversion/...
if !multi-engine
if !multi-engine
if localize
if localize
if necessary
if necessary
Decoy generation
Decoy generation
if requested
if requested
Database
Database
Input
Input
+
+
Design
Spectra
Spectra
or
or
or
or
ID SAGE
ID SAGE
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/images/nf-core-quantms_logo_dark.png b/docs/images/nf-core-quantms_logo_dark.png index 1cfe323f..11938c13 100644 Binary files a/docs/images/nf-core-quantms_logo_dark.png and b/docs/images/nf-core-quantms_logo_dark.png differ diff --git a/docs/images/nf-core-quantms_logo_light.png b/docs/images/nf-core-quantms_logo_light.png index 030a38e8..83c08f47 100644 Binary files a/docs/images/nf-core-quantms_logo_light.png and b/docs/images/nf-core-quantms_logo_light.png differ diff --git a/docs/images/quantms.png b/docs/images/quantms.png index 2a0fc055..b3498dbd 100644 Binary files a/docs/images/quantms.png and b/docs/images/quantms.png differ diff --git a/docs/images/quantms.svg b/docs/images/quantms.svg index 2d4066c1..6cf881d3 100644 --- a/docs/images/quantms.svg +++ b/docs/images/quantms.svg @@ -4,7 +4,7 @@ rawMS-GF+, Comet, Percolator, ConsesusIDMS-GF+, SAGE, CometPercolator, ConsesusID + viewBox="0 0 1283 603" style="enable-background:new 0 0 1283 603;" xml:space="preserve"> - + + + - - - + + + - Search engines(at least one) - - - - - - - - - - - - - - Rescoring(exactly one) - - - - - - - - - Search engines + (at least one) + + + - + + + - - - + + + + + + - ThermoRawFileParser - - - - - - - - + Rescoring + (exactly one) + + + + + + + + + + + + + - Comet - - + ThermoRaw + FileParser + + + + + + + - MSGF+ - - - - - - + Comet + + + + + + - ConsensusID + ConsensusID - - - + + + - Distributionfitting - - - - - - + Distribution + fitting + + + + + + - FDRcontrol + FDR + control - + - IsobaricAnalyzer - - - - - - - - - + Isobaric + Analyzer + + + + + + + + + - Mod.localization + Mod. + localization - + - Percolator + FDR - - - - - - - - + Percolator + FDR + + + + + + + + - Legend + Legend - - - + + + - DDA-LFQ + DDA-LFQ - DDA-ISO + DDA-ISO - DIA-LFQ + DIA-LFQ - Parallel step + Parallel step - - - + + + - IDMapper + IDMapper - + - Proteininference + Protein + inference - - - + + + - nf-core/quantms v1.1Example analysis pathways + nf-core/quantms v1.2 + Example analysis pathways - - mzMLindexing + mzML + indexing - - + + - DIA-NN insilico lib. predict + DIA-NN + insilico lib. predict - - + - DIA-NNpreanalysis + preanalysis - - DIA-NNempirical lib. gen. + DIA-NN + empirical lib. gen. - + - Adddecoys - - - - - - + Add + decoys + + + + + + - ProteomicsLFQ + Proteomics + LFQ - + - mzTab + mzTab - + - ProteinQuantifier + Protein + Quantifier - - + + - Triqler + Triqler - - + + - MSstats + MSstats - + - DIA-NNsummary + DIA-NN + summary - - + + - DIA-NNindividual final analysis + DIA-NN + individual final analysis - + + + MSGF+ + + + + + + - - - - - -Job n -In Parallel - -Job 1 -...... + + + + + +Job n +In Parallel + +Job 1 +...... - - - - - - - - - - mzML - + + + + + + + + + + + + mzML + + + - - - - - - - - - - raw - + + + + + + + + + + + + raw + + + - - - - - - - - - - fasta - + + + + + + + + + + + + fasta + + + - - - - - - - - - - tsv - + + + + + + + + + + + + tsv + + + - - - - - - - - - - mzTab - + + + + + + + + + + + + mzTab + + + - - - - - - - - - - tsv - + + + + + + + + + + + + tsv + + + - - - -Job 1 -...... -Job n - In Parallel - + + + +Job 1 +...... +Job n + In Parallel + + + + + + + + + + + + + + + + + Sage + + + + + + + + + + + + +speclib +skip diff --git a/docs/images/quantms_metro.png b/docs/images/quantms_metro.png index f1f5d155..90685839 100644 Binary files a/docs/images/quantms_metro.png and b/docs/images/quantms_metro.png differ diff --git a/docs/usage.md b/docs/usage.md index ae574c88..c2fe207b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -179,35 +179,35 @@ The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementatio 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) 3. Create the custom config accordingly: - - For Docker: - - ```nextflow - process { - withName: PANGOLIN { - container = 'biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Singularity: - - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +- For Docker: + +```nextflow +process { + withName: PANGOLIN { + container = 'biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } +} +``` + +- For Singularity: + +```nextflow +process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } +} +``` + +- For Conda: + +```nextflow +process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } +} +``` > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653d..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 0cb27015..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,57 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/quantms pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input sdrf to the pipeline e.g. '--input *.sdrf.csv'") - } - - // check fasta database has been provided - if (!params.database) { - Nextflow.error("Please provide an fasta database to the pipeline e.g. '--database *.fasta'") - } - } -} diff --git a/lib/WorkflowQuantms.groovy b/lib/WorkflowQuantms.groovy deleted file mode 100755 index a7772bf3..00000000 --- a/lib/WorkflowQuantms.groovy +++ /dev/null @@ -1,118 +0,0 @@ -// -// This file holds several functions specific to the workflow/quantms.nf in the nf-core/quantms pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowQuantms { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - if (!params.database) { - Nextflow.error "database file not specified with e.g. '--database *.fasta' or via a detectable config file." - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Check class of an Object for "List" type - // - public static boolean isCollectionOrArray(object) { - return [Collection, Object[]].any { it.isAssignableFrom(object.getClass()) } - } - - // - // check file extension - // - public static boolean hasExtension(file, extension) { - return file.toString().toLowerCase().endsWith(extension.toLowerCase()) - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 7becd311..96d36250 100644 --- a/main.nf +++ b/main.nf @@ -13,35 +13,15 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' +include { QUANTMS } from './workflows/quantms' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_quantms_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_quantms_pipeline' -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input input_files.sdrf.tsv --database ~/dbs/human_fasta.fasta -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { QUANTMS } from './workflows/quantms' // // WORKFLOW: Run main nf-core/quantms analysis pipeline diff --git a/modules.json b/modules.json index a8f251aa..beb0405b 100644 --- a/modules.json +++ b/modules.json @@ -7,15 +7,34 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/diann_preliminary_analysis/main.nf b/modules/local/diann_preliminary_analysis/main.nf index 88e63877..fe995ece 100644 --- a/modules/local/diann_preliminary_analysis/main.nf +++ b/modules/local/diann_preliminary_analysis/main.nf @@ -13,6 +13,7 @@ process DIANN_PRELIMINARY_ANALYSIS { path "*.quant", emit: diann_quant tuple val(meta), path("*_diann.log"), emit: log path "versions.yml", emit: version + path(ms_file), emit: preliminary_ms_file when: task.ext.when == null || task.ext.when @@ -42,6 +43,8 @@ process DIANN_PRELIMINARY_ANALYSIS { # Precursor Tolerance unit was: ${meta['precursormasstoleranceunit']} # Fragment Tolerance unit was: ${meta['fragmentmasstoleranceunit']} + # Final mass accuracy is '${mass_acc}' + diann --lib ${predict_library} \\ --f ${ms_file} \\ --threads ${task.cpus} \\ diff --git a/modules/local/extract_psm/main.nf b/modules/local/extract_psm/main.nf new file mode 100644 index 00000000..a640316a --- /dev/null +++ b/modules/local/extract_psm/main.nf @@ -0,0 +1,36 @@ +process PSMCONVERSION { + tag "$meta.mzml_id" + label 'process_medium' + + conda "bioconda::pyopenms=3.1.0" + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/pyopenms:3.1.0--py39h9b8898c_0" + } else { + container "biocontainers/pyopenms:3.1.0--py39h9b8898c_0" + } + + input: + tuple val(meta), path(idxml_file), path(spectrum_df) + + output: + path "*_psm.csv", emit: psm_info + path "versions.yml", emit: version + path "*.log", emit: log + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.mzml_id}" + + + """ + psm_conversion.py "${idxml_file}" \\ + ${spectrum_df} \\ + $params.export_decoy_psm \\ + 2>&1 | tee extract_idxml.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyopenms: \$(pip show pyopenms | grep "Version" | awk -F ': ' '{print \$2}') + END_VERSIONS + """ +} diff --git a/modules/local/extract_psm/meta.yml b/modules/local/extract_psm/meta.yml new file mode 100644 index 00000000..23586d7e --- /dev/null +++ b/modules/local/extract_psm/meta.yml @@ -0,0 +1,34 @@ +name: PSMCONVERSION +description: A module to extract PSM information from idXML file +keywords: + - PSM + - conversion +tools: + - custom: + description: | + A custom module for PSM extraction. + homepage: https://github.com/bigbio/quantms + documentation: https://github.com/bigbio/quantms/tree/readthedocs +input: + - idxml_file: + type: file + description: idXML identification file + pattern: "*.idXML" + - spectrum_df: + type: file + description: spectrum data file + pattern: "_spectrum_df.csv" + - meta: + type: map + description: Groovy Map containing sample information +output: + - psm_info: + type: file + description: PSM csv file + pattern: "*_psm.csv" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@daichengxin" diff --git a/modules/local/msstats/main.nf b/modules/local/msstats/main.nf index 6e215bb8..bd5de19e 100644 --- a/modules/local/msstats/main.nf +++ b/modules/local/msstats/main.nf @@ -2,11 +2,11 @@ process MSSTATS { tag "$msstats_csv_input.Name" label 'process_medium' - conda "bioconda::bioconductor-msstats=4.8.3" + conda "bioconda::bioconductor-msstats=4.10.0" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/bioconductor-msstats:4.8.3--r43hf17093f_0" + container "https://depot.galaxyproject.org/singularity/bioconductor-msstats:4.10.0--r43hf17093f_0" } else { - container "biocontainers/bioconductor-msstats:4.8.3--r43hf17093f_0" + container "biocontainers/bioconductor-msstats:4.10.0--r43hf17093f_0" } input: diff --git a/modules/local/msstatstmt/main.nf b/modules/local/msstatstmt/main.nf index 1d5d7a90..32b9cb6b 100644 --- a/modules/local/msstatstmt/main.nf +++ b/modules/local/msstatstmt/main.nf @@ -2,11 +2,11 @@ process MSSTATSTMT { tag "$msstatstmt_csv_input.Name" label 'process_medium' - conda "bioconda::bioconductor-msstatstmt=2.8.0" + conda "bioconda::bioconductor-msstatstmt=2.10.0" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/bioconductor-msstatstmt:2.8.0--r43hdfd78af_0" + container "https://depot.galaxyproject.org/singularity/bioconductor-msstatstmt:2.10.0--r43hdfd78af_0" } else { - container "biocontainers/bioconductor-msstatstmt:2.8.0--r43hdfd78af_0" + container "biocontainers/bioconductor-msstatstmt:2.10.0--r43hdfd78af_0" } input: diff --git a/modules/local/mzmlstatistics/main.nf b/modules/local/mzmlstatistics/main.nf index f39c8bb6..51770947 100644 --- a/modules/local/mzmlstatistics/main.nf +++ b/modules/local/mzmlstatistics/main.nf @@ -15,6 +15,7 @@ process MZMLSTATISTICS { output: path "*_ms_info.tsv", emit: ms_statistics + tuple val(meta), path("*_spectrum_df.csv"), emit: spectrum_df path "versions.yml", emit: version path "*.log", emit: log @@ -24,6 +25,7 @@ process MZMLSTATISTICS { """ mzml_statistics.py "${ms_file}" \\ + $params.id_only \\ 2>&1 | tee mzml_statistics.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/mzmlstatistics/meta.yml b/modules/local/mzmlstatistics/meta.yml index d1fab0da..59a1b451 100644 --- a/modules/local/mzmlstatistics/meta.yml +++ b/modules/local/mzmlstatistics/meta.yml @@ -19,6 +19,10 @@ output: type: file description: mzMLs statistics file pattern: "*_mzml_info.tsv" + - spectrum_df: + type: file + description: spectrum data file + pattern: "_spectrum_df.csv" - version: type: file description: File containing software version diff --git a/modules/local/openms/thirdparty/searchenginecomet/main.nf b/modules/local/openms/thirdparty/searchenginecomet/main.nf index a9b35c2a..b8f1a91a 100644 --- a/modules/local/openms/thirdparty/searchenginecomet/main.nf +++ b/modules/local/openms/thirdparty/searchenginecomet/main.nf @@ -102,6 +102,7 @@ process SEARCHENGINECOMET { -precursor_error_units $meta.precursormasstoleranceunit \\ -fragment_mass_tolerance ${bin_tol} \\ -fragment_bin_offset ${bin_offset} \\ + -minimum_peaks $params.min_peaks \\ ${il_equiv} \\ -PeptideIndexing:unmatched_action ${params.unmatched_action} \\ -debug $params.db_debug \\ diff --git a/modules/local/openms/thirdparty/searchenginemsgf/main.nf b/modules/local/openms/thirdparty/searchenginemsgf/main.nf index 226b65ff..b76b1106 100644 --- a/modules/local/openms/thirdparty/searchenginemsgf/main.nf +++ b/modules/local/openms/thirdparty/searchenginemsgf/main.nf @@ -32,7 +32,7 @@ process SEARCHENGINEMSGF { if (meta.enzyme == 'Trypsin') enzyme = 'Trypsin/P' else if (meta.enzyme == 'Arg-C') enzyme = 'Arg-C/P' else if (meta.enzyme == 'Asp-N') enzyme = 'Asp-N/B' - else if (meta.enzyme == 'Chymotrypsin') enzyme = 'Chymotrypsin' + else if (meta.enzyme == 'Chymotrypsin') enzyme = 'Chymotrypsin/P' else if (meta.enzyme == 'Lys-C') enzyme = 'Lys-C/P' if (enzyme.toLowerCase() == "unspecific cleavage") { @@ -49,9 +49,11 @@ process SEARCHENGINEMSGF { } num_enzyme_termini = "" + max_missed_cleavages = "-max_missed_cleavages ${params.allowed_missed_cleavages}" if (meta.enzyme == "unspecific cleavage") { num_enzyme_termini = "none" + max_missed_cleavages = "" } else if (params.num_enzyme_termini == "fully") { @@ -75,7 +77,7 @@ process SEARCHENGINEMSGF { -max_precursor_charge $params.max_precursor_charge \\ -min_peptide_length $params.min_peptide_length \\ -max_peptide_length $params.max_peptide_length \\ - -max_missed_cleavages $params.allowed_missed_cleavages \\ + ${max_missed_cleavages} \\ -isotope_error_range $params.isotope_error_range \\ -enzyme "${enzyme}" \\ -tryptic ${msgf_num_enzyme_termini} \\ diff --git a/modules/local/openms/thirdparty/searchenginesage/main.nf b/modules/local/openms/thirdparty/searchenginesage/main.nf index 8257b801..3e6023bc 100644 --- a/modules/local/openms/thirdparty/searchenginesage/main.nf +++ b/modules/local/openms/thirdparty/searchenginesage/main.nf @@ -50,6 +50,8 @@ process SEARCHENGINESAGE { -fragment_tol_unit $meta.fragmentmasstoleranceunit \\ -fixed_modifications ${meta.fixedmodifications.tokenize(',').collect{ "'${it}'" }.join(" ") } \\ -variable_modifications ${meta.variablemodifications.tokenize(',').collect{ "'${it}'" }.join(" ") } \\ + -charges "$params.min_precursor_charge, $params.max_precursor_charge" \\ + -min_peaks $params.min_peaks \\ -max_variable_mods $params.max_mods \\ -isotope_error_range $params.isotope_error_range \\ ${il_equiv} \\ diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 86dbba9f..8b6ad192 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -1,11 +1,11 @@ process PMULTIQC { label 'process_high' - conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.23" + conda "conda-forge::pandas_schema conda-forge::lzstring bioconda::pmultiqc=0.0.24" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.23--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/pmultiqc:0.0.24--pyhdfd78af_0" } else { - container "biocontainers/pmultiqc:0.0.23--pyhdfd78af_0" + container "biocontainers/pmultiqc:0.0.24--pyhdfd78af_0" } input: diff --git a/modules/local/preprocess_expdesign.nf b/modules/local/preprocess_expdesign.nf index 361d3d20..dbe01a79 100644 --- a/modules/local/preprocess_expdesign.nf +++ b/modules/local/preprocess_expdesign.nf @@ -6,11 +6,11 @@ process PREPROCESS_EXPDESIGN { tag "$design.Name" label 'process_low' - conda "bioconda::sdrf-pipelines=0.0.24" + conda "bioconda::sdrf-pipelines=0.0.26" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0" } else { - container "biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0" + container "biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0" } input: diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 5c9332e1..67d6a5f4 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -3,11 +3,11 @@ process SAMPLESHEET_CHECK { tag "$input_file" label 'process_single' - conda "bioconda::sdrf-pipelines=0.0.24" + conda "bioconda::sdrf-pipelines=0.0.26" if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { - container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0" + container "https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0" } else { - container "biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0" + container "biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0" } input: diff --git a/modules/local/sdrfparsing/main.nf b/modules/local/sdrfparsing/main.nf index 2f7989bb..5236c5de 100644 --- a/modules/local/sdrfparsing/main.nf +++ b/modules/local/sdrfparsing/main.nf @@ -2,10 +2,10 @@ process SDRFPARSING { tag "$sdrf.Name" label 'process_low' - conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.24" + conda "conda-forge::pandas_schema bioconda::sdrf-pipelines=0.0.26" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.24--pyhdfd78af_0' : - 'biocontainers/sdrf-pipelines:0.0.24--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/sdrf-pipelines:0.0.26--pyhdfd78af_0' : + 'biocontainers/sdrf-pipelines:0.0.26--pyhdfd78af_0' }" input: path sdrf diff --git a/modules/local/thermorawfileparser/main.nf b/modules/local/thermorawfileparser/main.nf index 3c3f8a66..dba9e328 100644 --- a/modules/local/thermorawfileparser/main.nf +++ b/modules/local/thermorawfileparser/main.nf @@ -4,10 +4,10 @@ process THERMORAWFILEPARSER { label 'process_single' label 'error_retry' - conda "conda-forge::mono bioconda::thermorawfileparser=1.3.4" + conda "conda-forge::mono bioconda::thermorawfileparser=1.4.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.3.4--ha8f3691_0' : - 'biocontainers/thermorawfileparser:1.3.4--ha8f3691_0' }" + 'https://depot.galaxyproject.org/singularity/thermorawfileparser:1.4.3--ha8f3691_0' : + 'biocontainers/thermorawfileparser:1.4.3--ha8f3691_0' }" stageInMode { if (task.attempt == 1) { diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index ebc87273..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index c32657de..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index f5690c52..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import platform -from textwrap import dedent - -import yaml - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - # load as text and print for debugging - versions_text = f.read() - print(versions_text) - - with open("$versions") as f: - versions_by_process = yaml.safe_load(f) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index bda2d480..d0ebf103 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { local_input_type = 'mzML' database = null acquisition_method = null + id_only = false // Input options input = null @@ -22,6 +23,7 @@ params { // Tools flags posterior_probabilities = 'percolator' add_decoys = false + skip_rescoring = false search_engines = 'comet' sage_processes = 1 run_fdr_cutoff = 0.10 @@ -62,7 +64,7 @@ params { // Isobaric analyses labelling_type = null - reference_channel = 126 + reference_channel = '126' min_precursor_intensity = 1.0 reporter_mass_shift = 0.002 select_activation = 'HCD' @@ -94,6 +96,7 @@ params { max_peptide_length = 40 num_hits = 1 max_mods = 3 + min_peaks = 10 //minimum number of peaks in a spectrum min_pr_mz = null max_pr_mz = null min_fr_mz = null @@ -106,6 +109,9 @@ params { // IDPEP flags outlier_handling = "none" + // DDA_ID flags + export_decoy_psm = true + // Percolator flags train_FDR = 0.05 test_FDR = 0.05 @@ -175,6 +181,13 @@ params { diann_normalize = true diann_speclib = null + // DIA-NN: Extras + skip_preliminary_analysis = false + empirical_assembly_log = null + random_preanalysis = false + empirical_assembly_ms_n = 200 + + // MSstats general options msstats_remove_one_feat_prot = true ref_condition = null @@ -210,7 +223,7 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null + outdir = './results' publish_dir_mode = 'copy' email = null email_on_fail = null @@ -261,7 +274,7 @@ try { } // Load nf-core/quantms custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/quantms.config" // } catch (Exception e) { @@ -274,6 +287,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -282,6 +296,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -297,16 +312,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -402,7 +417,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -419,6 +434,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -444,7 +462,7 @@ manifest { description = """Quantitative Mass Spectrometry nf-core workflow""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.2.0' + version = '1.3.0' doi = '10.5281/zenodo.7754148' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 51b4da14..544c02dc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -56,9 +56,20 @@ "acquisition_method": { "type": "string", "description": "Proteomics data acquisition method", - "default": "dda", "enum": ["dda", "dia"], "fa_icon": "far fa-list-ol" + }, + "id_only": { + "type": "boolean", + "description": "Only perform identification subworkflow.", + "fa_icon": "far fa-check-square", + "help_text": "Only perform identification subworkflow for specific cases." + }, + "export_decoy_psm": { + "type": "boolean", + "description": "Whether export PSM from decoy in final identification results", + "fa_icon": "far fa-check-square", + "help_text": "Whether export PSM from decoy in final identification results for dda_id subworkflow for specific cases." } } }, @@ -269,7 +280,6 @@ "instrument": { "type": "string", "description": "Type of instrument that generated the data. 'low_res' or 'high_res' (default; refers to LCQ and LTQ instruments)", - "default": "high_res", "fa_icon": "fas fa-list-ol" }, "protocol": { @@ -314,6 +324,13 @@ "default": 3, "fa_icon": "fas fa-sliders-h" }, + "min_peaks": { + "type": "integer", + "description": "Minimum number of peaks in the spectrum to be considered for the search engine. Default: 10", + "default": 10, + "fa_icon": "fas fa-sliders-h" + }, + "min_pr_mz": { "type": "number", "description": "The minimum precursor m/z for the in silico library generation or library-free search", @@ -418,6 +435,12 @@ "description": "Choose between different rescoring/posterior probability calculation methods and set them up.", "default": "", "properties": { + "skip_rescoring": { + "type": "boolean", + "description": "Skip PSM rescoring steps for specific cases, such as studying pure search engine results and search engine ranks", + "default": false, + "fa_icon": "far fa-check-square" + }, "posterior_probabilities": { "type": "string", "description": "How to calculate posterior probabilities for PSMs:\n\n* 'percolator' = Re-score based on PSM-feature-based SVM and transform distance\n to hyperplane for posteriors\n* 'fit_distributions' = Fit positive and negative distributions to scores\n (similar to PeptideProphet)", @@ -428,7 +451,7 @@ "run_fdr_cutoff": { "type": "number", "description": "FDR cutoff on PSM level (or peptide level; see Percolator options) *per run* before going into feature finding, map alignment and inference. This can be seen as a pre-filter. See ", - "default": 0.01, + "default": 0.1, "fa_icon": "fas fa-filter" }, "idfilter_debug": { @@ -624,10 +647,10 @@ "help_text": "The normalization is done by using the Median of Ratios. Also the ratios the medians is provided as control measure." }, "reference_channel": { - "type": "integer", + "type": "string", "description": "The reference channel, e.g. for calculating ratios.", "fa_icon": "fas fa-list-ol", - "default": 126 + "default": "126" }, "iso_debug": { "type": "integer", @@ -836,21 +859,21 @@ "feature_with_id_min_score": { "type": "number", "description": "The minimum probability (e.g.: 0.25) an identified (=id targeted) feature must have to be kept for alignment and linking (0=no filter).", - "default": 0.0, + "default": 0.1, "fa_icon": "fas fa-filter", "help_text": "The minimum probability (e.g.: 0.25) an identified (=id targeted) feature must have to be kept for alignment and linking (0=no filter). (default: '0.0') (min: '0.0' max: '1.0')" }, "feature_without_id_min_score": { "type": "number", "description": "The minimum probability (e.g.: 0.75) an unidentified feature must have to be kept for alignment and linking (0=no filter).", - "default": 0.0, + "default": 0.75, "fa_icon": "fas fa-filter", "help_text": "The minimum probability (e.g.: 0.75) an unidentified feature must have to be kept for alignment and linking (0=no filter). (default: '0.0') (min: '0.0' max: '1.0')" }, "lfq_intensity_threshold": { "type": "number", "description": "The minimum intensity for a feature to be considered for quantification. (default: '10000')", - "default": 10000, + "default": 1000, "fa_icon": "fas fa-filter", "help_text": "The minimum intensity for a feature to be considered for quantification. (default: '10000')" }, @@ -900,7 +923,7 @@ "description": "Set the scan window radius to a specific value", "fa_icon": "fas fa-filter", "help_text": " Ideally, should be approximately equal to the average number of data points per peak", - "default": 7 + "default": 8 }, "min_corr": { "type": "number", @@ -940,6 +963,20 @@ "help_text": "If passed, will use that spectral library to carry out the DIA-NN search, instead of predicting one from the fasta file.", "hidden": false }, + "skip_preliminary_analysis": { + "type": "boolean", + "description": "Skip the preliminary analysis step, thus use the passed spectral library as-is insted of generating a local concensus library.", + "fa_icon": "fas fa-forward", + "default": false, + "hidden": false + }, + "empirical_assembly_log": { + "type": "string", + "description": "The log file for the empirical assembly, Only used if `--skip_preliminary_analysis` is set to `true` and `--diann_speclib` is passed. If passed, will use that log file to carry out the DIA-NN search, instead of running a preliminary search.", + "fa_icon": "fas fa-file", + "help_text": "If passed, will use that log file to carry out the DIA-NN search, instead of predicting one from the fasta file.", + "hidden": false + }, "diann_debug": { "type": "integer", "description": "Debug level", @@ -953,6 +990,19 @@ "description": "Enable cross-run normalization between runs by diann.", "default": true, "fa_icon": "far fa-check-square" + }, + "random_preanalysis": { + "type": "boolean", + "description": "Enable random selection of spectrum files to generate empirical library.", + "default": false, + "fa_icon": "far fa-check-square" + }, + "empirical_assembly_ms_n": { + "type": "integer", + "description": "The number of randomly selected spectrum files.", + "default": 200, + "fa_icon": "fas fa-filter", + "hidden": true } }, "fa_icon": "fas fa-braille" @@ -1154,7 +1204,7 @@ "max_memory": { "type": "string", "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", + "default": "128 GB", "fa_icon": "fas fa-memory", "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, @@ -1163,7 +1213,7 @@ "max_time": { "type": "string", "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", + "default": "10d", "fa_icon": "far fa-clock", "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, diff --git a/pyproject.toml b/pyproject.toml index 814dd46f..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] -[tool.ruff] -line-length = 120 +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/databasesearchengines.nf b/subworkflows/local/databasesearchengines.nf index 166be4b2..e0d62bfc 100644 --- a/subworkflows/local/databasesearchengines.nf +++ b/subworkflows/local/databasesearchengines.nf @@ -26,9 +26,11 @@ workflow DATABASESEARCHENGINES { ch_id_comet = ch_id_comet.mix(SEARCHENGINECOMET.out.id_files_comet) } + // sorted mzmls to generate same batch ids when enable cache + ch_mzmls_sorted_search = ch_mzmls_search.collect(flat: false, sort: { a, b -> a[0]["mzml_id"] <=> b[0]["mzml_id"] }).flatMap() if (params.search_engines.contains("sage")) { cnt = 0 - ch_meta_mzml_db = ch_mzmls_search.map{ metapart, mzml -> + ch_meta_mzml_db = ch_mzmls_sorted_search.map{ metapart, mzml -> cnt++ def groupkey = metapart.labelling_type + metapart.dissociationmethod + diff --git a/subworkflows/local/dda_id.nf b/subworkflows/local/dda_id.nf new file mode 100644 index 00000000..c98b1c55 --- /dev/null +++ b/subworkflows/local/dda_id.nf @@ -0,0 +1,104 @@ +// +// MODULE: Local to the pipeline +// +include { DECOYDATABASE } from '../../modules/local/openms/decoydatabase/main' +include { CONSENSUSID } from '../../modules/local/openms/consensusid/main' +include { EXTRACTPSMFEATURES } from '../../modules/local/openms/extractpsmfeatures/main' +include { PERCOLATOR } from '../../modules/local/openms/thirdparty/percolator/main' +include { FALSEDISCOVERYRATE as FDRIDPEP } from '../../modules/local/openms/falsediscoveryrate/main' +include { IDPEP } from '../../modules/local/openms/idpep/main' +include { PSMCONVERSION } from '../../modules/local/extract_psm/main' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { DATABASESEARCHENGINES } from './databasesearchengines' +include { PSMFDRCONTROL } from './psmfdrcontrol' + +workflow DDA_ID { + take: + ch_file_preparation_results + ch_database_wdecoy + ch_spectrum_data + + main: + + ch_software_versions = Channel.empty() + + // + // SUBWORKFLOW: DatabaseSearchEngines + // + DATABASESEARCHENGINES ( + ch_file_preparation_results, + ch_database_wdecoy + ) + ch_software_versions = ch_software_versions.mix(DATABASESEARCHENGINES.out.versions.ifEmpty(null)) + ch_id_files = DATABASESEARCHENGINES.out.ch_id_files_idx + + ch_id_files.branch{ meta, filename -> + sage: filename.name.contains('sage') + return [meta, filename] + nosage: true + return [meta, filename] + }.set{ch_id_files_branched} + + + // + // SUBWORKFLOW: Rescoring + // + if (params.skip_rescoring == false) { + if (params.posterior_probabilities == 'percolator') { + EXTRACTPSMFEATURES(ch_id_files_branched.nosage) + ch_id_files_feats = ch_id_files_branched.sage.mix(EXTRACTPSMFEATURES.out.id_files_feat) + ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version) + PERCOLATOR(ch_id_files_feats) + ch_software_versions = ch_software_versions.mix(PERCOLATOR.out.version) + ch_consensus_input = PERCOLATOR.out.id_files_perc + } + + + if (params.posterior_probabilities != 'percolator') { + ch_fdridpep = Channel.empty() + if (params.search_engines.split(",").size() == 1) { + FDRIDPEP(ch_id_files) + ch_software_versions = ch_software_versions.mix(FDRIDPEP.out.version) + ch_id_files = Channel.empty() + ch_fdridpep = FDRIDPEP.out.id_files_idx_ForIDPEP_FDR + } + IDPEP(ch_fdridpep.mix(ch_id_files)) + ch_software_versions = ch_software_versions.mix(IDPEP.out.version) + ch_consensus_input = IDPEP.out.id_files_ForIDPEP + } + + // + // SUBWORKFLOW: PSMFDRCONTROL + // + ch_psmfdrcontrol = Channel.empty() + ch_consensus_results = Channel.empty() + if (params.search_engines.split(",").size() > 1) { + CONSENSUSID(ch_consensus_input.groupTuple(size: params.search_engines.split(",").size())) + ch_software_versions = ch_software_versions.mix(CONSENSUSID.out.version.ifEmpty(null)) + ch_psmfdrcontrol = CONSENSUSID.out.consensusids + ch_consensus_results = CONSENSUSID.out.consensusids + } else { + ch_psmfdrcontrol = ch_consensus_input + } + + PSMFDRCONTROL(ch_psmfdrcontrol) + ch_software_versions = ch_software_versions.mix(PSMFDRCONTROL.out.version.ifEmpty(null)) + + // + // Extract PSMs and export parquet format + // + ch_spectrum_data.view() + PSMFDRCONTROL.out.id_filtered.view() + PSMCONVERSION(PSMFDRCONTROL.out.id_filtered.combine(ch_spectrum_data, by: 0)) + + } else { + PSMCONVERSION(ch_id_files.combine(ch_spectrum_data, by: 0)) + } + + + emit: + version = ch_software_versions +} diff --git a/subworkflows/local/file_preparation.nf b/subworkflows/local/file_preparation.nf index 42f92fd6..a2b73fa1 100644 --- a/subworkflows/local/file_preparation.nf +++ b/subworkflows/local/file_preparation.nf @@ -18,13 +18,14 @@ workflow FILE_PREPARATION { ch_results = Channel.empty() ch_statistics = Channel.empty() ch_mqc_data = Channel.empty() + ch_spectrum_df = Channel.empty() // Divide the compressed files ch_rawfiles .branch { - dottar: WorkflowQuantms.hasExtension(it[1], '.tar') - dotzip: WorkflowQuantms.hasExtension(it[1], '.zip') - gz: WorkflowQuantms.hasExtension(it[1], '.gz') + dottar: hasExtension(it[1], '.tar') + dotzip: hasExtension(it[1], '.zip') + gz: hasExtension(it[1], '.gz') uncompressed: true }.set { ch_branched_input } @@ -37,9 +38,9 @@ workflow FILE_PREPARATION { // Divide mzml files ch_rawfiles .branch { - raw: WorkflowQuantms.hasExtension(it[1], '.raw') - mzML: WorkflowQuantms.hasExtension(it[1], '.mzML') - dotd: WorkflowQuantms.hasExtension(it[1], '.d') + raw: hasExtension(it[1], '.raw') + mzML: hasExtension(it[1], '.mzML') + dotd: hasExtension(it[1], '.d') }.set { ch_branched_input } // Note: we used to always index mzMLs if not already indexed but due to @@ -80,8 +81,11 @@ workflow FILE_PREPARATION { ch_results = indexed_mzml_bundle.mix(ch_branched_input.dotd) } + MZMLSTATISTICS(ch_results) ch_statistics = ch_statistics.mix(MZMLSTATISTICS.out.ms_statistics.collect()) + ch_spectrum_df = ch_spectrum_df.mix(MZMLSTATISTICS.out.spectrum_df) + ch_versions = ch_versions.mix(MZMLSTATISTICS.out.version) if (params.openms_peakpicking) { @@ -97,5 +101,13 @@ workflow FILE_PREPARATION { emit: results = ch_results // channel: [val(mzml_id), indexedmzml|.d.tar] statistics = ch_statistics // channel: [ *_ms_info.tsv ] + spectrum_data = ch_spectrum_df // channel: [val(mzml_id), *_spectrum_df.csv] version = ch_versions // channel: [ *.version.txt ] } + +// +// check file extension +// +def hasExtension(file, extension) { + return file.toString().toLowerCase().endsWith(extension.toLowerCase()) +} diff --git a/subworkflows/local/utils_nfcore_quantms_pipeline/main.nf b/subworkflows/local/utils_nfcore_quantms_pipeline/main.nf new file mode 100644 index 00000000..2a03b6fa --- /dev/null +++ b/subworkflows/local/utils_nfcore_quantms_pipeline/main.nf @@ -0,0 +1,251 @@ +// +// Subworkflow with functionality specific to the nf-core/quantms pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/dia.nf b/workflows/dia.nf index 0b1d22c9..06412d57 100644 --- a/workflows/dia.nf +++ b/workflows/dia.nf @@ -44,13 +44,14 @@ workflow DIA { result -> meta: preprocessed_meta(result[0]) ms_file:result[1] - } + } .set { ch_result } meta = ch_result.meta.unique { it[0] } DIANNCFG(meta) - ch_software_versions = ch_software_versions.mix(DIANNCFG.out.version.ifEmpty(null)) + ch_software_versions = ch_software_versions + .mix(DIANNCFG.out.version.ifEmpty(null)) // // MODULE: SILICOLIBRARYGENERATION @@ -62,34 +63,61 @@ workflow DIA { speclib = SILICOLIBRARYGENERATION.out.predict_speclib } - // - // MODULE: DIANN_PRELIMINARY_ANALYSIS - // - DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib)) - ch_software_versions = ch_software_versions.mix(DIANN_PRELIMINARY_ANALYSIS.out.version.ifEmpty(null)) + if (params.skip_preliminary_analysis) { + assembly_log = Channel.fromPath(params.empirical_assembly_log) + empirical_library = Channel.fromPath(params.diann_speclib) + indiv_fin_analysis_in = ch_file_preparation_results.combine(ch_searchdb) + .combine(assembly_log) + .combine(empirical_library) - // - // MODULE: ASSEMBLE_EMPIRICAL_LIBRARY - // - // Order matters in DIANN, This shoudl be sorted for reproducible results. - ASSEMBLE_EMPIRICAL_LIBRARY( - ch_result.ms_file.collect(), - meta, - DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), - speclib - ) - ch_software_versions = ch_software_versions.mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.version.ifEmpty(null)) + empirical_lib = empirical_library + } else { + // + // MODULE: DIANN_PRELIMINARY_ANALYSIS + // + if (params.random_preanalysis) { + preanalysis_seed = 2024 + preanalysis_subset = ch_file_preparation_results + .randomSample(params.empirical_assembly_ms_n, preanalysis_seed) + empirical_lib_files = preanalysis_subset + .map { result -> result[1] } + .collect() + DIANN_PRELIMINARY_ANALYSIS(preanalysis_subset.combine(speclib)) + } else { + empirical_lib_files = ch_file_preparation_results + .map { result -> result[1] } + .collect() + DIANN_PRELIMINARY_ANALYSIS(ch_file_preparation_results.combine(speclib)) + } + ch_software_versions = ch_software_versions + .mix(DIANN_PRELIMINARY_ANALYSIS.out.version.ifEmpty(null)) + + // + // MODULE: ASSEMBLE_EMPIRICAL_LIBRARY + // + // Order matters in DIANN, This should be sorted for reproducible results. + ASSEMBLE_EMPIRICAL_LIBRARY( + empirical_lib_files, + meta, + DIANN_PRELIMINARY_ANALYSIS.out.diann_quant.collect(), + speclib + ) + ch_software_versions = ch_software_versions + .mix(ASSEMBLE_EMPIRICAL_LIBRARY.out.version.ifEmpty(null)) + indiv_fin_analysis_in = ch_file_preparation_results + .combine(ch_searchdb) + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) + .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) + + empirical_lib = ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library + } // // MODULE: INDIVIDUAL_FINAL_ANALYSIS // - INDIVIDUAL_FINAL_ANALYSIS( - ch_file_preparation_results - .combine(ch_searchdb) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.log) - .combine(ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library) - ) - ch_software_versions = ch_software_versions.mix(INDIVIDUAL_FINAL_ANALYSIS.out.version.ifEmpty(null)) + INDIVIDUAL_FINAL_ANALYSIS(indiv_fin_analysis_in) + ch_software_versions = ch_software_versions + .mix(INDIVIDUAL_FINAL_ANALYSIS.out.version.ifEmpty(null)) // // MODULE: DIANNSUMMARY @@ -103,9 +131,17 @@ workflow DIA { .ms_file.map { msfile -> file(msfile).getName() } .collect() .set { ms_file_names } - DIANNSUMMARY(ms_file_names, meta, ASSEMBLE_EMPIRICAL_LIBRARY.out.empirical_library, - INDIVIDUAL_FINAL_ANALYSIS.out.diann_quant.collect(), ch_searchdb) - ch_software_versions = ch_software_versions.mix(DIANNSUMMARY.out.version.ifEmpty(null)) + + DIANNSUMMARY( + ms_file_names, + meta, + empirical_lib, + INDIVIDUAL_FINAL_ANALYSIS.out.diann_quant.collect(), + ch_searchdb) + + ch_software_versions = ch_software_versions.mix( + DIANNSUMMARY.out.version.ifEmpty(null) + ) // // MODULE: DIANNCONVERT @@ -118,7 +154,8 @@ workflow DIA { ch_searchdb, DIANNSUMMARY.out.version ) - ch_software_versions = ch_software_versions.mix(DIANNCONVERT.out.version.ifEmpty(null)) + ch_software_versions = ch_software_versions + .mix(DIANNCONVERT.out.version.ifEmpty(null)) // // MODULE: MSSTATS @@ -126,7 +163,9 @@ workflow DIA { if (!params.skip_post_msstats) { MSSTATS(DIANNCONVERT.out.out_msstats) ch_msstats_out = MSSTATS.out.msstats_csv - ch_software_versions = ch_software_versions.mix(MSSTATS.out.version.ifEmpty(null)) + ch_software_versions = ch_software_versions.mix( + MSSTATS.out.version.ifEmpty(null) + ) } emit: diff --git a/workflows/quantms.nf b/workflows/quantms.nf index 1e41ed38..e0772204 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -4,40 +4,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowQuantms.initialise(params, log) - -// Check conflicting parameters -if (params.decoy_string_position == "suffix" && params.searchengines.contains("sage")) -{ - log.error "Sage does not support decoy suffixes. Please change your input database or generate with add_decoys and decoy_string_position 'prefix' (default)." -} - -/* -======================================================================================== - CONFIG FILES -======================================================================================== -*/ - -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - - -/* -======================================================================================== - IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== -*/ +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_quantms_pipeline' include { TMT } from './tmt' include { LFQ } from './lfq' @@ -51,19 +21,7 @@ include { DECOYDATABASE } from '../modules/local/openms/decoydatabase/main' include { INPUT_CHECK } from '../subworkflows/local/input_check' include { FILE_PREPARATION } from '../subworkflows/local/file_preparation' include { CREATE_INPUT_CHANNEL } from '../subworkflows/local/create_input_channel' - -/* -======================================================================================== - IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -// -// MODULE: Installed directly from nf-core/modules -// - -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - +include { DDA_ID } from '../subworkflows/local/dda_id' /* @@ -146,50 +104,58 @@ workflow QUANTMS { ch_versions = ch_versions.mix(DECOYDATABASE.out.version.ifEmpty(null)) } - - TMT(ch_fileprep_result.iso, CREATE_INPUT_CHANNEL.out.ch_expdesign, ch_searchengine_in_db) - ch_ids_pmultiqc = ch_ids_pmultiqc.mix(TMT.out.ch_pmultiqc_ids) - ch_consensus_pmultiqc = ch_consensus_pmultiqc.mix(TMT.out.ch_pmultiqc_consensus) - ch_pipeline_results = ch_pipeline_results.mix(TMT.out.final_result) - ch_msstats_in = ch_msstats_in.mix(TMT.out.msstats_in) - ch_versions = ch_versions.mix(TMT.out.versions.ifEmpty(null)) - - LFQ(ch_fileprep_result.lfq, CREATE_INPUT_CHANNEL.out.ch_expdesign, ch_searchengine_in_db) - ch_ids_pmultiqc = ch_ids_pmultiqc.mix(LFQ.out.ch_pmultiqc_ids) - ch_consensus_pmultiqc = ch_consensus_pmultiqc.mix(LFQ.out.ch_pmultiqc_consensus) - ch_pipeline_results = ch_pipeline_results.mix(LFQ.out.final_result) - ch_msstats_in = ch_msstats_in.mix(LFQ.out.msstats_in) - ch_versions = ch_versions.mix(LFQ.out.versions.ifEmpty(null)) - - DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, FILE_PREPARATION.out.statistics) - ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) - ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in) - ch_versions = ch_versions.mix(DIA.out.versions.ifEmpty(null)) - - - // - // MODULE: Pipeline reporting - // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + if (params.id_only) { + DDA_ID( FILE_PREPARATION.out.results, ch_searchengine_in_db, FILE_PREPARATION.out.spectrum_data) + ch_versions = ch_versions.mix(DDA_ID.out.version.ifEmpty(null)) + } else { + TMT(ch_fileprep_result.iso, CREATE_INPUT_CHANNEL.out.ch_expdesign, ch_searchengine_in_db) + ch_ids_pmultiqc = ch_ids_pmultiqc.mix(TMT.out.ch_pmultiqc_ids) + ch_consensus_pmultiqc = ch_consensus_pmultiqc.mix(TMT.out.ch_pmultiqc_consensus) + ch_pipeline_results = ch_pipeline_results.mix(TMT.out.final_result) + ch_msstats_in = ch_msstats_in.mix(TMT.out.msstats_in) + ch_versions = ch_versions.mix(TMT.out.versions.ifEmpty(null)) + + LFQ(ch_fileprep_result.lfq, CREATE_INPUT_CHANNEL.out.ch_expdesign, ch_searchengine_in_db) + ch_ids_pmultiqc = ch_ids_pmultiqc.mix(LFQ.out.ch_pmultiqc_ids) + ch_consensus_pmultiqc = ch_consensus_pmultiqc.mix(LFQ.out.ch_pmultiqc_consensus) + ch_pipeline_results = ch_pipeline_results.mix(LFQ.out.final_result) + ch_msstats_in = ch_msstats_in.mix(LFQ.out.msstats_in) + ch_versions = ch_versions.mix(LFQ.out.versions.ifEmpty(null)) + + DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, FILE_PREPARATION.out.statistics) + ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) + ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in) + ch_versions = ch_versions.mix(DIA.out.versions.ifEmpty(null)) + } // - // MODULE: pmultiqc + // Collate and save software versions // - workflow_summary = WorkflowQuantms.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowQuantms.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") + ch_versions + .branch { + yaml : it.asBoolean() + other : true + } + .set{ versions_clean } + + softwareVersionsToYAML(versions_clean.yaml) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + ch_multiqc_files = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") SUMMARYPIPELINE ( CREATE_INPUT_CHANNEL.out.ch_expdesign @@ -204,24 +170,7 @@ workflow QUANTMS { } /* -======================================================================================== - COMPLETION EMAIL AND SUMMARY -======================================================================================== -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */