bigbio · daichengxin · Mar 24, 2024 · Jan 24, 2024 · Feb 1, 2024 · Feb 1, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -10,15 +10,7 @@
         "vscode": {
             // Set *default* container specific settings.json values on container create.
             "settings": {
-                "python.defaultInterpreterPath": "/opt/conda/bin/python",
-                "python.linting.enabled": true,
-                "python.linting.pylintEnabled": true,
-                "python.formatting.autopep8Path": "/opt/conda/bin/autopep8",
-                "python.formatting.yapfPath": "/opt/conda/bin/yapf",
-                "python.linting.flake8Path": "/opt/conda/bin/flake8",
-                "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
-                "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
-                "python.linting.pylintPath": "/opt/conda/bin/pylint"
+                "python.defaultInterpreterPath": "/opt/conda/bin/python"
             },
 
             // Add the IDs of extensions you want installed when the container is created.

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -9,9 +9,8 @@ Please use the pre-filled template to save time.
 However, don't be put off by this template - other more general issues and suggestions are welcome!
 Contributions to the code are even more welcome ;)
 
-:::info
-If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)).
-:::
+> [!NOTE]
+> If you need help using or modifying nf-core/quantms then the best place to ask is on the nf-core Slack [#quantms](https://nfcore.slack.com/channels/quantms) channel ([join our Slack here](https://nf-co.re/join/slack)).
 
 ## Contribution workflow
 
@@ -27,8 +26,11 @@ If you're not used to this workflow with git, you can start with some [docs from
 
 ## Tests
 
-You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to
-receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir <OUTDIR>`.
+You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command:
+
+```bash
+nf-test test --profile debug,test,docker --verbose
+```
 
 When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests.
 Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
@@ -90,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`.
 
 Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels.
 
-The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block.
+The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block.
 
 ### Naming schemes
 

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/quan
 - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/quantms/tree/master/.github/CONTRIBUTING.md)
 - [ ] If necessary, also make a PR on the nf-core/quantms _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
-- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
+- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`).
 - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
 - [ ] Output Documentation in `docs/output.md` is updated.

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
@@ -20,7 +20,9 @@ jobs:
     steps:
       - name: Launch workflow via tower
         uses: seqeralabs/action-tower-launch@v2
-
+        # TODO nf-core: You can customise AWS full pipeline tests as required
+        # Add full size test data (but still relatively small datasets for few samples)
+        # on the `test_full.config` test runs with only one set of parameters
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
           access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}

diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml
@@ -28,7 +28,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Install Nextflow
-        uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1
+        uses: nf-core/setup-nextflow@v1
 
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
         with:

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -35,7 +35,7 @@ jobs:
         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
 
       - name: Install Nextflow
-        uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1
+        uses: nf-core/setup-nextflow@v1
 
       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
         with:

diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml
@@ -12,7 +12,7 @@ jobs:
       - name: get topics and convert to hashtags
         id: get_topics
         run: |
-          curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' > $GITHUB_OUTPUT
+          curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT
 
       - uses: rzr/fediverse-action@master
         with:

diff --git a/.gitpod.yml b/.gitpod.yml
@@ -10,13 +10,11 @@ tasks:
 
 vscode:
   extensions: # based on nf-core.nf-core-extensionpack
-    - codezombiech.gitignore # Language support for .gitignore files
-    # - cssho.vscode-svgviewer                 # SVG viewer
     - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code
-    - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed
     - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files
     - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar
     - mechatroner.rainbow-csv # Highlight columns in csv files in different colors
-    # - nextflow.nextflow                      # Nextflow syntax highlighting
+    # - nextflow.nextflow                    # Nextflow syntax highlighting
     - oderwat.indent-rainbow # Highlight indentation level
     - streetsidesoftware.code-spell-checker # Spelling checker for source code
+    - charliermarsh.ruff # Code linter Ruff
diff --git a/README.md b/README.md
@@ -1,12 +1,14 @@
 # ![nf-core/quantms](docs/images/nf-core-quantms_logo_light.png#gh-light-mode-only) ![nf-core/quantms](docs/images/nf-core-quantms_logo_dark.png#gh-dark-mode-only)
 
-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](https://img.shields.io/badge/DOI-10.5281/zenodo.7754148-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7754148)
+[![GitHub Actions CI Status](https://github.com/nf-core/quantms/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/quantms/actions/workflows/ci.yml)
+[![GitHub Actions Linting Status](https://github.com/nf-core/quantms/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/quantms/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/quantms/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](https://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
-[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/quantms)
+[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/quantms)
 
 [![Get help on Slack](https://img.shields.io/badge/slack-nf--core%20%23quantms-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/quantms)[![Follow on Twitter](https://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](https://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
 

diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json
@@ -54,7 +54,8 @@
                             "body": [
                                 {
                                     "type": "FactSet",
-                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %>
+                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"
+                                        }.join(",\n") %>
                                     ]
                                 }
                             ]

diff --git a/bin/mzml_statistics.py b/bin/mzml_statistics.py
@@ -7,12 +7,12 @@
 import sys
 from pathlib import Path
 import sqlite3
-
+import re
 import pandas as pd
 from pyopenms import MSExperiment, MzMLFile
 
 
-def ms_dataframe(ms_path: str) -> None:
+def ms_dataframe(ms_path: str, id_only: bool = False) -> None:
     file_columns = [
         "SpectrumID",
         "MSLevel",
@@ -25,8 +25,9 @@ def ms_dataframe(ms_path: str) -> None:
         "AcquisitionDateTime",
     ]
 
-    def parse_mzml(file_name: str, file_columns: list):
+    def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
         info = []
+        psm_part_info = []
         exp = MSExperiment()
         acquisition_datetime = exp.getDateTime().get()
         MzMLFile().load(file_name, exp)
@@ -54,11 +55,23 @@ def parse_mzml(file_name: str, file_columns: list):
                 charge_state = spectrum.getPrecursors()[0].getCharge()
                 emz = spectrum.getPrecursors()[0].getMZ() if spectrum.getPrecursors()[0].getMZ() else None
                 info_list = [id_, MSLevel, charge_state, peak_per_ms, bpc, tic, rt, emz, acquisition_datetime]
+                mz_array = peaks_tuple[0]
+                intensity_array = peaks_tuple[1]
             else:
                 info_list = [id_, MSLevel, None, None, None, None, rt, None, acquisition_datetime]
 
+            if id_only and MSLevel == 2:
+                psm_part_info.append([re.findall(r"[scan|spectrum]=(\d+)", id_)[0], MSLevel, mz_array, intensity_array])
             info.append(info_list)
 
+        if id_only and len(psm_part_info) > 0:
+            pd.DataFrame(psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]).to_csv(
+                f"{Path(ms_path).stem}_spectrum_df.csv",
+                mode="w",
+                index=False,
+                header=True,
+            )
+
         return pd.DataFrame(info, columns=file_columns)
 
     def parse_bruker_d(file_name: str, file_columns: list):
@@ -139,7 +152,7 @@ def parse_bruker_d(file_name: str, file_columns: list):
     if Path(ms_path).suffix == ".d" and Path(ms_path).is_dir():
         ms_df = parse_bruker_d(ms_path, file_columns)
     elif Path(ms_path).suffix in [".mzML", ".mzml"]:
-        ms_df = parse_mzml(ms_path, file_columns)
+        ms_df = parse_mzml(ms_path, file_columns, id_only)
     else:
         msg = f"Unrecognized or inexistent mass spec file '{ms_path}'"
         raise RuntimeError(msg)
@@ -155,7 +168,8 @@ def parse_bruker_d(file_name: str, file_columns: list):
 
 def main():
     ms_path = sys.argv[1]
-    ms_dataframe(ms_path)
+    id_only = sys.argv[2]
+    ms_dataframe(ms_path, id_only)
 
 
 if __name__ == "__main__":

diff --git a/bin/psm_conversion.py b/bin/psm_conversion.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+import numpy as np
+import pyopenms as oms
+import pandas as pd
+import re
+import os
+from pathlib import Path
+import sys
+
+_parquet_field = [
+    "sequence", "protein_accessions", "protein_start_positions", "protein_end_positions",
+    "modifications", "retention_time", "charge", "calc_mass_to_charge", "reference_file_name",
+    "scan_number", "peptidoform", "posterior_error_probability", "global_qvalue", "is_decoy",
+    "consensus_support", "mz_array", "intensity_array", "num_peaks", "search_engines", "id_scores", "hit_rank"
+]
+
+
+def mods_position(peptide):
+    pattern = re.compile(r"\((.*?)\)")
+    original_mods = pattern.findall(peptide)
+    peptide = re.sub(r"\(.*?\)", ".", peptide)
+    position = [i.start() for i in re.finditer(r"\.", peptide)]
+    for j in range(1, len(position)):
+        position[j] -= j
+
+    for k in range(0, len(original_mods)):
+        original_mods[k] = str(position[k]) + "-" + original_mods[k]
+
+    original_mods = [str(i) for i in original_mods] if len(original_mods) > 0 else np.nan
+
+    return original_mods
+
+
+def convert_psm(idxml, spectra_file, export_decoy_psm):
+    prot_ids = []
+    pep_ids = []
+    parquet_data = []
+    consensus_support = np.nan
+    mz_array = []
+    intensity_array = []
+    num_peaks = np.nan
+    id_scores = []
+    search_engines = []
+
+    oms.IdXMLFile().load(idxml, prot_ids, pep_ids)
+    if "ConsensusID" in prot_ids[0].getSearchEngine():
+        if prot_ids[0].getSearchParameters().metaValueExists("SE:MS-GF+"):
+            search_engines = ["MS-GF+"]
+        if prot_ids[0].getSearchParameters().metaValueExists("SE:Comet"):
+            search_engines.append("Comet")
+        if prot_ids[0].getSearchParameters().metaValueExists("SE:Sage"):
+            search_engines.append("Sage")
+    else:
+        search_engines = [prot_ids[0].getSearchEngine()]
+
+    reference_file_name = os.path.splitext(prot_ids[0].getMetaValue("spectra_data")[0].decode("UTF-8"))[0]
+    spectra_df = pd.read_csv(spectra_file) if spectra_file else None
+
+    for peptide_id in pep_ids:
+        retention_time = peptide_id.getRT()
+        calc_mass_to_charge = peptide_id.getMZ()
+        scan_number = int(re.findall(r"(spectrum|scan)=(\d+)", peptide_id.getMetaValue("spectrum_reference"))[0][1])
+
+        if isinstance(spectra_df, pd.DataFrame):
+            spectra = spectra_df[spectra_df["scan"] == scan_number]
+            mz_array = spectra["mz"].values[0]
+            intensity_array = spectra["intensity"].values[0]
+            num_peaks = len(mz_array)
+
+        for hit in peptide_id.getHits():
+            # if remove decoy when mapped to target+decoy?
+            is_decoy = 0 if hit.getMetaValue("target_decoy") == "target" else 1
+            if export_decoy_psm == "false" and is_decoy:
+                continue
+            global_qvalue = np.nan
+            if len(search_engines) > 1:
+                if "q-value" in peptide_id.getScoreType():
+                    global_qvalue = hit.getScore()
+                consensus_support = hit.getMetaValue("consensus_support")
+            elif search_engines == "Comet":
+                id_scores = ["Comet:Expectation value: " + str(hit.getScore())]
+            elif search_engines == "MS-GF+":
+                id_scores = ["MS-GF:SpecEValue: " + str(hit.getScore())]
+            elif search_engines == "Sage":
+                id_scores = ["Sage:hyperscore: " + str(hit.getScore())]
+
+            charge = hit.getCharge()
+            peptidoform = hit.getSequence().toString()
+            modifications = mods_position(peptidoform)
+            sequence = hit.getSequence().toUnmodifiedString()
+            protein_accessions = [ev.getProteinAccession() for ev in hit.getPeptideEvidences()]
+            posterior_error_probability = hit.getMetaValue("Posterior Error Probability_score")
+            protein_start_positions = [ev.getStart() for ev in hit.getPeptideEvidences()]
+            protein_end_positions = [ev.getEnd() for ev in hit.getPeptideEvidences()]
+            hit_rank = hit.getRank()
+
+            parquet_data.append([sequence, protein_accessions, protein_start_positions, protein_end_positions,
+                                 modifications, retention_time, charge, calc_mass_to_charge, reference_file_name,
+                                 scan_number, peptidoform, posterior_error_probability, global_qvalue, is_decoy,
+                                 consensus_support, mz_array, intensity_array, num_peaks, search_engines, id_scores,
+                                 hit_rank])
+
+    pd.DataFrame(parquet_data, columns=_parquet_field).to_csv(f"{Path(idxml).stem}_psm.csv",
+                                                              mode="w",
+                                                              index=False,
+                                                              header=True)
+
+
+def main():
+    idxml_path = sys.argv[1]
+    spectra_file = sys.argv[2]
+    export_decoy_psm = sys.argv[3]
+    convert_psm(idxml_path, spectra_file, export_decoy_psm)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/conf/modules.config b/conf/modules.config
@@ -245,6 +245,16 @@ process {
         ]
     }
 
+    withName: '.*:DDA_ID:PSMFDRCONTROL:IDFILTER' {
+        ext.args    = "-score:pep \"$params.run_fdr_cutoff\""
+        ext.suffix  = '.idXML'
+        publishDir  = [
+            path: { "${params.outdir}/idfilter" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     // PROTEOMICSLFQ
     withName: '.*:LFQ:PROTEOMICSLFQ' {
         ext.args    = "-debug $params.plfq_debug"

diff --git a/modules.json b/modules.json
@@ -12,10 +12,29 @@
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "ccacf6f5de6df3bc6d73b665c1fd2933d8bbc290",
+                        "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
                         "installed_by": ["modules"]
                     }
                 }
+            },
+            "subworkflows": {
+                "nf-core": {
+                    "utils_nextflow_pipeline": {
+                        "branch": "master",
+                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "installed_by": ["subworkflows"]
+                    },
+                    "utils_nfcore_pipeline": {
+                        "branch": "master",
+                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "installed_by": ["subworkflows"]
+                    },
+                    "utils_nfvalidation_plugin": {
+                        "branch": "master",
+                        "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
+                        "installed_by": ["subworkflows"]
+                    }
+                }
             }
         }
     }