Skip to content

Commit

Permalink
Merge pull request #373 from daichengxin/dev
Browse files Browse the repository at this point in the history
Add dda id ci and fixed some bugs
  • Loading branch information
ypriverol committed May 13, 2024
2 parents e3c95f0 + f571096 commit bb0b114
Show file tree
Hide file tree
Showing 17 changed files with 202 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
NXF_VER:
- "23.04.0"
- "latest-everything"
test_profile: ["test_lfq", "test_lfq_sage", "test_dia", "test_localize", "test_tmt"]
test_profile: ["test_lfq", "test_lfq_sage", "test_dia", "test_localize", "test_tmt", "test_dda_id"]
exec_profile: ["docker", "conda"]
exclude:
- test_profile: test_dia
Expand Down
19 changes: 9 additions & 10 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4

- name: Set up Python 3.11
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
- name: Set up Python 3.12
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
python-version: 3.11
cache: "pip"
python-version: "3.12"

- name: Install pre-commit
run: pip install pre-commit
Expand All @@ -32,14 +31,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out pipeline code
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
uses: nf-core/setup-nextflow@v2

- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5
- uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
with:
python-version: "3.11"
python-version: "3.12"
architecture: "x64"

- name: Install dependencies
Expand All @@ -60,7 +59,7 @@ jobs:

- name: Upload linting log file artifact
if: ${{ always() }}
uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4
with:
name: linting-logs
path: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linting_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3
uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3
with:
workflow: linting.yml
workflow_conclusion: completed
Expand Down
38 changes: 38 additions & 0 deletions bin/add_sage_feature.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python
# Add extra features in sage idXML. Adding extra feature in Sage isn't known input for PSMFeatureExtractor

import pyopenms as oms
import pandas as pd
import sys


def add_feature(idx_file, output_file, feat_file):
extra_feat = []
feat = pd.read_csv(feat_file, sep='\t')
for _, row in feat.iterrows():
if row["feature_generator"] == 'psm_file':
continue
else:
extra_feat.append(row["feature_name"])
print("Adding extra feature: {}".format(extra_feat))
protein_ids = []
peptide_ids = []
oms.IdXMLFile().load(idx_file, protein_ids, peptide_ids)
SearchParameters = protein_ids[0].getSearchParameters()
features = SearchParameters.getMetaValue("extra_features")
extra_features = features + "," + ",".join(extra_feat)
SearchParameters.setMetaValue("extra_features", extra_features)
protein_ids[0].setSearchParameters(SearchParameters)
oms.IdXMLFile().store(output_file, protein_ids, peptide_ids)
print("Done")


def main():
idx_file = sys.argv[1]
output_file = sys.argv[2]
feat_file = sys.argv[3]
add_feature(idx_file, output_file, feat_file)


if __name__ == "__main__":
sys.exit(main())
Empty file modified bin/extract_sample.py
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions bin/ms2rescore_cli.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# Written by Jonas Scheid under the MIT license


import sys
import click
import importlib.resources
Expand Down
Empty file modified bin/psm_conversion.py
100644 → 100755
Empty file.
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,13 @@ process {
]
}

withName: '.*:DDA_ID:SAGEFEATURE' {
publishDir = [
path: { "${params.outdir}/addsagefeature" },
pattern: "*.log",
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

}
37 changes: 37 additions & 0 deletions conf/test_dda_id.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running real full-size tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a real and full-size test.
Use as follows:
nextflow run nf-core/quantms -profile test_dda_id,<docker/singularity> [--outdir <OUTDIR>]
------------------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Real full-size test profile for DDA ID'
config_profile_description = 'Real full-size test dataset to check pipeline function of the DDA identification branch of the pipeline'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = 6.GB
max_time = 48.h

outdir = "./results_lfq_dda_id"

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/PXD000001.sdrf.tsv'
database = 'https://raw.githubusercontent.com/nf-core/test-datasets/quantms/testdata/tmt_ci/erwinia_carotovora.fasta'
posterior_probabilities = "percolator"
search_engines = "msgf,comet"
add_decoys = true
decoy_string = "rev"
protein_level_fdr_cutoff = 0.01
psm_level_fdr_cutoff = 1.0
pmultiqc_idxml_skip = false
id_only = true
enable_pmultiqc = false
ms2rescore = true
}
32 changes: 32 additions & 0 deletions modules/local/add_sage_feat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
process SAGEFEATURE {
tag "$meta.mzml_id"
label 'process_low'

conda "bioconda::pyopenms=3.1.0"
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/pyopenms:3.1.0--py39h9b8898c_0"
} else {
container "biocontainers/pyopenms:3.1.0--py39h9b8898c_0"
}

input:
tuple val(meta), path(id_file), path(extra_feat)

output:
tuple val(meta), path("${id_file.baseName}_feat.idXML"), emit: id_files_feat
path "versions.yml", emit: version
path "*.log", emit: log

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.mzml_id}"

"""
add_sage_feature.py "${id_file}" "${id_file.baseName}_feat.idXML" "${extra_feat}" 2>&1 | tee add_sage_feature.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pyopenms: \$(pip show pyopenms | grep "Version" | awk -F ': ' '{print \$2}')
END_VERSIONS
"""
}
39 changes: 39 additions & 0 deletions modules/local/add_sage_feat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: SAGEFEATURE
description: A module to extract extra features from ms2rescore
keywords:
- features
- ms2rescore
tools:
- custom:
description: |
A custom module to extract extra features from ms2rescore.
homepage: https://github.com/bigbio/quantms
documentation: https://github.com/bigbio/quantms/tree/readthedocs
input:
- meta:
type: map
description: Groovy Map containing sample information
- id_file:
type: file
description: |
Input idXML file containing the identifications.
pattern: "*.idXML"
output:
- meta:
type: map
description: Groovy Map containing sample information
- id_files_feat:
type: file
description: |
Output file in idXML format
pattern: "*.idXML"
- log:
type: file
description: log file
pattern: "*.log"
- version:
type: file
description: File containing software version
pattern: "versions.yml"
authors:
- "@daichengxin"
6 changes: 3 additions & 3 deletions modules/local/extract_psm/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ process PSMCONVERSION {
tag "$meta.mzml_id"
label 'process_medium'

conda "bioconda::pyopenms=3.1.0"
conda "bioconda::pyopenms=2.8.0"
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/pyopenms:3.1.0--py39h9b8898c_0"
container "https://depot.galaxyproject.org/singularity/pyopenms:2.8.0--py38hd8d5640_1"
} else {
container "biocontainers/pyopenms:3.1.0--py39h9b8898c_0"
container "biocontainers/pyopenms:2.8.0--py38hd8d5640_1"
}

input:
Expand Down
13 changes: 10 additions & 3 deletions modules/local/ms2rescore/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process MS2RESCORE {
tag "$meta.mzml_id"
label 'process_high'

conda "bioconda::ms2rescore=3.0.2"
conda "bioconda::ms2rescore=3.0.2 bioconda::psm-utils=0.8.0 conda-forge::pydantic=1.10"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ms2rescore:3.0.2--pyhdfd78af_0':
'biocontainers/ms2rescore:3.0.2--pyhdfd78af_0' }"
Expand Down Expand Up @@ -36,19 +36,26 @@ process MS2RESCORE {
ms2_tolerence = 0.02
}

if (params.decoy_string_position == "prefix") {
decoy_pattern = "^${params.decoy_string}"
} else {
decoy_pattern = "${params.decoy_string}\$"
}

"""
ms2rescore_cli.py \\
--psm_file $idxml \\
--spectrum_path . \\
--ms2_tolerance $ms2_tolerence \\
--output_path ${idxml.baseName}_ms2rescore.idXML \\
--processes $task.cpus \\
--id_decoy_pattern $decoy_pattern \\
$args \\
2>&1 | tee ${meta.mzml_id}_ms2rescore.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
MS²Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' ))
MS2Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' )
END_VERSIONS
"""

Expand All @@ -61,7 +68,7 @@ process MS2RESCORE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
MS²Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' ))
MS2Rescore: \$(echo \$(ms2rescore --version 2>&1) | grep -oP 'MS²Rescore \\(v\\K[^\\)]+' )
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ profiles {
test_full_tmt { includeConfig 'conf/test_full_tmt.config' }
test_full_dia { includeConfig 'conf/test_full_dia.config' }
test_full { includeConfig 'conf/test_full_lfq.config' }
test_dda_id { includeConfig 'conf/test_dda_id.config' }
mambaci { includeConfig 'conf/mambaci.config' }

}
Expand Down
21 changes: 17 additions & 4 deletions subworkflows/local/dda_id.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ include { PSMCONVERSION } from '../../modules/local/extract_psm
include { MS2RESCORE } from '../../modules/local/ms2rescore/main'
include { IDSCORESWITCHER } from '../../modules/local/openms/idscoreswitcher/main'
include { GETSAMPLE } from '../../modules/local/extract_sample/main'
include { SAGEFEATURE } from '../../modules/local/add_sage_feat/main'

//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
Expand Down Expand Up @@ -48,6 +49,8 @@ workflow DDA_ID {
return [meta, filename, []]
}.set{ch_id_files_branched}

ch_pmultiqc_consensus = Channel.empty()
ch_pmultiqc_ids = Channel.empty()

//
// SUBWORKFLOW: Rescoring
Expand All @@ -60,13 +63,14 @@ workflow DDA_ID {

MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names).branch{ meta, idxml, feature_name ->
sage: idxml.name.contains('sage')
return [meta, idxml]
return [meta, idxml, feature_name]
nosage: true
return [meta, idxml, feature_name]
}.set{ch_ms2rescore_branched}

EXTRACTPSMFEATURES(ch_ms2rescore_branched.nosage)
ch_id_files_feats = EXTRACTPSMFEATURES.out.id_files_feat.mix(ch_ms2rescore_branched.sage)
SAGEFEATURE(ch_ms2rescore_branched.sage)
ch_id_files_feats = EXTRACTPSMFEATURES.out.id_files_feat.mix(SAGEFEATURE.out.id_files_feat)
ch_software_versions = ch_software_versions.mix(EXTRACTPSMFEATURES.out.version)
} else {
EXTRACTPSMFEATURES(ch_id_files_branched.nosage)
Expand Down Expand Up @@ -143,13 +147,15 @@ workflow DDA_ID {

}

ch_rescoring_results = ch_consensus_input

} else if (params.posterior_probabilities == 'mokapot') {
MS2RESCORE(ch_id_files.combine(ch_file_preparation_results, by: 0))
ch_software_versions = ch_software_versions.mix(MS2RESCORE.out.versions)
IDSCORESWITCHER(MS2RESCORE.out.idxml.combine(Channel.value("PEP")))
ch_software_versions = ch_software_versions.mix(IDSCORESWITCHER.out.version)
ch_consensus_input = IDSCORESWITCHER.out.id_score_switcher.combine(Channel.value("MS:1001491"))
ch_rescoring_results = IDSCORESWITCHER.out.id_files_ForIDPEP
} else {
ch_fdridpep = Channel.empty()
if (params.search_engines.split(",").size() == 1) {
Expand All @@ -161,6 +167,7 @@ workflow DDA_ID {
IDPEP(ch_fdridpep.mix(ch_id_files))
ch_software_versions = ch_software_versions.mix(IDPEP.out.version)
ch_consensus_input = IDPEP.out.id_files_ForIDPEP
ch_rescoring_results = ch_consensus_input
}

//
Expand All @@ -172,24 +179,30 @@ workflow DDA_ID {
CONSENSUSID(ch_consensus_input.groupTuple(size: params.search_engines.split(",").size()))
ch_software_versions = ch_software_versions.mix(CONSENSUSID.out.version.ifEmpty(null))
ch_psmfdrcontrol = CONSENSUSID.out.consensusids
ch_consensus_results = CONSENSUSID.out.consensusids
ch_psmfdrcontrol
.map { it -> it[1] }
.set { ch_pmultiqc_consensus }
} else {
ch_psmfdrcontrol = ch_consensus_input
}

PSMFDRCONTROL(ch_psmfdrcontrol)
ch_software_versions = ch_software_versions.mix(PSMFDRCONTROL.out.version.ifEmpty(null))


// Extract PSMs and export parquet format
PSMCONVERSION(PSMFDRCONTROL.out.id_filtered.combine(ch_spectrum_data, by: 0))

ch_rescoring_results
.map { it -> it[1] }
.set { ch_pmultiqc_ids }
} else {
PSMCONVERSION(ch_id_files.combine(ch_spectrum_data, by: 0))
}


emit:
ch_pmultiqc_ids = ch_pmultiqc_ids
ch_pmultiqc_consensus = ch_pmultiqc_consensus
version = ch_software_versions
}

Expand Down
Loading

0 comments on commit bb0b114

Please sign in to comment.