Skip to content

Commit

Permalink
Merge pull request #193 from VukManojlovic/CTX-5939
Browse files Browse the repository at this point in the history
CTX-5939: Changed qiime multithreading to allow for exact thread count for command executions
  • Loading branch information
dule1322 authored May 27, 2024
2 parents 5bce683 + bb462a6 commit 2e817d6
Showing 1 changed file with 46 additions and 25 deletions.
71 changes: 46 additions & 25 deletions coretex/bioinformatics/ctx_qiime2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from typing import Optional, Union
from pathlib import Path

import os

from .utils import compressGzip, createSample, getDemuxSamples, getDenoisedSamples, \
getFastqDPSamples, getFastqMPSamples, getImportedSamples, getMetadata, getPhylogeneticTreeSamples, \
isDemultiplexedSample, isDenoisedSample, isFastqDPSample, isFastqMPSample, \
Expand Down Expand Up @@ -89,8 +91,6 @@ def demuxEmpPaired(
Output path to the resulting feature table
errorCorretctionDetailsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading. One thread per CPU core
"""

if isinstance(sequencesPath, Path):
Expand Down Expand Up @@ -131,7 +131,7 @@ def dada2DenoiseSingle(
representativeSequencesPath: Union[str, Path],
tablePath: Union[str, Path],
denoisingStatsPath: Union[str, Path],
multithreading: bool = True
threads: Optional[int] = -1
) -> None:
"""
Wrapper for QIIME's DADA2 denoise-single, which performs denoising on the input paired-end reads.
Expand Down Expand Up @@ -159,8 +159,9 @@ def dada2DenoiseSingle(
Output path to the resulting feature table
denoisingStatsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading. One thread per CPU core
threads : Optional[int]
Number of threads to use for the process. If None, multithreading will not be used.
Set to -1 (defaul) to use a thread for each CPU core
"""

if isinstance(inputPath, Path):
Expand All @@ -185,8 +186,10 @@ def dada2DenoiseSingle(
"--o-denoising-stats", denoisingStatsPath
]

if multithreading:
if threads == -1:
args.extend(["--p-n-threads", "0"])
elif threads is not None:
args.extend(["--p-n-threads", str()])

command(args)

Expand All @@ -200,7 +203,7 @@ def dada2DenoisePaired(
representativeSequencesPath: Union[str, Path],
tablePath: Union[str, Path],
denoisingStatsPath: Union[str, Path],
multithreading: bool = True
threads: Optional[int] = -1
) -> None:
"""
Wrapper for QIIME's DADA2 denoise-paired, which performs denoising on the input paired-end reads.
Expand Down Expand Up @@ -239,8 +242,9 @@ def dada2DenoisePaired(
Output path to the resulting feature table
denoisingStatsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading. One thread per CPU core
threads : Optional[int]
Number of threads to use for the process. If None, multithreading will not be used.
Set to -1 (defaul) to use a thread for each CPU core
"""

if isinstance(inputPath, Path):
Expand All @@ -267,8 +271,10 @@ def dada2DenoisePaired(
"--o-denoising-stats", denoisingStatsPath
]

if multithreading:
if threads == -1:
args.extend(["--p-n-threads", "0"])
elif threads is not None:
args.extend(["--p-n-threads", str(threads)])

command(args)

Expand Down Expand Up @@ -304,7 +310,7 @@ def phylogenyAlignToTreeMafftFasttree(
maskedAligmentPath: str,
unrootedTreePath: str,
rootedTreePath: str,
multithreading: bool = True
threads: Optional[int] = -1
) -> None:

args = [
Expand All @@ -316,8 +322,10 @@ def phylogenyAlignToTreeMafftFasttree(
"--o-rooted-tree", rootedTreePath
]

if multithreading:
if threads == -1:
args.extend(["--p-n-threads", "auto"])
elif threads is not None:
args.extend(["--p-n-threads", str(threads)])

command(args)

Expand All @@ -328,7 +336,7 @@ def diversityCoreMetricsPhylogenetic(
samplingDepth: int,
metadataPath: str,
outputDir: str,
multithreading: bool = True
threads: Optional[int] = -1
) -> None:

args = [
Expand All @@ -340,8 +348,10 @@ def diversityCoreMetricsPhylogenetic(
"--output-dir", outputDir
]

if multithreading:
if threads == -1:
args.extend(["--p-n-jobs-or-threads", "auto"])
elif threads is not None:
args.extend(["--p-n-jobs-or-threads", str(threads)])

command(args)

Expand Down Expand Up @@ -416,7 +426,7 @@ def featureClassifierClassifySklearn(
classifierPath: str,
readsPath: str,
classificationPath: str,
multithreading: bool = True
threads: Optional[int] = -1
) -> None:

args = [
Expand All @@ -426,8 +436,10 @@ def featureClassifierClassifySklearn(
"--o-classification", classificationPath
]

if multithreading:
if threads == -1:
args.extend(["--p-n-jobs", "-1"])
elif threads is not None:
args.extend(["--p-n-jobs", str(threads)])

command(args)

Expand Down Expand Up @@ -509,7 +521,7 @@ def vsearchClusterDeNovo(
percIdentity: float,
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
multithreading: bool = True
threads: Optional[int] = -1
) -> None:
"""
Wrapper for QIIME2's vsearch de novo clusteing command.
Expand All @@ -527,7 +539,8 @@ def vsearchClusterDeNovo(
clusteredSequencesPath : Union[str, Path]
Path to the output clustered sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
Number of threads to use for the process. If None, multithreading will not be used.
Set to -1 (defaul) to use a thread for each CPU core
"""

if isinstance(tablePath, Path):
Expand All @@ -551,8 +564,10 @@ def vsearchClusterDeNovo(
"--o-clustered-sequences", clusteredSequencesPath
]

if multithreading:
if threads == -1:
args.extend(["--p-threads", "0"])
elif threads is not None:
args.extend(["--p-threads", str(threads)])

command(args)

Expand All @@ -565,7 +580,7 @@ def vsearchClusterClosedReference(
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
unmatchedSequencesPath: Union[str, Path],
multithreading: bool = True
threads: Optional[int] = -1
) -> None:
"""
Wrapper for QIIME2's vsearch closed reference clusteing command.
Expand All @@ -587,7 +602,8 @@ def vsearchClusterClosedReference(
unmatchedSequencesPath : Union[str, Path]
Path to the output unmatched sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
Number of threads to use for the process. If None, multithreading will not be used.
Set to -1 (defaul) to use a thread for each CPU core
"""

if isinstance(tablePath, Path):
Expand Down Expand Up @@ -619,8 +635,10 @@ def vsearchClusterClosedReference(
"--o-unmatched-sequences", unmatchedSequencesPath
]

if multithreading:
if threads == -1:
args.extend(["--p-threads", "0"])
elif threads is not None:
args.extend(["--p-threads", str(threads)])

command(args)

Expand All @@ -633,7 +651,7 @@ def vsearchClusterOpenReference(
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
newReferenceSequencesPath: Union[str, Path],
multithreading: bool = True
threads: Optional[int] = -1
) -> None:
"""
Wrapper for QIIME2's vsearch open reference clusteing command.
Expand All @@ -655,7 +673,8 @@ def vsearchClusterOpenReference(
newReferenceSequencesPath : Union[str, Path]
Path to the output new reference sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
Number of threads to use for the process. If None, multithreading will not be used.
Set to -1 (defaul) to use a thread for each CPU core
"""

if isinstance(tablePath, Path):
Expand Down Expand Up @@ -687,7 +706,9 @@ def vsearchClusterOpenReference(
"--o-new-reference-sequences", newReferenceSequencesPath
]

if multithreading:
if threads == -1:
args.extend(["--p-threads", "0"])
elif threads is not None:
args.extend(["--p-threads", str(threads)])

command(args)

0 comments on commit 2e817d6

Please sign in to comment.