Skip to content

Commit

Permalink
CTX-4247: Created a bioinformatics folder, moved the qiime2 folder th…
Browse files Browse the repository at this point in the history
…ere and migrated shared code from templates related to sequence alignment to sequence_alignment
  • Loading branch information
Vuk Manojlovic committed Jul 14, 2023
1 parent 9415689 commit 23423e3
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,14 @@
import subprocess
import logging

from ..utils import logProcessOutput


class QiimeCommandException(Exception):
pass


def logProcessOutput(output: bytes, severity: int) -> None:
decoded = output.decode("UTF-8")

for line in decoded.split("\n"):
# skip empty lines
if line.strip() == "":
continue

# ignoring type for now, has to be fixed in coretexpylib
logging.getLogger("coretexpylib").log(severity, line)


def QiimeCommand(args: List[str]) -> None:
def command(args: List[str]) -> None:
process = subprocess.Popen(
args,
shell = False,
Expand Down Expand Up @@ -82,7 +72,7 @@ def toolsImport(
"--input-format" , inputFormat
])

QiimeCommand(args)
command(args)


def demuxEmpSingle(
Expand All @@ -93,7 +83,7 @@ def demuxEmpSingle(
errorCorretctionDetailsPath: str
) -> None:

QiimeCommand([
command([
"qiime", "demux", "emp-single",
"--i-seqs", sequencesPath,
"--m-barcodes-file", barcodesPath,
Expand All @@ -105,7 +95,7 @@ def demuxEmpSingle(


def demuxSummarize(dataPath: str, visualizationPath: str) -> None:
QiimeCommand([
command([
"qiime", "demux", "summarize",
"--i-data", dataPath,
"--o-visualization", visualizationPath
Expand All @@ -121,7 +111,7 @@ def dada2DenoiseSingle(
denoisingStatsPath: str
) -> None:

QiimeCommand([
command([
"qiime", "dada2", "denoise-single",
"--i-demultiplexed-seqs", inputPath,
"--p-trim-left", str(trimLeft),
Expand All @@ -133,15 +123,15 @@ def dada2DenoiseSingle(


def metadataTabulate(inputFile: str, visualizationPath: str) -> None:
QiimeCommand([
command([
"qiime", "metadata", "tabulate",
"--m-input-file", inputFile,
"--o-visualization", visualizationPath
])


def featureTableSummarize(inputPath: str, visualizationPath: str, metadataPath: str) -> None:
QiimeCommand([
command([
"qiime", "feature-table", "summarize",
"--i-table", inputPath,
"--o-visualization", visualizationPath,
Expand All @@ -150,7 +140,7 @@ def featureTableSummarize(inputPath: str, visualizationPath: str, metadataPath:


def featureTableTabulateSeqs(inputPath: str, visualizationPath: str) -> None:
QiimeCommand([
command([
"qiime", "feature-table", "tabulate-seqs",
"--i-data", inputPath,
"--o-visualization", visualizationPath
Expand All @@ -165,7 +155,7 @@ def phylogenyAlignToTreeMafftFasttree(
rootedTreePath: str
) -> None:

QiimeCommand([
command([
"qiime", "phylogeny", "align-to-tree-mafft-fasttree",
"--i-sequences", sequencesPath,
"--o-alignment", aligmentPath,
Expand All @@ -183,7 +173,7 @@ def diversityCoreMetricsPhylogenetic(
outputDir: str
) -> None:

QiimeCommand([
command([
"qiime", "diversity", "core-metrics-phylogenetic",
"--i-phylogeny", phlogenyPath,
"--i-table", tablePath,
Expand All @@ -199,7 +189,7 @@ def diversityAlphaGroupSignificance(
visualizationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "diversity", "alpha-group-significance",
"--i-alpha-diversity", alphaDiversityPath,
"--m-metadata-file", metadataPath,
Expand All @@ -215,7 +205,7 @@ def diversityBetaGroupSignificance(
pairwise: bool
) -> None:

QiimeCommand([
command([
"qiime", "diversity", "beta-group-significance",
"--i-distance-matrix", distanceMatrixPath,
"--m-metadata-file", metadataPath,
Expand All @@ -232,7 +222,7 @@ def emperorPlot(
visualizationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "emperor", "plot",
"--i-pcoa", pcoaPath,
"--m-metadata-file", metadataPath,
Expand All @@ -249,7 +239,7 @@ def diversityAlphaRarefaction(
visualizationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "diversity", "alpha-rarefaction",
"--i-table", tablePath,
"--i-phylogeny", phylogenyPath,
Expand All @@ -265,7 +255,7 @@ def featureClassifierClassifySklearn(
classificationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "feature-classifier", "classify-sklearn",
"--i-classifier", classifierPath,
"--i-reads", readsPath,
Expand All @@ -280,7 +270,7 @@ def taxaBarplot(
visualizationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "taxa", "barplot",
"--i-table", tablePath,
"--i-taxonomy", taxonomyPath,
Expand All @@ -296,7 +286,7 @@ def featureTableFilterSamples(
filteredTablePath: str
) -> None:

QiimeCommand([
command([
"qiime", "feature-table", "filter-samples",
"--i-table", tablePath,
"--m-metadata-file", metadataPath,
Expand All @@ -306,7 +296,7 @@ def featureTableFilterSamples(


def compositionAddPseudocount(tablePath: str, compositionTablePath: str) -> None:
QiimeCommand([
command([
"qiime", "composition", "add-pseudocount",
"--i-table", tablePath,
"--o-composition-table", compositionTablePath
Expand All @@ -320,7 +310,7 @@ def compositionAncom(
visualizationPath: str
) -> None:

QiimeCommand([
command([
"qiime", "composition", "ancom",
"--i-table", tablePath,
"--m-metadata-file", metadataPath,
Expand All @@ -336,7 +326,7 @@ def taxaCollapse(
collapsedTablePath: str
) -> None:

QiimeCommand([
command([
"qiime", "taxa", "collapse",
"--i-table", tablePath,
"--i-taxonomy", taxonomyPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import logging
import gzip

from ..coretex import Experiment, CustomSample, CustomDataset
from ...coretex import Experiment, CustomSample, CustomDataset


def createSample(name: str, datasetId: int, path: Path, experiment: Experiment, stepName: str, retryCount: int = 0) -> CustomSample:
Expand Down
126 changes: 126 additions & 0 deletions coretex/bioinformatics/sequence_alignment/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright (C) 2023 Coretex LLC

# This file is part of Coretex.ai

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from typing import List, Tuple
from pathlib import Path

import subprocess
import logging

from ...coretex import CustomDataset
from ..utils import logProcessOutput


def command(args: List[str], ignoreOutput: bool = False) -> None:
process = subprocess.Popen(
args,
shell = False,
cwd = Path(__file__).parent,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE
)

while process.poll() is None:
stdout = process.stdout.readline()
stderr = process.stderr.readline()

if len(stdout) > 0 and not ignoreOutput:
logProcessOutput(stdout, logging.INFO)

if len(stderr) > 0 and not ignoreOutput:
if process.returncode == 0:
logProcessOutput(stderr, logging.INFO)
else:
logProcessOutput(stderr, logging.CRITICAL)

if process.returncode != 0:
raise RuntimeError(f">> [Coretex] Falied to execute command. Returncode: {process.returncode}")


def indexCommand(bwaPath: Path, sequencePath: Path, prefix: Path) -> None:
command([
str(bwaPath.absolute()), "index",
"-p", str(prefix.absolute()),
str(sequencePath.absolute())
])


def alignCommand(bwaPath: Path, prefix: Path, sequencePath: Path, outputPath: Path) -> None:
args = [
str(bwaPath.absolute()), "mem",
"-o", str(outputPath.absolute()),
str(prefix.absolute()),
str(sequencePath.absolute())
]

command(args, True)


def sam2bamCommand(samtoolsPath: Path, samPath: Path, outputPath: Path) -> None:
command([
str(samtoolsPath.absolute()), "view",
"-b", "-S", "-o",
str(outputPath.absolute()),
str(samPath.absolute())
])


def extractData(samtoolsPath: Path, file: Path) -> Tuple[List[int], List[int], List[int]]:
scores: list[int] = []
positions: list[int] = []
sequenceLengths: list[int] = []

args = [
str(samtoolsPath.absolute()),
"view", "-F", "256", "-F", "2048", "-F", "512",
str(file.absolute())
]

process = subprocess.Popen(
args,
shell = False,
cwd = Path(__file__).parent,
stdout = subprocess.PIPE
)

for line in process.stdout:
fields = line.decode("UTF-8").strip().split("\t")
scores.append(int(fields[4]))
positions.append(int(fields[3]))
sequenceLengths.append(len(fields[9]))

return scores, positions, sequenceLengths


def chmodX(file: Path) -> None:
command(["chmod", "+x", str(file.absolute())])


def loadFa(dataset: CustomDataset) -> List[Path]:
inputFiles: list[Path] = []

for sample in dataset.samples:
sample.unzip()

for file in Path(sample.path).iterdir():
if file.suffix.startswith(".fa"):
inputFiles.append(file)

if len(inputFiles) == 0:
raise ValueError(">> [Coretex] No sequence reads found")

return inputFiles
30 changes: 30 additions & 0 deletions coretex/bioinformatics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (C) 2023 Coretex LLC

# This file is part of Coretex.ai

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import logging


def logProcessOutput(output: bytes, severity: int) -> None:
decoded = output.decode("UTF-8")

for line in decoded.split("\n"):
# skip empty lines
if line.strip() == "":
continue

# ignoring type for now, has to be fixed in coretexpylib
logging.getLogger("coretexpylib").log(severity, line)

0 comments on commit 23423e3

Please sign in to comment.