Skip to content

Commit

Permalink
Add validation of duplication rate
Browse files Browse the repository at this point in the history
  • Loading branch information
seallard committed Dec 12, 2023
1 parent 0c3aa9b commit 418837d
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
1 change: 1 addition & 0 deletions cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ class APIMethods(StrEnum):
class MicrosaltQC:
QC_PERCENT_THRESHOLD_MWX: float = 0.1
COVERAGE_10X_THRESHOLD: float = 0.75
DUPLICATION_RATE_THRESHOLD: float = 0.8
MAPPED_RATE_THRESHOLD: float = 0.3
NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2
TARGET_READS: int = 6000000
Expand Down
18 changes: 11 additions & 7 deletions cg/meta/workflow/microsalt/quality_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from cg.constants.constants import MicrosaltAppTags, MicrosaltQC
from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics
from cg.meta.workflow.microsalt.utils import (
is_valid_duplication_rate,
is_valid_mapped_rate,
is_valid_total_reads,
is_valid_total_reads_for_control,
Expand All @@ -28,17 +29,16 @@ def quality_control(self, run_dir_path: Path, lims_project: str):

sample_results: list[QualityResult] = []

for sample_metrics in quality_metrics:
result = self.quality_control_sample(sample_metrics)
for sample_id, metrics in quality_metrics:
result = self.quality_control_sample(sample_id=sample_id, metrics=metrics)
sample_results.append(result)

self.quality_control_case(sample_results)

def quality_control_sample(
self, sample_id: str, sample_metrics: SampleMetrics
) -> QualityResult:
reads_passes_qc: bool = self.is_valid_total_reads(sample_id)
mapped_rate_passes_qc: bool = self.is_valid_mapped_rate(sample_metrics)
def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult:
valid_reads: bool = self.is_valid_total_reads(sample_id)
valid_mapped_rate: bool = self.is_valid_mapped_rate(metrics)
valid_duplication_rate: bool = self.is_valid_duplication_rate(metrics)

def quality_control_case(self, sample_results: list[QualityResult]) -> bool:
pass
Expand Down Expand Up @@ -162,3 +162,7 @@ def is_valid_total_reads(self, sample_id: str) -> bool:
def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool:
mapped_rate: float = metrics.microsalt_samtools_stats.mapped_rate
return is_valid_mapped_rate(mapped_rate)

def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool:
duplication_rate: float = metrics.picard_markduplicate.duplication_rate
return is_valid_duplication_rate(duplication_rate)
4 changes: 4 additions & 0 deletions cg/meta/workflow/microsalt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def is_valid_mapped_rate(sample_mapped_rate: float) -> bool:
return sample_mapped_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD


def is_valid_duplication_rate(sample_duplication_rate: float) -> bool:
return sample_duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD


def parse_quality_metrics(file_path: Path) -> QualityMetrics:
data = read_json(file_path)
return QualityMetrics.model_validate_json(data)

0 comments on commit 418837d

Please sign in to comment.