diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index b99f71f59d..2ae4053bc8 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -14,7 +14,7 @@ from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI from cg.models.cg_config import CGConfig -from cg.store.models import Sample +from cg.store.models import Case, Sample LOG = logging.getLogger(__name__) @@ -218,21 +218,15 @@ def start_available(context: click.Context, dry_run: bool = False): raise click.Abort -@microsalt.command("qc-microsalt") +@microsalt.command("qc") @ARGUMENT_UNIQUE_IDENTIFIER @click.pass_context def qc_microsalt(context: click.Context, unique_id: str) -> None: """Perform QC on a microsalt case.""" analysis_api: MicrosaltAnalysisAPI = context.obj.meta_apis["analysis_api"] + metrics_file_path: Path = analysis_api.get_metrics_file_path(unique_id) try: - analysis_api.microsalt_qc( - case_id=unique_id, - run_dir_path=analysis_api.get_latest_case_path(case_id=unique_id), - lims_project=analysis_api.get_project( - analysis_api.status_db.get_case_by_internal_id(internal_id=unique_id) - .samples[0] - .internal_id - ), - ) + LOG.info(f"Performing QC on case {unique_id}") + analysis_api.quality_checker.quality_control(metrics_file_path) except IndexError: LOG.error(f"No existing analysis directories found for case {unique_id}.") diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 642858add4..5074cc9557 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -211,10 +211,15 @@ class APIMethods(StrEnum): class MicrosaltQC: - QC_PERCENT_THRESHOLD_MWX: float = 0.1 + AVERAGE_COVERAGE_THRESHOLD: int = 10 + MWX_THRESHOLD_SAMPLES_PASSING: float = 0.9 COVERAGE_10X_THRESHOLD: float = 0.75 + DUPLICATION_RATE_THRESHOLD: float = 0.8 + INSERT_SIZE_THRESHOLD: int = 100 + MAPPED_RATE_THRESHOLD: float = 0.3 NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2 TARGET_READS: int = 6000000 + TARGET_READS_FAIL_THRESHOLD: float = 0.7 class MicrosaltAppTags(StrEnum): diff --git a/cg/exc.py b/cg/exc.py index 921fefef3f..17596615fa 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -132,6 +132,18 @@ class LimsDataError(CgError): """ +class MicrosaltError(CgError): + """ + Error related to Microsalt analysis. + """ + + +class MissingAnalysisDir(CgError): + """ + Error related to missing analysis. + """ + + class OrderError(CgError): """ Exception related to orders. diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py new file mode 100644 index 0000000000..2f1bfe3bc8 --- /dev/null +++ b/cg/meta/workflow/microsalt/__init__.py @@ -0,0 +1 @@ +from cg.meta.workflow.microsalt.microsalt import MicrosaltAnalysisAPI diff --git a/cg/meta/workflow/microsalt/constants.py b/cg/meta/workflow/microsalt/constants.py new file mode 100644 index 0000000000..af2e1ed055 --- /dev/null +++ b/cg/meta/workflow/microsalt/constants.py @@ -0,0 +1,4 @@ +from cg.constants.constants import FileExtensions + + +QUALITY_REPORT_FILE_NAME: str = f"QC_done{FileExtensions.JSON}" diff --git a/cg/meta/workflow/microsalt/metrics_parser/__init__.py b/cg/meta/workflow/microsalt/metrics_parser/__init__.py new file mode 100644 index 0000000000..ba8833b223 --- /dev/null +++ b/cg/meta/workflow/microsalt/metrics_parser/__init__.py @@ -0,0 +1,2 @@ +from cg.meta.workflow.microsalt.metrics_parser.metrics_parser import MetricsParser +from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics diff --git a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py new file mode 100644 index 0000000000..b35c883dc2 --- /dev/null +++ b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from cg.io.json import read_json +from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics + + +class MetricsParser: + @staticmethod + def parse(file_path: Path) -> QualityMetrics: + data = read_json(file_path) + formatted_data = {"samples": data} + return QualityMetrics.model_validate(formatted_data) diff --git a/cg/meta/workflow/microsalt/metrics_parser/models.py b/cg/meta/workflow/microsalt/metrics_parser/models.py new file mode 100644 index 0000000000..de91d116ce --- /dev/null +++ b/cg/meta/workflow/microsalt/metrics_parser/models.py @@ -0,0 +1,27 @@ +from typing import Annotated +from pydantic import BaseModel, BeforeValidator + + +def empty_str_to_none(v: str) -> str | None: + return v or None + + +class PicardMarkduplicate(BaseModel): + insert_size: Annotated[int | None, BeforeValidator(empty_str_to_none)] + duplication_rate: Annotated[float | None, BeforeValidator(empty_str_to_none)] + + +class MicrosaltSamtoolsStats(BaseModel): + total_reads: Annotated[int | None, BeforeValidator(empty_str_to_none)] + mapped_rate: Annotated[float | None, BeforeValidator(empty_str_to_none)] + average_coverage: Annotated[float | None, BeforeValidator(empty_str_to_none)] + coverage_10x: Annotated[float | None, BeforeValidator(empty_str_to_none)] + + +class SampleMetrics(BaseModel): + picard_markduplicate: PicardMarkduplicate + microsalt_samtools_stats: MicrosaltSamtoolsStats + + +class QualityMetrics(BaseModel): + samples: dict[str, SampleMetrics] diff --git a/cg/meta/workflow/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py similarity index 62% rename from cg/meta/workflow/microsalt.py rename to cg/meta/workflow/microsalt/microsalt.py index 3bd36e91fe..bbcc1ed6d2 100644 --- a/cg/meta/workflow/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -1,10 +1,3 @@ -""" API to manage Microsalt Analyses - Organism - Fallback based on reference, ‘Other species’ and ‘Comment’. Default to “Unset”. - Priority = Default to empty string. Weird response. Typically “standard” or “research”. - Reference = Defaults to “None” - Method: Outputted as “1273:23”. Defaults to “Not in LIMS” - Date: Returns latest == most recent date. Outputted as DT object “YYYY MM DD”. Defaults to - datetime.min""" import glob import logging import os @@ -17,13 +10,14 @@ import click from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority -from cg.constants.constants import MicrosaltAppTags, MicrosaltQC -from cg.exc import CgDataError -from cg.io.json import read_json, write_json +from cg.constants.constants import FileExtensions +from cg.constants.tb import AnalysisStatus +from cg.exc import CgDataError, MissingAnalysisDir from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MicrosaltFastqHandler +from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.models.cg_config import CGConfig -from cg.models.orders.sample_base import ControlEnum from cg.store.models import Case, Sample from cg.utils import Process @@ -37,6 +31,7 @@ def __init__(self, config: CGConfig, pipeline: Pipeline = Pipeline.MICROSALT): super().__init__(pipeline, config) self.root_dir = config.microsalt.root self.queries_path = config.microsalt.queries_path + self.quality_checker = QualityController(config.status_db) @property def use_read_count_threshold(self) -> bool: @@ -60,33 +55,6 @@ def process(self) -> Process: ) return self._process - def get_case_path(self, case_id: str) -> list[Path]: - """Returns all paths associated with the case or single sample analysis.""" - case_obj: Case = self.status_db.get_case_by_internal_id(internal_id=case_id) - lims_project: str = self.get_project(case_obj.links[0].sample.internal_id) - lims_project_dir_path: Path = Path(self.root_dir, "results", lims_project) - - case_directories: list[Path] = [ - Path(path) for path in glob.glob(f"{lims_project_dir_path}*", recursive=True) - ] - - return sorted(case_directories, key=os.path.getctime, reverse=True) - - def get_latest_case_path(self, case_id: str) -> Path | None: - """Return latest run dir for a microbial case, if no path found it returns None.""" - lims_project: str = self.get_project( - self.status_db.get_case_by_internal_id(internal_id=case_id).links[0].sample.internal_id - ) - - return next( - ( - path - for path in self.get_case_path(case_id=case_id) - if lims_project + "_" in str(path) - ), - None, - ) - def clean_run_dir(self, case_id: str, yes: bool, case_path: list[Path] | Path) -> int: """Remove workflow run directories for a MicroSALT case.""" @@ -167,10 +135,7 @@ def get_samples(self, case_id: str, sample_id: str | None = None) -> list[Sample def get_lims_comment(self, sample_id: str) -> str: """Returns the comment associated with a sample stored in lims""" comment: str = self.lims_api.get_sample_comment(sample_id) or "" - if re.match(r"\w{4}\d{2,3}", comment): - return comment - - return "" + return comment if re.match(r"\w{4}\d{2,3}", comment) else "" def get_organism(self, sample_obj: Sample) -> str: """Organism @@ -282,94 +247,71 @@ def get_case_id_from_case(self, unique_id: str) -> tuple[str, None]: case_id = case_obj.internal_id return case_id, None - def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: - """Check if given microSALT case passes QC check.""" - failed_samples: dict = {} - case_qc: dict = read_json(file_path=Path(run_dir_path, f"{lims_project}.json")) + def get_cases_to_store(self) -> list[Case]: + cases_qc_ready: list[Case] = self.get_completed_cases() + cases_to_store: list[Case] = [] + LOG.info(f"Found {len(cases_qc_ready)} cases to perform QC on!") + + for case in cases_qc_ready: + case_run_dir: Path | None = self.get_case_path(case.internal_id) + LOG.info(f"Checking QC for case {case.internal_id} in {case_run_dir}") + + if self.quality_checker.is_qc_required(case_run_dir): + LOG.info(f"QC required for case {case.internal_id}") + metrics_file_path = self.get_metrics_file_path(case.internal_id) + + if not metrics_file_path.exists(): + continue + + result: QualityResult = self.quality_checker.quality_control(metrics_file_path) + self.trailblazer_api.add_comment(case_id=case.internal_id, comment=result.summary) + if result.passes_qc: + cases_to_store.append(case) + else: + self.trailblazer_api.set_analysis_status( + case_id=case.internal_id, status=AnalysisStatus.FAILED + ) + else: + cases_to_store.append(case) - for sample_id in case_qc: - sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) - sample_check: dict | None = self.qc_sample_check( - sample=sample, - sample_qc=case_qc[sample_id], - ) - if sample_check is not None: - failed_samples[sample_id] = sample_check - - return self.qc_case_check( - case_id=case_id, - failed_samples=failed_samples, - number_of_samples=len(case_qc), - run_dir_path=run_dir_path, - ) + return cases_to_store - def qc_case_check( - self, case_id: str, failed_samples: dict, number_of_samples: int, run_dir_path: Path - ) -> bool: - """Perform the final QC check for a microbial case based on failed samples.""" - qc_pass: bool = True - - for sample_id in failed_samples: - sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) - if sample.control == ControlEnum.negative: - qc_pass = False - if sample.application_version.application.tag == MicrosaltAppTags.MWRNXTR003: - qc_pass = False - - # Check if more than 10% of MWX samples failed - if len(failed_samples) / number_of_samples > MicrosaltQC.QC_PERCENT_THRESHOLD_MWX: - qc_pass = False - - if not qc_pass: - LOG.warning( - f"Case {case_id} failed QC, see {run_dir_path}/QC_done.json for more information." - ) - else: - LOG.info(f"Case {case_id} passed QC.") + def get_completed_cases(self) -> list[Case]: + """Return cases that are completed in trailblazer.""" + return [ + case + for case in self.status_db.get_running_cases_in_pipeline(self.pipeline) + if self.trailblazer_api.is_latest_analysis_completed(case.internal_id) + ] - self.create_qc_done_file( - run_dir_path=run_dir_path, - failed_samples=failed_samples, - ) - return qc_pass - - def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: - """Creates a QC_done when a QC check is performed.""" - write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) - - def qc_sample_check(self, sample: Sample, sample_qc: dict) -> dict | None: - """Perform a QC on a sample.""" - if sample.control == ControlEnum.negative: - reads_pass: bool = self.check_external_negative_control_sample(sample) - if not reads_pass: - LOG.warning(f"Negative control sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass} - else: - reads_pass: bool = sample.sequencing_qc - coverage_10x_pass: bool = self.check_coverage_10x( - sample_name=sample.internal_id, sample_qc=sample_qc - ) - if not reads_pass or not coverage_10x_pass: - LOG.warning(f"Sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass, "Passed Coverage 10X": coverage_10x_pass} - - def check_coverage_10x(self, sample_name: str, sample_qc: dict) -> bool: - """Check if a sample passed the coverage_10x criteria.""" - try: - return ( - sample_qc["microsalt_samtools_stats"]["coverage_10x"] - >= MicrosaltQC.COVERAGE_10X_THRESHOLD - ) - except TypeError as e: - LOG.error( - f"There is no 10X coverage value for sample {sample_name}, setting qc to fail for this sample" - ) - LOG.error(f"See error: {e}") - return False - - def check_external_negative_control_sample(self, sample: Sample) -> bool: - """Check if external negative control passed read check""" - return sample.reads < ( - sample.application_version.application.target_reads - * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD - ) + def get_metrics_file_path(self, case_id: str) -> Path: + """Return path to metrics file for a case.""" + project_id: str = self.get_project_id(case_id) + case_run_dir: Path = self.get_case_path(case_id) + return Path(case_run_dir, f"{project_id}{FileExtensions.JSON}") + + def extract_project_id(self, sample_id: str) -> str: + return sample_id.rsplit("A", maxsplit=1)[0] + + def get_project_id(self, case_id: str) -> str: + case: Case = self.status_db.get_case_by_internal_id(case_id) + sample_id: str = case.links[0].sample.internal_id + return self.extract_project_id(sample_id) + + def get_results_dir(self) -> Path: + return Path(self.root_dir, "results") + + def get_analyses_result_dirs(self, case_id: str) -> list[str]: + project_id: str = self.get_project_id(case_id) + results_dir: Path = self.get_results_dir() + matches: list[str] = [d for d in os.listdir(results_dir) if d.startswith(project_id)] + if not matches: + LOG.error(f"No result directory found for {case_id} with project id {project_id}") + raise MissingAnalysisDir + return matches + + def get_case_path(self, case_id: str) -> Path: + results_dir: Path = self.get_results_dir() + matching_cases: list[str] = self.get_analyses_result_dirs(case_id) + case_dir: str = max(matching_cases, default=None) + return Path(results_dir, case_dir) diff --git a/cg/meta/workflow/microsalt/quality_controller/__init__.py b/cg/meta/workflow/microsalt/quality_controller/__init__.py new file mode 100644 index 0000000000..a2afe4fcaa --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/__init__.py @@ -0,0 +1 @@ +from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController diff --git a/cg/meta/workflow/microsalt/quality_controller/models.py b/cg/meta/workflow/microsalt/quality_controller/models.py new file mode 100644 index 0000000000..f9579579c0 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/models.py @@ -0,0 +1,33 @@ +from pydantic import BaseModel + +from cg.constants.constants import MicrosaltAppTags + + +class SampleQualityResult(BaseModel): + sample_id: str + passes_qc: bool + is_control: bool + application_tag: MicrosaltAppTags + passes_reads_qc: bool + passes_mapping_qc: bool = True + passes_duplication_qc: bool = True + passes_inserts_qc: bool = True + passes_coverage_qc: bool = True + passes_10x_coverage_qc: bool = True + + +class CaseQualityResult(BaseModel): + passes_qc: bool + control_passes_qc: bool + urgent_passes_qc: bool + non_urgent_passes_qc: bool + + +class QualityResult(BaseModel): + case: CaseQualityResult + samples: list[SampleQualityResult] + summary: str + + @property + def passes_qc(self) -> bool: + return self.case.passes_qc diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py new file mode 100644 index 0000000000..8a00abb1e7 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -0,0 +1,126 @@ +import logging +from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME + +from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + QualityResult, + SampleQualityResult, +) +from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator +from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger +from cg.meta.workflow.microsalt.quality_controller.utils import ( + get_application_tag, + get_percent_reads_guaranteed, + get_report_path, + get_sample_target_reads, + is_sample_negative_control, + has_valid_10x_coverage, + has_valid_average_coverage, + has_valid_duplication_rate, + has_valid_mapping_rate, + has_valid_median_insert_size, + is_valid_total_reads, + is_valid_total_reads_for_negative_control, + quality_control_case, +) +from cg.store.api.core import Store +from cg.store.models import Sample + +LOG = logging.getLogger(__name__) + + +class QualityController: + def __init__(self, status_db: Store): + self.status_db = status_db + + def quality_control(self, case_metrics_file_path: Path) -> QualityResult: + quality_metrics: QualityMetrics = MetricsParser.parse(case_metrics_file_path) + sample_results: list[SampleQualityResult] = self.quality_control_samples(quality_metrics) + case_result: CaseQualityResult = quality_control_case(sample_results) + report_file: Path = get_report_path(case_metrics_file_path) + ReportGenerator.report(out_file=report_file, samples=sample_results, case=case_result) + ResultLogger.log_results(case=case_result, samples=sample_results, report=report_file) + summary: str = ReportGenerator.get_summary(case=case_result, samples=sample_results) + return QualityResult(case=case_result, samples=sample_results, summary=summary) + + def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[SampleQualityResult]: + sample_results: list[SampleQualityResult] = [] + for sample_id, metrics in quality_metrics.samples.items(): + result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) + sample_results.append(result) + return sample_results + + def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> SampleQualityResult: + valid_read_count: bool = self.has_valid_total_reads(sample_id) + valid_mapping: bool = has_valid_mapping_rate(metrics) + valid_duplication: bool = has_valid_duplication_rate(metrics) + valid_inserts: bool = has_valid_median_insert_size(metrics) + valid_coverage: bool = has_valid_average_coverage(metrics) + valid_10x_coverage: bool = has_valid_10x_coverage(metrics) + + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + application_tag: str = get_application_tag(sample) + + if is_control := is_sample_negative_control(sample): + sample_quality = SampleQualityResult( + sample_id=sample_id, + passes_qc=valid_read_count, + is_control=is_control, + passes_reads_qc=valid_read_count, + application_tag=application_tag, + ) + ResultLogger.log_sample_result(sample_quality) + return sample_quality + + sample_passes_qc: bool = ( + valid_read_count + and valid_mapping + and valid_duplication + and valid_inserts + and valid_coverage + and valid_10x_coverage + ) + + sample_quality = SampleQualityResult( + sample_id=sample_id, + passes_qc=sample_passes_qc, + is_control=is_control, + application_tag=application_tag, + passes_reads_qc=valid_read_count, + passes_mapping_qc=valid_mapping, + passes_duplication_qc=valid_duplication, + passes_inserts_qc=valid_inserts, + passes_coverage_qc=valid_coverage, + passes_10x_coverage_qc=valid_10x_coverage, + ) + ResultLogger.log_sample_result(sample_quality) + return sample_quality + + def is_qc_required(self, case_run_dir: Path) -> bool: + if not case_run_dir: + LOG.warning(f"Skipping QC, run directory {case_run_dir} does not exist.") + return False + qc_done_path: Path = case_run_dir.joinpath(QUALITY_REPORT_FILE_NAME) + qc_already_done: bool = qc_done_path.exists() + if qc_already_done: + LOG.warning(f"Skipping QC, report {qc_done_path} already exists.") + return not qc_done_path.exists() + + def has_valid_total_reads(self, sample_id: str) -> bool: + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + target_reads: int = get_sample_target_reads(sample) + percent_reads_guaranteed: int = get_percent_reads_guaranteed(sample) + sample_reads: int = sample.reads + + if is_sample_negative_control(sample): + return is_valid_total_reads_for_negative_control( + reads=sample_reads, target_reads=target_reads + ) + + return is_valid_total_reads( + reads=sample_reads, + target_reads=target_reads, + threshold_percentage=percent_reads_guaranteed, + ) diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py new file mode 100644 index 0000000000..53932a427a --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -0,0 +1,27 @@ +from pathlib import Path +from typing import List + +from cg.io.json import write_json +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) +from cg.meta.workflow.microsalt.quality_controller.result_logger import sample_result_message + + +class ReportGenerator: + @staticmethod + def report(out_file: Path, case: CaseQualityResult, samples: List[SampleQualityResult]) -> None: + summary: str = ReportGenerator.get_summary(case=case, samples=samples) + report_content = { + "summary": summary, + "case": case.model_dump(), + "samples": [sample.model_dump() for sample in samples], + } + write_json(file_path=out_file, content=report_content) + + @staticmethod + def get_summary(case: CaseQualityResult, samples: List[SampleQualityResult]) -> str: + case_summary: str = "Case passed QC. " if case.passes_qc else "Case failed QC. " + sample_summary: str = sample_result_message(samples) + return case_summary + sample_summary diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py new file mode 100644 index 0000000000..327705bb32 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -0,0 +1,70 @@ +import logging +from pathlib import Path +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) + +LOG = logging.getLogger(__name__) + + +class ResultLogger: + @staticmethod + def log_results( + samples: list[SampleQualityResult], case: CaseQualityResult, report: Path + ) -> None: + if case.passes_qc: + LOG.info(f"QC passed, see {report} for details.") + else: + message = get_case_fail_message(case) + LOG.warning(message) + + message = sample_result_message(samples) + LOG.info(message) + + @staticmethod + def log_sample_result(result: SampleQualityResult) -> None: + control_message = "Control sample " if result.is_control else "" + if result.passes_qc: + message = f"{control_message}{result.sample_id} passed QC." + LOG.info(message) + else: + message = f"{control_message}{result.sample_id} failed QC." + LOG.warning(message) + + @staticmethod + def log_case_result(result: CaseQualityResult) -> None: + if not result.passes_qc: + LOG.warning("Case failed QC.") + + +def get_case_fail_message(case: CaseQualityResult) -> str: + fail_reasons = [] + + if not case.control_passes_qc: + fail_reasons.append("The negative control sample failed QC.\n") + if not case.urgent_passes_qc: + fail_reasons.append("The urgent samples failed QC.\n") + if not case.non_urgent_passes_qc: + fail_reasons.append("The non-urgent samples failed QC.\n") + fail_message = "QC failed.\n" + return fail_message + "".join(fail_reasons) + + +def sample_result_message(samples: list[SampleQualityResult]) -> str: + failed_samples: list[SampleQualityResult] = get_failed_results(samples) + passed_samples: list[SampleQualityResult] = get_passed_results(samples) + + failed_count: int = len(failed_samples) + passed_count: int = len(passed_samples) + total_count: int = len(samples) + + return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total." + + +def get_failed_results(samples: list[SampleQualityResult]) -> list[str]: + return [result for result in samples if not result.passes_qc] + + +def get_passed_results(samples: list[SampleQualityResult]) -> list[str]: + return [result for result in samples if result.passes_qc] diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py new file mode 100644 index 0000000000..f1b4bdb525 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -0,0 +1,146 @@ +from pathlib import Path + +from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME +from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) +from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger +from cg.models.orders.sample_base import ControlEnum +from cg.store.models import Sample + + +def is_valid_total_reads(reads: int, target_reads: int, threshold_percentage: int) -> bool: + return reads > target_reads * threshold_percentage / 100 + + +def is_valid_total_reads_for_negative_control(reads: int, target_reads: int) -> bool: + return reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD + + +def is_valid_mapping_rate(mapping_rate: float) -> bool: + return mapping_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD + + +def is_valid_duplication_rate(duplication_rate: float) -> bool: + return duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD + + +def is_valid_median_insert_size(insert_size: int) -> bool: + return insert_size > MicrosaltQC.INSERT_SIZE_THRESHOLD + + +def is_valid_average_coverage(average_coverage: float) -> bool: + return average_coverage > MicrosaltQC.AVERAGE_COVERAGE_THRESHOLD + + +def is_valid_10x_coverage(coverage_10x: float) -> bool: + return coverage_10x > MicrosaltQC.COVERAGE_10X_THRESHOLD + + +def has_valid_mapping_rate(metrics: SampleMetrics) -> bool: + mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate + return is_valid_mapping_rate(mapped_rate) if mapped_rate else False + + +def has_valid_duplication_rate(metrics: SampleMetrics) -> bool: + duplication_rate: float | None = metrics.picard_markduplicate.duplication_rate + return is_valid_duplication_rate(duplication_rate) if duplication_rate else False + + +def has_valid_median_insert_size(metrics: SampleMetrics) -> bool: + insert_size: int | None = metrics.picard_markduplicate.insert_size + return is_valid_median_insert_size(insert_size) if insert_size else False + + +def has_valid_average_coverage(metrics: SampleMetrics) -> bool: + coverage: float | None = metrics.microsalt_samtools_stats.average_coverage + return is_valid_average_coverage(coverage) if coverage else False + + +def has_valid_10x_coverage(metrics: SampleMetrics) -> bool: + coverage_10x: float | None = metrics.microsalt_samtools_stats.coverage_10x + return is_valid_10x_coverage(coverage_10x) if coverage_10x else False + + +def get_negative_control_result(results: list[SampleQualityResult]) -> SampleQualityResult | None: + for result in results: + if result.is_control: + return result + + +def negative_control_pass_qc(results: list[SampleQualityResult]) -> bool: + if negative_control_result := get_negative_control_result(results): + return negative_control_result.passes_qc + return True + + +def get_results_passing_qc(results: list[SampleQualityResult]) -> list[SampleQualityResult]: + return [result for result in results if result.passes_qc] + + +def get_non_urgent_results(results: list[SampleQualityResult]) -> list[SampleQualityResult]: + return [result for result in results if not is_urgent_result(result)] + + +def get_urgent_results(results: list[SampleQualityResult]) -> list[SampleQualityResult]: + return [result for result in results if is_urgent_result(result)] + + +def is_urgent_result(result: SampleQualityResult) -> bool: + return result.application_tag == MicrosaltAppTags.MWRNXTR003 + + +def urgent_samples_pass_qc(results: list[SampleQualityResult]) -> bool: + urgent_results: list[SampleQualityResult] = get_urgent_results(results) + return all(result.passes_qc for result in urgent_results) + + +def non_urgent_samples_pass_qc(results: list[SampleQualityResult]) -> bool: + non_urgent_samples: list[SampleQualityResult] = get_non_urgent_results(results) + passing_qc: list[SampleQualityResult] = get_results_passing_qc(non_urgent_samples) + + if not non_urgent_samples: + return True + + fraction_passing_qc: float = len(passing_qc) / len(non_urgent_samples) + return fraction_passing_qc >= MicrosaltQC.MWX_THRESHOLD_SAMPLES_PASSING + + +def is_sample_negative_control(sample: Sample) -> bool: + return sample.control == ControlEnum.negative + + +def get_application_tag(sample: Sample) -> str: + return sample.application_version.application.tag + + +def get_sample_target_reads(sample: Sample) -> int: + return sample.application_version.application.target_reads + + +def get_percent_reads_guaranteed(sample: Sample) -> int: + return sample.application_version.application.percent_reads_guaranteed + + +def get_report_path(metrics_file_path: Path) -> Path: + return metrics_file_path.parent.joinpath(QUALITY_REPORT_FILE_NAME) + + +def quality_control_case(sample_results: list[SampleQualityResult]) -> CaseQualityResult: + control_pass_qc: bool = negative_control_pass_qc(sample_results) + urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) + non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) + + case_passes_qc: bool = control_pass_qc and urgent_pass_qc and non_urgent_pass_qc + + result = CaseQualityResult( + passes_qc=case_passes_qc, + control_passes_qc=control_pass_qc, + urgent_passes_qc=urgent_pass_qc, + non_urgent_passes_qc=non_urgent_pass_qc, + ) + ResultLogger.log_case_result(result) + return result diff --git a/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json b/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json index e93e00a6bd..b1708ae86f 100644 --- a/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json +++ b/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json @@ -32,28 +32,33 @@ }, "ACC22222A2": { "blast_pubmlst": { - "sequence_type": "", - "thresholds": "" + "sequence_type": "8", + "thresholds": "Passed" }, "quast_assembly": { - "estimated_genome_length": "", - "gc_percentage": "", - "n50": "", - "necessary_contigs": "" + "estimated_genome_length": 2869750, + "gc_percentage": "32.6000000000", + "n50": 209899, + "necessary_contigs": 33 }, - "blast_resfinder_resistence": [], + "blast_resfinder_resistence": [ + "aph(3')-III", + "mecA", + "mph(C)", + "msr(A)" + ], "picard_markduplicate": { - "insert_size": "", - "duplication_rate": "" + "insert_size": 215, + "duplication_rate": 0.105372 }, "microsalt_samtools_stats": { - "total_reads": 42, - "mapped_rate": "", - "average_coverage": "", - "coverage_10x": "", - "coverage_30x": "", - "coverage_50x": "", - "coverage_100x": "" + "total_reads": 3079934, + "mapped_rate": 0.8400670274103276, + "average_coverage": 120.99379962716425, + "coverage_10x": 0.9365702921426038, + "coverage_30x": 0.9330515081968174, + "coverage_50x": 0.9282236223103506, + "coverage_100x": 0.8010769589265144 } }, "ACC22222A3": { diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index 0b632f515a..cacff7f38f 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -1,15 +1,17 @@ """Fixtures for the workflow tests.""" import datetime from pathlib import Path +import shutil import pytest -from cg.constants.constants import MicrosaltAppTags, MicrosaltQC, Pipeline +from cg.constants.constants import CaseActions, MicrosaltAppTags, MicrosaltQC, Pipeline from cg.meta.compress.compress import CompressAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI from cg.models.cg_config import CGConfig from cg.models.compression_data import CompressionData +from cg.models.orders.sample_base import ControlEnum from cg.store.models import Case, Sample from tests.cli.workflow.balsamic.conftest import ( balsamic_housekeeper_dir, @@ -19,6 +21,7 @@ ) from tests.meta.compress.conftest import compress_api, real_crunchy_api from tests.meta.upload.scout.conftest import another_sample_id +from tests.mocks.tb_mock import MockTB from tests.store_helpers import StoreHelpers @@ -113,6 +116,39 @@ def microsalt_qc_fail_lims_project() -> str: return "ACC11111_qc_fail" +@pytest.fixture +def metrics_file_failing_qc( + microsalt_qc_fail_run_dir_path: Path, + microsalt_qc_fail_lims_project: str, + tmp_path: Path, +) -> Path: + """Return a metrics file that fails QC with corresponding samples in the database.""" + metrics_path = Path(microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json") + temp_metrics_path = Path(tmp_path, metrics_path.name) + shutil.copy(metrics_path, temp_metrics_path) + return temp_metrics_path + + +@pytest.fixture +def metrics_file_passing_qc( + microsalt_qc_pass_run_dir_path: Path, + microsalt_qc_pass_lims_project: str, + tmp_path: Path, +) -> Path: + """Return a metrics file that pass QC with corresponding samples in the database.""" + metrics_path = Path(microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json") + temp_metrics_path = Path(tmp_path, metrics_path.name) + shutil.copy(metrics_path, temp_metrics_path) + return temp_metrics_path + + +@pytest.fixture +def microsalt_metrics_file( + microsalt_qc_fail_run_dir_path: Path, microsalt_qc_fail_lims_project: str +) -> Path: + return Path(microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json") + + @pytest.fixture(name="microsalt_case_qc_pass") def microsalt_case_qc_pass() -> str: """Return a microsalt case to pass QC.""" @@ -145,10 +181,9 @@ def qc_microsalt_context( microsalt_case_qc_fail: str, qc_pass_microsalt_samples: list[str], qc_fail_microsalt_samples: list[str], - microsalt_qc_pass_lims_project: str, - microsalt_qc_fail_lims_project: str, ) -> CGConfig: """Return a Microsalt CG context.""" + cg_context.trailblazer_api_ = MockTB() analysis_api = MicrosaltAnalysisAPI(cg_context) store = analysis_api.status_db @@ -158,9 +193,10 @@ def qc_microsalt_context( internal_id=microsalt_case_qc_pass, name=microsalt_case_qc_pass, data_analysis=Pipeline.MICROSALT, + action=CaseActions.RUNNING, ) - for sample in qc_pass_microsalt_samples: + for sample in qc_pass_microsalt_samples[1:]: sample_to_add: Sample = helpers.add_sample( store=store, application_tag=MicrosaltAppTags.MWRNXTR003, @@ -172,6 +208,20 @@ def qc_microsalt_context( helpers.add_relationship(store=store, case=microsalt_case_qc_pass, sample=sample_to_add) + # Add a negative control sample that passes the qc + negative_control_sample: Sample = helpers.add_sample( + store=store, + internal_id=qc_pass_microsalt_samples[0], + application_tag=MicrosaltAppTags.MWRNXTR003, + application_type=MicrosaltAppTags.PREP_CATEGORY, + reads=0, + last_sequenced_at=datetime.datetime.now(), + control=ControlEnum.negative, + ) + helpers.add_relationship( + store=store, case=microsalt_case_qc_pass, sample=negative_control_sample + ) + # Create a microsalt MWX case that fails QC microsalt_case_qc_fail: Case = helpers.add_case( store=store, @@ -188,6 +238,7 @@ def qc_microsalt_context( internal_id=sample, reads=MicrosaltQC.TARGET_READS, last_sequenced_at=datetime.datetime.now(), + control=ControlEnum.negative, ) helpers.add_relationship(store=store, case=microsalt_case_qc_fail, sample=sample_to_add) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py new file mode 100644 index 0000000000..6dde2e0ed6 --- /dev/null +++ b/tests/meta/workflow/microsalt/conftest.py @@ -0,0 +1,118 @@ +import pytest +from cg.constants.constants import MicrosaltAppTags +from cg.meta.workflow.microsalt.metrics_parser.models import ( + MicrosaltSamtoolsStats, + PicardMarkduplicate, + SampleMetrics, +) + +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) +from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController +from cg.store.api.core import Store + + +def create_sample_metrics( + total_reads: int | None = 100, + mapped_rate: float | None = 0.8, + duplication_rate: float | None = 0.1, + insert_size: int | None = 200, + average_coverage: float | None = 30.0, + coverage_10x: float | None = 95.0, +) -> SampleMetrics: + return SampleMetrics( + microsalt_samtools_stats=MicrosaltSamtoolsStats( + total_reads=total_reads, + mapped_rate=mapped_rate, + average_coverage=average_coverage, + coverage_10x=coverage_10x, + ), + picard_markduplicate=PicardMarkduplicate( + insert_size=insert_size, duplication_rate=duplication_rate + ), + ) + + +def create_quality_result( + sample_id: str = "sample_1", + passes_qc: bool = True, + is_control: bool = False, + application_tag: str = MicrosaltAppTags.MWRNXTR003, + passes_reads_qc: bool = True, + passes_mapping_qc: bool = True, + passes_duplication_qc: bool = True, + passes_inserts_qc: bool = True, + passes_coverage_qc: bool = True, + passes_10x_coverage_qc: bool = True, +) -> SampleQualityResult: + return SampleQualityResult( + sample_id=sample_id, + passes_qc=passes_qc, + is_control=is_control, + application_tag=application_tag, + passes_reads_qc=passes_reads_qc, + passes_mapping_qc=passes_mapping_qc, + passes_duplication_qc=passes_duplication_qc, + passes_inserts_qc=passes_inserts_qc, + passes_coverage_qc=passes_coverage_qc, + passes_10x_coverage_qc=passes_10x_coverage_qc, + ) + + +@pytest.fixture +def quality_results() -> list[SampleQualityResult]: + return [ + SampleQualityResult( + sample_id="sample_1", + passes_qc=False, + is_control=True, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=True, + passes_mapping_qc=True, + passes_duplication_qc=False, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=True, + ), + SampleQualityResult( + sample_id="sample_2", + passes_qc=True, + is_control=False, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=True, + passes_mapping_qc=True, + passes_duplication_qc=True, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=True, + ), + SampleQualityResult( + sample_id="sample_3", + passes_qc=False, + is_control=False, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=False, + passes_mapping_qc=True, + passes_duplication_qc=False, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=False, + ), + ] + + +@pytest.fixture +def case_result(): + return CaseQualityResult( + passes_qc=False, + control_passes_qc=True, + urgent_passes_qc=True, + non_urgent_passes_qc=True, + ) + + +@pytest.fixture +def quality_controller(store: Store) -> QualityController: + return QualityController(store) diff --git a/tests/meta/workflow/microsalt/test_parsing_metrics.py b/tests/meta/workflow/microsalt/test_parsing_metrics.py new file mode 100644 index 0000000000..b1b614691c --- /dev/null +++ b/tests/meta/workflow/microsalt/test_parsing_metrics.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from cg.meta.workflow.microsalt.metrics_parser import MetricsParser + + +def test_parse_valid_quality_metrics(microsalt_metrics_file: Path): + # GIVEN a valid quality metrics file path + + # WHEN parsing the file + MetricsParser.parse(microsalt_metrics_file) + + # THEN no error is thrown diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py new file mode 100644 index 0000000000..09b5f1e565 --- /dev/null +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -0,0 +1,99 @@ +from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME + +from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.models.cg_config import CGConfig +from cg.store.api.core import Store +from cg.store.models import Application, Sample +from tests.store_helpers import StoreHelpers + +PRICES = {"standard": 1_000, "priority": 2_000, "express": 3_000, "research": 4_000} + + +def test_is_valid_total_reads_passes(quality_controller: QualityController): + # GIVEN an application + store = quality_controller.status_db + application: Application = StoreHelpers.add_application(store=store, target_reads=1_000) + + # GIVEN an application version + version = StoreHelpers.add_application_version( + store=store, + application=application, + prices=PRICES, + ) + + # GIVEN a sample with a number of reads that is above the target reads + sample: Sample = StoreHelpers.add_sample(store=store, reads=10_000) + + # GIVEN that the sample is associated with the application version + sample.application_version = version + + # WHEN controlling the quality of the sample reads + has_valid_reads: bool = quality_controller.has_valid_total_reads(sample.internal_id) + + # THEN the sample passes the quality control + assert has_valid_reads + + +def test_is_valid_total_reads_fails(quality_controller: QualityController): + # GIVEN an application + store = quality_controller.status_db + application: Application = StoreHelpers.add_application(store=store, target_reads=1_000) + + # GIVEN an application version + version = StoreHelpers.add_application_version( + store=store, + application=application, + prices=PRICES, + ) + + # GIVEN a sample with a number of reads that is far below the target reads + sample: Sample = StoreHelpers.add_sample(store=store, reads=100) + + # GIVEN that the sample is associated with the application version + sample.application_version = version + + # WHEN controlling the quality of the sample reads + has_valid_reads: bool = quality_controller.has_valid_total_reads(sample.internal_id) + + # THEN the sample fails the quality control + assert not has_valid_reads + + +def test_quality_control_fails(qc_microsalt_context: CGConfig, metrics_file_failing_qc: Path): + # GIVEN a metrics file with samples that should fail the quality control + + # GIVEN a store containing the corresponding samples + store: Store = qc_microsalt_context.status_db + + # GIVEN a quality controller + quality_controller = QualityController(store) + + # WHEN performing the quality control + result: QualityResult = quality_controller.quality_control(metrics_file_failing_qc) + + # THEN the case should fail the quality control + assert not result.passes_qc + + # THEN a report should be generated + assert metrics_file_failing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists() + + +def test_quality_control_passes(qc_microsalt_context: CGConfig, metrics_file_passing_qc: Path): + # GIVEN a metrics file with samples that should pass the quality control + + # GIVEN a store containing the corresponding samples + store: Store = qc_microsalt_context.status_db + + # GIVEN a quality controller + quality_controller = QualityController(store) + + # WHEN performing the quality control + result: QualityResult = quality_controller.quality_control(metrics_file_passing_qc) + + # THEN the case should pass the quality control + assert result.passes_qc + + # THEN a report should be generated + assert metrics_file_passing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists() diff --git a/tests/meta/workflow/microsalt/test_quality_controller_utils.py b/tests/meta/workflow/microsalt/test_quality_controller_utils.py new file mode 100644 index 0000000000..7a6ab022da --- /dev/null +++ b/tests/meta/workflow/microsalt/test_quality_controller_utils.py @@ -0,0 +1,436 @@ +from cg.constants.constants import MicrosaltAppTags +from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics +from cg.meta.workflow.microsalt.quality_controller.models import SampleQualityResult +from cg.meta.workflow.microsalt.quality_controller.utils import ( + get_non_urgent_results, + get_urgent_results, + has_valid_10x_coverage, + has_valid_average_coverage, + has_valid_duplication_rate, + has_valid_mapping_rate, + has_valid_median_insert_size, + is_valid_10x_coverage, + is_valid_average_coverage, + is_valid_duplication_rate, + is_valid_mapping_rate, + is_valid_median_insert_size, + is_valid_total_reads, + is_valid_total_reads_for_negative_control, + negative_control_pass_qc, + non_urgent_samples_pass_qc, + urgent_samples_pass_qc, +) +from tests.meta.workflow.microsalt.conftest import create_quality_result, create_sample_metrics + + +def test_sample_total_reads_passing(): + # GIVEN sufficient reads + sample_reads = 100 + target_reads = 100 + + # WHEN checking if the sample has sufficient reads + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_sample_total_reads_failing(): + # GIVEN insufficient reads + sample_reads = 50 + target_reads = 100 + + # WHEN checking if the sample has sufficient reads + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 + ) + + # THEN it fails + assert not passes_reads_threshold + + +def test_sample_total_reads_failing_without_reads(): + # GIVENout reads + sample_reads = 0 + target_reads = 100 + + # WHEN checking if the sample has sufficient reads + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 + ) + + # THEN it fails + assert not passes_reads_threshold + + +def test_control_total_reads_passing(): + # GIVEN a negative control sample with few reads + sample_reads = 1 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_control_total_reads_failing(): + # GIVEN a negative control sample with many reads + sample_reads = 100 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it fails + assert not passes_reads_threshold + + +def test_control_total_reads_passing_without_reads(): + # GIVEN a negative control sample without reads + sample_reads = 0 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_is_valid_mapping_rate_passing(): + # GIVEN a high mapping rate + mapping_rate = 0.99 + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_threshold: bool = is_valid_mapping_rate(mapping_rate) + + # THEN it passes + assert passes_mapping_rate_threshold + + +def test_is_valid_mapping_rate_failing(): + # GIVEN a low mapping rate + mapping_rate = 0.1 + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_threshold: bool = is_valid_mapping_rate(mapping_rate) + + # THEN it fails + assert not passes_mapping_rate_threshold + + +def test_is_valid_duplication_rate_passing(): + # GIVEN a low duplication rate + duplication_rate = 0.1 + + # WHEN checking if the duplication rate is valid + passes_duplication_qc: bool = is_valid_duplication_rate(duplication_rate) + + # THEN it passes + assert passes_duplication_qc + + +def test_is_valid_duplication_rate_failing(): + # GIVEN a high duplication rate + duplication_rate = 0.9 + + # WHEN checking if the duplication rate is valid + passes_duplication_qc: bool = is_valid_duplication_rate(duplication_rate) + + # THEN it fails + assert not passes_duplication_qc + + +def test_is_valid_median_insert_size_passing(): + # GIVEN a high median insert size + insert_size = 1000 + + # WHEN checking if the median insert size is valid + passes_insert_size_qc: bool = is_valid_median_insert_size(insert_size) + + # THEN it passes + assert passes_insert_size_qc + + +def test_is_valid_median_insert_size_failing(): + # GIVEN a low median insert size + insert_size = 10 + + # WHEN checking if the median insert size is valid + passes_insert_size_qc = is_valid_median_insert_size(insert_size) + + # THEN it fails + assert not passes_insert_size_qc + + +def test_is_valid_average_coverage_passing(): + # GIVEN a high average coverage + average_coverage = 50 + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc: bool = is_valid_average_coverage(average_coverage) + + # THEN it passes + assert passes_average_coverage_qc + + +def test_is_valid_average_coverage_failing(): + # GIVEN a low average coverage + average_coverage = 1 + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc: bool = is_valid_average_coverage(average_coverage) + + # THEN it fails + assert not passes_average_coverage_qc + + +def test_is_valid_10x_coverage_passing(): + # GIVEN a high percent of bases covered at 10x + coverage_10x = 0.95 + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc: bool = is_valid_10x_coverage(coverage_10x) + + # THEN it passes + assert passes_coverage_10x_qc + + +def test_is_valid_10x_coverage_failing(): + # GIVEN a low percent of bases covered at 10x + coverage_10x = 0.1 + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc: bool = is_valid_10x_coverage(coverage_10x) + + # THEN it fails + assert not passes_coverage_10x_qc + + +def test_has_valid_mapping_rate_passing(): + # GIVEN metrics with a high mapping rate + metrics: SampleMetrics = create_sample_metrics(mapped_rate=0.8) + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_qc: bool = has_valid_mapping_rate(metrics) + + # THEN it passes the quality control + assert passes_mapping_rate_qc + + +def test_has_valid_mapping_rate_missing(): + # GIVEN metrics without a mapping rate + metrics: SampleMetrics = create_sample_metrics(mapped_rate=None) + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_qc: bool = has_valid_mapping_rate(metrics) + + # THEN it fails the quality control + assert not passes_mapping_rate_qc + + +def test_has_valid_duplication_rate_passing(): + # GIVEN metrics with a low duplication rate + metrics: SampleMetrics = create_sample_metrics(duplication_rate=0.1) + + # WHEN checking if the duplication rate is valid + passes_duplication_rate_qc: bool = has_valid_duplication_rate(metrics) + + # THEN it passes the quality control + assert passes_duplication_rate_qc + + +def test_has_valid_duplication_rate_missing(): + # GIVEN metrics without a duplication rate + metrics: SampleMetrics = create_sample_metrics(duplication_rate=None) + + # WHEN checking if the duplication rate is valid + passes_duplication_rate_qc: bool = has_valid_duplication_rate(metrics) + + # THEN it fails the quality control + assert not passes_duplication_rate_qc + + +def test_has_valid_median_insert_size_passing(): + # GIVEN metrics with a high median insert size + metrics: SampleMetrics = create_sample_metrics(insert_size=200) + + # WHEN checking if the median insert size is valid + passes_insert_size_qc: bool = has_valid_median_insert_size(metrics) + + # THEN it passes the quality control + assert passes_insert_size_qc + + +def test_has_valid_median_insert_size_missing(): + # GIVEN metrics without a median insert size + metrics: SampleMetrics = create_sample_metrics(insert_size=None) + + # WHEN checking if the median insert size is valid + passes_insert_size_qc: bool = has_valid_median_insert_size(metrics) + + # THEN it fails the quality control + assert not passes_insert_size_qc + + +def test_has_valid_average_coverage_passes(): + # GIVEN metrics with a high average coverage + metrics: SampleMetrics = create_sample_metrics(average_coverage=30.0) + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc: bool = has_valid_average_coverage(metrics) + + # THEN it passes the quality control + assert passes_average_coverage_qc + + +def test_has_valid_average_coverage_missing(): + # GIVEN metrics without an average coverage + metrics: SampleMetrics = create_sample_metrics(average_coverage=None) + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc: bool = has_valid_average_coverage(metrics) + + # THEN it fails the quality control + assert not passes_average_coverage_qc + + +def test_has_valid_10x_coverage_passing(): + # GIVEN metrics with a high percent of bases covered at 10x + metrics: SampleMetrics = create_sample_metrics(coverage_10x=95.0) + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc: bool = has_valid_10x_coverage(metrics) + + # THEN it passes the quality control + assert passes_coverage_10x_qc + + +def test_has_valid_10x_coverage_missing(): + # GIVEN metrics without a percent of bases covered at 10x + metrics: SampleMetrics = create_sample_metrics(coverage_10x=None) + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc: bool = has_valid_10x_coverage(metrics) + + # THEN it fails the quality control + assert not passes_coverage_10x_qc + + +def test_negative_control_passes_qc(): + # GIVEN a negative control sample that passes quality control + control_result: SampleQualityResult = create_quality_result(is_control=True) + other_result: SampleQualityResult = create_quality_result(passes_qc=False) + + # WHEN checking if the negative control passes quality control + control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) + + # THEN it passes quality control + assert control_passes_qc + + +def test_negative_control_fails_qc(): + # GIVEN a negative control sample that fails quality control + control_result: SampleQualityResult = create_quality_result(is_control=True, passes_qc=False) + other_result: SampleQualityResult = create_quality_result() + + # WHEN checking if the negative control passes quality control + control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) + + # THEN it fails quality control + assert not control_passes_qc + + +def test_get_urgent_results(): + # GIVEN quality results with urgent and non-urgent samples + urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + non_urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + quality_results: list[SampleQualityResult] = [urgent_result, non_urgent_result] + + # WHEN getting the urgent results + urgent_results: list[SampleQualityResult] = get_urgent_results(quality_results) + + # THEN the urgent results are returned + assert urgent_results == [urgent_result] + + +def test_urgent_samples_pass_qc(): + # GIVEN quality results with urgent samples that pass quality control + urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + urgent_result_control: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True + ) + urgent_results: list[SampleQualityResult] = [urgent_result, urgent_result_control] + + # WHEN checking if the urgent samples pass quality control + urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) + + # THEN it passes quality control + assert urgent_pass_qc + + +def test_urgent_samples_fail_qc(): + # GIVEN quality results with urgent samples that fail quality control + urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=False + ) + urgent_result_control: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True + ) + urgent_results: list[SampleQualityResult] = [urgent_result, urgent_result_control] + + # WHEN checking if the urgent samples pass quality control + urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) + + # THEN it fails quality control + assert not urgent_pass_qc + + +def test_get_non_urgent_results(): + # GIVEN quality results with urgent and non-urgent samples + urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + non_urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + quality_results: list[SampleQualityResult] = [urgent_result, non_urgent_result] + + # WHEN getting the non-urgent results + non_urgent_results: list[SampleQualityResult] = get_non_urgent_results(quality_results) + + # THEN the non-urgent results are returned + assert non_urgent_results == [non_urgent_result] + + +def test_non_urgent_samples_pass_qc(): + # GIVEN quality results with non-urgent samples that pass quality control + non_urgent_result: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + non_urgent_result_control: SampleQualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True, is_control=True + ) + non_urgent_results: list[SampleQualityResult] = [non_urgent_result, non_urgent_result_control] + + # WHEN checking if the non-urgent samples pass quality control + non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(non_urgent_results) + + # THEN it passes quality control + assert non_urgent_pass_qc diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py new file mode 100644 index 0000000000..494783e15c --- /dev/null +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -0,0 +1,26 @@ +from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME + +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) +from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator + + +def test_generate_report_with_results( + quality_results: list[SampleQualityResult], case_result: CaseQualityResult, tmp_path: Path +): + # GIVEN quality results + + # GIVEN a file path to write them to + out_file = Path(tmp_path, QUALITY_REPORT_FILE_NAME) + + # WHEN generating a report + ReportGenerator.report(out_file=out_file, samples=quality_results, case=case_result) + + # THEN the report is created + assert out_file.exists() + + # THEN the report is populated + assert out_file.read_text() diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index f2d3d54fcb..d2a827525a 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -1,161 +1,33 @@ """Tests for MicroSALT analysis.""" -import logging from pathlib import Path -import mock +from mock import MagicMock +from cg.apps.lims.api import LimsAPI -from cg.constants.constants import CaseActions, Pipeline from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI from cg.models.cg_config import CGConfig -from cg.models.orders.sample_base import ControlEnum -from cg.store import Store from cg.store.models import Case -from tests.mocks.tb_mock import MockTB -from tests.store_helpers import StoreHelpers -def test_qc_check_fail( +def test_get_cases_to_store_pass( qc_microsalt_context: CGConfig, - microsalt_qc_fail_run_dir_path: Path, - microsalt_qc_fail_lims_project: str, - microsalt_case_qc_fail: str, - caplog, mocker, -): - """QC check for a microsalt case that should fail.""" - caplog.set_level(logging.INFO) - store: Store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) - for index in range(4): - microsalt_case.samples[index].reads = 1000 - - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") - - # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( - case_id=microsalt_case_qc_fail, - run_dir_path=microsalt_qc_fail_run_dir_path, - lims_project=microsalt_qc_fail_lims_project, - ) - - # THEN the QC should fail - assert not qc_pass - assert "failed" in caplog.text - - -def test_qc_check_pass( - qc_microsalt_context: CGConfig, microsalt_qc_pass_run_dir_path: Path, - microsalt_qc_pass_lims_project: str, - microsalt_case_qc_pass: str, - caplog, - mocker, + metrics_file_passing_qc: Path, ): - """QC check for a microsalt case that should pass.""" - caplog.set_level(logging.INFO) - store: Store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_pass) - microsalt_case.samples[1].control = ControlEnum.negative - microsalt_case.samples[1].reads = 1100000 - - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") - - # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( - case_id=microsalt_case_qc_pass, - run_dir_path=microsalt_qc_pass_run_dir_path, - lims_project=microsalt_qc_pass_lims_project, - ) - - # THEN the QC should pass - assert qc_pass - assert "passed" in caplog.text - + """Test get cases to store for a microsalt case that passes QC.""" -def test_qc_check_negative_control_fail( - qc_microsalt_context: CGConfig, - microsalt_qc_fail_run_dir_path: Path, - microsalt_qc_fail_lims_project: str, - microsalt_case_qc_fail: str, - caplog, - mocker, -): - """QC check for a microsalt case where a negative control fails QC.""" - - caplog.set_level(logging.INFO) - store = qc_microsalt_context.status_db + # GIVEN a store with a QC ready microsalt case that will pass QC microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) - microsalt_case.samples[0].control = ControlEnum.negative - - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") - - # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( - case_id=microsalt_case_qc_fail, - run_dir_path=microsalt_qc_fail_run_dir_path, - lims_project=microsalt_qc_fail_lims_project, + mocker.patch.object( + MicrosaltAnalysisAPI, "get_metrics_file_path", return_value=metrics_file_passing_qc ) - - # THEN the QC should fail - assert not qc_pass - assert "failed" in caplog.text - assert "Negative control sample" in caplog.text - - -def test_get_latest_case_path( - mocker, - qc_microsalt_context: CGConfig, - microsalt_case_qc_pass: str, - microsalt_analysis_dir: Path, -): - """Test get_latest_case_path return the first case path and not single sample path""" - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case with different case paths, both single sample and case analyses - mocker.patch.object(MicrosaltAnalysisAPI, "get_project", return_value="ACC12345") mocker.patch.object( - MicrosaltAnalysisAPI, - "get_case_path", - return_value=[ - Path(microsalt_analysis_dir, "ACC12345A2_2023"), - Path(microsalt_analysis_dir, "ACC12345_2022"), - Path(microsalt_analysis_dir, "ACC12345A1_2023"), - ], + MicrosaltAnalysisAPI, "get_case_path", return_value=microsalt_qc_pass_run_dir_path ) - # WHEN getting the latest case path - path = microsalt_api.get_latest_case_path(case_id=microsalt_case_qc_pass) - - # THEN the first case path should be returned - assert Path(microsalt_analysis_dir, "ACC12345_2022") == path - - -def test_get_cases_to_store( - qc_microsalt_context: CGConfig, helpers: StoreHelpers, trailblazer_api: MockTB -): - """Test that the cases fetched are Microsalt and finished successfully.""" - # GIVEN a MicrosaltAPI, a Store and a TrailblazerAPI - analysis_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - store: Store = analysis_api.status_db - mock.patch.object(trailblazer_api, "is_latest_analysis_completed", return_value=True) - analysis_api.trailblazer_api = trailblazer_api - - # GIVEN a running case in the store - helpers.ensure_case(store=store, data_analysis=Pipeline.MICROSALT, action=CaseActions.RUNNING) - # WHEN getting the cases to store in Housekeeper - cases_to_store: list[Case] = analysis_api.get_cases_to_store() - case: Case = cases_to_store[0] + # WHEN retrieving cases to store + cases_to_store: list[Case] = microsalt_api.get_cases_to_store() - # THEN a list with one microsalt case is returned - assert len(cases_to_store) == 1 - assert case.data_analysis == Pipeline.MICROSALT - assert case.action == CaseActions.RUNNING + # THEN cases should returned + assert cases_to_store diff --git a/tests/mocks/tb_mock.py b/tests/mocks/tb_mock.py index 1bb09f7019..3355265db2 100644 --- a/tests/mocks/tb_mock.py +++ b/tests/mocks/tb_mock.py @@ -45,3 +45,6 @@ def is_latest_analysis_qc(self, case_id: str): def set_analysis_status(self, case_id: str, status: str): return + + def add_comment(self, case_id: str, comment: str): + return