From 3abe9210b0fd1d364002ddff3da2ecc5fd129d67 Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 7 Dec 2023 16:51:30 +0100 Subject: [PATCH 01/65] Add microsalt qc checks These were implemented in https://github.com/Clinical-Genomics/cg/pull/1655 and reverted in https://github.com/Clinical-Genomics/cg/pull/2505. --- cg/meta/workflow/microsalt.py | 49 +++++++++++++++- tests/meta/workflow/test_microsalt.py | 83 +++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/cg/meta/workflow/microsalt.py b/cg/meta/workflow/microsalt.py index e51283303f..1a07d93512 100644 --- a/cg/meta/workflow/microsalt.py +++ b/cg/meta/workflow/microsalt.py @@ -18,6 +18,7 @@ from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.constants.tb import AnalysisStatus from cg.exc import CgDataError from cg.io.json import read_json, write_json from cg.meta.workflow.analysis import AnalysisAPI @@ -288,7 +289,7 @@ def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> b case_qc: dict = read_json(file_path=Path(run_dir_path, f"{lims_project}.json")) for sample_id in case_qc: - sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) sample_check: dict | None = self.qc_sample_check( sample=sample, sample_qc=case_qc[sample_id], @@ -373,3 +374,49 @@ def check_external_negative_control_sample(self, sample: Sample) -> bool: sample.application_version.application.target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD ) + + def get_cases_to_store(self) -> list[Case]: + cases_qc_ready: list[Case] = self.get_completed_cases() + cases_to_store: list[Case] = [] + LOG.info(f"Found {len(cases_qc_ready)} cases to perform QC on!") + + for case in cases_qc_ready: + case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) + if self.is_qc_required(case_run_dir=case_run_dir, case_id=case.internal_id): + if self.microsalt_qc( + case_id=case.internal_id, + run_dir_path=case_run_dir, + lims_project=self.get_project(case.samples[0].internal_id), + ): + self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") + cases_to_store.append(case) + else: + self.trailblazer_api.set_analysis_status( + case_id=case.internal_id, status=AnalysisStatus.FAILED + ) + self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC failed") + else: + cases_to_store.append(case) + + return cases_to_store + + def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: + """Checks if a qc is required for a microbial case.""" + if case_run_dir is None: + LOG.info(f"There are no running directories for case {case_id}.") + return False + + if case_run_dir.joinpath("QC_done.json").exists(): + LOG.info(f"QC already performed for case {case_id}, storing case.") + return False + + LOG.info(f"Performing QC on case {case_id}") + return True + + def get_completed_cases(self) -> list[Case]: + """Return cases that are completed in trailblazer.""" + return [ + case + for case in self.status_db.get_running_cases_in_pipeline(pipeline=self.pipeline) + if self.trailblazer_api.is_latest_analysis_completed(case_id=case.internal_id) + ] diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index f2d3d54fcb..1acdcf1067 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -3,6 +3,7 @@ from pathlib import Path import mock +from cg.apps.tb.api import TrailblazerAPI from cg.constants.constants import CaseActions, Pipeline from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI @@ -159,3 +160,85 @@ def test_get_cases_to_store( assert len(cases_to_store) == 1 assert case.data_analysis == Pipeline.MICROSALT assert case.action == CaseActions.RUNNING + + +def test_get_cases_to_store_pass( + qc_microsalt_context: CGConfig, + caplog, + mocker, + microsalt_qc_pass_lims_project: str, + microsalt_case_qc_pass: str, + microsalt_qc_pass_run_dir_path: Path, +): + """Test get cases to store for a microsalt case that passes QC.""" + + caplog.set_level(logging.INFO) + store = qc_microsalt_context.status_db + microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] + mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(TrailblazerAPI, "set_analysis_status") + mocker.patch.object(TrailblazerAPI, "add_comment") + + # GIVEN a store with a QC ready microsalt case that will pass QC + microsalt_pass_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_pass) + microsalt_pass_case.samples[1].control = "negative" + microsalt_pass_case.samples[1].reads = 1100000 + + mocker.patch.object( + MicrosaltAnalysisAPI, + "get_completed_cases", + return_value=[microsalt_pass_case], + ) + mocker.patch.object( + MicrosaltAnalysisAPI, "get_project", return_value=microsalt_qc_pass_lims_project + ) + + mocker.patch.object( + MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_pass_run_dir_path + ) + + # WHEN get cases to store + cases_to_store: list[Case] = microsalt_api.get_cases_to_store() + + # THEN it should be stored + assert microsalt_pass_case in cases_to_store + + +def test_get_cases_to_store_fail( + qc_microsalt_context: CGConfig, + caplog, + mocker, + microsalt_qc_fail_lims_project: str, + microsalt_case_qc_fail: str, + microsalt_qc_fail_run_dir_path: Path, +): + """Test get cases to store for a microsalt case that fails QC.""" + + caplog.set_level(logging.INFO) + store = qc_microsalt_context.status_db + microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] + mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(TrailblazerAPI, "set_analysis_status") + mocker.patch.object(TrailblazerAPI, "add_comment") + + # GIVEN a store with a QC ready microsalt case that will fail QC + microsalt_fail_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) + + mocker.patch.object( + MicrosaltAnalysisAPI, + "get_completed_cases", + return_value=[microsalt_fail_case], + ) + mocker.patch.object( + MicrosaltAnalysisAPI, "get_project", return_value=microsalt_qc_fail_lims_project + ) + + mocker.patch.object( + MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_fail_run_dir_path + ) + + # WHEN get case to store + cases_to_store: list[Case] = microsalt_api.get_cases_to_store() + + # Then it should not be stored + assert microsalt_fail_case not in cases_to_store From c3f669e82b5c45121e06a74e93803dde774d4a7d Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Fri, 8 Dec 2023 15:37:24 +0100 Subject: [PATCH 02/65] Extract qc logic to separate module --- cg/meta/workflow/microsalt.py | 116 +---------------- cg/meta/workflow/microsalt/__init__.py | 0 cg/meta/workflow/microsalt/quality_checker.py | 121 ++++++++++++++++++ 3 files changed, 127 insertions(+), 110 deletions(-) create mode 100644 cg/meta/workflow/microsalt/__init__.py create mode 100644 cg/meta/workflow/microsalt/quality_checker.py diff --git a/cg/meta/workflow/microsalt.py b/cg/meta/workflow/microsalt.py index 1a07d93512..6fdaf3f682 100644 --- a/cg/meta/workflow/microsalt.py +++ b/cg/meta/workflow/microsalt.py @@ -17,14 +17,12 @@ import click from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority -from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.constants.tb import AnalysisStatus from cg.exc import CgDataError -from cg.io.json import read_json, write_json from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MicrosaltFastqHandler +from cg.meta.workflow.microsalt.quality_checker import QualityChecker from cg.models.cg_config import CGConfig -from cg.models.orders.sample_base import ControlEnum from cg.store.models import Case, Sample from cg.utils import Process @@ -38,6 +36,7 @@ def __init__(self, config: CGConfig, pipeline: Pipeline = Pipeline.MICROSALT): super().__init__(pipeline, config) self.root_dir = config.microsalt.root self.queries_path = config.microsalt.queries_path + self.quality_checker = QualityChecker(config.status_db) @property def use_read_count_threshold(self) -> bool: @@ -283,98 +282,6 @@ def get_case_id_from_case(self, unique_id: str) -> tuple[str, None]: case_id = case_obj.internal_id return case_id, None - def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: - """Check if given microSALT case passes QC check.""" - failed_samples: dict = {} - case_qc: dict = read_json(file_path=Path(run_dir_path, f"{lims_project}.json")) - - for sample_id in case_qc: - sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) - sample_check: dict | None = self.qc_sample_check( - sample=sample, - sample_qc=case_qc[sample_id], - ) - if sample_check is not None: - failed_samples[sample_id] = sample_check - - return self.qc_case_check( - case_id=case_id, - failed_samples=failed_samples, - number_of_samples=len(case_qc), - run_dir_path=run_dir_path, - ) - - def qc_case_check( - self, case_id: str, failed_samples: dict, number_of_samples: int, run_dir_path: Path - ) -> bool: - """Perform the final QC check for a microbial case based on failed samples.""" - qc_pass: bool = True - - for sample_id in failed_samples: - sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) - if sample.control == ControlEnum.negative: - qc_pass = False - if sample.application_version.application.tag == MicrosaltAppTags.MWRNXTR003: - qc_pass = False - - # Check if more than 10% of MWX samples failed - if len(failed_samples) / number_of_samples > MicrosaltQC.QC_PERCENT_THRESHOLD_MWX: - qc_pass = False - - if not qc_pass: - LOG.warning( - f"Case {case_id} failed QC, see {run_dir_path}/QC_done.json for more information." - ) - else: - LOG.info(f"Case {case_id} passed QC.") - - self.create_qc_done_file( - run_dir_path=run_dir_path, - failed_samples=failed_samples, - ) - return qc_pass - - def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: - """Creates a QC_done when a QC check is performed.""" - write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) - - def qc_sample_check(self, sample: Sample, sample_qc: dict) -> dict | None: - """Perform a QC on a sample.""" - if sample.control == ControlEnum.negative: - reads_pass: bool = self.check_external_negative_control_sample(sample) - if not reads_pass: - LOG.warning(f"Negative control sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass} - else: - reads_pass: bool = sample.sequencing_qc - coverage_10x_pass: bool = self.check_coverage_10x( - sample_name=sample.internal_id, sample_qc=sample_qc - ) - if not reads_pass or not coverage_10x_pass: - LOG.warning(f"Sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass, "Passed Coverage 10X": coverage_10x_pass} - - def check_coverage_10x(self, sample_name: str, sample_qc: dict) -> bool: - """Check if a sample passed the coverage_10x criteria.""" - try: - return ( - sample_qc["microsalt_samtools_stats"]["coverage_10x"] - >= MicrosaltQC.COVERAGE_10X_THRESHOLD - ) - except TypeError as e: - LOG.error( - f"There is no 10X coverage value for sample {sample_name}, setting qc to fail for this sample" - ) - LOG.error(f"See error: {e}") - return False - - def check_external_negative_control_sample(self, sample: Sample) -> bool: - """Check if external negative control passed read check""" - return sample.reads < ( - sample.application_version.application.target_reads - * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD - ) - def get_cases_to_store(self) -> list[Case]: cases_qc_ready: list[Case] = self.get_completed_cases() cases_to_store: list[Case] = [] @@ -382,8 +289,10 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) - if self.is_qc_required(case_run_dir=case_run_dir, case_id=case.internal_id): - if self.microsalt_qc( + if self.quality_checker.is_qc_required( + case_run_dir=case_run_dir, case_id=case.internal_id + ): + if self.quality_checker.microsalt_qc( case_id=case.internal_id, run_dir_path=case_run_dir, lims_project=self.get_project(case.samples[0].internal_id), @@ -400,19 +309,6 @@ def get_cases_to_store(self) -> list[Case]: return cases_to_store - def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: - """Checks if a qc is required for a microbial case.""" - if case_run_dir is None: - LOG.info(f"There are no running directories for case {case_id}.") - return False - - if case_run_dir.joinpath("QC_done.json").exists(): - LOG.info(f"QC already performed for case {case_id}, storing case.") - return False - - LOG.info(f"Performing QC on case {case_id}") - return True - def get_completed_cases(self) -> list[Case]: """Return cases that are completed in trailblazer.""" return [ diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py new file mode 100644 index 0000000000..8c77b19229 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -0,0 +1,121 @@ +import logging +from pathlib import Path + +from cg.io.json import read_json, write_json + +from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.models.orders.sample_base import ControlEnum +from cg.store.models import Sample + +LOG = logging.getLogger(__name__) + + +class QualityChecker: + def __init__(self, status_db): + self.status_db = status_db + + def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: + """Check if given microSALT case passes QC check.""" + failed_samples: dict = {} + case_qc: dict = read_json(file_path=Path(run_dir_path, f"{lims_project}.json")) + + for sample_id in case_qc: + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + sample_check: dict | None = self.qc_sample_check( + sample=sample, + sample_qc=case_qc[sample_id], + ) + if sample_check is not None: + failed_samples[sample_id] = sample_check + + return self.qc_case_check( + case_id=case_id, + failed_samples=failed_samples, + number_of_samples=len(case_qc), + run_dir_path=run_dir_path, + ) + + def qc_case_check( + self, case_id: str, failed_samples: dict, number_of_samples: int, run_dir_path: Path + ) -> bool: + """Perform the final QC check for a microbial case based on failed samples.""" + qc_pass: bool = True + + for sample_id in failed_samples: + sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) + if sample.control == ControlEnum.negative: + qc_pass = False + if sample.application_version.application.tag == MicrosaltAppTags.MWRNXTR003: + qc_pass = False + + # Check if more than 10% of MWX samples failed + if len(failed_samples) / number_of_samples > MicrosaltQC.QC_PERCENT_THRESHOLD_MWX: + qc_pass = False + + if not qc_pass: + LOG.warning( + f"Case {case_id} failed QC, see {run_dir_path}/QC_done.json for more information." + ) + else: + LOG.info(f"Case {case_id} passed QC.") + + self.create_qc_done_file( + run_dir_path=run_dir_path, + failed_samples=failed_samples, + ) + return qc_pass + + def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: + """Creates a QC_done when a QC check is performed.""" + write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) + + def qc_sample_check(self, sample: Sample, sample_qc: dict) -> dict | None: + """Perform a QC on a sample.""" + if sample.control == ControlEnum.negative: + reads_pass: bool = self.check_external_negative_control_sample(sample) + if not reads_pass: + LOG.warning(f"Negative control sample {sample.internal_id} failed QC.") + return {"Passed QC Reads": reads_pass} + else: + reads_pass: bool = sample.sequencing_qc + coverage_10x_pass: bool = self.check_coverage_10x( + sample_name=sample.internal_id, sample_qc=sample_qc + ) + if not reads_pass or not coverage_10x_pass: + LOG.warning(f"Sample {sample.internal_id} failed QC.") + return {"Passed QC Reads": reads_pass, "Passed Coverage 10X": coverage_10x_pass} + + def check_coverage_10x(self, sample_name: str, sample_qc: dict) -> bool: + """Check if a sample passed the coverage_10x criteria.""" + try: + return ( + sample_qc["microsalt_samtools_stats"]["coverage_10x"] + >= MicrosaltQC.COVERAGE_10X_THRESHOLD + ) + except TypeError as e: + LOG.error( + f"There is no 10X coverage value for sample {sample_name}, setting qc to fail for this sample" + ) + LOG.error(f"See error: {e}") + return False + + def check_external_negative_control_sample(self, sample: Sample) -> bool: + """Check if external negative control passed read check""" + return sample.reads < ( + sample.application_version.application.target_reads + * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD + ) + + + def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: + """Checks if a qc is required for a microbial case.""" + if case_run_dir is None: + LOG.info(f"There are no running directories for case {case_id}.") + return False + + if case_run_dir.joinpath("QC_done.json").exists(): + LOG.info(f"QC already performed for case {case_id}, storing case.") + return False + + LOG.info(f"Performing QC on case {case_id}") + return True From 0d79d1e67dd9df0d49fef5d67cc6aa9b20aa2861 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 09:36:39 +0100 Subject: [PATCH 03/65] Fix imports --- cg/meta/workflow/microsalt/__init__.py | 2 ++ cg/meta/workflow/{ => microsalt}/microsalt.py | 0 cg/meta/workflow/microsalt/quality_checker.py | 4 ++-- tests/meta/workflow/test_microsalt.py | 18 +++++++++--------- 4 files changed, 13 insertions(+), 11 deletions(-) rename cg/meta/workflow/{ => microsalt}/microsalt.py (100%) diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py index e69de29bb2..8129c1a0c2 100644 --- a/cg/meta/workflow/microsalt/__init__.py +++ b/cg/meta/workflow/microsalt/__init__.py @@ -0,0 +1,2 @@ +from .microsalt import MicrosaltAnalysisAPI +from .quality_checker import QualityChecker \ No newline at end of file diff --git a/cg/meta/workflow/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py similarity index 100% rename from cg/meta/workflow/microsalt.py rename to cg/meta/workflow/microsalt/microsalt.py diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 8c77b19229..13cf72049c 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -5,13 +5,14 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.models.orders.sample_base import ControlEnum +from cg.store.api.core import Store from cg.store.models import Sample LOG = logging.getLogger(__name__) class QualityChecker: - def __init__(self, status_db): + def __init__(self, status_db: Store): self.status_db = status_db def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: @@ -106,7 +107,6 @@ def check_external_negative_control_sample(self, sample: Sample) -> bool: * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD ) - def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: """Checks if a qc is required for a microbial case.""" if case_run_dir is None: diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 1acdcf1067..fa2bcb384b 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -6,7 +6,7 @@ from cg.apps.tb.api import TrailblazerAPI from cg.constants.constants import CaseActions, Pipeline -from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI +from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI, QualityChecker from cg.models.cg_config import CGConfig from cg.models.orders.sample_base import ControlEnum from cg.store import Store @@ -33,10 +33,10 @@ def test_qc_check_fail( for index in range(4): microsalt_case.samples[index].reads = 1000 - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(QualityChecker, "create_qc_done_file") # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( case_id=microsalt_case_qc_fail, run_dir_path=microsalt_qc_fail_run_dir_path, lims_project=microsalt_qc_fail_lims_project, @@ -65,10 +65,10 @@ def test_qc_check_pass( microsalt_case.samples[1].control = ControlEnum.negative microsalt_case.samples[1].reads = 1100000 - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(QualityChecker, "create_qc_done_file") # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( case_id=microsalt_case_qc_pass, run_dir_path=microsalt_qc_pass_run_dir_path, lims_project=microsalt_qc_pass_lims_project, @@ -97,10 +97,10 @@ def test_qc_check_negative_control_fail( microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) microsalt_case.samples[0].control = ControlEnum.negative - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(QualityChecker, "create_qc_done_file") # WHEN performing QC check - qc_pass: bool = microsalt_api.microsalt_qc( + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( case_id=microsalt_case_qc_fail, run_dir_path=microsalt_qc_fail_run_dir_path, lims_project=microsalt_qc_fail_lims_project, @@ -175,7 +175,7 @@ def test_get_cases_to_store_pass( caplog.set_level(logging.INFO) store = qc_microsalt_context.status_db microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(QualityChecker, "create_qc_done_file") mocker.patch.object(TrailblazerAPI, "set_analysis_status") mocker.patch.object(TrailblazerAPI, "add_comment") @@ -217,7 +217,7 @@ def test_get_cases_to_store_fail( caplog.set_level(logging.INFO) store = qc_microsalt_context.status_db microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(MicrosaltAnalysisAPI, "create_qc_done_file") + mocker.patch.object(QualityChecker, "create_qc_done_file") mocker.patch.object(TrailblazerAPI, "set_analysis_status") mocker.patch.object(TrailblazerAPI, "add_comment") From 1a4d6611bc63d67d649ba4fcec65fef433f09d41 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 10:47:15 +0100 Subject: [PATCH 04/65] Formatting --- cg/meta/workflow/microsalt/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py index 8129c1a0c2..a3eb140356 100644 --- a/cg/meta/workflow/microsalt/__init__.py +++ b/cg/meta/workflow/microsalt/__init__.py @@ -1,2 +1,2 @@ from .microsalt import MicrosaltAnalysisAPI -from .quality_checker import QualityChecker \ No newline at end of file +from .quality_checker import QualityChecker From da79782fd2a514acff8588536a1dc8275e3c1d2b Mon Sep 17 00:00:00 2001 From: seallard Date: Mon, 11 Dec 2023 11:07:53 +0100 Subject: [PATCH 05/65] Remove failing test --- cg/meta/workflow/microsalt/microsalt.py | 6 +++--- tests/meta/workflow/test_microsalt.py | 23 ----------------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 6fdaf3f682..36ebcdaa82 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -74,9 +74,9 @@ def get_case_path(self, case_id: str) -> list[Path]: def get_latest_case_path(self, case_id: str) -> Path | None: """Return latest run dir for a microbial case, if no path found it returns None.""" - lims_project: str = self.get_project( - self.status_db.get_case_by_internal_id(internal_id=case_id).links[0].sample.internal_id - ) + case: Case = self.status_db.get_case_by_internal_id(case_id) + sample_id: str = case.links[0].sample.internal_id + lims_project: str = self.get_project(sample_id) return next( ( diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index fa2bcb384b..63806d326c 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -139,29 +139,6 @@ def test_get_latest_case_path( assert Path(microsalt_analysis_dir, "ACC12345_2022") == path -def test_get_cases_to_store( - qc_microsalt_context: CGConfig, helpers: StoreHelpers, trailblazer_api: MockTB -): - """Test that the cases fetched are Microsalt and finished successfully.""" - # GIVEN a MicrosaltAPI, a Store and a TrailblazerAPI - analysis_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - store: Store = analysis_api.status_db - mock.patch.object(trailblazer_api, "is_latest_analysis_completed", return_value=True) - analysis_api.trailblazer_api = trailblazer_api - - # GIVEN a running case in the store - helpers.ensure_case(store=store, data_analysis=Pipeline.MICROSALT, action=CaseActions.RUNNING) - - # WHEN getting the cases to store in Housekeeper - cases_to_store: list[Case] = analysis_api.get_cases_to_store() - case: Case = cases_to_store[0] - - # THEN a list with one microsalt case is returned - assert len(cases_to_store) == 1 - assert case.data_analysis == Pipeline.MICROSALT - assert case.action == CaseActions.RUNNING - - def test_get_cases_to_store_pass( qc_microsalt_context: CGConfig, caplog, From 2f34f6db4ea9f35e222c3b86f2b2ba77db1d3c42 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 13:05:48 +0100 Subject: [PATCH 06/65] Add sample total reads check --- cg/constants/constants.py | 1 + cg/meta/workflow/microsalt/microsalt.py | 2 +- cg/meta/workflow/microsalt/quality_checker.py | 3 ++ cg/meta/workflow/microsalt/utils.py | 5 ++++ .../microsalt/test_quality_control.py | 30 +++++++++++++++++++ 5 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 cg/meta/workflow/microsalt/utils.py create mode 100644 tests/meta/workflow/microsalt/test_quality_control.py diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 41831d5671..1c8a9691ee 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -215,6 +215,7 @@ class MicrosaltQC: COVERAGE_10X_THRESHOLD: float = 0.75 NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2 TARGET_READS: int = 6000000 + TARGET_READS_FAIL_THRESHOLD: float = 0.7 class MicrosaltAppTags(StrEnum): diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 36ebcdaa82..f99182b1b7 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -82,7 +82,7 @@ def get_latest_case_path(self, case_id: str) -> Path | None: ( path for path in self.get_case_path(case_id=case_id) - if lims_project + "_" in str(path) + if f"{lims_project}_" in str(path) ), None, ) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 13cf72049c..4df7bcbabe 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -119,3 +119,6 @@ def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: LOG.info(f"Performing QC on case {case_id}") return True + + def sample_total_reads_qc(self, sample: Sample) -> bool: + pass \ No newline at end of file diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py new file mode 100644 index 0000000000..8c37aef8d4 --- /dev/null +++ b/cg/meta/workflow/microsalt/utils.py @@ -0,0 +1,5 @@ +from cg.constants.constants import MicrosaltQC + + +def is_total_reads_above_failure_threshold(sample_reads: int, target_reads: int) -> bool: + return sample_reads <= target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py new file mode 100644 index 0000000000..f9ca1d5a91 --- /dev/null +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -0,0 +1,30 @@ +import pytest + +from cg.constants.constants import MicrosaltQC +from cg.meta.workflow.microsalt.utils import is_total_reads_above_failure_threshold + +TARGET_READS_FAIL_THRESHOLD = MicrosaltQC.TARGET_READS_FAIL_THRESHOLD + +test_cases = [ + (TARGET_READS_FAIL_THRESHOLD * 100, 100, False, "sufficient_reads"), + (TARGET_READS_FAIL_THRESHOLD * 100 - 1, 100, True, "just_below_threshold"), + (0, 100, True, "edge_case_no_reads"), + (TARGET_READS_FAIL_THRESHOLD * 100, 0, False, "edge_case_no_target_reads"), +] + + +@pytest.mark.parametrize( + "sample_reads, target_reads, expected_result, test_id", test_cases, ids=lambda x: x[-1] +) +def test_is_total_reads_above_failure_threshold( + sample_reads, target_reads, expected_result, test_id +): + # GIVEN a sample with a number of reads and a target number of reads + + # WHEN checking if the sample has sufficient reads + result = is_total_reads_above_failure_threshold( + sample_reads=sample_reads, target_reads=target_reads + ) + + # THEN the result should be as expected + assert result == expected_result, f"Test failed for {test_id}" From 79feb5cda79256c1faa6215f1d7e79c9b4545e29 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 13:16:57 +0100 Subject: [PATCH 07/65] Fix tests --- cg/meta/workflow/microsalt/quality_checker.py | 2 +- cg/meta/workflow/microsalt/utils.py | 2 +- tests/meta/workflow/microsalt/__init__.py | 0 .../microsalt/test_quality_control.py | 52 ++++++++++++------- 4 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 tests/meta/workflow/microsalt/__init__.py diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 4df7bcbabe..43f0b00099 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -120,5 +120,5 @@ def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: LOG.info(f"Performing QC on case {case_id}") return True - def sample_total_reads_qc(self, sample: Sample) -> bool: + def sample_total_reads_qc(self, sample_id: str) -> bool: pass \ No newline at end of file diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 8c37aef8d4..e5dfafe026 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -2,4 +2,4 @@ def is_total_reads_above_failure_threshold(sample_reads: int, target_reads: int) -> bool: - return sample_reads <= target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD + return sample_reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD diff --git a/tests/meta/workflow/microsalt/__init__.py b/tests/meta/workflow/microsalt/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index f9ca1d5a91..800cf84f6e 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -1,30 +1,44 @@ -import pytest - from cg.constants.constants import MicrosaltQC from cg.meta.workflow.microsalt.utils import is_total_reads_above_failure_threshold -TARGET_READS_FAIL_THRESHOLD = MicrosaltQC.TARGET_READS_FAIL_THRESHOLD -test_cases = [ - (TARGET_READS_FAIL_THRESHOLD * 100, 100, False, "sufficient_reads"), - (TARGET_READS_FAIL_THRESHOLD * 100 - 1, 100, True, "just_below_threshold"), - (0, 100, True, "edge_case_no_reads"), - (TARGET_READS_FAIL_THRESHOLD * 100, 0, False, "edge_case_no_target_reads"), -] +def test_sample_total_reads_passing(): + # GIVEN a sample with sufficient reads + sample_reads = 100 + target_reads = 100 + + # WHEN checking if the sample has sufficient reads + passes_reads_threshold = is_total_reads_above_failure_threshold( + sample_reads=sample_reads, target_reads=target_reads + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_sample_total_reads_failing(): + # GIVEN a sample with insufficient reads + sample_reads = 50 + target_reads = 100 + + # WHEN checking if the sample has sufficient reads + passes_reads_threshold = is_total_reads_above_failure_threshold( + sample_reads=sample_reads, target_reads=target_reads + ) + + # THEN it fails + assert not passes_reads_threshold -@pytest.mark.parametrize( - "sample_reads, target_reads, expected_result, test_id", test_cases, ids=lambda x: x[-1] -) -def test_is_total_reads_above_failure_threshold( - sample_reads, target_reads, expected_result, test_id -): - # GIVEN a sample with a number of reads and a target number of reads +def test_sample_total_reads_failing_without_reads(): + # GIVEN a sample without reads + sample_reads = 0 + target_reads = 100 # WHEN checking if the sample has sufficient reads - result = is_total_reads_above_failure_threshold( + passes_reads_threshold = is_total_reads_above_failure_threshold( sample_reads=sample_reads, target_reads=target_reads ) - # THEN the result should be as expected - assert result == expected_result, f"Test failed for {test_id}" + # THEN it fails + assert not passes_reads_threshold From 09ddae912f65b38e26952e45c23b34334a458505 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 13:31:35 +0100 Subject: [PATCH 08/65] Quality control for total reads given sample id --- cg/meta/workflow/microsalt/quality_checker.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 43f0b00099..554e2f9020 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -4,6 +4,7 @@ from cg.io.json import read_json, write_json from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.meta.workflow.microsalt.utils import is_total_reads_above_failure_threshold from cg.models.orders.sample_base import ControlEnum from cg.store.api.core import Store from cg.store.models import Sample @@ -121,4 +122,13 @@ def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: return True def sample_total_reads_qc(self, sample_id: str) -> bool: - pass \ No newline at end of file + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + target_reads: int = sample.application_version.application.target_reads + sample_reads: int = sample.reads + + passes_total_reads_qc: bool = is_total_reads_above_failure_threshold( + sample_reads=sample_reads, target_reads=target_reads + ) + if not passes_total_reads_qc: + LOG.warning(f"Sample {sample_id} failed total reads QC.") + return passes_total_reads_qc From cca1d7322e897ac5e47c6c5ec3eee28cc638aaf4 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 13:53:56 +0100 Subject: [PATCH 09/65] Validate negative control total reads --- cg/meta/workflow/microsalt/quality_checker.py | 17 ++++++++--------- cg/meta/workflow/microsalt/utils.py | 6 +++++- .../workflow/microsalt/test_quality_control.py | 9 ++++----- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 554e2f9020..e55ed71a30 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -4,7 +4,7 @@ from cg.io.json import read_json, write_json from cg.constants.constants import MicrosaltAppTags, MicrosaltQC -from cg.meta.workflow.microsalt.utils import is_total_reads_above_failure_threshold +from cg.meta.workflow.microsalt.utils import is_valid_total_reads, is_valid_total_reads_for_control from cg.models.orders.sample_base import ControlEnum from cg.store.api.core import Store from cg.store.models import Sample @@ -121,14 +121,13 @@ def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: LOG.info(f"Performing QC on case {case_id}") return True - def sample_total_reads_qc(self, sample_id: str) -> bool: - sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + def is_valid_total_reads(self, sample_id: str) -> bool: + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) target_reads: int = sample.application_version.application.target_reads sample_reads: int = sample.reads - passes_total_reads_qc: bool = is_total_reads_above_failure_threshold( - sample_reads=sample_reads, target_reads=target_reads - ) - if not passes_total_reads_qc: - LOG.warning(f"Sample {sample_id} failed total reads QC.") - return passes_total_reads_qc + if sample.control == ControlEnum.negative: + return is_valid_total_reads_for_control( + sample_reads=sample_reads, target_reads=target_reads + ) + return is_valid_total_reads(sample_reads=sample_reads, target_reads=target_reads) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index e5dfafe026..fc4fcd5ba1 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -1,5 +1,9 @@ from cg.constants.constants import MicrosaltQC -def is_total_reads_above_failure_threshold(sample_reads: int, target_reads: int) -> bool: +def is_valid_total_reads(sample_reads: int, target_reads: int) -> bool: return sample_reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD + + +def is_valid_total_reads_for_control(sample_reads: int, target_reads: int) -> bool: + return sample_reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index 800cf84f6e..c4f6fc15db 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -1,5 +1,4 @@ -from cg.constants.constants import MicrosaltQC -from cg.meta.workflow.microsalt.utils import is_total_reads_above_failure_threshold +from cg.meta.workflow.microsalt.utils import is_valid_total_reads def test_sample_total_reads_passing(): @@ -8,7 +7,7 @@ def test_sample_total_reads_passing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_total_reads_above_failure_threshold( + passes_reads_threshold = is_valid_total_reads( sample_reads=sample_reads, target_reads=target_reads ) @@ -22,7 +21,7 @@ def test_sample_total_reads_failing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_total_reads_above_failure_threshold( + passes_reads_threshold = is_valid_total_reads( sample_reads=sample_reads, target_reads=target_reads ) @@ -36,7 +35,7 @@ def test_sample_total_reads_failing_without_reads(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_total_reads_above_failure_threshold( + passes_reads_threshold = is_valid_total_reads( sample_reads=sample_reads, target_reads=target_reads ) From 09830e6cbe87da4b2ba30b081575f00b284a70e9 Mon Sep 17 00:00:00 2001 From: seallard Date: Mon, 11 Dec 2023 15:20:37 +0100 Subject: [PATCH 10/65] Add models for parsing quality metrics --- cg/meta/workflow/microsalt/models.py | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 cg/meta/workflow/microsalt/models.py diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py new file mode 100644 index 0000000000..d43ec10a2f --- /dev/null +++ b/cg/meta/workflow/microsalt/models.py @@ -0,0 +1,41 @@ +from typing import List, Dict +from pydantic import BaseModel + + +class BlastPubmlst(BaseModel): + sequence_type: str + thresholds: str + + +class QuastAssembly(BaseModel): + estimated_genome_length: int + gc_percentage: str + n50: int + necessary_contigs: int + + +class PicardMarkduplicate(BaseModel): + insert_size: int + duplication_rate: float + + +class MicrosaltSamtoolsStats(BaseModel): + total_reads: int + mapped_rate: float + average_coverage: float + coverage_10x: float + coverage_30x: float + coverage_50x: float + coverage_100x: float + + +class Sample(BaseModel): + blast_pubmlst: BlastPubmlst + quast_assembly: QuastAssembly + blast_resfinder_resistence: List[str] + picard_markduplicate: PicardMarkduplicate + microsalt_samtools_stats: MicrosaltSamtoolsStats + + +class QualityMetrics(BaseModel): + samples: Dict[str, Sample] From 0c3aa9b800ca955e2d4b6576b7c16fcd12bf490c Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 11 Dec 2023 15:53:09 +0100 Subject: [PATCH 11/65] Validate mapped rate --- cg/constants/constants.py | 1 + cg/meta/workflow/microsalt/models.py | 10 ++++-- cg/meta/workflow/microsalt/quality_checker.py | 33 ++++++++++++++++++- cg/meta/workflow/microsalt/utils.py | 13 ++++++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 1c8a9691ee..514a9927be 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -213,6 +213,7 @@ class APIMethods(StrEnum): class MicrosaltQC: QC_PERCENT_THRESHOLD_MWX: float = 0.1 COVERAGE_10X_THRESHOLD: float = 0.75 + MAPPED_RATE_THRESHOLD: float = 0.3 NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2 TARGET_READS: int = 6000000 TARGET_READS_FAIL_THRESHOLD: float = 0.7 diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index d43ec10a2f..c2bac742f3 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -29,7 +29,7 @@ class MicrosaltSamtoolsStats(BaseModel): coverage_100x: float -class Sample(BaseModel): +class SampleMetrics(BaseModel): blast_pubmlst: BlastPubmlst quast_assembly: QuastAssembly blast_resfinder_resistence: List[str] @@ -38,4 +38,10 @@ class Sample(BaseModel): class QualityMetrics(BaseModel): - samples: Dict[str, Sample] + samples: Dict[str, SampleMetrics] + + +class QualityResult(BaseModel): + sample_id: str + passed: bool + fail_message: str | None = None diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index e55ed71a30..88193603a2 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -4,7 +4,13 @@ from cg.io.json import read_json, write_json from cg.constants.constants import MicrosaltAppTags, MicrosaltQC -from cg.meta.workflow.microsalt.utils import is_valid_total_reads, is_valid_total_reads_for_control +from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics +from cg.meta.workflow.microsalt.utils import ( + is_valid_mapped_rate, + is_valid_total_reads, + is_valid_total_reads_for_control, + parse_quality_metrics, +) from cg.models.orders.sample_base import ControlEnum from cg.store.api.core import Store from cg.store.models import Sample @@ -16,6 +22,27 @@ class QualityChecker: def __init__(self, status_db: Store): self.status_db = status_db + def quality_control(self, run_dir_path: Path, lims_project: str): + metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") + quality_metrics: QualityMetrics = parse_quality_metrics(metrics_file_path) + + sample_results: list[QualityResult] = [] + + for sample_metrics in quality_metrics: + result = self.quality_control_sample(sample_metrics) + sample_results.append(result) + + self.quality_control_case(sample_results) + + def quality_control_sample( + self, sample_id: str, sample_metrics: SampleMetrics + ) -> QualityResult: + reads_passes_qc: bool = self.is_valid_total_reads(sample_id) + mapped_rate_passes_qc: bool = self.is_valid_mapped_rate(sample_metrics) + + def quality_control_case(self, sample_results: list[QualityResult]) -> bool: + pass + def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: """Check if given microSALT case passes QC check.""" failed_samples: dict = {} @@ -131,3 +158,7 @@ def is_valid_total_reads(self, sample_id: str) -> bool: sample_reads=sample_reads, target_reads=target_reads ) return is_valid_total_reads(sample_reads=sample_reads, target_reads=target_reads) + + def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: + mapped_rate: float = metrics.microsalt_samtools_stats.mapped_rate + return is_valid_mapped_rate(mapped_rate) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index fc4fcd5ba1..c08e7f0eb0 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -1,4 +1,8 @@ +from pathlib import Path + from cg.constants.constants import MicrosaltQC +from cg.io.json import read_json +from cg.meta.workflow.microsalt.models import QualityMetrics def is_valid_total_reads(sample_reads: int, target_reads: int) -> bool: @@ -7,3 +11,12 @@ def is_valid_total_reads(sample_reads: int, target_reads: int) -> bool: def is_valid_total_reads_for_control(sample_reads: int, target_reads: int) -> bool: return sample_reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD + + +def is_valid_mapped_rate(sample_mapped_rate: float) -> bool: + return sample_mapped_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD + + +def parse_quality_metrics(file_path: Path) -> QualityMetrics: + data = read_json(file_path) + return QualityMetrics.model_validate_json(data) From 418837d616295a54fd99e0da4a1736fe4350c9a8 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 08:43:21 +0100 Subject: [PATCH 12/65] Add validation of duplication rate --- cg/constants/constants.py | 1 + cg/meta/workflow/microsalt/quality_checker.py | 18 +++++++++++------- cg/meta/workflow/microsalt/utils.py | 4 ++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 514a9927be..70bd635261 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -213,6 +213,7 @@ class APIMethods(StrEnum): class MicrosaltQC: QC_PERCENT_THRESHOLD_MWX: float = 0.1 COVERAGE_10X_THRESHOLD: float = 0.75 + DUPLICATION_RATE_THRESHOLD: float = 0.8 MAPPED_RATE_THRESHOLD: float = 0.3 NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2 TARGET_READS: int = 6000000 diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 88193603a2..c0dc95f7d4 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -6,6 +6,7 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( + is_valid_duplication_rate, is_valid_mapped_rate, is_valid_total_reads, is_valid_total_reads_for_control, @@ -28,17 +29,16 @@ def quality_control(self, run_dir_path: Path, lims_project: str): sample_results: list[QualityResult] = [] - for sample_metrics in quality_metrics: - result = self.quality_control_sample(sample_metrics) + for sample_id, metrics in quality_metrics: + result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) self.quality_control_case(sample_results) - def quality_control_sample( - self, sample_id: str, sample_metrics: SampleMetrics - ) -> QualityResult: - reads_passes_qc: bool = self.is_valid_total_reads(sample_id) - mapped_rate_passes_qc: bool = self.is_valid_mapped_rate(sample_metrics) + def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: + valid_reads: bool = self.is_valid_total_reads(sample_id) + valid_mapped_rate: bool = self.is_valid_mapped_rate(metrics) + valid_duplication_rate: bool = self.is_valid_duplication_rate(metrics) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: pass @@ -162,3 +162,7 @@ def is_valid_total_reads(self, sample_id: str) -> bool: def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: mapped_rate: float = metrics.microsalt_samtools_stats.mapped_rate return is_valid_mapped_rate(mapped_rate) + + def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: + duplication_rate: float = metrics.picard_markduplicate.duplication_rate + return is_valid_duplication_rate(duplication_rate) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index c08e7f0eb0..d2f451f81c 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -17,6 +17,10 @@ def is_valid_mapped_rate(sample_mapped_rate: float) -> bool: return sample_mapped_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD +def is_valid_duplication_rate(sample_duplication_rate: float) -> bool: + return sample_duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD + + def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) return QualityMetrics.model_validate_json(data) From 4cabeca9bfa1ecbfbc9a6965463c93efd7896e80 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 08:49:48 +0100 Subject: [PATCH 13/65] Validate median insert size --- cg/constants/constants.py | 1 + cg/meta/workflow/microsalt/quality_checker.py | 5 +++++ cg/meta/workflow/microsalt/utils.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 70bd635261..28b171b0ed 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -214,6 +214,7 @@ class MicrosaltQC: QC_PERCENT_THRESHOLD_MWX: float = 0.1 COVERAGE_10X_THRESHOLD: float = 0.75 DUPLICATION_RATE_THRESHOLD: float = 0.8 + INSERT_SIZE_THRESHOLD: int = 100 MAPPED_RATE_THRESHOLD: float = 0.3 NEGATIVE_CONTROL_READS_THRESHOLD: float = 0.2 TARGET_READS: int = 6000000 diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index c0dc95f7d4..b618f66fe4 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -8,6 +8,7 @@ from cg.meta.workflow.microsalt.utils import ( is_valid_duplication_rate, is_valid_mapped_rate, + is_valid_median_insert_size, is_valid_total_reads, is_valid_total_reads_for_control, parse_quality_metrics, @@ -166,3 +167,7 @@ def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: duplication_rate: float = metrics.picard_markduplicate.duplication_rate return is_valid_duplication_rate(duplication_rate) + + def is_valid_median_insert_size(self, metrics: SampleMetrics) -> bool: + insert_size: int = metrics.picard_markduplicate.insert_size + return is_valid_median_insert_size(insert_size) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index d2f451f81c..ba52d28e86 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -21,6 +21,10 @@ def is_valid_duplication_rate(sample_duplication_rate: float) -> bool: return sample_duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD +def is_valid_median_insert_size(sample_insert_size: int) -> bool: + return sample_insert_size > MicrosaltQC.INSERT_SIZE_THRESHOLD + + def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) return QualityMetrics.model_validate_json(data) From 72b3f997fdaa9b8a747f893a034a5dc1a4c67a7f Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 08:54:35 +0100 Subject: [PATCH 14/65] Validate average coverage --- cg/constants/constants.py | 1 + cg/meta/workflow/microsalt/quality_checker.py | 7 +++++++ cg/meta/workflow/microsalt/utils.py | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 28b171b0ed..303d3c5b2e 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -211,6 +211,7 @@ class APIMethods(StrEnum): class MicrosaltQC: + AVERAGE_COVERAGE_THRESHOLD: int = 10 QC_PERCENT_THRESHOLD_MWX: float = 0.1 COVERAGE_10X_THRESHOLD: float = 0.75 DUPLICATION_RATE_THRESHOLD: float = 0.8 diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index b618f66fe4..eb7cde1294 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -6,6 +6,7 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( + is_valid_average_coverage, is_valid_duplication_rate, is_valid_mapped_rate, is_valid_median_insert_size, @@ -40,6 +41,8 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual valid_reads: bool = self.is_valid_total_reads(sample_id) valid_mapped_rate: bool = self.is_valid_mapped_rate(metrics) valid_duplication_rate: bool = self.is_valid_duplication_rate(metrics) + valid_median_insert_size: bool = self.is_valid_median_insert_size(metrics) + valid_average_coverage: bool = self.is_valid_average_coverage(metrics) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: pass @@ -171,3 +174,7 @@ def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: def is_valid_median_insert_size(self, metrics: SampleMetrics) -> bool: insert_size: int = metrics.picard_markduplicate.insert_size return is_valid_median_insert_size(insert_size) + + def is_valid_average_coverage(self, metrics: SampleMetrics) -> bool: + average_coverage: float = metrics.microsalt_samtools_stats.average_coverage + return is_valid_average_coverage(average_coverage) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index ba52d28e86..008870b2ba 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -25,6 +25,10 @@ def is_valid_median_insert_size(sample_insert_size: int) -> bool: return sample_insert_size > MicrosaltQC.INSERT_SIZE_THRESHOLD +def is_valid_average_coverage(average_coverage: float) -> bool: + return average_coverage > MicrosaltQC.AVERAGE_COVERAGE_THRESHOLD + + def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) return QualityMetrics.model_validate_json(data) From 71634c081d54a3b87970be21cb246e33c7d39aff Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 09:12:37 +0100 Subject: [PATCH 15/65] Validate 10x coverage --- cg/meta/workflow/microsalt/quality_checker.py | 24 +++++++++++-------- cg/meta/workflow/microsalt/utils.py | 24 +++++++++++-------- .../microsalt/test_quality_control.py | 6 ++--- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index eb7cde1294..3b0ce2a401 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -6,9 +6,10 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( + is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, - is_valid_mapped_rate, + is_valid_mapping_rate, is_valid_median_insert_size, is_valid_total_reads, is_valid_total_reads_for_control, @@ -39,10 +40,11 @@ def quality_control(self, run_dir_path: Path, lims_project: str): def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) - valid_mapped_rate: bool = self.is_valid_mapped_rate(metrics) - valid_duplication_rate: bool = self.is_valid_duplication_rate(metrics) - valid_median_insert_size: bool = self.is_valid_median_insert_size(metrics) - valid_average_coverage: bool = self.is_valid_average_coverage(metrics) + valid_mapping: bool = self.is_valid_mapped_rate(metrics) + valid_duplication: bool = self.is_valid_duplication_rate(metrics) + valid_inserts: bool = self.is_valid_median_insert_size(metrics) + valid_coverage: bool = self.is_valid_average_coverage(metrics) + valid_10x_coverage: bool = self.is_valid_10x_coverage(metrics) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: pass @@ -158,14 +160,12 @@ def is_valid_total_reads(self, sample_id: str) -> bool: sample_reads: int = sample.reads if sample.control == ControlEnum.negative: - return is_valid_total_reads_for_control( - sample_reads=sample_reads, target_reads=target_reads - ) - return is_valid_total_reads(sample_reads=sample_reads, target_reads=target_reads) + return is_valid_total_reads_for_control(reads=sample_reads, target_reads=target_reads) + return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: mapped_rate: float = metrics.microsalt_samtools_stats.mapped_rate - return is_valid_mapped_rate(mapped_rate) + return is_valid_mapping_rate(mapped_rate) def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: duplication_rate: float = metrics.picard_markduplicate.duplication_rate @@ -178,3 +178,7 @@ def is_valid_median_insert_size(self, metrics: SampleMetrics) -> bool: def is_valid_average_coverage(self, metrics: SampleMetrics) -> bool: average_coverage: float = metrics.microsalt_samtools_stats.average_coverage return is_valid_average_coverage(average_coverage) + + def is_valid_10x_coverage(self, metrics: SampleMetrics) -> bool: + coverage_10x: float = metrics.microsalt_samtools_stats.coverage_10x + return is_valid_10x_coverage(coverage_10x) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 008870b2ba..c1f7debabf 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -5,30 +5,34 @@ from cg.meta.workflow.microsalt.models import QualityMetrics -def is_valid_total_reads(sample_reads: int, target_reads: int) -> bool: - return sample_reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD +def is_valid_total_reads(reads: int, target_reads: int) -> bool: + return reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD -def is_valid_total_reads_for_control(sample_reads: int, target_reads: int) -> bool: - return sample_reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD +def is_valid_total_reads_for_control(reads: int, target_reads: int) -> bool: + return reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD -def is_valid_mapped_rate(sample_mapped_rate: float) -> bool: - return sample_mapped_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD +def is_valid_mapping_rate(mapping_rate: float) -> bool: + return mapping_rate > MicrosaltQC.MAPPED_RATE_THRESHOLD -def is_valid_duplication_rate(sample_duplication_rate: float) -> bool: - return sample_duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD +def is_valid_duplication_rate(duplication_rate: float) -> bool: + return duplication_rate < MicrosaltQC.DUPLICATION_RATE_THRESHOLD -def is_valid_median_insert_size(sample_insert_size: int) -> bool: - return sample_insert_size > MicrosaltQC.INSERT_SIZE_THRESHOLD +def is_valid_median_insert_size(insert_size: int) -> bool: + return insert_size > MicrosaltQC.INSERT_SIZE_THRESHOLD def is_valid_average_coverage(average_coverage: float) -> bool: return average_coverage > MicrosaltQC.AVERAGE_COVERAGE_THRESHOLD +def is_valid_10x_coverage(coverage_10x: float) -> bool: + return coverage_10x > MicrosaltQC.COVERAGE_10X_THRESHOLD + + def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) return QualityMetrics.model_validate_json(data) diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index c4f6fc15db..c3ee4ff672 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -8,7 +8,7 @@ def test_sample_total_reads_passing(): # WHEN checking if the sample has sufficient reads passes_reads_threshold = is_valid_total_reads( - sample_reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads ) # THEN it passes @@ -22,7 +22,7 @@ def test_sample_total_reads_failing(): # WHEN checking if the sample has sufficient reads passes_reads_threshold = is_valid_total_reads( - sample_reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads ) # THEN it fails @@ -36,7 +36,7 @@ def test_sample_total_reads_failing_without_reads(): # WHEN checking if the sample has sufficient reads passes_reads_threshold = is_valid_total_reads( - sample_reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads ) # THEN it fails From 0227515e1e965893172c1def6886b9351ef41401 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 11:16:34 +0100 Subject: [PATCH 16/65] Formatting --- cg/meta/workflow/microsalt/models.py | 1 - cg/meta/workflow/microsalt/quality_checker.py | 16 +++++++++++++++- .../workflow/microsalt/test_quality_control.py | 12 +++--------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index c2bac742f3..12814590c0 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -44,4 +44,3 @@ class QualityMetrics(BaseModel): class QualityResult(BaseModel): sample_id: str passed: bool - fail_message: str | None = None diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 3b0ce2a401..1fb8c5b3b6 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -46,8 +46,22 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual valid_coverage: bool = self.is_valid_average_coverage(metrics) valid_10x_coverage: bool = self.is_valid_10x_coverage(metrics) + sample_passes_qc: bool = ( + valid_reads + and valid_mapping + and valid_duplication + and valid_inserts + and valid_coverage + and valid_10x_coverage + ) + + return QualityResult( + sample_id=sample_id, + passed=sample_passes_qc, + ) + def quality_control_case(self, sample_results: list[QualityResult]) -> bool: - pass + negative_control_passes: bool = True def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: """Check if given microSALT case passes QC check.""" diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index c3ee4ff672..e8c061114b 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -7,9 +7,7 @@ def test_sample_total_reads_passing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads - ) + passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) # THEN it passes assert passes_reads_threshold @@ -21,9 +19,7 @@ def test_sample_total_reads_failing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads - ) + passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) # THEN it fails assert not passes_reads_threshold @@ -35,9 +31,7 @@ def test_sample_total_reads_failing_without_reads(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads - ) + passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) # THEN it fails assert not passes_reads_threshold From 48e247b2d13665bdd3d07784e15e44df499097c4 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 11:35:18 +0100 Subject: [PATCH 17/65] Validate that negative control sample for cases passes control --- cg/meta/workflow/microsalt/models.py | 6 ++++-- cg/meta/workflow/microsalt/quality_checker.py | 10 ++++++++-- cg/meta/workflow/microsalt/utils.py | 9 ++++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index 12814590c0..ade4700459 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -1,6 +1,8 @@ from typing import List, Dict from pydantic import BaseModel +from cg.store.models import Sample + class BlastPubmlst(BaseModel): sequence_type: str @@ -42,5 +44,5 @@ class QualityMetrics(BaseModel): class QualityResult(BaseModel): - sample_id: str - passed: bool + sample: Sample + passes_qc: bool diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 1fb8c5b3b6..088e60a58e 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -6,6 +6,7 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( + get_negative_control_result, is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, @@ -39,6 +40,7 @@ def quality_control(self, run_dir_path: Path, lims_project: str): self.quality_control_case(sample_results) def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: + sample = self.status_db.get_sample_by_internal_id(sample_id) valid_reads: bool = self.is_valid_total_reads(sample_id) valid_mapping: bool = self.is_valid_mapped_rate(metrics) valid_duplication: bool = self.is_valid_duplication_rate(metrics) @@ -56,12 +58,12 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual ) return QualityResult( - sample_id=sample_id, + sample=sample, passed=sample_passes_qc, ) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: - negative_control_passes: bool = True + control_passes_qc: bool = self.is_valid_negative_control(sample_results) def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: """Check if given microSALT case passes QC check.""" @@ -196,3 +198,7 @@ def is_valid_average_coverage(self, metrics: SampleMetrics) -> bool: def is_valid_10x_coverage(self, metrics: SampleMetrics) -> bool: coverage_10x: float = metrics.microsalt_samtools_stats.coverage_10x return is_valid_10x_coverage(coverage_10x) + + def is_valid_negative_control(self, results: list[QualityResult]) -> bool: + negative_control_result: QualityResult = get_negative_control_result(results) + return negative_control_result.passes_qc diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index c1f7debabf..fb43a05090 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -2,7 +2,8 @@ from cg.constants.constants import MicrosaltQC from cg.io.json import read_json -from cg.meta.workflow.microsalt.models import QualityMetrics +from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult +from cg.models.orders.sample_base import ControlEnum def is_valid_total_reads(reads: int, target_reads: int) -> bool: @@ -36,3 +37,9 @@ def is_valid_10x_coverage(coverage_10x: float) -> bool: def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) return QualityMetrics.model_validate_json(data) + + +def get_negative_control_result(results: list[QualityResult]) -> QualityResult: + for result in results: + if result.sample.control == ControlEnum.negative: + return result From ec60b9657d346162690d43a8ddacc6764a69416a Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 11:48:24 +0100 Subject: [PATCH 18/65] Fix quality result model --- cg/meta/workflow/microsalt/models.py | 3 ++- cg/meta/workflow/microsalt/quality_checker.py | 7 +++++-- cg/meta/workflow/microsalt/utils.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index ade4700459..8e4ab3c665 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -44,5 +44,6 @@ class QualityMetrics(BaseModel): class QualityResult(BaseModel): - sample: Sample + sample_id: str + is_negative_control: bool passes_qc: bool diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 088e60a58e..840ac26a12 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -40,7 +40,6 @@ def quality_control(self, run_dir_path: Path, lims_project: str): self.quality_control_case(sample_results) def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: - sample = self.status_db.get_sample_by_internal_id(sample_id) valid_reads: bool = self.is_valid_total_reads(sample_id) valid_mapping: bool = self.is_valid_mapped_rate(metrics) valid_duplication: bool = self.is_valid_duplication_rate(metrics) @@ -57,8 +56,12 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual and valid_10x_coverage ) + sample = self.status_db.get_sample_by_internal_id(sample_id) + is_negative_control: bool = sample.control == ControlEnum.negative + return QualityResult( - sample=sample, + sample_id=sample_id, + is_negative_control=is_negative_control, passed=sample_passes_qc, ) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index fb43a05090..9b0544e047 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -41,5 +41,5 @@ def parse_quality_metrics(file_path: Path) -> QualityMetrics: def get_negative_control_result(results: list[QualityResult]) -> QualityResult: for result in results: - if result.sample.control == ControlEnum.negative: + if result.is_negative_control: return result From ea264ca88d8f20b3c2a3af7d4696c014ff97c2cf Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 12:08:11 +0100 Subject: [PATCH 19/65] Validate that all urgent samples pass quality control --- cg/meta/workflow/microsalt/models.py | 6 ++++-- cg/meta/workflow/microsalt/quality_checker.py | 16 +++++++++++++--- cg/meta/workflow/microsalt/utils.py | 19 ++++++++++++++++--- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index 8e4ab3c665..ee85af5b19 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -1,11 +1,12 @@ from typing import List, Dict from pydantic import BaseModel +from cg.constants.constants import MicrosaltAppTags from cg.store.models import Sample class BlastPubmlst(BaseModel): - sequence_type: str + sequence_type: MicrosaltAppTags thresholds: str @@ -45,5 +46,6 @@ class QualityMetrics(BaseModel): class QualityResult(BaseModel): sample_id: str - is_negative_control: bool passes_qc: bool + is_negative_control: bool + application_tag: MicrosaltAppTags diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 840ac26a12..5bb7389da0 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -6,7 +6,10 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( + get_application_tag, get_negative_control_result, + get_urgent_results, + is_sample_negative_control, is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, @@ -56,17 +59,20 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual and valid_10x_coverage ) - sample = self.status_db.get_sample_by_internal_id(sample_id) - is_negative_control: bool = sample.control == ControlEnum.negative + sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) + is_control: bool = is_sample_negative_control(sample) + application_tag: str = get_application_tag(sample) return QualityResult( sample_id=sample_id, - is_negative_control=is_negative_control, passed=sample_passes_qc, + is_negative_control=is_control, + application_tag=application_tag, ) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: control_passes_qc: bool = self.is_valid_negative_control(sample_results) + urgent_pass_qc: bool = self.all_urgent_samples_pass_qc(sample_results) def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: """Check if given microSALT case passes QC check.""" @@ -205,3 +211,7 @@ def is_valid_10x_coverage(self, metrics: SampleMetrics) -> bool: def is_valid_negative_control(self, results: list[QualityResult]) -> bool: negative_control_result: QualityResult = get_negative_control_result(results) return negative_control_result.passes_qc + + def all_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: + urgent_samples: list[QualityResult] = get_urgent_results(results) + return all(sample.passes_qc for sample in urgent_samples) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 9b0544e047..8e56466942 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -1,9 +1,10 @@ from pathlib import Path -from cg.constants.constants import MicrosaltQC +from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.io.json import read_json -from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult +from cg.meta.workflow.microsalt.models import QualityMetrics, SampleQualityControl from cg.models.orders.sample_base import ControlEnum +from cg.store.models import Sample def is_valid_total_reads(reads: int, target_reads: int) -> bool: @@ -39,7 +40,19 @@ def parse_quality_metrics(file_path: Path) -> QualityMetrics: return QualityMetrics.model_validate_json(data) -def get_negative_control_result(results: list[QualityResult]) -> QualityResult: +def is_sample_negative_control(sample: Sample) -> bool: + return sample.control == ControlEnum.negative + + +def get_application_tag(sample: Sample) -> str: + return sample.application_version.application.tag + + +def get_urgent_results(results: list[SampleQualityControl]) -> list[SampleQualityControl]: + return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] + + +def get_negative_control_result(results: list[SampleQualityControl]) -> SampleQualityControl: for result in results: if result.is_negative_control: return result From 5da77a431d8cdb879775b8a8214ed02382e676ba Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 12:17:39 +0100 Subject: [PATCH 20/65] Validate that most non urgent samples pass qc --- cg/constants/constants.py | 2 +- cg/meta/workflow/microsalt/quality_checker.py | 10 ++++++++++ cg/meta/workflow/microsalt/utils.py | 14 +++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 303d3c5b2e..35c20f6449 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -212,7 +212,7 @@ class APIMethods(StrEnum): class MicrosaltQC: AVERAGE_COVERAGE_THRESHOLD: int = 10 - QC_PERCENT_THRESHOLD_MWX: float = 0.1 + QC_PERCENT_THRESHOLD_MWX: float = 0.9 COVERAGE_10X_THRESHOLD: float = 0.75 DUPLICATION_RATE_THRESHOLD: float = 0.8 INSERT_SIZE_THRESHOLD: int = 100 diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 5bb7389da0..60b57ed1d4 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -8,6 +8,8 @@ from cg.meta.workflow.microsalt.utils import ( get_application_tag, get_negative_control_result, + get_non_urgent_results, + get_results_passing_qc, get_urgent_results, is_sample_negative_control, is_valid_10x_coverage, @@ -73,6 +75,7 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual def quality_control_case(self, sample_results: list[QualityResult]) -> bool: control_passes_qc: bool = self.is_valid_negative_control(sample_results) urgent_pass_qc: bool = self.all_urgent_samples_pass_qc(sample_results) + non_urgent_pass_qc: bool = self.non_urgent_samples_pass_qc(sample_results) def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: """Check if given microSALT case passes QC check.""" @@ -215,3 +218,10 @@ def is_valid_negative_control(self, results: list[QualityResult]) -> bool: def all_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: urgent_samples: list[QualityResult] = get_urgent_results(results) return all(sample.passes_qc for sample in urgent_samples) + + def non_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: + urgent_samples: list[QualityResult] = get_non_urgent_results(results) + passing_qc: list[QualityResult] = get_results_passing_qc(urgent_samples) + + fraction_passing_qc: float = len(passing_qc) / len(urgent_samples) + return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 8e56466942..73e616d333 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -2,7 +2,7 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.io.json import read_json -from cg.meta.workflow.microsalt.models import QualityMetrics, SampleQualityControl +from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample @@ -48,11 +48,19 @@ def get_application_tag(sample: Sample) -> str: return sample.application_version.application.tag -def get_urgent_results(results: list[SampleQualityControl]) -> list[SampleQualityControl]: +def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] -def get_negative_control_result(results: list[SampleQualityControl]) -> SampleQualityControl: +def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.application_tag != MicrosaltAppTags.MWRNXTR003] + + +def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.passes_qc] + + +def get_negative_control_result(results: list[QualityResult]) -> QualityResult: for result in results: if result.is_negative_control: return result From d33a84c7233a2e51d7a5329fdd663434b5d42432 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 12:18:43 +0100 Subject: [PATCH 21/65] Handle div by zero --- cg/meta/workflow/microsalt/quality_checker.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 60b57ed1d4..715320d4b8 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -222,6 +222,9 @@ def all_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: def non_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: urgent_samples: list[QualityResult] = get_non_urgent_results(results) passing_qc: list[QualityResult] = get_results_passing_qc(urgent_samples) - + + if not urgent_samples: + return True + fraction_passing_qc: float = len(passing_qc) / len(urgent_samples) return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX From 738fa8b4304ebab9b3bc9c5ae1b527e139bd8c2a Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 12:26:43 +0100 Subject: [PATCH 22/65] Remove deprecated code --- cg/cli/workflow/microsalt/base.py | 2 +- cg/meta/workflow/microsalt/quality_checker.py | 97 +------------------ 2 files changed, 4 insertions(+), 95 deletions(-) diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index b99f71f59d..5df1d9a602 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -225,7 +225,7 @@ def qc_microsalt(context: click.Context, unique_id: str) -> None: """Perform QC on a microsalt case.""" analysis_api: MicrosaltAnalysisAPI = context.obj.meta_apis["analysis_api"] try: - analysis_api.microsalt_qc( + analysis_api.quality_checker.microsalt_qc( case_id=unique_id, run_dir_path=analysis_api.get_latest_case_path(case_id=unique_id), lims_project=analysis_api.get_project( diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 715320d4b8..4b13ae5e62 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -32,7 +32,7 @@ class QualityChecker: def __init__(self, status_db: Store): self.status_db = status_db - def quality_control(self, run_dir_path: Path, lims_project: str): + def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") quality_metrics: QualityMetrics = parse_quality_metrics(metrics_file_path) @@ -42,7 +42,7 @@ def quality_control(self, run_dir_path: Path, lims_project: str): result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) - self.quality_control_case(sample_results) + return self.quality_control_case(sample_results) def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) @@ -76,98 +76,7 @@ def quality_control_case(self, sample_results: list[QualityResult]) -> bool: control_passes_qc: bool = self.is_valid_negative_control(sample_results) urgent_pass_qc: bool = self.all_urgent_samples_pass_qc(sample_results) non_urgent_pass_qc: bool = self.non_urgent_samples_pass_qc(sample_results) - - def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: - """Check if given microSALT case passes QC check.""" - failed_samples: dict = {} - case_qc: dict = read_json(file_path=Path(run_dir_path, f"{lims_project}.json")) - - for sample_id in case_qc: - sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) - sample_check: dict | None = self.qc_sample_check( - sample=sample, - sample_qc=case_qc[sample_id], - ) - if sample_check is not None: - failed_samples[sample_id] = sample_check - - return self.qc_case_check( - case_id=case_id, - failed_samples=failed_samples, - number_of_samples=len(case_qc), - run_dir_path=run_dir_path, - ) - - def qc_case_check( - self, case_id: str, failed_samples: dict, number_of_samples: int, run_dir_path: Path - ) -> bool: - """Perform the final QC check for a microbial case based on failed samples.""" - qc_pass: bool = True - - for sample_id in failed_samples: - sample: Sample = self.status_db.get_sample_by_internal_id(internal_id=sample_id) - if sample.control == ControlEnum.negative: - qc_pass = False - if sample.application_version.application.tag == MicrosaltAppTags.MWRNXTR003: - qc_pass = False - - # Check if more than 10% of MWX samples failed - if len(failed_samples) / number_of_samples > MicrosaltQC.QC_PERCENT_THRESHOLD_MWX: - qc_pass = False - - if not qc_pass: - LOG.warning( - f"Case {case_id} failed QC, see {run_dir_path}/QC_done.json for more information." - ) - else: - LOG.info(f"Case {case_id} passed QC.") - - self.create_qc_done_file( - run_dir_path=run_dir_path, - failed_samples=failed_samples, - ) - return qc_pass - - def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: - """Creates a QC_done when a QC check is performed.""" - write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) - - def qc_sample_check(self, sample: Sample, sample_qc: dict) -> dict | None: - """Perform a QC on a sample.""" - if sample.control == ControlEnum.negative: - reads_pass: bool = self.check_external_negative_control_sample(sample) - if not reads_pass: - LOG.warning(f"Negative control sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass} - else: - reads_pass: bool = sample.sequencing_qc - coverage_10x_pass: bool = self.check_coverage_10x( - sample_name=sample.internal_id, sample_qc=sample_qc - ) - if not reads_pass or not coverage_10x_pass: - LOG.warning(f"Sample {sample.internal_id} failed QC.") - return {"Passed QC Reads": reads_pass, "Passed Coverage 10X": coverage_10x_pass} - - def check_coverage_10x(self, sample_name: str, sample_qc: dict) -> bool: - """Check if a sample passed the coverage_10x criteria.""" - try: - return ( - sample_qc["microsalt_samtools_stats"]["coverage_10x"] - >= MicrosaltQC.COVERAGE_10X_THRESHOLD - ) - except TypeError as e: - LOG.error( - f"There is no 10X coverage value for sample {sample_name}, setting qc to fail for this sample" - ) - LOG.error(f"See error: {e}") - return False - - def check_external_negative_control_sample(self, sample: Sample) -> bool: - """Check if external negative control passed read check""" - return sample.reads < ( - sample.application_version.application.target_reads - * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD - ) + return control_passes_qc and urgent_pass_qc and non_urgent_pass_qc def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: """Checks if a qc is required for a microbial case.""" From dbd596f84858d3caaeecaf0ae19ea5e5693e2bf7 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 15:23:09 +0100 Subject: [PATCH 23/65] Fix parsing of metrics --- cg/meta/workflow/microsalt/models.py | 36 ++++++------------- cg/meta/workflow/microsalt/quality_checker.py | 9 +++-- cg/meta/workflow/microsalt/utils.py | 3 +- tests/meta/workflow/conftest.py | 7 ++++ .../microsalt/test_parsing_metrics.py | 12 +++++++ tests/meta/workflow/test_microsalt.py | 4 --- 6 files changed, 37 insertions(+), 34 deletions(-) create mode 100644 tests/meta/workflow/microsalt/test_parsing_metrics.py diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/models.py index ee85af5b19..08fa9a93d5 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/models.py @@ -1,41 +1,25 @@ -from typing import List, Dict -from pydantic import BaseModel +from typing import Annotated, Any, Dict +from pydantic import BaseModel, BeforeValidator from cg.constants.constants import MicrosaltAppTags -from cg.store.models import Sample - -class BlastPubmlst(BaseModel): - sequence_type: MicrosaltAppTags - thresholds: str - - -class QuastAssembly(BaseModel): - estimated_genome_length: int - gc_percentage: str - n50: int - necessary_contigs: int +def empty_str_to_none(v: str) -> Any: + return v or None class PicardMarkduplicate(BaseModel): - insert_size: int - duplication_rate: float + insert_size: Annotated[int, BeforeValidator(empty_str_to_none)] + duplication_rate: Annotated[float | None, BeforeValidator(empty_str_to_none)] class MicrosaltSamtoolsStats(BaseModel): - total_reads: int - mapped_rate: float - average_coverage: float - coverage_10x: float - coverage_30x: float - coverage_50x: float - coverage_100x: float + total_reads: Annotated[int | None, BeforeValidator(empty_str_to_none)] + mapped_rate: Annotated[float | None, BeforeValidator(empty_str_to_none)] + average_coverage: Annotated[float | None, BeforeValidator(empty_str_to_none)] + coverage_10x: Annotated[float | None, BeforeValidator(empty_str_to_none)] class SampleMetrics(BaseModel): - blast_pubmlst: BlastPubmlst - quast_assembly: QuastAssembly - blast_resfinder_resistence: List[str] picard_markduplicate: PicardMarkduplicate microsalt_samtools_stats: MicrosaltSamtoolsStats diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 4b13ae5e62..e9fc4130d9 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -1,9 +1,8 @@ import logging from pathlib import Path -from cg.io.json import read_json, write_json - -from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.io.json import write_json +from cg.constants.constants import MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( get_application_tag, @@ -91,6 +90,10 @@ def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: LOG.info(f"Performing QC on case {case_id}") return True + def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: + """Creates a QC_done when a QC check is performed.""" + write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) + def is_valid_total_reads(self, sample_id: str) -> bool: sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) target_reads: int = sample.application_version.application.target_reads diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 73e616d333..1dfa6e5700 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -37,7 +37,8 @@ def is_valid_10x_coverage(coverage_10x: float) -> bool: def parse_quality_metrics(file_path: Path) -> QualityMetrics: data = read_json(file_path) - return QualityMetrics.model_validate_json(data) + formatted_data = {"samples": data} + return QualityMetrics(**formatted_data) def is_sample_negative_control(sample: Sample) -> bool: diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index e620fcd871..356786ffaa 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -113,6 +113,13 @@ def microsalt_qc_fail_lims_project() -> str: return "ACC11111_qc_fail" +@pytest.fixture +def valid_microsalt_metrics_file( + microsalt_qc_fail_run_dir_path: Path, microsalt_qc_fail_lims_project: str +) -> Path: + return Path(microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json") + + @pytest.fixture(name="microsalt_case_qc_pass") def microsalt_case_qc_pass() -> str: """Return a microsalt case to pass QC.""" diff --git a/tests/meta/workflow/microsalt/test_parsing_metrics.py b/tests/meta/workflow/microsalt/test_parsing_metrics.py new file mode 100644 index 0000000000..c3bbd5732e --- /dev/null +++ b/tests/meta/workflow/microsalt/test_parsing_metrics.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from cg.meta.workflow.microsalt.utils import parse_quality_metrics + + +def test_parse_valid_quality_metrics(valid_microsalt_metrics_file: Path): + # GIVEN a valid quality metrics file path + + # WHEN parsing the file + parse_quality_metrics(valid_microsalt_metrics_file) + + # THEN no error is thrown diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 63806d326c..dac7d047fc 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -2,17 +2,13 @@ import logging from pathlib import Path -import mock from cg.apps.tb.api import TrailblazerAPI -from cg.constants.constants import CaseActions, Pipeline from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI, QualityChecker from cg.models.cg_config import CGConfig from cg.models.orders.sample_base import ControlEnum from cg.store import Store from cg.store.models import Case -from tests.mocks.tb_mock import MockTB -from tests.store_helpers import StoreHelpers def test_qc_check_fail( From c118eef73f8dae2bdca0948560dd4d6d28fe8775 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 15:35:27 +0100 Subject: [PATCH 24/65] Pass metrics file path as single parameter to qc --- cg/cli/workflow/microsalt/base.py | 17 +++++------ cg/meta/workflow/microsalt/microsalt.py | 8 ++--- cg/meta/workflow/microsalt/quality_checker.py | 3 +- tests/meta/workflow/test_microsalt.py | 30 +++++++++---------- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index 5df1d9a602..7af61aef51 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -14,7 +14,7 @@ from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI from cg.models.cg_config import CGConfig -from cg.store.models import Sample +from cg.store.models import Case, Sample LOG = logging.getLogger(__name__) @@ -224,15 +224,12 @@ def start_available(context: click.Context, dry_run: bool = False): def qc_microsalt(context: click.Context, unique_id: str) -> None: """Perform QC on a microsalt case.""" analysis_api: MicrosaltAnalysisAPI = context.obj.meta_apis["analysis_api"] + run_dir_path: Path = analysis_api.get_latest_case_path(unique_id) + case: Case = analysis_api.status_db.get_case_by_internal_id(unique_id) + sample_id: str = case.samples[0].internal_id + lims_project: str = analysis_api.get_project(sample_id) + metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") try: - analysis_api.quality_checker.microsalt_qc( - case_id=unique_id, - run_dir_path=analysis_api.get_latest_case_path(case_id=unique_id), - lims_project=analysis_api.get_project( - analysis_api.status_db.get_case_by_internal_id(internal_id=unique_id) - .samples[0] - .internal_id - ), - ) + analysis_api.quality_checker.microsalt_qc(metrics_file_path) except IndexError: LOG.error(f"No existing analysis directories found for case {unique_id}.") diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index f99182b1b7..bb90b5419f 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -289,14 +289,12 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) + lims_project: str = self.get_project(case.samples[0].internal_id) + metrics_file_path: Path = Path(case_run_dir, f"{lims_project}.json") if self.quality_checker.is_qc_required( case_run_dir=case_run_dir, case_id=case.internal_id ): - if self.quality_checker.microsalt_qc( - case_id=case.internal_id, - run_dir_path=case_run_dir, - lims_project=self.get_project(case.samples[0].internal_id), - ): + if self.quality_checker.microsalt_qc(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") cases_to_store.append(case) else: diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index e9fc4130d9..91ea80f2d2 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -31,8 +31,7 @@ class QualityChecker: def __init__(self, status_db: Store): self.status_db = status_db - def microsalt_qc(self, case_id: str, run_dir_path: Path, lims_project: str) -> bool: - metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") + def microsalt_qc(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = parse_quality_metrics(metrics_file_path) sample_results: list[QualityResult] = [] diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index dac7d047fc..e29c21f49a 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -31,12 +31,12 @@ def test_qc_check_fail( mocker.patch.object(QualityChecker, "create_qc_done_file") - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( - case_id=microsalt_case_qc_fail, - run_dir_path=microsalt_qc_fail_run_dir_path, - lims_project=microsalt_qc_fail_lims_project, + # GIVEN the path to the metrics file + metrics_file_path = Path( + microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" ) + # WHEN performing QC check + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) # THEN the QC should fail assert not qc_pass @@ -63,12 +63,12 @@ def test_qc_check_pass( mocker.patch.object(QualityChecker, "create_qc_done_file") - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( - case_id=microsalt_case_qc_pass, - run_dir_path=microsalt_qc_pass_run_dir_path, - lims_project=microsalt_qc_pass_lims_project, + # GIVEN the path to the metrics file + metrics_file_path = Path( + microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json" ) + # WHEN performing QC check + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) # THEN the QC should pass assert qc_pass @@ -95,12 +95,12 @@ def test_qc_check_negative_control_fail( mocker.patch.object(QualityChecker, "create_qc_done_file") - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc( - case_id=microsalt_case_qc_fail, - run_dir_path=microsalt_qc_fail_run_dir_path, - lims_project=microsalt_qc_fail_lims_project, + # GIVEN the metrics file path + metrics_file_path = Path( + microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" ) + # WHEN performing QC check + qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) # THEN the QC should fail assert not qc_pass From 296b12fde8314439e52282d91a56e4fc2cd02806 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 15:50:36 +0100 Subject: [PATCH 25/65] Handle possible null values from metrics --- cg/meta/workflow/microsalt/quality_checker.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 91ea80f2d2..771fa8b5a5 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -33,7 +33,6 @@ def __init__(self, status_db: Store): def microsalt_qc(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = parse_quality_metrics(metrics_file_path) - sample_results: list[QualityResult] = [] for sample_id, metrics in quality_metrics: @@ -103,24 +102,24 @@ def is_valid_total_reads(self, sample_id: str) -> bool: return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: - mapped_rate: float = metrics.microsalt_samtools_stats.mapped_rate - return is_valid_mapping_rate(mapped_rate) + mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate + return is_valid_mapping_rate(mapped_rate) if mapped_rate else False def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: - duplication_rate: float = metrics.picard_markduplicate.duplication_rate - return is_valid_duplication_rate(duplication_rate) + duplication_rate: float | None = metrics.picard_markduplicate.duplication_rate + return is_valid_duplication_rate(duplication_rate) if duplication_rate else False def is_valid_median_insert_size(self, metrics: SampleMetrics) -> bool: - insert_size: int = metrics.picard_markduplicate.insert_size - return is_valid_median_insert_size(insert_size) + insert_size: int | None = metrics.picard_markduplicate.insert_size + return is_valid_median_insert_size(insert_size) if insert_size else False def is_valid_average_coverage(self, metrics: SampleMetrics) -> bool: - average_coverage: float = metrics.microsalt_samtools_stats.average_coverage - return is_valid_average_coverage(average_coverage) + coverage: float | None = metrics.microsalt_samtools_stats.average_coverage + return is_valid_average_coverage(coverage) if coverage else False def is_valid_10x_coverage(self, metrics: SampleMetrics) -> bool: - coverage_10x: float = metrics.microsalt_samtools_stats.coverage_10x - return is_valid_10x_coverage(coverage_10x) + coverage_10x: float | None = metrics.microsalt_samtools_stats.coverage_10x + return is_valid_10x_coverage(coverage_10x) if coverage_10x else False def is_valid_negative_control(self, results: list[QualityResult]) -> bool: negative_control_result: QualityResult = get_negative_control_result(results) From 603c3a5afbb28e2dc1b4abab91bcd8732b740fe9 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 12 Dec 2023 16:02:40 +0100 Subject: [PATCH 26/65] Restructure methods --- cg/meta/workflow/microsalt/quality_checker.py | 64 ++++--------------- cg/meta/workflow/microsalt/utils.py | 56 ++++++++++++++-- 2 files changed, 63 insertions(+), 57 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker.py index 771fa8b5a5..7125575f42 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker.py @@ -2,23 +2,21 @@ from pathlib import Path from cg.io.json import write_json -from cg.constants.constants import MicrosaltQC from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.meta.workflow.microsalt.utils import ( get_application_tag, - get_negative_control_result, - get_non_urgent_results, - get_results_passing_qc, - get_urgent_results, is_sample_negative_control, is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, - is_valid_mapping_rate, + is_valid_mapped_rate, is_valid_median_insert_size, + is_valid_negative_control, is_valid_total_reads, is_valid_total_reads_for_control, + non_urgent_samples_pass_qc, parse_quality_metrics, + urgent_samples_pass_qc, ) from cg.models.orders.sample_base import ControlEnum from cg.store.api.core import Store @@ -43,11 +41,11 @@ def microsalt_qc(self, metrics_file_path: Path) -> bool: def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) - valid_mapping: bool = self.is_valid_mapped_rate(metrics) - valid_duplication: bool = self.is_valid_duplication_rate(metrics) - valid_inserts: bool = self.is_valid_median_insert_size(metrics) - valid_coverage: bool = self.is_valid_average_coverage(metrics) - valid_10x_coverage: bool = self.is_valid_10x_coverage(metrics) + valid_mapping: bool = is_valid_mapped_rate(metrics) + valid_duplication: bool = is_valid_duplication_rate(metrics) + valid_inserts: bool = is_valid_median_insert_size(metrics) + valid_coverage: bool = is_valid_average_coverage(metrics) + valid_10x_coverage: bool = is_valid_10x_coverage(metrics) sample_passes_qc: bool = ( valid_reads @@ -70,9 +68,9 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual ) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: - control_passes_qc: bool = self.is_valid_negative_control(sample_results) - urgent_pass_qc: bool = self.all_urgent_samples_pass_qc(sample_results) - non_urgent_pass_qc: bool = self.non_urgent_samples_pass_qc(sample_results) + control_passes_qc: bool = is_valid_negative_control(sample_results) + urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) + non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) return control_passes_qc and urgent_pass_qc and non_urgent_pass_qc def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: @@ -100,41 +98,3 @@ def is_valid_total_reads(self, sample_id: str) -> bool: if sample.control == ControlEnum.negative: return is_valid_total_reads_for_control(reads=sample_reads, target_reads=target_reads) return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) - - def is_valid_mapped_rate(self, metrics: SampleMetrics) -> bool: - mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate - return is_valid_mapping_rate(mapped_rate) if mapped_rate else False - - def is_valid_duplication_rate(self, metrics: SampleMetrics) -> bool: - duplication_rate: float | None = metrics.picard_markduplicate.duplication_rate - return is_valid_duplication_rate(duplication_rate) if duplication_rate else False - - def is_valid_median_insert_size(self, metrics: SampleMetrics) -> bool: - insert_size: int | None = metrics.picard_markduplicate.insert_size - return is_valid_median_insert_size(insert_size) if insert_size else False - - def is_valid_average_coverage(self, metrics: SampleMetrics) -> bool: - coverage: float | None = metrics.microsalt_samtools_stats.average_coverage - return is_valid_average_coverage(coverage) if coverage else False - - def is_valid_10x_coverage(self, metrics: SampleMetrics) -> bool: - coverage_10x: float | None = metrics.microsalt_samtools_stats.coverage_10x - return is_valid_10x_coverage(coverage_10x) if coverage_10x else False - - def is_valid_negative_control(self, results: list[QualityResult]) -> bool: - negative_control_result: QualityResult = get_negative_control_result(results) - return negative_control_result.passes_qc - - def all_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: - urgent_samples: list[QualityResult] = get_urgent_results(results) - return all(sample.passes_qc for sample in urgent_samples) - - def non_urgent_samples_pass_qc(self, results: list[QualityResult]) -> bool: - urgent_samples: list[QualityResult] = get_non_urgent_results(results) - passing_qc: list[QualityResult] = get_results_passing_qc(urgent_samples) - - if not urgent_samples: - return True - - fraction_passing_qc: float = len(passing_qc) / len(urgent_samples) - return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py index 1dfa6e5700..56fedf1cbe 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/utils.py @@ -2,7 +2,7 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.io.json import read_json -from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult +from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample @@ -49,16 +49,62 @@ def get_application_tag(sample: Sample) -> str: return sample.application_version.application.tag -def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] +def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.passes_qc] def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: return [result for result in results if result.application_tag != MicrosaltAppTags.MWRNXTR003] -def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.passes_qc] +def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] + + +def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: + urgent_results: list[QualityResult] = get_urgent_results(results) + return all(result.passes_qc for result in urgent_results) + + +def is_valid_mapped_rate(metrics: SampleMetrics) -> bool: + mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate + return is_valid_mapping_rate(mapped_rate) if mapped_rate else False + + +def is_valid_duplication_rate(metrics: SampleMetrics) -> bool: + duplication_rate: float | None = metrics.picard_markduplicate.duplication_rate + return is_valid_duplication_rate(duplication_rate) if duplication_rate else False + + +def is_valid_median_insert_size(metrics: SampleMetrics) -> bool: + insert_size: int | None = metrics.picard_markduplicate.insert_size + return is_valid_median_insert_size(insert_size) if insert_size else False + + +def is_valid_average_coverage(metrics: SampleMetrics) -> bool: + coverage: float | None = metrics.microsalt_samtools_stats.average_coverage + return is_valid_average_coverage(coverage) if coverage else False + + +def is_valid_10x_coverage(metrics: SampleMetrics) -> bool: + coverage_10x: float | None = metrics.microsalt_samtools_stats.coverage_10x + return is_valid_10x_coverage(coverage_10x) if coverage_10x else False + + +def is_valid_negative_control(results: list[QualityResult]) -> bool: + negative_control_result: QualityResult = get_negative_control_result(results) + return negative_control_result.passes_qc + + +def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: + urgent_samples: list[QualityResult] = get_non_urgent_results(results) + passing_qc: list[QualityResult] = get_results_passing_qc(urgent_samples) + + if not urgent_samples: + return True + + fraction_passing_qc: float = len(passing_qc) / len(urgent_samples) + return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX def get_negative_control_result(results: list[QualityResult]) -> QualityResult: From b1750d086bbeb6d5de689de5b7012b40bc5e1ba2 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 10:25:23 +0100 Subject: [PATCH 27/65] Restructure microsalt module --- cg/meta/workflow/microsalt/__init__.py | 1 - .../microsalt/metrics_parser/__init__.py | 1 + .../metrics_parser/metrics_parser.py | 12 +++++++++ .../microsalt/{ => metrics_parser}/models.py | 16 +++-------- cg/meta/workflow/microsalt/microsalt.py | 9 ++----- .../microsalt/quality_checker/__init__.py | 1 + .../microsalt/quality_checker/models.py | 10 +++++++ .../{ => quality_checker}/quality_checker.py | 27 +++++++------------ .../microsalt/{ => quality_checker}/utils.py | 23 ++++++---------- .../microsalt/test_parsing_metrics.py | 4 +-- .../microsalt/test_quality_control.py | 2 +- tests/meta/workflow/test_microsalt.py | 3 ++- 12 files changed, 53 insertions(+), 56 deletions(-) create mode 100644 cg/meta/workflow/microsalt/metrics_parser/__init__.py create mode 100644 cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py rename cg/meta/workflow/microsalt/{ => metrics_parser}/models.py (65%) create mode 100644 cg/meta/workflow/microsalt/quality_checker/__init__.py create mode 100644 cg/meta/workflow/microsalt/quality_checker/models.py rename cg/meta/workflow/microsalt/{ => quality_checker}/quality_checker.py (78%) rename cg/meta/workflow/microsalt/{ => quality_checker}/utils.py (84%) diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py index a3eb140356..ef4fec629a 100644 --- a/cg/meta/workflow/microsalt/__init__.py +++ b/cg/meta/workflow/microsalt/__init__.py @@ -1,2 +1 @@ from .microsalt import MicrosaltAnalysisAPI -from .quality_checker import QualityChecker diff --git a/cg/meta/workflow/microsalt/metrics_parser/__init__.py b/cg/meta/workflow/microsalt/metrics_parser/__init__.py new file mode 100644 index 0000000000..c384d8decd --- /dev/null +++ b/cg/meta/workflow/microsalt/metrics_parser/__init__.py @@ -0,0 +1 @@ +from .metrics_parser import MetricsParser \ No newline at end of file diff --git a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py new file mode 100644 index 0000000000..0be24355a1 --- /dev/null +++ b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from cg.io.json import read_json +from .models import QualityMetrics + + +class MetricsParser: + @staticmethod + def parse(file_path: Path) -> QualityMetrics: + data = read_json(file_path) + formatted_data = {"samples": data} + return QualityMetrics(**formatted_data) diff --git a/cg/meta/workflow/microsalt/models.py b/cg/meta/workflow/microsalt/metrics_parser/models.py similarity index 65% rename from cg/meta/workflow/microsalt/models.py rename to cg/meta/workflow/microsalt/metrics_parser/models.py index 08fa9a93d5..de91d116ce 100644 --- a/cg/meta/workflow/microsalt/models.py +++ b/cg/meta/workflow/microsalt/metrics_parser/models.py @@ -1,14 +1,13 @@ -from typing import Annotated, Any, Dict +from typing import Annotated from pydantic import BaseModel, BeforeValidator -from cg.constants.constants import MicrosaltAppTags -def empty_str_to_none(v: str) -> Any: +def empty_str_to_none(v: str) -> str | None: return v or None class PicardMarkduplicate(BaseModel): - insert_size: Annotated[int, BeforeValidator(empty_str_to_none)] + insert_size: Annotated[int | None, BeforeValidator(empty_str_to_none)] duplication_rate: Annotated[float | None, BeforeValidator(empty_str_to_none)] @@ -25,11 +24,4 @@ class SampleMetrics(BaseModel): class QualityMetrics(BaseModel): - samples: Dict[str, SampleMetrics] - - -class QualityResult(BaseModel): - sample_id: str - passes_qc: bool - is_negative_control: bool - application_tag: MicrosaltAppTags + samples: dict[str, SampleMetrics] diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index bb90b5419f..1b4f965238 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -167,10 +167,7 @@ def get_samples(self, case_id: str, sample_id: str | None = None) -> list[Sample def get_lims_comment(self, sample_id: str) -> str: """Returns the comment associated with a sample stored in lims""" comment: str = self.lims_api.get_sample_comment(sample_id) or "" - if re.match(r"\w{4}\d{2,3}", comment): - return comment - - return "" + return comment if re.match(r"\w{4}\d{2,3}", comment) else "" def get_organism(self, sample_obj: Sample) -> str: """Organism @@ -291,9 +288,7 @@ def get_cases_to_store(self) -> list[Case]: case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) lims_project: str = self.get_project(case.samples[0].internal_id) metrics_file_path: Path = Path(case_run_dir, f"{lims_project}.json") - if self.quality_checker.is_qc_required( - case_run_dir=case_run_dir, case_id=case.internal_id - ): + if self.quality_checker.is_qc_required(case_run_dir): if self.quality_checker.microsalt_qc(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") cases_to_store.append(case) diff --git a/cg/meta/workflow/microsalt/quality_checker/__init__.py b/cg/meta/workflow/microsalt/quality_checker/__init__.py new file mode 100644 index 0000000000..35da7ca738 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_checker/__init__.py @@ -0,0 +1 @@ +from .quality_checker import QualityChecker diff --git a/cg/meta/workflow/microsalt/quality_checker/models.py b/cg/meta/workflow/microsalt/quality_checker/models.py new file mode 100644 index 0000000000..1a5c05b435 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_checker/models.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + +from cg.constants.constants import MicrosaltAppTags + + +class QualityResult(BaseModel): + sample_id: str + passes_qc: bool + is_negative_control: bool + application_tag: MicrosaltAppTags diff --git a/cg/meta/workflow/microsalt/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker/quality_checker.py similarity index 78% rename from cg/meta/workflow/microsalt/quality_checker.py rename to cg/meta/workflow/microsalt/quality_checker/quality_checker.py index 7125575f42..6731369b30 100644 --- a/cg/meta/workflow/microsalt/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker/quality_checker.py @@ -2,20 +2,21 @@ from pathlib import Path from cg.io.json import write_json -from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics -from cg.meta.workflow.microsalt.utils import ( +from cg.meta.workflow.microsalt.metrics_parser import MetricsParser +from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics +from cg.meta.workflow.microsalt.quality_checker.models import QualityResult +from cg.meta.workflow.microsalt.quality_checker.utils import ( get_application_tag, is_sample_negative_control, is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, - is_valid_mapped_rate, + is_valid_mapping_rate, is_valid_median_insert_size, is_valid_negative_control, is_valid_total_reads, is_valid_total_reads_for_control, non_urgent_samples_pass_qc, - parse_quality_metrics, urgent_samples_pass_qc, ) from cg.models.orders.sample_base import ControlEnum @@ -30,7 +31,7 @@ def __init__(self, status_db: Store): self.status_db = status_db def microsalt_qc(self, metrics_file_path: Path) -> bool: - quality_metrics: QualityMetrics = parse_quality_metrics(metrics_file_path) + quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) sample_results: list[QualityResult] = [] for sample_id, metrics in quality_metrics: @@ -41,7 +42,7 @@ def microsalt_qc(self, metrics_file_path: Path) -> bool: def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) - valid_mapping: bool = is_valid_mapped_rate(metrics) + valid_mapping: bool = is_valid_mapping_rate(metrics) valid_duplication: bool = is_valid_duplication_rate(metrics) valid_inserts: bool = is_valid_median_insert_size(metrics) valid_coverage: bool = is_valid_average_coverage(metrics) @@ -73,21 +74,13 @@ def quality_control_case(self, sample_results: list[QualityResult]) -> bool: non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) return control_passes_qc and urgent_pass_qc and non_urgent_pass_qc - def is_qc_required(self, case_run_dir: Path | None, case_id: str) -> bool: - """Checks if a qc is required for a microbial case.""" + def is_qc_required(self, case_run_dir: Path) -> bool: if case_run_dir is None: - LOG.info(f"There are no running directories for case {case_id}.") return False - - if case_run_dir.joinpath("QC_done.json").exists(): - LOG.info(f"QC already performed for case {case_id}, storing case.") - return False - - LOG.info(f"Performing QC on case {case_id}") - return True + qc_done_path: Path = case_run_dir.joinpath("QC_done.json") + return not qc_done_path.exists() def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: - """Creates a QC_done when a QC check is performed.""" write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) def is_valid_total_reads(self, sample_id: str) -> bool: diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/quality_checker/utils.py similarity index 84% rename from cg/meta/workflow/microsalt/utils.py rename to cg/meta/workflow/microsalt/quality_checker/utils.py index 56fedf1cbe..f6565a8d6d 100644 --- a/cg/meta/workflow/microsalt/utils.py +++ b/cg/meta/workflow/microsalt/quality_checker/utils.py @@ -1,8 +1,6 @@ -from pathlib import Path - from cg.constants.constants import MicrosaltAppTags, MicrosaltQC -from cg.io.json import read_json -from cg.meta.workflow.microsalt.models import QualityMetrics, QualityResult, SampleMetrics +from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics +from cg.meta.workflow.microsalt.quality_checker.models import QualityResult from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample @@ -35,12 +33,6 @@ def is_valid_10x_coverage(coverage_10x: float) -> bool: return coverage_10x > MicrosaltQC.COVERAGE_10X_THRESHOLD -def parse_quality_metrics(file_path: Path) -> QualityMetrics: - data = read_json(file_path) - formatted_data = {"samples": data} - return QualityMetrics(**formatted_data) - - def is_sample_negative_control(sample: Sample) -> bool: return sample.control == ControlEnum.negative @@ -66,7 +58,7 @@ def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: return all(result.passes_qc for result in urgent_results) -def is_valid_mapped_rate(metrics: SampleMetrics) -> bool: +def is_valid_mapping_rate(metrics: SampleMetrics) -> bool: mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate return is_valid_mapping_rate(mapped_rate) if mapped_rate else False @@ -97,13 +89,13 @@ def is_valid_negative_control(results: list[QualityResult]) -> bool: def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: - urgent_samples: list[QualityResult] = get_non_urgent_results(results) - passing_qc: list[QualityResult] = get_results_passing_qc(urgent_samples) + non_urgent_samples: list[QualityResult] = get_non_urgent_results(results) + passing_qc: list[QualityResult] = get_results_passing_qc(non_urgent_samples) - if not urgent_samples: + if not non_urgent_samples: return True - fraction_passing_qc: float = len(passing_qc) / len(urgent_samples) + fraction_passing_qc: float = len(passing_qc) / len(non_urgent_samples) return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX @@ -111,3 +103,4 @@ def get_negative_control_result(results: list[QualityResult]) -> QualityResult: for result in results: if result.is_negative_control: return result + raise ValueError("No negative control result found") diff --git a/tests/meta/workflow/microsalt/test_parsing_metrics.py b/tests/meta/workflow/microsalt/test_parsing_metrics.py index c3bbd5732e..43fde2491f 100644 --- a/tests/meta/workflow/microsalt/test_parsing_metrics.py +++ b/tests/meta/workflow/microsalt/test_parsing_metrics.py @@ -1,12 +1,12 @@ from pathlib import Path -from cg.meta.workflow.microsalt.utils import parse_quality_metrics +from cg.meta.workflow.microsalt.metrics_parser import MetricsParser def test_parse_valid_quality_metrics(valid_microsalt_metrics_file: Path): # GIVEN a valid quality metrics file path # WHEN parsing the file - parse_quality_metrics(valid_microsalt_metrics_file) + MetricsParser.parse(valid_microsalt_metrics_file) # THEN no error is thrown diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index e8c061114b..89c6d46d10 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -1,4 +1,4 @@ -from cg.meta.workflow.microsalt.utils import is_valid_total_reads +from cg.meta.workflow.microsalt.quality_checker.utils import is_valid_total_reads def test_sample_total_reads_passing(): diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index e29c21f49a..0e2c110728 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -4,7 +4,8 @@ from cg.apps.tb.api import TrailblazerAPI -from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI, QualityChecker +from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI +from cg.meta.workflow.microsalt.quality_checker import QualityChecker from cg.models.cg_config import CGConfig from cg.models.orders.sample_base import ControlEnum from cg.store import Store From 151b157a9df6cd73ed946dbf0f6dcabc2c7f2005 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 11:19:59 +0100 Subject: [PATCH 28/65] Generate quality report --- cg/cli/workflow/microsalt/base.py | 2 +- .../microsalt/metrics_parser/__init__.py | 2 +- cg/meta/workflow/microsalt/microsalt.py | 9 +------ .../microsalt/quality_checker/models.py | 8 +++++- .../quality_checker/quality_checker.py | 15 +++++++---- .../quality_checker/report_generatory.py | 27 +++++++++++++++++++ .../microsalt/quality_checker/utils.py | 2 +- tests/meta/workflow/test_microsalt.py | 6 ++--- 8 files changed, 51 insertions(+), 20 deletions(-) create mode 100644 cg/meta/workflow/microsalt/quality_checker/report_generatory.py diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index 7af61aef51..9d06c52499 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -230,6 +230,6 @@ def qc_microsalt(context: click.Context, unique_id: str) -> None: lims_project: str = analysis_api.get_project(sample_id) metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") try: - analysis_api.quality_checker.microsalt_qc(metrics_file_path) + analysis_api.quality_checker.quality_control(metrics_file_path) except IndexError: LOG.error(f"No existing analysis directories found for case {unique_id}.") diff --git a/cg/meta/workflow/microsalt/metrics_parser/__init__.py b/cg/meta/workflow/microsalt/metrics_parser/__init__.py index c384d8decd..21c99c7f74 100644 --- a/cg/meta/workflow/microsalt/metrics_parser/__init__.py +++ b/cg/meta/workflow/microsalt/metrics_parser/__init__.py @@ -1 +1 @@ -from .metrics_parser import MetricsParser \ No newline at end of file +from .metrics_parser import MetricsParser diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 1b4f965238..eccba202c0 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -1,10 +1,3 @@ -""" API to manage Microsalt Analyses - Organism - Fallback based on reference, ‘Other species’ and ‘Comment’. Default to “Unset”. - Priority = Default to empty string. Weird response. Typically “standard” or “research”. - Reference = Defaults to “None” - Method: Outputted as “1273:23”. Defaults to “Not in LIMS” - Date: Returns latest == most recent date. Outputted as DT object “YYYY MM DD”. Defaults to - datetime.min""" import glob import logging import os @@ -289,7 +282,7 @@ def get_cases_to_store(self) -> list[Case]: lims_project: str = self.get_project(case.samples[0].internal_id) metrics_file_path: Path = Path(case_run_dir, f"{lims_project}.json") if self.quality_checker.is_qc_required(case_run_dir): - if self.quality_checker.microsalt_qc(metrics_file_path): + if self.quality_checker.quality_control(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") cases_to_store.append(case) else: diff --git a/cg/meta/workflow/microsalt/quality_checker/models.py b/cg/meta/workflow/microsalt/quality_checker/models.py index 1a5c05b435..7b20af78dc 100644 --- a/cg/meta/workflow/microsalt/quality_checker/models.py +++ b/cg/meta/workflow/microsalt/quality_checker/models.py @@ -6,5 +6,11 @@ class QualityResult(BaseModel): sample_id: str passes_qc: bool - is_negative_control: bool + is_control: bool application_tag: MicrosaltAppTags + passes_reads_qc: bool + passes_mapping_qc: bool + passes_duplication_qc: bool + passes_inserts_qc: bool + passes_coverage_qc: bool + passes_10x_coverage_qc: bool diff --git a/cg/meta/workflow/microsalt/quality_checker/quality_checker.py b/cg/meta/workflow/microsalt/quality_checker/quality_checker.py index 6731369b30..cba14e3a5f 100644 --- a/cg/meta/workflow/microsalt/quality_checker/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_checker/quality_checker.py @@ -5,6 +5,7 @@ from cg.meta.workflow.microsalt.metrics_parser import MetricsParser from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics from cg.meta.workflow.microsalt.quality_checker.models import QualityResult +from cg.meta.workflow.microsalt.quality_checker.report_generatory import ReportGenerator from cg.meta.workflow.microsalt.quality_checker.utils import ( get_application_tag, is_sample_negative_control, @@ -30,7 +31,7 @@ class QualityChecker: def __init__(self, status_db: Store): self.status_db = status_db - def microsalt_qc(self, metrics_file_path: Path) -> bool: + def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) sample_results: list[QualityResult] = [] @@ -38,6 +39,7 @@ def microsalt_qc(self, metrics_file_path: Path) -> bool: result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) + ReportGenerator.report(out_dir=metrics_file_path.parent, results=sample_results) return self.quality_control_case(sample_results) def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: @@ -64,8 +66,14 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual return QualityResult( sample_id=sample_id, passed=sample_passes_qc, - is_negative_control=is_control, + is_control=is_control, application_tag=application_tag, + passes_reads_qc=valid_reads, + passes_mapping_qc=valid_mapping, + passes_duplication_qc=valid_duplication, + passes_inserts_qc=valid_inserts, + passes_coverage_qc=valid_coverage, + passes_10x_coverage_qc=valid_10x_coverage, ) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: @@ -80,9 +88,6 @@ def is_qc_required(self, case_run_dir: Path) -> bool: qc_done_path: Path = case_run_dir.joinpath("QC_done.json") return not qc_done_path.exists() - def create_qc_done_file(self, run_dir_path: Path, failed_samples: dict) -> None: - write_json(file_path=run_dir_path.joinpath("QC_done.json"), content=failed_samples) - def is_valid_total_reads(self, sample_id: str) -> bool: sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) target_reads: int = sample.application_version.application.target_reads diff --git a/cg/meta/workflow/microsalt/quality_checker/report_generatory.py b/cg/meta/workflow/microsalt/quality_checker/report_generatory.py new file mode 100644 index 0000000000..e1df694b7b --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_checker/report_generatory.py @@ -0,0 +1,27 @@ +from pathlib import Path + +from cg.io.json import write_json +from cg.meta.workflow.microsalt.quality_checker.models import QualityResult + + +class ReportGenerator: + @staticmethod + def report(out_dir: Path, results: list[QualityResult]): + formatted_results: list[dict] = [] + + for result in results: + formatted_result = { + result.sample_id: { + "Passed QC": result.passes_qc, + "Passed QC Reads": result.passes_reads_qc, + "Passed QC Mapping": result.passes_mapping_qc, + "Passed QC Duplication": result.passes_duplication_qc, + "Passed QC Insert Size": result.passes_inserts_qc, + "Passed QC Coverage": result.passes_coverage_qc, + "Passed QC 10x Coverage": result.passes_10x_coverage_qc, + } + } + formatted_results.append(formatted_result) + + out_file: Path = Path(out_dir, "QC_report.json") + write_json(out_file, formatted_results) diff --git a/cg/meta/workflow/microsalt/quality_checker/utils.py b/cg/meta/workflow/microsalt/quality_checker/utils.py index f6565a8d6d..e9baeb246c 100644 --- a/cg/meta/workflow/microsalt/quality_checker/utils.py +++ b/cg/meta/workflow/microsalt/quality_checker/utils.py @@ -101,6 +101,6 @@ def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: def get_negative_control_result(results: list[QualityResult]) -> QualityResult: for result in results: - if result.is_negative_control: + if result.is_control: return result raise ValueError("No negative control result found") diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 0e2c110728..49086894eb 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -37,7 +37,7 @@ def test_qc_check_fail( microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" ) # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) + qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) # THEN the QC should fail assert not qc_pass @@ -69,7 +69,7 @@ def test_qc_check_pass( microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json" ) # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) + qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) # THEN the QC should pass assert qc_pass @@ -101,7 +101,7 @@ def test_qc_check_negative_control_fail( microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" ) # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.microsalt_qc(metrics_file_path) + qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) # THEN the QC should fail assert not qc_pass From e8c5a4be6518cf333b278bc65b0de50b132518ca Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 11:23:40 +0100 Subject: [PATCH 29/65] Fix naming --- cg/meta/workflow/microsalt/microsalt.py | 4 ++-- .../workflow/microsalt/quality_checker/__init__.py | 1 - .../microsalt/quality_controller/__init__.py | 1 + .../models.py | 0 .../quality_controller.py} | 8 ++++---- .../report_generatory.py | 2 +- .../{quality_checker => quality_controller}/utils.py | 2 +- .../meta/workflow/microsalt/test_quality_control.py | 2 +- tests/meta/workflow/test_microsalt.py | 12 ++++++------ 9 files changed, 16 insertions(+), 16 deletions(-) delete mode 100644 cg/meta/workflow/microsalt/quality_checker/__init__.py create mode 100644 cg/meta/workflow/microsalt/quality_controller/__init__.py rename cg/meta/workflow/microsalt/{quality_checker => quality_controller}/models.py (100%) rename cg/meta/workflow/microsalt/{quality_checker/quality_checker.py => quality_controller/quality_controller.py} (93%) rename cg/meta/workflow/microsalt/{quality_checker => quality_controller}/report_generatory.py (92%) rename cg/meta/workflow/microsalt/{quality_checker => quality_controller}/utils.py (98%) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index eccba202c0..5bd6e11224 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -14,7 +14,7 @@ from cg.exc import CgDataError from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MicrosaltFastqHandler -from cg.meta.workflow.microsalt.quality_checker import QualityChecker +from cg.meta.workflow.microsalt.quality_controller import QualityController from cg.models.cg_config import CGConfig from cg.store.models import Case, Sample from cg.utils import Process @@ -29,7 +29,7 @@ def __init__(self, config: CGConfig, pipeline: Pipeline = Pipeline.MICROSALT): super().__init__(pipeline, config) self.root_dir = config.microsalt.root self.queries_path = config.microsalt.queries_path - self.quality_checker = QualityChecker(config.status_db) + self.quality_checker = QualityController(config.status_db) @property def use_read_count_threshold(self) -> bool: diff --git a/cg/meta/workflow/microsalt/quality_checker/__init__.py b/cg/meta/workflow/microsalt/quality_checker/__init__.py deleted file mode 100644 index 35da7ca738..0000000000 --- a/cg/meta/workflow/microsalt/quality_checker/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .quality_checker import QualityChecker diff --git a/cg/meta/workflow/microsalt/quality_controller/__init__.py b/cg/meta/workflow/microsalt/quality_controller/__init__.py new file mode 100644 index 0000000000..b6fdaafd87 --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/__init__.py @@ -0,0 +1 @@ +from .quality_controller import QualityController diff --git a/cg/meta/workflow/microsalt/quality_checker/models.py b/cg/meta/workflow/microsalt/quality_controller/models.py similarity index 100% rename from cg/meta/workflow/microsalt/quality_checker/models.py rename to cg/meta/workflow/microsalt/quality_controller/models.py diff --git a/cg/meta/workflow/microsalt/quality_checker/quality_checker.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py similarity index 93% rename from cg/meta/workflow/microsalt/quality_checker/quality_checker.py rename to cg/meta/workflow/microsalt/quality_controller/quality_controller.py index cba14e3a5f..fa63d31ef4 100644 --- a/cg/meta/workflow/microsalt/quality_checker/quality_checker.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -4,9 +4,9 @@ from cg.io.json import write_json from cg.meta.workflow.microsalt.metrics_parser import MetricsParser from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics -from cg.meta.workflow.microsalt.quality_checker.models import QualityResult -from cg.meta.workflow.microsalt.quality_checker.report_generatory import ReportGenerator -from cg.meta.workflow.microsalt.quality_checker.utils import ( +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.report_generatory import ReportGenerator +from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, is_sample_negative_control, is_valid_10x_coverage, @@ -27,7 +27,7 @@ LOG = logging.getLogger(__name__) -class QualityChecker: +class QualityController: def __init__(self, status_db: Store): self.status_db = status_db diff --git a/cg/meta/workflow/microsalt/quality_checker/report_generatory.py b/cg/meta/workflow/microsalt/quality_controller/report_generatory.py similarity index 92% rename from cg/meta/workflow/microsalt/quality_checker/report_generatory.py rename to cg/meta/workflow/microsalt/quality_controller/report_generatory.py index e1df694b7b..a134e813a9 100644 --- a/cg/meta/workflow/microsalt/quality_checker/report_generatory.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generatory.py @@ -1,7 +1,7 @@ from pathlib import Path from cg.io.json import write_json -from cg.meta.workflow.microsalt.quality_checker.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult class ReportGenerator: diff --git a/cg/meta/workflow/microsalt/quality_checker/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py similarity index 98% rename from cg/meta/workflow/microsalt/quality_checker/utils.py rename to cg/meta/workflow/microsalt/quality_controller/utils.py index e9baeb246c..a4295fcb07 100644 --- a/cg/meta/workflow/microsalt/quality_checker/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -1,6 +1,6 @@ from cg.constants.constants import MicrosaltAppTags, MicrosaltQC from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics -from cg.meta.workflow.microsalt.quality_checker.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_quality_control.py index 89c6d46d10..6279c08319 100644 --- a/tests/meta/workflow/microsalt/test_quality_control.py +++ b/tests/meta/workflow/microsalt/test_quality_control.py @@ -1,4 +1,4 @@ -from cg.meta.workflow.microsalt.quality_checker.utils import is_valid_total_reads +from cg.meta.workflow.microsalt.quality_controller.utils import is_valid_total_reads def test_sample_total_reads_passing(): diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 49086894eb..7d39fc8ec8 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -5,7 +5,7 @@ from cg.apps.tb.api import TrailblazerAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI -from cg.meta.workflow.microsalt.quality_checker import QualityChecker +from cg.meta.workflow.microsalt.quality_controller import QualityController from cg.models.cg_config import CGConfig from cg.models.orders.sample_base import ControlEnum from cg.store import Store @@ -30,7 +30,7 @@ def test_qc_check_fail( for index in range(4): microsalt_case.samples[index].reads = 1000 - mocker.patch.object(QualityChecker, "create_qc_done_file") + mocker.patch.object(QualityController, "create_qc_done_file") # GIVEN the path to the metrics file metrics_file_path = Path( @@ -62,7 +62,7 @@ def test_qc_check_pass( microsalt_case.samples[1].control = ControlEnum.negative microsalt_case.samples[1].reads = 1100000 - mocker.patch.object(QualityChecker, "create_qc_done_file") + mocker.patch.object(QualityController, "create_qc_done_file") # GIVEN the path to the metrics file metrics_file_path = Path( @@ -94,7 +94,7 @@ def test_qc_check_negative_control_fail( microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) microsalt_case.samples[0].control = ControlEnum.negative - mocker.patch.object(QualityChecker, "create_qc_done_file") + mocker.patch.object(QualityController, "create_qc_done_file") # GIVEN the metrics file path metrics_file_path = Path( @@ -149,7 +149,7 @@ def test_get_cases_to_store_pass( caplog.set_level(logging.INFO) store = qc_microsalt_context.status_db microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(QualityChecker, "create_qc_done_file") + mocker.patch.object(QualityController, "create_qc_done_file") mocker.patch.object(TrailblazerAPI, "set_analysis_status") mocker.patch.object(TrailblazerAPI, "add_comment") @@ -191,7 +191,7 @@ def test_get_cases_to_store_fail( caplog.set_level(logging.INFO) store = qc_microsalt_context.status_db microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(QualityChecker, "create_qc_done_file") + mocker.patch.object(QualityController, "create_qc_done_file") mocker.patch.object(TrailblazerAPI, "set_analysis_status") mocker.patch.object(TrailblazerAPI, "add_comment") From 45869be70bb5845dd3073685aedbb26dd0ece8e5 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 11:34:48 +0100 Subject: [PATCH 30/65] Cleaning --- .../microsalt/metrics_parser/__init__.py | 1 + .../quality_controller/quality_controller.py | 20 +++++++++---------- ...port_generatory.py => report_generator.py} | 0 .../microsalt/quality_controller/utils.py | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) rename cg/meta/workflow/microsalt/quality_controller/{report_generatory.py => report_generator.py} (100%) diff --git a/cg/meta/workflow/microsalt/metrics_parser/__init__.py b/cg/meta/workflow/microsalt/metrics_parser/__init__.py index 21c99c7f74..2daaf38068 100644 --- a/cg/meta/workflow/microsalt/metrics_parser/__init__.py +++ b/cg/meta/workflow/microsalt/metrics_parser/__init__.py @@ -1 +1,2 @@ from .metrics_parser import MetricsParser +from .models import QualityMetrics, SampleMetrics diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index fa63d31ef4..927f932ee9 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -1,11 +1,9 @@ import logging from pathlib import Path -from cg.io.json import write_json -from cg.meta.workflow.microsalt.metrics_parser import MetricsParser -from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics +from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics from cg.meta.workflow.microsalt.quality_controller.models import QualityResult -from cg.meta.workflow.microsalt.quality_controller.report_generatory import ReportGenerator +from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, is_sample_negative_control, @@ -14,7 +12,7 @@ is_valid_duplication_rate, is_valid_mapping_rate, is_valid_median_insert_size, - is_valid_negative_control, + negative_control_pass_qc, is_valid_total_reads, is_valid_total_reads_for_control, non_urgent_samples_pass_qc, @@ -33,14 +31,16 @@ def __init__(self, status_db: Store): def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) - sample_results: list[QualityResult] = [] + sample_results: list[QualityResult] = self.quality_control_samples(quality_metrics) + ReportGenerator.report(out_dir=metrics_file_path.parent, results=sample_results) + return self.quality_control_case(sample_results) + def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[QualityResult]: + sample_results: list[QualityResult] = [] for sample_id, metrics in quality_metrics: result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) - - ReportGenerator.report(out_dir=metrics_file_path.parent, results=sample_results) - return self.quality_control_case(sample_results) + return sample_results def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) @@ -77,7 +77,7 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual ) def quality_control_case(self, sample_results: list[QualityResult]) -> bool: - control_passes_qc: bool = is_valid_negative_control(sample_results) + control_passes_qc: bool = negative_control_pass_qc(sample_results) urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) return control_passes_qc and urgent_pass_qc and non_urgent_pass_qc diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generatory.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py similarity index 100% rename from cg/meta/workflow/microsalt/quality_controller/report_generatory.py rename to cg/meta/workflow/microsalt/quality_controller/report_generator.py diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index a4295fcb07..2b2705403c 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -83,7 +83,7 @@ def is_valid_10x_coverage(metrics: SampleMetrics) -> bool: return is_valid_10x_coverage(coverage_10x) if coverage_10x else False -def is_valid_negative_control(results: list[QualityResult]) -> bool: +def negative_control_pass_qc(results: list[QualityResult]) -> bool: negative_control_result: QualityResult = get_negative_control_result(results) return negative_control_result.passes_qc From d6372b989b08ae185eba6d021a7bc3b22f94733f Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 11:57:41 +0100 Subject: [PATCH 31/65] Test report generator --- .../quality_controller/quality_controller.py | 3 +- .../quality_controller/report_generator.py | 5 +- .../microsalt/quality_controller/utils.py | 50 +++++++++---------- tests/meta/workflow/microsalt/conftest.py | 46 +++++++++++++++++ .../microsalt/test_report_generation.py | 21 ++++++++ ...{test_quality_control.py => test_utils.py} | 0 6 files changed, 96 insertions(+), 29 deletions(-) create mode 100644 tests/meta/workflow/microsalt/conftest.py create mode 100644 tests/meta/workflow/microsalt/test_report_generation.py rename tests/meta/workflow/microsalt/{test_quality_control.py => test_utils.py} (100%) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 927f932ee9..b65c402311 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -32,7 +32,8 @@ def __init__(self, status_db: Store): def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) sample_results: list[QualityResult] = self.quality_control_samples(quality_metrics) - ReportGenerator.report(out_dir=metrics_file_path.parent, results=sample_results) + report_file: Path = metrics_file_path.parent.joinpath("QC_done.json") + ReportGenerator.report(out_file=report_file, results=sample_results) return self.quality_control_case(sample_results) def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[QualityResult]: diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index a134e813a9..36dff8fef0 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -6,7 +6,7 @@ class ReportGenerator: @staticmethod - def report(out_dir: Path, results: list[QualityResult]): + def report(out_file: Path, results: list[QualityResult]): formatted_results: list[dict] = [] for result in results: @@ -23,5 +23,4 @@ def report(out_dir: Path, results: list[QualityResult]): } formatted_results.append(formatted_result) - out_file: Path = Path(out_dir, "QC_report.json") - write_json(out_file, formatted_results) + write_json(file_path=out_file, content=formatted_results) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 2b2705403c..a2fd12a4c1 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -33,31 +33,6 @@ def is_valid_10x_coverage(coverage_10x: float) -> bool: return coverage_10x > MicrosaltQC.COVERAGE_10X_THRESHOLD -def is_sample_negative_control(sample: Sample) -> bool: - return sample.control == ControlEnum.negative - - -def get_application_tag(sample: Sample) -> str: - return sample.application_version.application.tag - - -def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.passes_qc] - - -def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.application_tag != MicrosaltAppTags.MWRNXTR003] - - -def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] - - -def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: - urgent_results: list[QualityResult] = get_urgent_results(results) - return all(result.passes_qc for result in urgent_results) - - def is_valid_mapping_rate(metrics: SampleMetrics) -> bool: mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate return is_valid_mapping_rate(mapped_rate) if mapped_rate else False @@ -88,6 +63,23 @@ def negative_control_pass_qc(results: list[QualityResult]) -> bool: return negative_control_result.passes_qc +def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.passes_qc] + + +def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.application_tag != MicrosaltAppTags.MWRNXTR003] + + +def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: + return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] + + +def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: + urgent_results: list[QualityResult] = get_urgent_results(results) + return all(result.passes_qc for result in urgent_results) + + def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: non_urgent_samples: list[QualityResult] = get_non_urgent_results(results) passing_qc: list[QualityResult] = get_results_passing_qc(non_urgent_samples) @@ -104,3 +96,11 @@ def get_negative_control_result(results: list[QualityResult]) -> QualityResult: if result.is_control: return result raise ValueError("No negative control result found") + + +def is_sample_negative_control(sample: Sample) -> bool: + return sample.control == ControlEnum.negative + + +def get_application_tag(sample: Sample) -> str: + return sample.application_version.application.tag diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py new file mode 100644 index 0000000000..3a86bd576d --- /dev/null +++ b/tests/meta/workflow/microsalt/conftest.py @@ -0,0 +1,46 @@ +import pytest +from cg.constants.constants import MicrosaltAppTags + +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult + + +@pytest.fixture +def quality_results() -> list[QualityResult]: + return [ + QualityResult( + sample_id="sample1", + passes_qc=False, + is_control=True, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=True, + passes_mapping_qc=True, + passes_duplication_qc=False, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=True, + ), + QualityResult( + sample_id="sample2", + passes_qc=True, + is_control=False, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=True, + passes_mapping_qc=True, + passes_duplication_qc=True, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=True, + ), + QualityResult( + sample_id="sample3", + passes_qc=False, + is_control=False, + application_tag=MicrosaltAppTags.MWRNXTR003, + passes_reads_qc=False, + passes_mapping_qc=True, + passes_duplication_qc=False, + passes_inserts_qc=True, + passes_coverage_qc=True, + passes_10x_coverage_qc=False, + ), + ] diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py new file mode 100644 index 0000000000..22b921e5c5 --- /dev/null +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -0,0 +1,21 @@ +from pathlib import Path + +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator + + +def test_generate_report_without_results(): + pass + + +def test_generate_report_with_results(quality_results: list[QualityResult], tmp_path: Path): + # GIVEN quality results + + # GIVEN a file path to write the report to + out_file = Path(tmp_path, "QC_done.json") + + # WHEN generating a report + ReportGenerator.report(out_file=out_file, results=quality_results) + + # THEN the report is written to the directory + assert out_file.exists() diff --git a/tests/meta/workflow/microsalt/test_quality_control.py b/tests/meta/workflow/microsalt/test_utils.py similarity index 100% rename from tests/meta/workflow/microsalt/test_quality_control.py rename to tests/meta/workflow/microsalt/test_utils.py From 22078c81182ec842875e9e40ae8c06c0ae7efdfd Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 12:48:01 +0100 Subject: [PATCH 32/65] Add tests for utils --- .../quality_controller/quality_controller.py | 20 +-- .../microsalt/quality_controller/utils.py | 10 +- .../microsalt/test_report_generation.py | 11 +- tests/meta/workflow/microsalt/test_utils.py | 168 +++++++++++++++++- 4 files changed, 184 insertions(+), 25 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index b65c402311..4b92b5bf56 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -7,11 +7,11 @@ from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, is_sample_negative_control, - is_valid_10x_coverage, - is_valid_average_coverage, - is_valid_duplication_rate, - is_valid_mapping_rate, - is_valid_median_insert_size, + has_valid_10x_coverage, + has_valid_average_coverage, + has_valid_duplication_rate, + has_valid_mapping_rate, + has_valid_median_insert_size, negative_control_pass_qc, is_valid_total_reads, is_valid_total_reads_for_control, @@ -45,11 +45,11 @@ def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[Quali def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: valid_reads: bool = self.is_valid_total_reads(sample_id) - valid_mapping: bool = is_valid_mapping_rate(metrics) - valid_duplication: bool = is_valid_duplication_rate(metrics) - valid_inserts: bool = is_valid_median_insert_size(metrics) - valid_coverage: bool = is_valid_average_coverage(metrics) - valid_10x_coverage: bool = is_valid_10x_coverage(metrics) + valid_mapping: bool = has_valid_mapping_rate(metrics) + valid_duplication: bool = has_valid_duplication_rate(metrics) + valid_inserts: bool = has_valid_median_insert_size(metrics) + valid_coverage: bool = has_valid_average_coverage(metrics) + valid_10x_coverage: bool = has_valid_10x_coverage(metrics) sample_passes_qc: bool = ( valid_reads diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index a2fd12a4c1..7b4dcaaecd 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -33,27 +33,27 @@ def is_valid_10x_coverage(coverage_10x: float) -> bool: return coverage_10x > MicrosaltQC.COVERAGE_10X_THRESHOLD -def is_valid_mapping_rate(metrics: SampleMetrics) -> bool: +def has_valid_mapping_rate(metrics: SampleMetrics) -> bool: mapped_rate: float | None = metrics.microsalt_samtools_stats.mapped_rate return is_valid_mapping_rate(mapped_rate) if mapped_rate else False -def is_valid_duplication_rate(metrics: SampleMetrics) -> bool: +def has_valid_duplication_rate(metrics: SampleMetrics) -> bool: duplication_rate: float | None = metrics.picard_markduplicate.duplication_rate return is_valid_duplication_rate(duplication_rate) if duplication_rate else False -def is_valid_median_insert_size(metrics: SampleMetrics) -> bool: +def has_valid_median_insert_size(metrics: SampleMetrics) -> bool: insert_size: int | None = metrics.picard_markduplicate.insert_size return is_valid_median_insert_size(insert_size) if insert_size else False -def is_valid_average_coverage(metrics: SampleMetrics) -> bool: +def has_valid_average_coverage(metrics: SampleMetrics) -> bool: coverage: float | None = metrics.microsalt_samtools_stats.average_coverage return is_valid_average_coverage(coverage) if coverage else False -def is_valid_10x_coverage(metrics: SampleMetrics) -> bool: +def has_valid_10x_coverage(metrics: SampleMetrics) -> bool: coverage_10x: float | None = metrics.microsalt_samtools_stats.coverage_10x return is_valid_10x_coverage(coverage_10x) if coverage_10x else False diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py index 22b921e5c5..ee96a84bfb 100644 --- a/tests/meta/workflow/microsalt/test_report_generation.py +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -4,18 +4,17 @@ from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator -def test_generate_report_without_results(): - pass - - def test_generate_report_with_results(quality_results: list[QualityResult], tmp_path: Path): # GIVEN quality results - # GIVEN a file path to write the report to + # GIVEN a file path to write them to out_file = Path(tmp_path, "QC_done.json") # WHEN generating a report ReportGenerator.report(out_file=out_file, results=quality_results) - # THEN the report is written to the directory + # THEN the report is created assert out_file.exists() + + # THEN the report is populated + assert out_file.read_text() diff --git a/tests/meta/workflow/microsalt/test_utils.py b/tests/meta/workflow/microsalt/test_utils.py index 6279c08319..30fc804d39 100644 --- a/tests/meta/workflow/microsalt/test_utils.py +++ b/tests/meta/workflow/microsalt/test_utils.py @@ -1,8 +1,16 @@ -from cg.meta.workflow.microsalt.quality_controller.utils import is_valid_total_reads +from cg.meta.workflow.microsalt.quality_controller.utils import ( + is_valid_10x_coverage, + is_valid_average_coverage, + is_valid_duplication_rate, + is_valid_mapping_rate, + is_valid_median_insert_size, + is_valid_total_reads, + is_valid_total_reads_for_control, +) def test_sample_total_reads_passing(): - # GIVEN a sample with sufficient reads + # GIVEN sufficient reads sample_reads = 100 target_reads = 100 @@ -14,7 +22,7 @@ def test_sample_total_reads_passing(): def test_sample_total_reads_failing(): - # GIVEN a sample with insufficient reads + # GIVEN insufficient reads sample_reads = 50 target_reads = 100 @@ -26,7 +34,7 @@ def test_sample_total_reads_failing(): def test_sample_total_reads_failing_without_reads(): - # GIVEN a sample without reads + # GIVENout reads sample_reads = 0 target_reads = 100 @@ -35,3 +43,155 @@ def test_sample_total_reads_failing_without_reads(): # THEN it fails assert not passes_reads_threshold + + +def test_control_total_reads_passing(): + # GIVEN a negative control sample with few reads + sample_reads = 1 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold = is_valid_total_reads_for_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_control_total_reads_failing(): + # GIVEN a negative control sample with many reads + sample_reads = 100 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold = is_valid_total_reads_for_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it fails + assert not passes_reads_threshold + + +def test_control_total_reads_passing_without_reads(): + # GIVEN a negative control sample without reads + sample_reads = 0 + target_reads = 100 + + # WHEN checking if the control read count is valid + passes_reads_threshold = is_valid_total_reads_for_control( + reads=sample_reads, target_reads=target_reads + ) + + # THEN it passes + assert passes_reads_threshold + + +def test_is_valid_mapping_rate_passing(): + # GIVEN a high mapping rate + mapping_rate = 0.99 + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_threshold = is_valid_mapping_rate(mapping_rate) + + # THEN it passes + assert passes_mapping_rate_threshold + + +def test_is_valid_mapping_rate_failing(): + # GIVEN a low mapping rate + mapping_rate = 0.1 + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_threshold = is_valid_mapping_rate(mapping_rate) + + # THEN it fails + assert not passes_mapping_rate_threshold + + +def test_is_valid_duplication_rate_passing(): + # GIVEN a low duplication rate + duplication_rate = 0.1 + + # WHEN checking if the duplication rate is valid + passes_duplication_qc = is_valid_duplication_rate(duplication_rate) + + # THEN it passes + assert passes_duplication_qc + + +def test_is_valid_duplication_rate_failing(): + # GIVEN a high duplication rate + duplication_rate = 0.9 + + # WHEN checking if the duplication rate is valid + passes_duplication_qc = is_valid_duplication_rate(duplication_rate) + + # THEN it fails + assert not passes_duplication_qc + + +def test_is_valid_median_insert_size_passing(): + # GIVEN a high median insert size + insert_size = 1000 + + # WHEN checking if the median insert size is valid + passes_insert_size_qc = is_valid_median_insert_size(insert_size) + + # THEN it passes + assert passes_insert_size_qc + + +def test_is_valid_median_insert_size_failing(): + # GIVEN a low median insert size + insert_size = 10 + + # WHEN checking if the median insert size is valid + passes_insert_size_qc = is_valid_median_insert_size(insert_size) + + # THEN it fails + assert not passes_insert_size_qc + + +def test_is_valid_average_coverage_passing(): + # GIVEN a high average coverage + average_coverage = 50 + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc = is_valid_average_coverage(average_coverage) + + # THEN it passes + assert passes_average_coverage_qc + + +def test_is_valid_average_coverage_failing(): + # GIVEN a low average coverage + average_coverage = 1 + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc = is_valid_average_coverage(average_coverage) + + # THEN it fails + assert not passes_average_coverage_qc + + +def test_is_valid_10x_coverage_passing(): + # GIVEN a high percent of bases covered at 10x + coverage_10x = 0.95 + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc = is_valid_10x_coverage(coverage_10x) + + # THEN it passes + assert passes_coverage_10x_qc + + +def test_is_valid_10x_coverage_failing(): + # GIVEN a low percent of bases covered at 10x + coverage_10x = 0.1 + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc = is_valid_10x_coverage(coverage_10x) + + # THEN it fails + assert not passes_coverage_10x_qc From 475d0fb8e598c22111fc10727ac90df1a7f65998 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 13:18:53 +0100 Subject: [PATCH 33/65] Add tests for utils --- tests/meta/workflow/microsalt/conftest.py | 26 +++++ tests/meta/workflow/microsalt/test_utils.py | 121 ++++++++++++++++++++ 2 files changed, 147 insertions(+) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index 3a86bd576d..2c7d21a965 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -1,9 +1,35 @@ import pytest from cg.constants.constants import MicrosaltAppTags +from cg.meta.workflow.microsalt.metrics_parser.models import ( + MicrosaltSamtoolsStats, + PicardMarkduplicate, + SampleMetrics, +) from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +def create_sample_metrics( + total_reads: int = 100, + mapped_rate: float = 0.8, + duplication_rate: float = 0.1, + insert_size: int = 200, + average_coverage: float = 30.0, + coverage_10x: float = 95.0, +) -> SampleMetrics: + return SampleMetrics( + microsalt_samtools_stats=MicrosaltSamtoolsStats( + total_reads=total_reads, + mapped_rate=mapped_rate, + average_coverage=average_coverage, + coverage_10x=coverage_10x, + ), + picard_markduplicate=PicardMarkduplicate( + insert_size=insert_size, duplication_rate=duplication_rate + ), + ) + + @pytest.fixture def quality_results() -> list[QualityResult]: return [ diff --git a/tests/meta/workflow/microsalt/test_utils.py b/tests/meta/workflow/microsalt/test_utils.py index 30fc804d39..34d9c84bcf 100644 --- a/tests/meta/workflow/microsalt/test_utils.py +++ b/tests/meta/workflow/microsalt/test_utils.py @@ -1,4 +1,14 @@ +from cg.meta.workflow.microsalt.metrics_parser.models import ( + MicrosaltSamtoolsStats, + PicardMarkduplicate, + SampleMetrics, +) from cg.meta.workflow.microsalt.quality_controller.utils import ( + has_valid_10x_coverage, + has_valid_average_coverage, + has_valid_duplication_rate, + has_valid_mapping_rate, + has_valid_median_insert_size, is_valid_10x_coverage, is_valid_average_coverage, is_valid_duplication_rate, @@ -7,6 +17,7 @@ is_valid_total_reads, is_valid_total_reads_for_control, ) +from tests.meta.workflow.microsalt.conftest import create_sample_metrics def test_sample_total_reads_passing(): @@ -195,3 +206,113 @@ def test_is_valid_10x_coverage_failing(): # THEN it fails assert not passes_coverage_10x_qc + +def test_has_valid_mapping_rate_passing(): + # GIVEN metrics with a high mapping rate + metrics = create_sample_metrics(mapped_rate=0.8) + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_qc = has_valid_mapping_rate(metrics) + + # THEN it passes the quality control + assert passes_mapping_rate_qc + + +def test_has_valid_mapping_rate_missing(): + # GIVEN metrics without a mapping rate + metrics = create_sample_metrics(mapped_rate=None) + + # WHEN checking if the mapping rate is valid + passes_mapping_rate_qc = has_valid_mapping_rate(metrics) + + # THEN it fails the quality control + assert not passes_mapping_rate_qc + + +def test_has_valid_duplication_rate_passing(): + # GIVEN metrics with a low duplication rate + metrics = create_sample_metrics(duplication_rate=0.1) + + # WHEN checking if the duplication rate is valid + passes_duplication_rate_qc = has_valid_duplication_rate(metrics) + + # THEN it passes the quality control + assert passes_duplication_rate_qc + + +def test_has_valid_duplication_rate_missing(): + # GIVEN metrics without a duplication rate + metrics = create_sample_metrics(duplication_rate=None) + + # WHEN checking if the duplication rate is valid + passes_duplication_rate_qc = has_valid_duplication_rate(metrics) + + # THEN it fails the quality control + assert not passes_duplication_rate_qc + + +def test_has_valid_median_insert_size_passing(): + # GIVEN metrics with a high median insert size + metrics = create_sample_metrics(insert_size=200) + + # WHEN checking if the median insert size is valid + passes_insert_size_qc = has_valid_median_insert_size(metrics) + + # THEN it passes the quality control + assert passes_insert_size_qc + + +def test_has_valid_median_insert_size_missing(): + # GIVEN metrics without a median insert size + metrics = create_sample_metrics(insert_size=None) + + # WHEN checking if the median insert size is valid + passes_insert_size_qc = has_valid_median_insert_size(metrics) + + # THEN it fails the quality control + assert not passes_insert_size_qc + + +def test_has_valid_average_coverage_passes(): + # GIVEN metrics with a high average coverage + metrics = create_sample_metrics(average_coverage=30.0) + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc = has_valid_average_coverage(metrics) + + # THEN it passes the quality control + assert passes_average_coverage_qc + + +def test_has_valid_average_coverage_missing(): + # GIVEN metrics without an average coverage + metrics = create_sample_metrics(average_coverage=None) + + # WHEN checking if the average coverage is valid + passes_average_coverage_qc = has_valid_average_coverage(metrics) + + # THEN it fails the quality control + assert not passes_average_coverage_qc + + +def test_has_valid_10x_coverage_passing(): + # GIVEN metrics with a high percent of bases covered at 10x + metrics = create_sample_metrics(coverage_10x=95.0) + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc = has_valid_10x_coverage(metrics) + + # THEN it passes the quality control + assert passes_coverage_10x_qc + + +def test_has_valid_10x_coverage_missing(): + # GIVEN metrics without a percent of bases covered at 10x + metrics = create_sample_metrics(coverage_10x=None) + + # WHEN checking if the coverage is valid + passes_coverage_10x_qc = has_valid_10x_coverage(metrics) + + # THEN it fails the quality control + assert not passes_coverage_10x_qc + From 84fde0bc2afa5f91d02c530f140d9d1954bfa6b7 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 13:46:25 +0100 Subject: [PATCH 34/65] Add tests for utils --- .../microsalt/quality_controller/utils.py | 14 +-- tests/meta/workflow/microsalt/conftest.py | 26 +++++ tests/meta/workflow/microsalt/test_utils.py | 110 +++++++++++------- 3 files changed, 102 insertions(+), 48 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 7b4dcaaecd..0ef0853ae3 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -58,6 +58,13 @@ def has_valid_10x_coverage(metrics: SampleMetrics) -> bool: return is_valid_10x_coverage(coverage_10x) if coverage_10x else False +def get_negative_control_result(results: list[QualityResult]) -> QualityResult: + for result in results: + if result.is_control: + return result + raise ValueError("No negative control found") + + def negative_control_pass_qc(results: list[QualityResult]) -> bool: negative_control_result: QualityResult = get_negative_control_result(results) return negative_control_result.passes_qc @@ -91,13 +98,6 @@ def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX -def get_negative_control_result(results: list[QualityResult]) -> QualityResult: - for result in results: - if result.is_control: - return result - raise ValueError("No negative control result found") - - def is_sample_negative_control(sample: Sample) -> bool: return sample.control == ControlEnum.negative diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index 2c7d21a965..a234739ecf 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -30,6 +30,32 @@ def create_sample_metrics( ) +def create_quality_result( + sample_id: str = "sample1", + passes_qc: bool = True, + is_control: bool = False, + application_tag: str = MicrosaltAppTags.MWRNXTR003, + passes_reads_qc: bool = True, + passes_mapping_qc: bool = True, + passes_duplication_qc: bool = True, + passes_inserts_qc: bool = True, + passes_coverage_qc: bool = True, + passes_10x_coverage_qc: bool = True, +) -> QualityResult: + return QualityResult( + sample_id=sample_id, + passes_qc=passes_qc, + is_control=is_control, + application_tag=application_tag, + passes_reads_qc=passes_reads_qc, + passes_mapping_qc=passes_mapping_qc, + passes_duplication_qc=passes_duplication_qc, + passes_inserts_qc=passes_inserts_qc, + passes_coverage_qc=passes_coverage_qc, + passes_10x_coverage_qc=passes_10x_coverage_qc, + ) + + @pytest.fixture def quality_results() -> list[QualityResult]: return [ diff --git a/tests/meta/workflow/microsalt/test_utils.py b/tests/meta/workflow/microsalt/test_utils.py index 34d9c84bcf..04d9cfc773 100644 --- a/tests/meta/workflow/microsalt/test_utils.py +++ b/tests/meta/workflow/microsalt/test_utils.py @@ -1,8 +1,5 @@ -from cg.meta.workflow.microsalt.metrics_parser.models import ( - MicrosaltSamtoolsStats, - PicardMarkduplicate, - SampleMetrics, -) +from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.meta.workflow.microsalt.quality_controller.utils import ( has_valid_10x_coverage, has_valid_average_coverage, @@ -16,8 +13,9 @@ is_valid_median_insert_size, is_valid_total_reads, is_valid_total_reads_for_control, + negative_control_pass_qc, ) -from tests.meta.workflow.microsalt.conftest import create_sample_metrics +from tests.meta.workflow.microsalt.conftest import create_quality_result, create_sample_metrics def test_sample_total_reads_passing(): @@ -26,7 +24,9 @@ def test_sample_total_reads_passing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads + ) # THEN it passes assert passes_reads_threshold @@ -38,7 +38,9 @@ def test_sample_total_reads_failing(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads + ) # THEN it fails assert not passes_reads_threshold @@ -50,7 +52,9 @@ def test_sample_total_reads_failing_without_reads(): target_reads = 100 # WHEN checking if the sample has sufficient reads - passes_reads_threshold = is_valid_total_reads(reads=sample_reads, target_reads=target_reads) + passes_reads_threshold: bool = is_valid_total_reads( + reads=sample_reads, target_reads=target_reads + ) # THEN it fails assert not passes_reads_threshold @@ -62,7 +66,7 @@ def test_control_total_reads_passing(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_control( reads=sample_reads, target_reads=target_reads ) @@ -76,7 +80,7 @@ def test_control_total_reads_failing(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_control( reads=sample_reads, target_reads=target_reads ) @@ -90,7 +94,7 @@ def test_control_total_reads_passing_without_reads(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_control( reads=sample_reads, target_reads=target_reads ) @@ -103,7 +107,7 @@ def test_is_valid_mapping_rate_passing(): mapping_rate = 0.99 # WHEN checking if the mapping rate is valid - passes_mapping_rate_threshold = is_valid_mapping_rate(mapping_rate) + passes_mapping_rate_threshold: bool = is_valid_mapping_rate(mapping_rate) # THEN it passes assert passes_mapping_rate_threshold @@ -114,7 +118,7 @@ def test_is_valid_mapping_rate_failing(): mapping_rate = 0.1 # WHEN checking if the mapping rate is valid - passes_mapping_rate_threshold = is_valid_mapping_rate(mapping_rate) + passes_mapping_rate_threshold: bool = is_valid_mapping_rate(mapping_rate) # THEN it fails assert not passes_mapping_rate_threshold @@ -125,7 +129,7 @@ def test_is_valid_duplication_rate_passing(): duplication_rate = 0.1 # WHEN checking if the duplication rate is valid - passes_duplication_qc = is_valid_duplication_rate(duplication_rate) + passes_duplication_qc: bool = is_valid_duplication_rate(duplication_rate) # THEN it passes assert passes_duplication_qc @@ -136,7 +140,7 @@ def test_is_valid_duplication_rate_failing(): duplication_rate = 0.9 # WHEN checking if the duplication rate is valid - passes_duplication_qc = is_valid_duplication_rate(duplication_rate) + passes_duplication_qc: bool = is_valid_duplication_rate(duplication_rate) # THEN it fails assert not passes_duplication_qc @@ -147,7 +151,7 @@ def test_is_valid_median_insert_size_passing(): insert_size = 1000 # WHEN checking if the median insert size is valid - passes_insert_size_qc = is_valid_median_insert_size(insert_size) + passes_insert_size_qc: bool = is_valid_median_insert_size(insert_size) # THEN it passes assert passes_insert_size_qc @@ -169,7 +173,7 @@ def test_is_valid_average_coverage_passing(): average_coverage = 50 # WHEN checking if the average coverage is valid - passes_average_coverage_qc = is_valid_average_coverage(average_coverage) + passes_average_coverage_qc: bool = is_valid_average_coverage(average_coverage) # THEN it passes assert passes_average_coverage_qc @@ -180,7 +184,7 @@ def test_is_valid_average_coverage_failing(): average_coverage = 1 # WHEN checking if the average coverage is valid - passes_average_coverage_qc = is_valid_average_coverage(average_coverage) + passes_average_coverage_qc: bool = is_valid_average_coverage(average_coverage) # THEN it fails assert not passes_average_coverage_qc @@ -191,7 +195,7 @@ def test_is_valid_10x_coverage_passing(): coverage_10x = 0.95 # WHEN checking if the coverage is valid - passes_coverage_10x_qc = is_valid_10x_coverage(coverage_10x) + passes_coverage_10x_qc: bool = is_valid_10x_coverage(coverage_10x) # THEN it passes assert passes_coverage_10x_qc @@ -202,17 +206,18 @@ def test_is_valid_10x_coverage_failing(): coverage_10x = 0.1 # WHEN checking if the coverage is valid - passes_coverage_10x_qc = is_valid_10x_coverage(coverage_10x) + passes_coverage_10x_qc: bool = is_valid_10x_coverage(coverage_10x) # THEN it fails assert not passes_coverage_10x_qc + def test_has_valid_mapping_rate_passing(): # GIVEN metrics with a high mapping rate - metrics = create_sample_metrics(mapped_rate=0.8) + metrics: SampleMetrics = create_sample_metrics(mapped_rate=0.8) # WHEN checking if the mapping rate is valid - passes_mapping_rate_qc = has_valid_mapping_rate(metrics) + passes_mapping_rate_qc: bool = has_valid_mapping_rate(metrics) # THEN it passes the quality control assert passes_mapping_rate_qc @@ -220,10 +225,10 @@ def test_has_valid_mapping_rate_passing(): def test_has_valid_mapping_rate_missing(): # GIVEN metrics without a mapping rate - metrics = create_sample_metrics(mapped_rate=None) + metrics: SampleMetrics = create_sample_metrics(mapped_rate=None) # WHEN checking if the mapping rate is valid - passes_mapping_rate_qc = has_valid_mapping_rate(metrics) + passes_mapping_rate_qc: bool = has_valid_mapping_rate(metrics) # THEN it fails the quality control assert not passes_mapping_rate_qc @@ -231,10 +236,10 @@ def test_has_valid_mapping_rate_missing(): def test_has_valid_duplication_rate_passing(): # GIVEN metrics with a low duplication rate - metrics = create_sample_metrics(duplication_rate=0.1) + metrics: SampleMetrics = create_sample_metrics(duplication_rate=0.1) # WHEN checking if the duplication rate is valid - passes_duplication_rate_qc = has_valid_duplication_rate(metrics) + passes_duplication_rate_qc: bool = has_valid_duplication_rate(metrics) # THEN it passes the quality control assert passes_duplication_rate_qc @@ -242,10 +247,10 @@ def test_has_valid_duplication_rate_passing(): def test_has_valid_duplication_rate_missing(): # GIVEN metrics without a duplication rate - metrics = create_sample_metrics(duplication_rate=None) + metrics: SampleMetrics = create_sample_metrics(duplication_rate=None) # WHEN checking if the duplication rate is valid - passes_duplication_rate_qc = has_valid_duplication_rate(metrics) + passes_duplication_rate_qc: bool = has_valid_duplication_rate(metrics) # THEN it fails the quality control assert not passes_duplication_rate_qc @@ -253,10 +258,10 @@ def test_has_valid_duplication_rate_missing(): def test_has_valid_median_insert_size_passing(): # GIVEN metrics with a high median insert size - metrics = create_sample_metrics(insert_size=200) + metrics: SampleMetrics = create_sample_metrics(insert_size=200) # WHEN checking if the median insert size is valid - passes_insert_size_qc = has_valid_median_insert_size(metrics) + passes_insert_size_qc: bool = has_valid_median_insert_size(metrics) # THEN it passes the quality control assert passes_insert_size_qc @@ -264,10 +269,10 @@ def test_has_valid_median_insert_size_passing(): def test_has_valid_median_insert_size_missing(): # GIVEN metrics without a median insert size - metrics = create_sample_metrics(insert_size=None) + metrics: SampleMetrics = create_sample_metrics(insert_size=None) # WHEN checking if the median insert size is valid - passes_insert_size_qc = has_valid_median_insert_size(metrics) + passes_insert_size_qc: bool = has_valid_median_insert_size(metrics) # THEN it fails the quality control assert not passes_insert_size_qc @@ -275,10 +280,10 @@ def test_has_valid_median_insert_size_missing(): def test_has_valid_average_coverage_passes(): # GIVEN metrics with a high average coverage - metrics = create_sample_metrics(average_coverage=30.0) + metrics: SampleMetrics = create_sample_metrics(average_coverage=30.0) # WHEN checking if the average coverage is valid - passes_average_coverage_qc = has_valid_average_coverage(metrics) + passes_average_coverage_qc: bool = has_valid_average_coverage(metrics) # THEN it passes the quality control assert passes_average_coverage_qc @@ -286,10 +291,10 @@ def test_has_valid_average_coverage_passes(): def test_has_valid_average_coverage_missing(): # GIVEN metrics without an average coverage - metrics = create_sample_metrics(average_coverage=None) + metrics: SampleMetrics = create_sample_metrics(average_coverage=None) # WHEN checking if the average coverage is valid - passes_average_coverage_qc = has_valid_average_coverage(metrics) + passes_average_coverage_qc: bool = has_valid_average_coverage(metrics) # THEN it fails the quality control assert not passes_average_coverage_qc @@ -297,10 +302,10 @@ def test_has_valid_average_coverage_missing(): def test_has_valid_10x_coverage_passing(): # GIVEN metrics with a high percent of bases covered at 10x - metrics = create_sample_metrics(coverage_10x=95.0) + metrics: SampleMetrics = create_sample_metrics(coverage_10x=95.0) # WHEN checking if the coverage is valid - passes_coverage_10x_qc = has_valid_10x_coverage(metrics) + passes_coverage_10x_qc: bool = has_valid_10x_coverage(metrics) # THEN it passes the quality control assert passes_coverage_10x_qc @@ -308,11 +313,34 @@ def test_has_valid_10x_coverage_passing(): def test_has_valid_10x_coverage_missing(): # GIVEN metrics without a percent of bases covered at 10x - metrics = create_sample_metrics(coverage_10x=None) + metrics: SampleMetrics = create_sample_metrics(coverage_10x=None) # WHEN checking if the coverage is valid - passes_coverage_10x_qc = has_valid_10x_coverage(metrics) + passes_coverage_10x_qc: bool = has_valid_10x_coverage(metrics) # THEN it fails the quality control assert not passes_coverage_10x_qc + +def test_negative_control_passes_qc(): + # GIVEN a negative control sample that passes quality control + control_result: QualityResult = create_quality_result(is_control=True) + other_result: QualityResult = create_quality_result(passes_qc=False) + + # WHEN checking if the negative control passes quality control + control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) + + # THEN it passes quality control + assert control_passes_qc + + +def test_negative_control_fails_qc(): + # GIVEN a negative control sample that fails quality control + control_result: QualityResult = create_quality_result(is_control=True, passes_qc=False) + other_result: QualityResult = create_quality_result(passes_qc=True) + + # WHEN checking if the negative control passes quality control + control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) + + # THEN it fails quality control + assert not control_passes_qc From ff10f87d16c229d82b25896942635e7a7bff1bf9 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 14:05:38 +0100 Subject: [PATCH 35/65] Add tests for utils --- tests/meta/workflow/microsalt/test_utils.py | 92 ++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/tests/meta/workflow/microsalt/test_utils.py b/tests/meta/workflow/microsalt/test_utils.py index 04d9cfc773..ec0be024b2 100644 --- a/tests/meta/workflow/microsalt/test_utils.py +++ b/tests/meta/workflow/microsalt/test_utils.py @@ -1,6 +1,9 @@ +from cg.constants.constants import MicrosaltAppTags from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.meta.workflow.microsalt.quality_controller.utils import ( + get_non_urgent_results, + get_urgent_results, has_valid_10x_coverage, has_valid_average_coverage, has_valid_duplication_rate, @@ -14,6 +17,8 @@ is_valid_total_reads, is_valid_total_reads_for_control, negative_control_pass_qc, + non_urgent_samples_pass_qc, + urgent_samples_pass_qc, ) from tests.meta.workflow.microsalt.conftest import create_quality_result, create_sample_metrics @@ -337,10 +342,95 @@ def test_negative_control_passes_qc(): def test_negative_control_fails_qc(): # GIVEN a negative control sample that fails quality control control_result: QualityResult = create_quality_result(is_control=True, passes_qc=False) - other_result: QualityResult = create_quality_result(passes_qc=True) + other_result: QualityResult = create_quality_result() # WHEN checking if the negative control passes quality control control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) # THEN it fails quality control assert not control_passes_qc + + +def test_get_urgent_results(): + # GIVEN quality results with urgent and non-urgent samples + urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + non_urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + quality_results: list[QualityResult] = [urgent_result, non_urgent_result] + + # WHEN getting the urgent results + urgent_results: list[QualityResult] = get_urgent_results(quality_results) + + # THEN the urgent results are returned + assert urgent_results == [urgent_result] + + +def test_urgent_samples_pass_qc(): + # GIVEN quality results with urgent samples that pass quality control + urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + urgent_result_control: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True + ) + urgent_results: list[QualityResult] = [urgent_result, urgent_result_control] + + # WHEN checking if the urgent samples pass quality control + urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) + + # THEN it passes quality control + assert urgent_pass_qc + + +def test_urgent_samples_fail_qc(): + # GIVEN quality results with urgent samples that fail quality control + urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=False + ) + urgent_result_control: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True + ) + urgent_results: list[QualityResult] = [urgent_result, urgent_result_control] + + # WHEN checking if the urgent samples pass quality control + urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) + + # THEN it fails quality control + assert not urgent_pass_qc + + +def test_get_non_urgent_results(): + # GIVEN quality results with urgent and non-urgent samples + urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True + ) + non_urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + quality_results: list[QualityResult] = [urgent_result, non_urgent_result] + + # WHEN getting the non-urgent results + non_urgent_results: list[QualityResult] = get_non_urgent_results(quality_results) + + # THEN the non-urgent results are returned + assert non_urgent_results == [non_urgent_result] + + +def test_non_urgent_samples_pass_qc(): + # GIVEN quality results with non-urgent samples that pass quality control + non_urgent_result: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True + ) + non_urgent_result_control: QualityResult = create_quality_result( + application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True, is_control=True + ) + non_urgent_results: list[QualityResult] = [non_urgent_result, non_urgent_result_control] + + # WHEN checking if the non-urgent samples pass quality control + non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(non_urgent_results) + + # THEN it passes quality control + assert non_urgent_pass_qc From f8ff37eea842fe420256b91bfd9f2e9125f912fd Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 14:52:39 +0100 Subject: [PATCH 36/65] Add tests for quality controller --- tests/meta/workflow/microsalt/conftest.py | 7 +++ .../microsalt/test_quality_controller.py | 53 +++++++++++++++++++ .../{test_utils.py => test_quality_utils.py} | 0 3 files changed, 60 insertions(+) create mode 100644 tests/meta/workflow/microsalt/test_quality_controller.py rename tests/meta/workflow/microsalt/{test_utils.py => test_quality_utils.py} (100%) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index a234739ecf..d1c12bb7b5 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -7,6 +7,8 @@ ) from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController +from cg.store.api.core import Store def create_sample_metrics( @@ -96,3 +98,8 @@ def quality_results() -> list[QualityResult]: passes_10x_coverage_qc=False, ), ] + + +@pytest.fixture +def quality_controller(store: Store) -> QualityController: + return QualityController(store) diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py new file mode 100644 index 0000000000..d1730a18ca --- /dev/null +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -0,0 +1,53 @@ +from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.store.models import Application, Sample +from tests.store_helpers import StoreHelpers + + +def test_is_valid_total_reads_passes(quality_controller: QualityController): + # GIVEN an application + store = quality_controller.status_db + application: Application = StoreHelpers.add_application(store=store, target_reads=1000) + + # GIVEN an application version + version = StoreHelpers.add_application_version( + store=store, + application=application, + prices={"standard": 1000, "priority": 2000, "express": 3000, "research": 4000}, + ) + + # GIVEN a sample with a number of reads that is above the target reads + sample: Sample = StoreHelpers.add_sample(store=store, reads=10000) + + # GIVEN that the sample is associated with the application version + sample.application_version = version + + # WHEN controlling the quality of the sample reads + has_valid_reads: bool = quality_controller.is_valid_total_reads(sample.internal_id) + + # THEN the sample passes the quality control + assert has_valid_reads + + +def test_is_valid_total_reads_fails(quality_controller: QualityController): + # GIVEN an application + store = quality_controller.status_db + application: Application = StoreHelpers.add_application(store=store, target_reads=1000) + + # GIVEN an application version + version = StoreHelpers.add_application_version( + store=store, + application=application, + prices={"standard": 1000, "priority": 2000, "express": 3000, "research": 4000}, + ) + + # GIVEN a sample with a number of reads that is far below the target reads + sample: Sample = StoreHelpers.add_sample(store=store, reads=100) + + # GIVEN that the sample is associated with the application version + sample.application_version = version + + # WHEN controlling the quality of the sample reads + has_valid_reads: bool = quality_controller.is_valid_total_reads(sample.internal_id) + + # THEN the sample fails the quality control + assert not has_valid_reads diff --git a/tests/meta/workflow/microsalt/test_utils.py b/tests/meta/workflow/microsalt/test_quality_utils.py similarity index 100% rename from tests/meta/workflow/microsalt/test_utils.py rename to tests/meta/workflow/microsalt/test_quality_utils.py From 5464dc51fa5bf4200c7bf6043fa79676066fe988 Mon Sep 17 00:00:00 2001 From: seallard Date: Wed, 13 Dec 2023 15:50:28 +0100 Subject: [PATCH 37/65] Remove old tests --- tests/meta/workflow/microsalt/conftest.py | 12 +- ...ls.py => test_quality_controller_utils.py} | 0 tests/meta/workflow/test_microsalt.py | 181 ++---------------- 3 files changed, 17 insertions(+), 176 deletions(-) rename tests/meta/workflow/microsalt/{test_quality_utils.py => test_quality_controller_utils.py} (100%) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index d1c12bb7b5..d26370c75e 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -12,12 +12,12 @@ def create_sample_metrics( - total_reads: int = 100, - mapped_rate: float = 0.8, - duplication_rate: float = 0.1, - insert_size: int = 200, - average_coverage: float = 30.0, - coverage_10x: float = 95.0, + total_reads: int | None = 100, + mapped_rate: float | None = 0.8, + duplication_rate: float | None = 0.1, + insert_size: int | None = 200, + average_coverage: float | None = 30.0, + coverage_10x: float | None = 95.0, ) -> SampleMetrics: return SampleMetrics( microsalt_samtools_stats=MicrosaltSamtoolsStats( diff --git a/tests/meta/workflow/microsalt/test_quality_utils.py b/tests/meta/workflow/microsalt/test_quality_controller_utils.py similarity index 100% rename from tests/meta/workflow/microsalt/test_quality_utils.py rename to tests/meta/workflow/microsalt/test_quality_controller_utils.py diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 7d39fc8ec8..689480b839 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -5,108 +5,21 @@ from cg.apps.tb.api import TrailblazerAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI -from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator from cg.models.cg_config import CGConfig -from cg.models.orders.sample_base import ControlEnum -from cg.store import Store from cg.store.models import Case -def test_qc_check_fail( - qc_microsalt_context: CGConfig, - microsalt_qc_fail_run_dir_path: Path, - microsalt_qc_fail_lims_project: str, - microsalt_case_qc_fail: str, - caplog, - mocker, -): - """QC check for a microsalt case that should fail.""" - caplog.set_level(logging.INFO) - store: Store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) - for index in range(4): - microsalt_case.samples[index].reads = 1000 - - mocker.patch.object(QualityController, "create_qc_done_file") - - # GIVEN the path to the metrics file - metrics_file_path = Path( - microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" - ) - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) - - # THEN the QC should fail - assert not qc_pass - assert "failed" in caplog.text - - -def test_qc_check_pass( - qc_microsalt_context: CGConfig, - microsalt_qc_pass_run_dir_path: Path, - microsalt_qc_pass_lims_project: str, - microsalt_case_qc_pass: str, - caplog, - mocker, -): - """QC check for a microsalt case that should pass.""" - caplog.set_level(logging.INFO) - store: Store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] +def test_test_quality_control_fails(): + pass - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_pass) - microsalt_case.samples[1].control = ControlEnum.negative - microsalt_case.samples[1].reads = 1100000 - mocker.patch.object(QualityController, "create_qc_done_file") +def test_quality_control_passes(): + pass - # GIVEN the path to the metrics file - metrics_file_path = Path( - microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json" - ) - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) - - # THEN the QC should pass - assert qc_pass - assert "passed" in caplog.text - - -def test_qc_check_negative_control_fail( - qc_microsalt_context: CGConfig, - microsalt_qc_fail_run_dir_path: Path, - microsalt_qc_fail_lims_project: str, - microsalt_case_qc_fail: str, - caplog, - mocker, -): - """QC check for a microsalt case where a negative control fails QC.""" - - caplog.set_level(logging.INFO) - store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case that is to be stored - microsalt_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) - microsalt_case.samples[0].control = ControlEnum.negative - - mocker.patch.object(QualityController, "create_qc_done_file") - - # GIVEN the metrics file path - metrics_file_path = Path( - microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json" - ) - # WHEN performing QC check - qc_pass: bool = microsalt_api.quality_checker.quality_control(metrics_file_path) - # THEN the QC should fail - assert not qc_pass - assert "failed" in caplog.text - assert "Negative control sample" in caplog.text +def test_quality_control_fails_due_to_negative_control(): + pass def test_get_latest_case_path( @@ -136,83 +49,11 @@ def test_get_latest_case_path( assert Path(microsalt_analysis_dir, "ACC12345_2022") == path -def test_get_cases_to_store_pass( - qc_microsalt_context: CGConfig, - caplog, - mocker, - microsalt_qc_pass_lims_project: str, - microsalt_case_qc_pass: str, - microsalt_qc_pass_run_dir_path: Path, -): +def test_get_cases_to_store_pass(): """Test get cases to store for a microsalt case that passes QC.""" + pass - caplog.set_level(logging.INFO) - store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(QualityController, "create_qc_done_file") - mocker.patch.object(TrailblazerAPI, "set_analysis_status") - mocker.patch.object(TrailblazerAPI, "add_comment") - - # GIVEN a store with a QC ready microsalt case that will pass QC - microsalt_pass_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_pass) - microsalt_pass_case.samples[1].control = "negative" - microsalt_pass_case.samples[1].reads = 1100000 - - mocker.patch.object( - MicrosaltAnalysisAPI, - "get_completed_cases", - return_value=[microsalt_pass_case], - ) - mocker.patch.object( - MicrosaltAnalysisAPI, "get_project", return_value=microsalt_qc_pass_lims_project - ) - - mocker.patch.object( - MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_pass_run_dir_path - ) - - # WHEN get cases to store - cases_to_store: list[Case] = microsalt_api.get_cases_to_store() - - # THEN it should be stored - assert microsalt_pass_case in cases_to_store - -def test_get_cases_to_store_fail( - qc_microsalt_context: CGConfig, - caplog, - mocker, - microsalt_qc_fail_lims_project: str, - microsalt_case_qc_fail: str, - microsalt_qc_fail_run_dir_path: Path, -): +def test_get_cases_to_store_fail(): """Test get cases to store for a microsalt case that fails QC.""" - - caplog.set_level(logging.INFO) - store = qc_microsalt_context.status_db - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(QualityController, "create_qc_done_file") - mocker.patch.object(TrailblazerAPI, "set_analysis_status") - mocker.patch.object(TrailblazerAPI, "add_comment") - - # GIVEN a store with a QC ready microsalt case that will fail QC - microsalt_fail_case: Case = store.get_case_by_internal_id(internal_id=microsalt_case_qc_fail) - - mocker.patch.object( - MicrosaltAnalysisAPI, - "get_completed_cases", - return_value=[microsalt_fail_case], - ) - mocker.patch.object( - MicrosaltAnalysisAPI, "get_project", return_value=microsalt_qc_fail_lims_project - ) - - mocker.patch.object( - MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_fail_run_dir_path - ) - - # WHEN get case to store - cases_to_store: list[Case] = microsalt_api.get_cases_to_store() - - # Then it should not be stored - assert microsalt_fail_case not in cases_to_store + pass From 1a1e3200d676d3b3179f7b33d4f6de4cfefecd9b Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 10:33:50 +0100 Subject: [PATCH 38/65] Add integration tests --- .../quality_controller/quality_controller.py | 18 +++---- .../microsalt/quality_controller/utils.py | 4 ++ .../ACC22222_qc_pass/ACC22222_qc_pass.json | 37 ++++++++------ tests/meta/workflow/conftest.py | 49 +++++++++++++++++-- .../microsalt/test_quality_controller.py | 49 ++++++++++++++++++- tests/meta/workflow/test_microsalt.py | 12 ----- 6 files changed, 126 insertions(+), 43 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 4b92b5bf56..d3547116b7 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -6,6 +6,7 @@ from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, + get_sample_target_reads, is_sample_negative_control, has_valid_10x_coverage, has_valid_average_coverage, @@ -18,7 +19,6 @@ non_urgent_samples_pass_qc, urgent_samples_pass_qc, ) -from cg.models.orders.sample_base import ControlEnum from cg.store.api.core import Store from cg.store.models import Sample @@ -38,13 +38,13 @@ def quality_control(self, metrics_file_path: Path) -> bool: def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[QualityResult]: sample_results: list[QualityResult] = [] - for sample_id, metrics in quality_metrics: + for sample_id, metrics in quality_metrics.samples.items(): result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) return sample_results def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: - valid_reads: bool = self.is_valid_total_reads(sample_id) + valid_read_count: bool = self.has_valid_total_reads(sample_id) valid_mapping: bool = has_valid_mapping_rate(metrics) valid_duplication: bool = has_valid_duplication_rate(metrics) valid_inserts: bool = has_valid_median_insert_size(metrics) @@ -52,7 +52,7 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual valid_10x_coverage: bool = has_valid_10x_coverage(metrics) sample_passes_qc: bool = ( - valid_reads + valid_read_count and valid_mapping and valid_duplication and valid_inserts @@ -66,10 +66,10 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual return QualityResult( sample_id=sample_id, - passed=sample_passes_qc, + passes_qc=sample_passes_qc, is_control=is_control, application_tag=application_tag, - passes_reads_qc=valid_reads, + passes_reads_qc=valid_read_count, passes_mapping_qc=valid_mapping, passes_duplication_qc=valid_duplication, passes_inserts_qc=valid_inserts, @@ -89,11 +89,11 @@ def is_qc_required(self, case_run_dir: Path) -> bool: qc_done_path: Path = case_run_dir.joinpath("QC_done.json") return not qc_done_path.exists() - def is_valid_total_reads(self, sample_id: str) -> bool: + def has_valid_total_reads(self, sample_id: str) -> bool: sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) - target_reads: int = sample.application_version.application.target_reads + target_reads: int = get_sample_target_reads(sample) sample_reads: int = sample.reads - if sample.control == ControlEnum.negative: + if is_sample_negative_control(sample): return is_valid_total_reads_for_control(reads=sample_reads, target_reads=target_reads) return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 0ef0853ae3..1d3377c311 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -104,3 +104,7 @@ def is_sample_negative_control(sample: Sample) -> bool: def get_application_tag(sample: Sample) -> str: return sample.application_version.application.tag + + +def get_sample_target_reads(sample: Sample) -> int: + return sample.application_version.application.target_reads diff --git a/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json b/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json index e93e00a6bd..b1708ae86f 100644 --- a/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json +++ b/tests/fixtures/analysis/microsalt/ACC22222_qc_pass/ACC22222_qc_pass.json @@ -32,28 +32,33 @@ }, "ACC22222A2": { "blast_pubmlst": { - "sequence_type": "", - "thresholds": "" + "sequence_type": "8", + "thresholds": "Passed" }, "quast_assembly": { - "estimated_genome_length": "", - "gc_percentage": "", - "n50": "", - "necessary_contigs": "" + "estimated_genome_length": 2869750, + "gc_percentage": "32.6000000000", + "n50": 209899, + "necessary_contigs": 33 }, - "blast_resfinder_resistence": [], + "blast_resfinder_resistence": [ + "aph(3')-III", + "mecA", + "mph(C)", + "msr(A)" + ], "picard_markduplicate": { - "insert_size": "", - "duplication_rate": "" + "insert_size": 215, + "duplication_rate": 0.105372 }, "microsalt_samtools_stats": { - "total_reads": 42, - "mapped_rate": "", - "average_coverage": "", - "coverage_10x": "", - "coverage_30x": "", - "coverage_50x": "", - "coverage_100x": "" + "total_reads": 3079934, + "mapped_rate": 0.8400670274103276, + "average_coverage": 120.99379962716425, + "coverage_10x": 0.9365702921426038, + "coverage_30x": 0.9330515081968174, + "coverage_50x": 0.9282236223103506, + "coverage_100x": 0.8010769589265144 } }, "ACC22222A3": { diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index 356786ffaa..9c525149c7 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -1,6 +1,7 @@ """Fixtures for the workflow tests.""" import datetime from pathlib import Path +import shutil import pytest @@ -10,6 +11,7 @@ from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI from cg.models.cg_config import CGConfig from cg.models.compression_data import CompressionData +from cg.models.orders.sample_base import ControlEnum from cg.store.models import Case, Sample from tests.cli.workflow.balsamic.conftest import ( balsamic_housekeeper_dir, @@ -113,6 +115,32 @@ def microsalt_qc_fail_lims_project() -> str: return "ACC11111_qc_fail" +@pytest.fixture +def metrics_file_failing_qc( + microsalt_qc_fail_run_dir_path: Path, + microsalt_qc_fail_lims_project: str, + tmp_path: Path, +) -> Path: + """Return a metrics file that fails QC with corresponding samples in the database.""" + metrics_path = Path(microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json") + temp_metrics_path = Path(tmp_path, metrics_path.name) + shutil.copy(metrics_path, temp_metrics_path) + return temp_metrics_path + + +@pytest.fixture +def metrics_file_passing_qc( + microsalt_qc_pass_run_dir_path: Path, + microsalt_qc_pass_lims_project: str, + tmp_path: Path, +) -> Path: + """Return a metrics file that fails QC with corresponding samples in the database.""" + metrics_path = Path(microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json") + temp_metrics_path = Path(tmp_path, metrics_path.name) + shutil.copy(metrics_path, temp_metrics_path) + return temp_metrics_path + + @pytest.fixture def valid_microsalt_metrics_file( microsalt_qc_fail_run_dir_path: Path, microsalt_qc_fail_lims_project: str @@ -152,8 +180,6 @@ def qc_microsalt_context( microsalt_case_qc_fail: str, qc_pass_microsalt_samples: list[str], qc_fail_microsalt_samples: list[str], - microsalt_qc_pass_lims_project: str, - microsalt_qc_fail_lims_project: str, ) -> CGConfig: """Return a Microsalt CG context.""" analysis_api = MicrosaltAnalysisAPI(cg_context) @@ -167,18 +193,32 @@ def qc_microsalt_context( data_analysis=Pipeline.MICROSALT, ) - for sample in qc_pass_microsalt_samples: + for sample in qc_pass_microsalt_samples[1:]: sample_to_add: Sample = helpers.add_sample( store=store, internal_id=sample, application_tag=MicrosaltAppTags.MWRNXTR003, application_type=MicrosaltAppTags.PREP_CATEGORY, - reads=MicrosaltQC.TARGET_READS, + reads=MicrosaltQC.TARGET_READS * 2, last_sequenced_at=datetime.datetime.now(), ) helpers.add_relationship(store=store, case=microsalt_case_qc_pass, sample=sample_to_add) + # Add a negative control sample that passes the qc + negative_control_sample: Sample = helpers.add_sample( + store=store, + internal_id=qc_pass_microsalt_samples[0], + application_tag=MicrosaltAppTags.MWRNXTR003, + application_type=MicrosaltAppTags.PREP_CATEGORY, + reads=0, + last_sequenced_at=datetime.datetime.now(), + control=ControlEnum.negative, + ) + helpers.add_relationship( + store=store, case=microsalt_case_qc_pass, sample=negative_control_sample + ) + # Create a microsalt MWX case that fails QC microsalt_case_qc_fail: Case = helpers.add_case( store=store, @@ -195,6 +235,7 @@ def qc_microsalt_context( application_type=MicrosaltAppTags.PREP_CATEGORY, reads=MicrosaltQC.TARGET_READS, last_sequenced_at=datetime.datetime.now(), + control=ControlEnum.negative, ) helpers.add_relationship(store=store, case=microsalt_case_qc_fail, sample=sample_to_add) diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py index d1730a18ca..19bf04a508 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller.py +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -1,4 +1,8 @@ +from pathlib import Path + from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.models.cg_config import CGConfig +from cg.store.api.core import Store from cg.store.models import Application, Sample from tests.store_helpers import StoreHelpers @@ -22,7 +26,7 @@ def test_is_valid_total_reads_passes(quality_controller: QualityController): sample.application_version = version # WHEN controlling the quality of the sample reads - has_valid_reads: bool = quality_controller.is_valid_total_reads(sample.internal_id) + has_valid_reads: bool = quality_controller.has_valid_total_reads(sample.internal_id) # THEN the sample passes the quality control assert has_valid_reads @@ -47,7 +51,48 @@ def test_is_valid_total_reads_fails(quality_controller: QualityController): sample.application_version = version # WHEN controlling the quality of the sample reads - has_valid_reads: bool = quality_controller.is_valid_total_reads(sample.internal_id) + has_valid_reads: bool = quality_controller.has_valid_total_reads(sample.internal_id) # THEN the sample fails the quality control assert not has_valid_reads + + +def test_quality_control_fails( + qc_microsalt_context: CGConfig, + metrics_file_failing_qc: Path, +): + # GIVEN a metrics file with samples that should fail the quality control + + # GIVEN a store containing the corresponding samples + store: Store = qc_microsalt_context.status_db + + # GIVEN a quality controller + quality_controller = QualityController(store) + + # WHEN performing the quality control + passes_qc: bool = quality_controller.quality_control(metrics_file_failing_qc) + + # THEN the case should fail the quality control + assert not passes_qc + + # THEN a report should be generated + assert metrics_file_failing_qc.parent.joinpath("QC_done.json").exists() + + +def test_quality_control_passes(qc_microsalt_context: CGConfig, metrics_file_passing_qc: Path): + # GIVEN a metrics file with samples that should pass the quality control + + # GIVEN a store containing the corresponding samples + store: Store = qc_microsalt_context.status_db + + # GIVEN a quality controller + quality_controller = QualityController(store) + + # WHEN performing the quality control + passes_qc: bool = quality_controller.quality_control(metrics_file_passing_qc) + + # THEN the case should pass the quality control + assert passes_qc + + # THEN a report should be generated + assert metrics_file_passing_qc.parent.joinpath("QC_done.json").exists() diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 689480b839..939201d54d 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -10,18 +10,6 @@ from cg.store.models import Case -def test_test_quality_control_fails(): - pass - - -def test_quality_control_passes(): - pass - - -def test_quality_control_fails_due_to_negative_control(): - pass - - def test_get_latest_case_path( mocker, qc_microsalt_context: CGConfig, From 4a3eb8591c621f6f4debba0d28c657fb66c2ea64 Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 10:53:53 +0100 Subject: [PATCH 39/65] Cleanup --- tests/meta/workflow/microsalt/__init__.py | 0 tests/meta/workflow/microsalt/test_quality_controller.py | 5 +---- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 tests/meta/workflow/microsalt/__init__.py diff --git a/tests/meta/workflow/microsalt/__init__.py b/tests/meta/workflow/microsalt/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py index 19bf04a508..086b42c3cf 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller.py +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -57,10 +57,7 @@ def test_is_valid_total_reads_fails(quality_controller: QualityController): assert not has_valid_reads -def test_quality_control_fails( - qc_microsalt_context: CGConfig, - metrics_file_failing_qc: Path, -): +def test_quality_control_fails(qc_microsalt_context: CGConfig, metrics_file_failing_qc: Path): # GIVEN a metrics file with samples that should fail the quality control # GIVEN a store containing the corresponding samples From 3e4a44b902ccf25b86956ab26028fec7a9d1b22d Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 14:52:03 +0100 Subject: [PATCH 40/65] Add integration test --- cg/meta/workflow/microsalt/microsalt.py | 6 +++--- tests/meta/workflow/conftest.py | 5 ++++- tests/meta/workflow/test_microsalt.py | 27 +++++++++++++++++-------- tests/mocks/tb_mock.py | 3 +++ 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 5bd6e11224..f67b4f19d6 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -280,7 +280,7 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) lims_project: str = self.get_project(case.samples[0].internal_id) - metrics_file_path: Path = Path(case_run_dir, f"{lims_project}.json") + metrics_file_path = Path(case_run_dir, f"{lims_project}.json") if self.quality_checker.is_qc_required(case_run_dir): if self.quality_checker.quality_control(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") @@ -299,6 +299,6 @@ def get_completed_cases(self) -> list[Case]: """Return cases that are completed in trailblazer.""" return [ case - for case in self.status_db.get_running_cases_in_pipeline(pipeline=self.pipeline) - if self.trailblazer_api.is_latest_analysis_completed(case_id=case.internal_id) + for case in self.status_db.get_running_cases_in_pipeline(self.pipeline) + if self.trailblazer_api.is_latest_analysis_completed(case.internal_id) ] diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index 9c525149c7..01b7f990e4 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -5,7 +5,7 @@ import pytest -from cg.constants.constants import MicrosaltAppTags, MicrosaltQC, Pipeline +from cg.constants.constants import CaseActions, MicrosaltAppTags, MicrosaltQC, Pipeline from cg.meta.compress.compress import CompressAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI @@ -21,6 +21,7 @@ ) from tests.meta.compress.conftest import compress_api, real_crunchy_api from tests.meta.upload.scout.conftest import another_sample_id +from tests.mocks.tb_mock import MockTB from tests.store_helpers import StoreHelpers @@ -182,6 +183,7 @@ def qc_microsalt_context( qc_fail_microsalt_samples: list[str], ) -> CGConfig: """Return a Microsalt CG context.""" + cg_context.trailblazer_api_ = MockTB() analysis_api = MicrosaltAnalysisAPI(cg_context) store = analysis_api.status_db @@ -191,6 +193,7 @@ def qc_microsalt_context( internal_id=microsalt_case_qc_pass, name=microsalt_case_qc_pass, data_analysis=Pipeline.MICROSALT, + action=CaseActions.RUNNING, ) for sample in qc_pass_microsalt_samples[1:]: diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 939201d54d..625a117fdd 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -1,11 +1,10 @@ """Tests for MicroSALT analysis.""" -import logging from pathlib import Path -from cg.apps.tb.api import TrailblazerAPI +from mock import MagicMock +from cg.apps.lims.api import LimsAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI -from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator from cg.models.cg_config import CGConfig from cg.store.models import Case @@ -37,11 +36,23 @@ def test_get_latest_case_path( assert Path(microsalt_analysis_dir, "ACC12345_2022") == path -def test_get_cases_to_store_pass(): +def test_get_cases_to_store_pass( + qc_microsalt_context: CGConfig, + mocker, + microsalt_qc_pass_lims_project: str, + microsalt_qc_pass_run_dir_path: Path, +): """Test get cases to store for a microsalt case that passes QC.""" - pass + # GIVEN a store with a QC ready microsalt case that will pass QC + microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] + mocker.patch.object(LimsAPI, "get_sample_project", return_value=microsalt_qc_pass_lims_project) + mocker.patch.object( + MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_pass_run_dir_path + ) + + # WHEN retrieving cases to store + cases_to_store: list[Case] = microsalt_api.get_cases_to_store() -def test_get_cases_to_store_fail(): - """Test get cases to store for a microsalt case that fails QC.""" - pass + # THEN cases should returned + assert cases_to_store diff --git a/tests/mocks/tb_mock.py b/tests/mocks/tb_mock.py index 1bb09f7019..3355265db2 100644 --- a/tests/mocks/tb_mock.py +++ b/tests/mocks/tb_mock.py @@ -45,3 +45,6 @@ def is_latest_analysis_qc(self, case_id: str): def set_analysis_status(self, case_id: str, status: str): return + + def add_comment(self, case_id: str, comment: str): + return From 39894b488da4548478155e609980af98b8ac6a12 Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 16:31:21 +0100 Subject: [PATCH 41/65] Add logging --- cg/cli/workflow/microsalt/base.py | 3 +- .../microsalt/quality_controller/models.py | 7 +++ .../quality_controller/quality_controller.py | 23 ++++++--- .../quality_controller/report_generator.py | 6 +-- .../quality_controller/result_logger.py | 51 +++++++++++++++++++ .../microsalt/test_report_generation.py | 2 +- 6 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 cg/meta/workflow/microsalt/quality_controller/result_logger.py diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index 9d06c52499..b0b3094504 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -228,8 +228,9 @@ def qc_microsalt(context: click.Context, unique_id: str) -> None: case: Case = analysis_api.status_db.get_case_by_internal_id(unique_id) sample_id: str = case.samples[0].internal_id lims_project: str = analysis_api.get_project(sample_id) - metrics_file_path: Path = Path(run_dir_path, f"{lims_project}.json") + metrics_file_path = Path(run_dir_path, f"{lims_project}.json") try: + LOG.info(f"Performing QC on case {unique_id}") analysis_api.quality_checker.quality_control(metrics_file_path) except IndexError: LOG.error(f"No existing analysis directories found for case {unique_id}.") diff --git a/cg/meta/workflow/microsalt/quality_controller/models.py b/cg/meta/workflow/microsalt/quality_controller/models.py index 7b20af78dc..65faeb7de2 100644 --- a/cg/meta/workflow/microsalt/quality_controller/models.py +++ b/cg/meta/workflow/microsalt/quality_controller/models.py @@ -14,3 +14,10 @@ class QualityResult(BaseModel): passes_inserts_qc: bool passes_coverage_qc: bool passes_10x_coverage_qc: bool + + +class CaseQualityResult(BaseModel): + passes_qc: bool + control_passes_qc: bool + urgent_passes_qc: bool + non_urgent_passes_qc: bool diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index d3547116b7..5ccfb8d76f 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -2,8 +2,9 @@ from pathlib import Path from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, QualityResult from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator +from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, get_sample_target_reads, @@ -32,9 +33,11 @@ def __init__(self, status_db: Store): def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) sample_results: list[QualityResult] = self.quality_control_samples(quality_metrics) + case_result: CaseQualityResult = self.quality_control_case(sample_results) report_file: Path = metrics_file_path.parent.joinpath("QC_done.json") - ReportGenerator.report(out_file=report_file, results=sample_results) - return self.quality_control_case(sample_results) + ReportGenerator.report(out_file=report_file, sample_results=sample_results) + ResultLogger.log_results(sample_results=sample_results, case_result=case_result) + return case_result.passes_qc def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[QualityResult]: sample_results: list[QualityResult] = [] @@ -77,11 +80,19 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual passes_10x_coverage_qc=valid_10x_coverage, ) - def quality_control_case(self, sample_results: list[QualityResult]) -> bool: - control_passes_qc: bool = negative_control_pass_qc(sample_results) + def quality_control_case(self, sample_results: list[QualityResult]) -> CaseQualityResult: + control_pass_qc: bool = negative_control_pass_qc(sample_results) urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) - return control_passes_qc and urgent_pass_qc and non_urgent_pass_qc + + case_passes_qc: bool = control_pass_qc and urgent_pass_qc and non_urgent_pass_qc + + return CaseQualityResult( + passes_qc=case_passes_qc, + control_passes_qc=control_pass_qc, + urgent_passes_qc=urgent_pass_qc, + non_urgent_passes_qc=non_urgent_pass_qc, + ) def is_qc_required(self, case_run_dir: Path) -> bool: if case_run_dir is None: diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index 36dff8fef0..9b9fa81756 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -6,10 +6,9 @@ class ReportGenerator: @staticmethod - def report(out_file: Path, results: list[QualityResult]): + def report(out_file: Path, sample_results: list[QualityResult]): formatted_results: list[dict] = [] - - for result in results: + for result in sample_results: formatted_result = { result.sample_id: { "Passed QC": result.passes_qc, @@ -22,5 +21,4 @@ def report(out_file: Path, results: list[QualityResult]): } } formatted_results.append(formatted_result) - write_json(file_path=out_file, content=formatted_results) diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py new file mode 100644 index 0000000000..28033abeeb --- /dev/null +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -0,0 +1,51 @@ +import logging +from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, QualityResult + +LOG = logging.getLogger(__name__) + + +class ResultLogger: + @staticmethod + def log_results(sample_results: list[QualityResult], case_result: CaseQualityResult): + if case_result.passes_qc: + LOG.info("Quality control passed.") + else: + message = get_case_fail_message(case_result) + LOG.warning(message) + + message = sample_result_message(sample_results) + LOG.info(message) + + +def get_case_fail_message(case_result: CaseQualityResult) -> str: + fail_reasons = [] + + if not case_result.control_passes_qc: + fail_reasons.append("The negative control sample failed quality control.") + if not case_result.urgent_passes_qc: + fail_reasons.append("The urgent samples failed quality control.") + if not case_result.non_urgent_passes_qc: + fail_reasons.append("The non-urgent samples failed quality control.") + + fail_message = "Quality control failed." + + return fail_message + " ".join(fail_reasons) + + +def sample_result_message(sample_results: list[QualityResult]) -> str: + failed_samples: list[QualityResult] = get_failed_results(sample_results) + passed_samples: list[QualityResult] = get_passed_results(sample_results) + + failed_count: int = len(failed_samples) + passed_count: int = len(passed_samples) + total_count = len(sample_results) + + return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total" + + +def get_failed_results(results: list[QualityResult]) -> list[str]: + return [result for result in results if not result.passes_qc] + + +def get_passed_results(results: list[QualityResult]) -> list[str]: + return [result for result in results if result.passes_qc] diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py index ee96a84bfb..1f6177889f 100644 --- a/tests/meta/workflow/microsalt/test_report_generation.py +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -11,7 +11,7 @@ def test_generate_report_with_results(quality_results: list[QualityResult], tmp_ out_file = Path(tmp_path, "QC_done.json") # WHEN generating a report - ReportGenerator.report(out_file=out_file, results=quality_results) + ReportGenerator.report(out_file=out_file, sample_results=quality_results) # THEN the report is created assert out_file.exists() From 899af792127f0d790d58ad800f22fcb817377af2 Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 16:33:32 +0100 Subject: [PATCH 42/65] Add new lines --- .../microsalt/quality_controller/result_logger.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py index 28033abeeb..6214c02318 100644 --- a/cg/meta/workflow/microsalt/quality_controller/result_logger.py +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -8,7 +8,7 @@ class ResultLogger: @staticmethod def log_results(sample_results: list[QualityResult], case_result: CaseQualityResult): if case_result.passes_qc: - LOG.info("Quality control passed.") + LOG.info("Quality control passed.\n") else: message = get_case_fail_message(case_result) LOG.warning(message) @@ -21,13 +21,13 @@ def get_case_fail_message(case_result: CaseQualityResult) -> str: fail_reasons = [] if not case_result.control_passes_qc: - fail_reasons.append("The negative control sample failed quality control.") + fail_reasons.append("The negative control sample failed quality control.\n") if not case_result.urgent_passes_qc: - fail_reasons.append("The urgent samples failed quality control.") + fail_reasons.append("The urgent samples failed quality control.\n") if not case_result.non_urgent_passes_qc: - fail_reasons.append("The non-urgent samples failed quality control.") + fail_reasons.append("The non-urgent samples failed quality control.\n") - fail_message = "Quality control failed." + fail_message = "Quality control failed.\n" return fail_message + " ".join(fail_reasons) @@ -40,7 +40,7 @@ def sample_result_message(sample_results: list[QualityResult]) -> str: passed_count: int = len(passed_samples) total_count = len(sample_results) - return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total" + return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total.\n" def get_failed_results(results: list[QualityResult]) -> list[str]: From 2289485be84b496150e47867830a6754b207168f Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 16:47:26 +0100 Subject: [PATCH 43/65] Fix comments --- cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py | 2 +- cg/meta/workflow/microsalt/microsalt.py | 4 ++-- tests/meta/workflow/conftest.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py index 0be24355a1..0faaccc489 100644 --- a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py +++ b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py @@ -9,4 +9,4 @@ class MetricsParser: def parse(file_path: Path) -> QualityMetrics: data = read_json(file_path) formatted_data = {"samples": data} - return QualityMetrics(**formatted_data) + return QualityMetrics.model_validate(formatted_data) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index f67b4f19d6..4c1e2eaaf3 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -74,8 +74,8 @@ def get_latest_case_path(self, case_id: str) -> Path | None: return next( ( path - for path in self.get_case_path(case_id=case_id) - if f"{lims_project}_" in str(path) + for path in self.get_case_path(case_id) + if f"{lims_project}_" in path.as_posix() ), None, ) diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index 01b7f990e4..f91b92304c 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -202,7 +202,7 @@ def qc_microsalt_context( internal_id=sample, application_tag=MicrosaltAppTags.MWRNXTR003, application_type=MicrosaltAppTags.PREP_CATEGORY, - reads=MicrosaltQC.TARGET_READS * 2, + reads=MicrosaltQC.TARGET_READS, last_sequenced_at=datetime.datetime.now(), ) From 85966cb0bb3779388388dd7eee5d8ba303c960aa Mon Sep 17 00:00:00 2001 From: seallard Date: Thu, 14 Dec 2023 16:51:21 +0100 Subject: [PATCH 44/65] Extract method --- cg/meta/workflow/microsalt/quality_controller/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 1d3377c311..3f55185626 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -75,11 +75,15 @@ def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.application_tag != MicrosaltAppTags.MWRNXTR003] + return [result for result in results if not is_urgent_result(result)] def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: - return [result for result in results if result.application_tag == MicrosaltAppTags.MWRNXTR003] + return [result for result in results if is_urgent_result(result)] + + +def is_urgent_result(result: QualityResult) -> bool: + return result.application_tag == MicrosaltAppTags.MWRNXTR003 def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: From fd5c4bfb4bfe94dc71ffa101e28e9a83272ccf72 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 15 Dec 2023 12:03:02 +0100 Subject: [PATCH 45/65] Extract method --- cg/cli/workflow/microsalt/base.py | 6 +----- cg/meta/workflow/microsalt/microsalt.py | 17 ++++++++++------- tests/meta/workflow/conftest.py | 2 +- .../workflow/microsalt/test_parsing_metrics.py | 4 ++-- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index b0b3094504..229b926b00 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -224,11 +224,7 @@ def start_available(context: click.Context, dry_run: bool = False): def qc_microsalt(context: click.Context, unique_id: str) -> None: """Perform QC on a microsalt case.""" analysis_api: MicrosaltAnalysisAPI = context.obj.meta_apis["analysis_api"] - run_dir_path: Path = analysis_api.get_latest_case_path(unique_id) - case: Case = analysis_api.status_db.get_case_by_internal_id(unique_id) - sample_id: str = case.samples[0].internal_id - lims_project: str = analysis_api.get_project(sample_id) - metrics_file_path = Path(run_dir_path, f"{lims_project}.json") + metrics_file_path: Path = analysis_api.get_metrics_file_path(unique_id) try: LOG.info(f"Performing QC on case {unique_id}") analysis_api.quality_checker.quality_control(metrics_file_path) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 4c1e2eaaf3..9dfd66bf34 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -72,11 +72,7 @@ def get_latest_case_path(self, case_id: str) -> Path | None: lims_project: str = self.get_project(sample_id) return next( - ( - path - for path in self.get_case_path(case_id) - if f"{lims_project}_" in path.as_posix() - ), + (path for path in self.get_case_path(case_id) if f"{lims_project}_" in path.as_posix()), None, ) @@ -279,9 +275,8 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) - lims_project: str = self.get_project(case.samples[0].internal_id) - metrics_file_path = Path(case_run_dir, f"{lims_project}.json") if self.quality_checker.is_qc_required(case_run_dir): + metrics_file_path = self.get_metrics_file_path(case.internal_id) if self.quality_checker.quality_control(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") cases_to_store.append(case) @@ -302,3 +297,11 @@ def get_completed_cases(self) -> list[Case]: for case in self.status_db.get_running_cases_in_pipeline(self.pipeline) if self.trailblazer_api.is_latest_analysis_completed(case.internal_id) ] + + def get_metrics_file_path(self, case_id: str) -> Path: + """Return path to metrics file for a case.""" + case_obj: Case = self.status_db.get_case_by_internal_id(case_id) + sample_id: str = case_obj.links[0].sample.internal_id + lims_project: str = self.get_project(sample_id) + case_run_dir: Path = self.get_latest_case_path(case_id) + return Path(case_run_dir, f"{lims_project}.json") diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index f91b92304c..7d54067279 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -143,7 +143,7 @@ def metrics_file_passing_qc( @pytest.fixture -def valid_microsalt_metrics_file( +def microsalt_metrics_file( microsalt_qc_fail_run_dir_path: Path, microsalt_qc_fail_lims_project: str ) -> Path: return Path(microsalt_qc_fail_run_dir_path, f"{microsalt_qc_fail_lims_project}.json") diff --git a/tests/meta/workflow/microsalt/test_parsing_metrics.py b/tests/meta/workflow/microsalt/test_parsing_metrics.py index 43fde2491f..b1b614691c 100644 --- a/tests/meta/workflow/microsalt/test_parsing_metrics.py +++ b/tests/meta/workflow/microsalt/test_parsing_metrics.py @@ -3,10 +3,10 @@ from cg.meta.workflow.microsalt.metrics_parser import MetricsParser -def test_parse_valid_quality_metrics(valid_microsalt_metrics_file: Path): +def test_parse_valid_quality_metrics(microsalt_metrics_file: Path): # GIVEN a valid quality metrics file path # WHEN parsing the file - MetricsParser.parse(valid_microsalt_metrics_file) + MetricsParser.parse(microsalt_metrics_file) # THEN no error is thrown From b08ec67863523692941ac80ee2829303ad681bd3 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 09:51:28 +0100 Subject: [PATCH 46/65] Fix comments --- cg/meta/workflow/microsalt/__init__.py | 2 +- cg/meta/workflow/microsalt/constants.py | 4 ++++ .../microsalt/metrics_parser/__init__.py | 4 ++-- .../microsalt/metrics_parser/metrics_parser.py | 2 +- cg/meta/workflow/microsalt/microsalt.py | 3 ++- .../microsalt/quality_controller/__init__.py | 2 +- .../quality_controller/quality_controller.py | 11 ++++++----- .../quality_controller/report_generator.py | 2 +- .../quality_controller/result_logger.py | 4 ++-- .../microsalt/quality_controller/utils.py | 2 +- tests/meta/workflow/conftest.py | 2 +- tests/meta/workflow/microsalt/conftest.py | 8 ++++---- .../microsalt/test_quality_controller.py | 17 ++++++++++------- .../microsalt/test_quality_controller_utils.py | 8 ++++---- .../microsalt/test_report_generation.py | 3 ++- 15 files changed, 42 insertions(+), 32 deletions(-) create mode 100644 cg/meta/workflow/microsalt/constants.py diff --git a/cg/meta/workflow/microsalt/__init__.py b/cg/meta/workflow/microsalt/__init__.py index ef4fec629a..2f1bfe3bc8 100644 --- a/cg/meta/workflow/microsalt/__init__.py +++ b/cg/meta/workflow/microsalt/__init__.py @@ -1 +1 @@ -from .microsalt import MicrosaltAnalysisAPI +from cg.meta.workflow.microsalt.microsalt import MicrosaltAnalysisAPI diff --git a/cg/meta/workflow/microsalt/constants.py b/cg/meta/workflow/microsalt/constants.py new file mode 100644 index 0000000000..bd8063ccf9 --- /dev/null +++ b/cg/meta/workflow/microsalt/constants.py @@ -0,0 +1,4 @@ +from cg.constants.constants import FileExtensions + + +QUALITY_REPORT_FILE_NAME: str = f"QC_done.{FileExtensions.JSON}" diff --git a/cg/meta/workflow/microsalt/metrics_parser/__init__.py b/cg/meta/workflow/microsalt/metrics_parser/__init__.py index 2daaf38068..ba8833b223 100644 --- a/cg/meta/workflow/microsalt/metrics_parser/__init__.py +++ b/cg/meta/workflow/microsalt/metrics_parser/__init__.py @@ -1,2 +1,2 @@ -from .metrics_parser import MetricsParser -from .models import QualityMetrics, SampleMetrics +from cg.meta.workflow.microsalt.metrics_parser.metrics_parser import MetricsParser +from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics, SampleMetrics diff --git a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py index 0faaccc489..b35c883dc2 100644 --- a/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py +++ b/cg/meta/workflow/microsalt/metrics_parser/metrics_parser.py @@ -1,7 +1,7 @@ from pathlib import Path from cg.io.json import read_json -from .models import QualityMetrics +from cg.meta.workflow.microsalt.metrics_parser.models import QualityMetrics class MetricsParser: diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 9dfd66bf34..61ff93e7cc 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -10,6 +10,7 @@ import click from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority +from cg.constants.constants import FileExtensions from cg.constants.tb import AnalysisStatus from cg.exc import CgDataError from cg.meta.workflow.analysis import AnalysisAPI @@ -304,4 +305,4 @@ def get_metrics_file_path(self, case_id: str) -> Path: sample_id: str = case_obj.links[0].sample.internal_id lims_project: str = self.get_project(sample_id) case_run_dir: Path = self.get_latest_case_path(case_id) - return Path(case_run_dir, f"{lims_project}.json") + return Path(case_run_dir, f"{lims_project}.{FileExtensions.JSON}") diff --git a/cg/meta/workflow/microsalt/quality_controller/__init__.py b/cg/meta/workflow/microsalt/quality_controller/__init__.py index b6fdaafd87..a2afe4fcaa 100644 --- a/cg/meta/workflow/microsalt/quality_controller/__init__.py +++ b/cg/meta/workflow/microsalt/quality_controller/__init__.py @@ -1 +1 @@ -from .quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 5ccfb8d76f..a61eca51fa 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -1,5 +1,6 @@ import logging from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, QualityResult @@ -16,7 +17,7 @@ has_valid_median_insert_size, negative_control_pass_qc, is_valid_total_reads, - is_valid_total_reads_for_control, + is_valid_total_reads_for_negative_control, non_urgent_samples_pass_qc, urgent_samples_pass_qc, ) @@ -34,7 +35,7 @@ def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) sample_results: list[QualityResult] = self.quality_control_samples(quality_metrics) case_result: CaseQualityResult = self.quality_control_case(sample_results) - report_file: Path = metrics_file_path.parent.joinpath("QC_done.json") + report_file: Path = metrics_file_path.parent.joinpath(QUALITY_REPORT_FILE_NAME) ReportGenerator.report(out_file=report_file, sample_results=sample_results) ResultLogger.log_results(sample_results=sample_results, case_result=case_result) return case_result.passes_qc @@ -95,9 +96,9 @@ def quality_control_case(self, sample_results: list[QualityResult]) -> CaseQuali ) def is_qc_required(self, case_run_dir: Path) -> bool: - if case_run_dir is None: + if not case_run_dir: return False - qc_done_path: Path = case_run_dir.joinpath("QC_done.json") + qc_done_path: Path = case_run_dir.joinpath(QUALITY_REPORT_FILE_NAME) return not qc_done_path.exists() def has_valid_total_reads(self, sample_id: str) -> bool: @@ -106,5 +107,5 @@ def has_valid_total_reads(self, sample_id: str) -> bool: sample_reads: int = sample.reads if is_sample_negative_control(sample): - return is_valid_total_reads_for_control(reads=sample_reads, target_reads=target_reads) + return is_valid_total_reads_for_negative_control(reads=sample_reads, target_reads=target_reads) return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index 9b9fa81756..5b7d306e32 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -6,7 +6,7 @@ class ReportGenerator: @staticmethod - def report(out_file: Path, sample_results: list[QualityResult]): + def report(out_file: Path, sample_results: list[QualityResult]) -> None: formatted_results: list[dict] = [] for result in sample_results: formatted_result = { diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py index 6214c02318..16694c1c9a 100644 --- a/cg/meta/workflow/microsalt/quality_controller/result_logger.py +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -6,7 +6,7 @@ class ResultLogger: @staticmethod - def log_results(sample_results: list[QualityResult], case_result: CaseQualityResult): + def log_results(sample_results: list[QualityResult], case_result: CaseQualityResult) -> None: if case_result.passes_qc: LOG.info("Quality control passed.\n") else: @@ -38,7 +38,7 @@ def sample_result_message(sample_results: list[QualityResult]) -> str: failed_count: int = len(failed_samples) passed_count: int = len(passed_samples) - total_count = len(sample_results) + total_count: int = len(sample_results) return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total.\n" diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 3f55185626..ca38345d40 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -9,7 +9,7 @@ def is_valid_total_reads(reads: int, target_reads: int) -> bool: return reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD -def is_valid_total_reads_for_control(reads: int, target_reads: int) -> bool: +def is_valid_total_reads_for_negative_control(reads: int, target_reads: int) -> bool: return reads < target_reads * MicrosaltQC.NEGATIVE_CONTROL_READS_THRESHOLD diff --git a/tests/meta/workflow/conftest.py b/tests/meta/workflow/conftest.py index 7d54067279..5a6a998da3 100644 --- a/tests/meta/workflow/conftest.py +++ b/tests/meta/workflow/conftest.py @@ -135,7 +135,7 @@ def metrics_file_passing_qc( microsalt_qc_pass_lims_project: str, tmp_path: Path, ) -> Path: - """Return a metrics file that fails QC with corresponding samples in the database.""" + """Return a metrics file that pass QC with corresponding samples in the database.""" metrics_path = Path(microsalt_qc_pass_run_dir_path, f"{microsalt_qc_pass_lims_project}.json") temp_metrics_path = Path(tmp_path, metrics_path.name) shutil.copy(metrics_path, temp_metrics_path) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index d26370c75e..1587385696 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -33,7 +33,7 @@ def create_sample_metrics( def create_quality_result( - sample_id: str = "sample1", + sample_id: str = "sample_1", passes_qc: bool = True, is_control: bool = False, application_tag: str = MicrosaltAppTags.MWRNXTR003, @@ -62,7 +62,7 @@ def create_quality_result( def quality_results() -> list[QualityResult]: return [ QualityResult( - sample_id="sample1", + sample_id="sample_1", passes_qc=False, is_control=True, application_tag=MicrosaltAppTags.MWRNXTR003, @@ -74,7 +74,7 @@ def quality_results() -> list[QualityResult]: passes_10x_coverage_qc=True, ), QualityResult( - sample_id="sample2", + sample_id="sample_2", passes_qc=True, is_control=False, application_tag=MicrosaltAppTags.MWRNXTR003, @@ -86,7 +86,7 @@ def quality_results() -> list[QualityResult]: passes_10x_coverage_qc=True, ), QualityResult( - sample_id="sample3", + sample_id="sample_3", passes_qc=False, is_control=False, application_tag=MicrosaltAppTags.MWRNXTR003, diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py index 086b42c3cf..254bf8ac21 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller.py +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -1,4 +1,5 @@ from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.quality_controller import QualityController from cg.models.cg_config import CGConfig @@ -6,21 +7,23 @@ from cg.store.models import Application, Sample from tests.store_helpers import StoreHelpers +PRICES = {"standard": 1_000, "priority": 2_000, "express": 3_000, "research": 4_000} + def test_is_valid_total_reads_passes(quality_controller: QualityController): # GIVEN an application store = quality_controller.status_db - application: Application = StoreHelpers.add_application(store=store, target_reads=1000) + application: Application = StoreHelpers.add_application(store=store, target_reads=1_000) # GIVEN an application version version = StoreHelpers.add_application_version( store=store, application=application, - prices={"standard": 1000, "priority": 2000, "express": 3000, "research": 4000}, + prices=PRICES, ) # GIVEN a sample with a number of reads that is above the target reads - sample: Sample = StoreHelpers.add_sample(store=store, reads=10000) + sample: Sample = StoreHelpers.add_sample(store=store, reads=10_000) # GIVEN that the sample is associated with the application version sample.application_version = version @@ -35,13 +38,13 @@ def test_is_valid_total_reads_passes(quality_controller: QualityController): def test_is_valid_total_reads_fails(quality_controller: QualityController): # GIVEN an application store = quality_controller.status_db - application: Application = StoreHelpers.add_application(store=store, target_reads=1000) + application: Application = StoreHelpers.add_application(store=store, target_reads=1_000) # GIVEN an application version version = StoreHelpers.add_application_version( store=store, application=application, - prices={"standard": 1000, "priority": 2000, "express": 3000, "research": 4000}, + prices=PRICES, ) # GIVEN a sample with a number of reads that is far below the target reads @@ -73,7 +76,7 @@ def test_quality_control_fails(qc_microsalt_context: CGConfig, metrics_file_fail assert not passes_qc # THEN a report should be generated - assert metrics_file_failing_qc.parent.joinpath("QC_done.json").exists() + assert metrics_file_failing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists() def test_quality_control_passes(qc_microsalt_context: CGConfig, metrics_file_passing_qc: Path): @@ -92,4 +95,4 @@ def test_quality_control_passes(qc_microsalt_context: CGConfig, metrics_file_pas assert passes_qc # THEN a report should be generated - assert metrics_file_passing_qc.parent.joinpath("QC_done.json").exists() + assert metrics_file_passing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists() diff --git a/tests/meta/workflow/microsalt/test_quality_controller_utils.py b/tests/meta/workflow/microsalt/test_quality_controller_utils.py index ec0be024b2..f19fa3cbab 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller_utils.py +++ b/tests/meta/workflow/microsalt/test_quality_controller_utils.py @@ -15,7 +15,7 @@ is_valid_mapping_rate, is_valid_median_insert_size, is_valid_total_reads, - is_valid_total_reads_for_control, + is_valid_total_reads_for_negative_control, negative_control_pass_qc, non_urgent_samples_pass_qc, urgent_samples_pass_qc, @@ -71,7 +71,7 @@ def test_control_total_reads_passing(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold: bool = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( reads=sample_reads, target_reads=target_reads ) @@ -85,7 +85,7 @@ def test_control_total_reads_failing(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold: bool = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( reads=sample_reads, target_reads=target_reads ) @@ -99,7 +99,7 @@ def test_control_total_reads_passing_without_reads(): target_reads = 100 # WHEN checking if the control read count is valid - passes_reads_threshold: bool = is_valid_total_reads_for_control( + passes_reads_threshold: bool = is_valid_total_reads_for_negative_control( reads=sample_reads, target_reads=target_reads ) diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py index 1f6177889f..3af4352cd5 100644 --- a/tests/meta/workflow/microsalt/test_report_generation.py +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -1,4 +1,5 @@ from pathlib import Path +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator @@ -8,7 +9,7 @@ def test_generate_report_with_results(quality_results: list[QualityResult], tmp_ # GIVEN quality results # GIVEN a file path to write them to - out_file = Path(tmp_path, "QC_done.json") + out_file = Path(tmp_path, QUALITY_REPORT_FILE_NAME) # WHEN generating a report ReportGenerator.report(out_file=out_file, sample_results=quality_results) From 20f94a658d5e4658a7488f334dcd7dea8d5e3481 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 12:59:05 +0100 Subject: [PATCH 47/65] Improve report and logging --- cg/meta/workflow/microsalt/constants.py | 2 +- cg/meta/workflow/microsalt/microsalt.py | 2 +- .../microsalt/quality_controller/models.py | 12 ++-- .../quality_controller/quality_controller.py | 57 +++++++++---------- .../quality_controller/report_generator.py | 28 ++++----- .../quality_controller/result_logger.py | 41 ++++++------- .../microsalt/quality_controller/utils.py | 51 ++++++++++++----- tests/meta/workflow/microsalt/conftest.py | 24 +++++--- .../test_quality_controller_utils.py | 44 +++++++------- .../microsalt/test_report_generation.py | 13 ++++- 10 files changed, 155 insertions(+), 119 deletions(-) diff --git a/cg/meta/workflow/microsalt/constants.py b/cg/meta/workflow/microsalt/constants.py index bd8063ccf9..af2e1ed055 100644 --- a/cg/meta/workflow/microsalt/constants.py +++ b/cg/meta/workflow/microsalt/constants.py @@ -1,4 +1,4 @@ from cg.constants.constants import FileExtensions -QUALITY_REPORT_FILE_NAME: str = f"QC_done.{FileExtensions.JSON}" +QUALITY_REPORT_FILE_NAME: str = f"QC_done{FileExtensions.JSON}" diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 61ff93e7cc..c1316b7c21 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -305,4 +305,4 @@ def get_metrics_file_path(self, case_id: str) -> Path: sample_id: str = case_obj.links[0].sample.internal_id lims_project: str = self.get_project(sample_id) case_run_dir: Path = self.get_latest_case_path(case_id) - return Path(case_run_dir, f"{lims_project}.{FileExtensions.JSON}") + return Path(case_run_dir, f"{lims_project}{FileExtensions.JSON}") diff --git a/cg/meta/workflow/microsalt/quality_controller/models.py b/cg/meta/workflow/microsalt/quality_controller/models.py index 65faeb7de2..4a1905c295 100644 --- a/cg/meta/workflow/microsalt/quality_controller/models.py +++ b/cg/meta/workflow/microsalt/quality_controller/models.py @@ -3,17 +3,17 @@ from cg.constants.constants import MicrosaltAppTags -class QualityResult(BaseModel): +class SampleQualityResult(BaseModel): sample_id: str passes_qc: bool is_control: bool application_tag: MicrosaltAppTags passes_reads_qc: bool - passes_mapping_qc: bool - passes_duplication_qc: bool - passes_inserts_qc: bool - passes_coverage_qc: bool - passes_10x_coverage_qc: bool + passes_mapping_qc: bool = True + passes_duplication_qc: bool = True + passes_inserts_qc: bool = True + passes_coverage_qc: bool = True + passes_10x_coverage_qc: bool = True class CaseQualityResult(BaseModel): diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index a61eca51fa..f22b2479d6 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -3,11 +3,15 @@ from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics -from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, + get_report_path, get_sample_target_reads, is_sample_negative_control, has_valid_10x_coverage, @@ -15,11 +19,9 @@ has_valid_duplication_rate, has_valid_mapping_rate, has_valid_median_insert_size, - negative_control_pass_qc, is_valid_total_reads, is_valid_total_reads_for_negative_control, - non_urgent_samples_pass_qc, - urgent_samples_pass_qc, + quality_control_case, ) from cg.store.api.core import Store from cg.store.models import Sample @@ -33,21 +35,21 @@ def __init__(self, status_db: Store): def quality_control(self, metrics_file_path: Path) -> bool: quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) - sample_results: list[QualityResult] = self.quality_control_samples(quality_metrics) - case_result: CaseQualityResult = self.quality_control_case(sample_results) - report_file: Path = metrics_file_path.parent.joinpath(QUALITY_REPORT_FILE_NAME) - ReportGenerator.report(out_file=report_file, sample_results=sample_results) - ResultLogger.log_results(sample_results=sample_results, case_result=case_result) + sample_results: list[SampleQualityResult] = self.quality_control_samples(quality_metrics) + case_result: CaseQualityResult = quality_control_case(sample_results) + report_file: Path = get_report_path(metrics_file_path) + ReportGenerator.report(out_file=report_file, samples=sample_results, case=case_result) + ResultLogger.log_results(case=case_result, samples=sample_results) return case_result.passes_qc - def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[QualityResult]: - sample_results: list[QualityResult] = [] + def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[SampleQualityResult]: + sample_results: list[SampleQualityResult] = [] for sample_id, metrics in quality_metrics.samples.items(): result = self.quality_control_sample(sample_id=sample_id, metrics=metrics) sample_results.append(result) return sample_results - def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> QualityResult: + def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> SampleQualityResult: valid_read_count: bool = self.has_valid_total_reads(sample_id) valid_mapping: bool = has_valid_mapping_rate(metrics) valid_duplication: bool = has_valid_duplication_rate(metrics) @@ -65,10 +67,17 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual ) sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) - is_control: bool = is_sample_negative_control(sample) application_tag: str = get_application_tag(sample) - - return QualityResult( + if is_control := is_sample_negative_control(sample): + return SampleQualityResult( + sample_id=sample_id, + passes_qc=sample_passes_qc, + is_control=is_control, + passes_reads_qc=valid_read_count, + application_tag=application_tag, + ) + + return SampleQualityResult( sample_id=sample_id, passes_qc=sample_passes_qc, is_control=is_control, @@ -81,20 +90,6 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Qual passes_10x_coverage_qc=valid_10x_coverage, ) - def quality_control_case(self, sample_results: list[QualityResult]) -> CaseQualityResult: - control_pass_qc: bool = negative_control_pass_qc(sample_results) - urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) - non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) - - case_passes_qc: bool = control_pass_qc and urgent_pass_qc and non_urgent_pass_qc - - return CaseQualityResult( - passes_qc=case_passes_qc, - control_passes_qc=control_pass_qc, - urgent_passes_qc=urgent_pass_qc, - non_urgent_passes_qc=non_urgent_pass_qc, - ) - def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: return False @@ -107,5 +102,7 @@ def has_valid_total_reads(self, sample_id: str) -> bool: sample_reads: int = sample.reads if is_sample_negative_control(sample): - return is_valid_total_reads_for_negative_control(reads=sample_reads, target_reads=target_reads) + return is_valid_total_reads_for_negative_control( + reads=sample_reads, target_reads=target_reads + ) return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index 5b7d306e32..32e9a7c456 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -1,24 +1,18 @@ from pathlib import Path +from typing import List from cg.io.json import write_json -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) class ReportGenerator: @staticmethod - def report(out_file: Path, sample_results: list[QualityResult]) -> None: - formatted_results: list[dict] = [] - for result in sample_results: - formatted_result = { - result.sample_id: { - "Passed QC": result.passes_qc, - "Passed QC Reads": result.passes_reads_qc, - "Passed QC Mapping": result.passes_mapping_qc, - "Passed QC Duplication": result.passes_duplication_qc, - "Passed QC Insert Size": result.passes_inserts_qc, - "Passed QC Coverage": result.passes_coverage_qc, - "Passed QC 10x Coverage": result.passes_10x_coverage_qc, - } - } - formatted_results.append(formatted_result) - write_json(file_path=out_file, content=formatted_results) + def report(out_file: Path, case: CaseQualityResult, samples: List[SampleQualityResult]) -> None: + report_content = { + "case": case.model_dump(), + "samples": [sample.model_dump() for sample in samples], + } + write_json(file_path=out_file, content=report_content) diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py index 16694c1c9a..3aa5cb3d24 100644 --- a/cg/meta/workflow/microsalt/quality_controller/result_logger.py +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -1,30 +1,33 @@ import logging -from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) LOG = logging.getLogger(__name__) class ResultLogger: @staticmethod - def log_results(sample_results: list[QualityResult], case_result: CaseQualityResult) -> None: - if case_result.passes_qc: - LOG.info("Quality control passed.\n") + def log_results(samples: list[SampleQualityResult], case: CaseQualityResult) -> None: + if case.passes_qc: + LOG.info("Quality control passed.") else: - message = get_case_fail_message(case_result) + message = get_case_fail_message(case) LOG.warning(message) - message = sample_result_message(sample_results) + message = sample_result_message(samples) LOG.info(message) -def get_case_fail_message(case_result: CaseQualityResult) -> str: +def get_case_fail_message(case: CaseQualityResult) -> str: fail_reasons = [] - if not case_result.control_passes_qc: + if not case.control_passes_qc: fail_reasons.append("The negative control sample failed quality control.\n") - if not case_result.urgent_passes_qc: + if not case.urgent_passes_qc: fail_reasons.append("The urgent samples failed quality control.\n") - if not case_result.non_urgent_passes_qc: + if not case.non_urgent_passes_qc: fail_reasons.append("The non-urgent samples failed quality control.\n") fail_message = "Quality control failed.\n" @@ -32,20 +35,20 @@ def get_case_fail_message(case_result: CaseQualityResult) -> str: return fail_message + " ".join(fail_reasons) -def sample_result_message(sample_results: list[QualityResult]) -> str: - failed_samples: list[QualityResult] = get_failed_results(sample_results) - passed_samples: list[QualityResult] = get_passed_results(sample_results) +def sample_result_message(samples: list[SampleQualityResult]) -> str: + failed_samples: list[SampleQualityResult] = get_failed_results(samples) + passed_samples: list[SampleQualityResult] = get_passed_results(samples) failed_count: int = len(failed_samples) passed_count: int = len(passed_samples) - total_count: int = len(sample_results) + total_count: int = len(samples) - return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total.\n" + return f"Sample results: {failed_count} failed, {passed_count} passed, {total_count} total." -def get_failed_results(results: list[QualityResult]) -> list[str]: - return [result for result in results if not result.passes_qc] +def get_failed_results(samples: list[SampleQualityResult]) -> list[str]: + return [result for result in samples if not result.passes_qc] -def get_passed_results(results: list[QualityResult]) -> list[str]: - return [result for result in results if result.passes_qc] +def get_passed_results(samples: list[SampleQualityResult]) -> list[str]: + return [result for result in samples if result.passes_qc] diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index ca38345d40..a683f83a7f 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -1,6 +1,12 @@ +from pathlib import Path + from cg.constants.constants import MicrosaltAppTags, MicrosaltQC +from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample @@ -58,42 +64,42 @@ def has_valid_10x_coverage(metrics: SampleMetrics) -> bool: return is_valid_10x_coverage(coverage_10x) if coverage_10x else False -def get_negative_control_result(results: list[QualityResult]) -> QualityResult: +def get_negative_control_result(results: list[SampleQualityResult]) -> SampleQualityResult: for result in results: if result.is_control: return result raise ValueError("No negative control found") -def negative_control_pass_qc(results: list[QualityResult]) -> bool: - negative_control_result: QualityResult = get_negative_control_result(results) +def negative_control_pass_qc(results: list[SampleQualityResult]) -> bool: + negative_control_result: SampleQualityResult = get_negative_control_result(results) return negative_control_result.passes_qc -def get_results_passing_qc(results: list[QualityResult]) -> list[QualityResult]: +def get_results_passing_qc(results: list[SampleQualityResult]) -> list[SampleQualityResult]: return [result for result in results if result.passes_qc] -def get_non_urgent_results(results: list[QualityResult]) -> list[QualityResult]: +def get_non_urgent_results(results: list[SampleQualityResult]) -> list[SampleQualityResult]: return [result for result in results if not is_urgent_result(result)] -def get_urgent_results(results: list[QualityResult]) -> list[QualityResult]: +def get_urgent_results(results: list[SampleQualityResult]) -> list[SampleQualityResult]: return [result for result in results if is_urgent_result(result)] -def is_urgent_result(result: QualityResult) -> bool: +def is_urgent_result(result: SampleQualityResult) -> bool: return result.application_tag == MicrosaltAppTags.MWRNXTR003 -def urgent_samples_pass_qc(results: list[QualityResult]) -> bool: - urgent_results: list[QualityResult] = get_urgent_results(results) +def urgent_samples_pass_qc(results: list[SampleQualityResult]) -> bool: + urgent_results: list[SampleQualityResult] = get_urgent_results(results) return all(result.passes_qc for result in urgent_results) -def non_urgent_samples_pass_qc(results: list[QualityResult]) -> bool: - non_urgent_samples: list[QualityResult] = get_non_urgent_results(results) - passing_qc: list[QualityResult] = get_results_passing_qc(non_urgent_samples) +def non_urgent_samples_pass_qc(results: list[SampleQualityResult]) -> bool: + non_urgent_samples: list[SampleQualityResult] = get_non_urgent_results(results) + passing_qc: list[SampleQualityResult] = get_results_passing_qc(non_urgent_samples) if not non_urgent_samples: return True @@ -112,3 +118,22 @@ def get_application_tag(sample: Sample) -> str: def get_sample_target_reads(sample: Sample) -> int: return sample.application_version.application.target_reads + + +def get_report_path(metrics_file_path: Path) -> Path: + return metrics_file_path.parent.joinpath(QUALITY_REPORT_FILE_NAME) + + +def quality_control_case(sample_results: list[SampleQualityResult]) -> CaseQualityResult: + control_pass_qc: bool = negative_control_pass_qc(sample_results) + urgent_pass_qc: bool = urgent_samples_pass_qc(sample_results) + non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(sample_results) + + case_passes_qc: bool = control_pass_qc and urgent_pass_qc and non_urgent_pass_qc + + return CaseQualityResult( + passes_qc=case_passes_qc, + control_passes_qc=control_pass_qc, + urgent_passes_qc=urgent_pass_qc, + non_urgent_passes_qc=non_urgent_pass_qc, + ) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index 1587385696..8a3c92d769 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -6,7 +6,7 @@ SampleMetrics, ) -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, SampleQualityResult from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController from cg.store.api.core import Store @@ -43,8 +43,8 @@ def create_quality_result( passes_inserts_qc: bool = True, passes_coverage_qc: bool = True, passes_10x_coverage_qc: bool = True, -) -> QualityResult: - return QualityResult( +) -> SampleQualityResult: + return SampleQualityResult( sample_id=sample_id, passes_qc=passes_qc, is_control=is_control, @@ -59,9 +59,9 @@ def create_quality_result( @pytest.fixture -def quality_results() -> list[QualityResult]: +def quality_results() -> list[SampleQualityResult]: return [ - QualityResult( + SampleQualityResult( sample_id="sample_1", passes_qc=False, is_control=True, @@ -73,7 +73,7 @@ def quality_results() -> list[QualityResult]: passes_coverage_qc=True, passes_10x_coverage_qc=True, ), - QualityResult( + SampleQualityResult( sample_id="sample_2", passes_qc=True, is_control=False, @@ -85,7 +85,7 @@ def quality_results() -> list[QualityResult]: passes_coverage_qc=True, passes_10x_coverage_qc=True, ), - QualityResult( + SampleQualityResult( sample_id="sample_3", passes_qc=False, is_control=False, @@ -100,6 +100,16 @@ def quality_results() -> list[QualityResult]: ] +@pytest.fixture +def case_result(): + return CaseQualityResult( + passes_qc=False, + control_passes_qc=True, + urgent_passes_qc=True, + non_urgent_passes_qc=True, + ) + + @pytest.fixture def quality_controller(store: Store) -> QualityController: return QualityController(store) diff --git a/tests/meta/workflow/microsalt/test_quality_controller_utils.py b/tests/meta/workflow/microsalt/test_quality_controller_utils.py index f19fa3cbab..1d71a91c4f 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller_utils.py +++ b/tests/meta/workflow/microsalt/test_quality_controller_utils.py @@ -1,6 +1,6 @@ from cg.constants.constants import MicrosaltAppTags from cg.meta.workflow.microsalt.metrics_parser.models import SampleMetrics -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import SampleQualityResult from cg.meta.workflow.microsalt.quality_controller.utils import ( get_non_urgent_results, get_urgent_results, @@ -329,8 +329,8 @@ def test_has_valid_10x_coverage_missing(): def test_negative_control_passes_qc(): # GIVEN a negative control sample that passes quality control - control_result: QualityResult = create_quality_result(is_control=True) - other_result: QualityResult = create_quality_result(passes_qc=False) + control_result: SampleQualityResult = create_quality_result(is_control=True) + other_result: SampleQualityResult = create_quality_result(passes_qc=False) # WHEN checking if the negative control passes quality control control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) @@ -341,8 +341,8 @@ def test_negative_control_passes_qc(): def test_negative_control_fails_qc(): # GIVEN a negative control sample that fails quality control - control_result: QualityResult = create_quality_result(is_control=True, passes_qc=False) - other_result: QualityResult = create_quality_result() + control_result: SampleQualityResult = create_quality_result(is_control=True, passes_qc=False) + other_result: SampleQualityResult = create_quality_result() # WHEN checking if the negative control passes quality control control_passes_qc: bool = negative_control_pass_qc([other_result, control_result]) @@ -353,16 +353,16 @@ def test_negative_control_fails_qc(): def test_get_urgent_results(): # GIVEN quality results with urgent and non-urgent samples - urgent_result: QualityResult = create_quality_result( + urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True ) - non_urgent_result: QualityResult = create_quality_result( + non_urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True ) - quality_results: list[QualityResult] = [urgent_result, non_urgent_result] + quality_results: list[SampleQualityResult] = [urgent_result, non_urgent_result] # WHEN getting the urgent results - urgent_results: list[QualityResult] = get_urgent_results(quality_results) + urgent_results: list[SampleQualityResult] = get_urgent_results(quality_results) # THEN the urgent results are returned assert urgent_results == [urgent_result] @@ -370,13 +370,13 @@ def test_get_urgent_results(): def test_urgent_samples_pass_qc(): # GIVEN quality results with urgent samples that pass quality control - urgent_result: QualityResult = create_quality_result( + urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True ) - urgent_result_control: QualityResult = create_quality_result( + urgent_result_control: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True ) - urgent_results: list[QualityResult] = [urgent_result, urgent_result_control] + urgent_results: list[SampleQualityResult] = [urgent_result, urgent_result_control] # WHEN checking if the urgent samples pass quality control urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) @@ -387,13 +387,13 @@ def test_urgent_samples_pass_qc(): def test_urgent_samples_fail_qc(): # GIVEN quality results with urgent samples that fail quality control - urgent_result: QualityResult = create_quality_result( + urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=False ) - urgent_result_control: QualityResult = create_quality_result( + urgent_result_control: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True, is_control=True ) - urgent_results: list[QualityResult] = [urgent_result, urgent_result_control] + urgent_results: list[SampleQualityResult] = [urgent_result, urgent_result_control] # WHEN checking if the urgent samples pass quality control urgent_pass_qc: bool = urgent_samples_pass_qc(urgent_results) @@ -404,16 +404,16 @@ def test_urgent_samples_fail_qc(): def test_get_non_urgent_results(): # GIVEN quality results with urgent and non-urgent samples - urgent_result: QualityResult = create_quality_result( + urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWRNXTR003, passes_qc=True ) - non_urgent_result: QualityResult = create_quality_result( + non_urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True ) - quality_results: list[QualityResult] = [urgent_result, non_urgent_result] + quality_results: list[SampleQualityResult] = [urgent_result, non_urgent_result] # WHEN getting the non-urgent results - non_urgent_results: list[QualityResult] = get_non_urgent_results(quality_results) + non_urgent_results: list[SampleQualityResult] = get_non_urgent_results(quality_results) # THEN the non-urgent results are returned assert non_urgent_results == [non_urgent_result] @@ -421,13 +421,13 @@ def test_get_non_urgent_results(): def test_non_urgent_samples_pass_qc(): # GIVEN quality results with non-urgent samples that pass quality control - non_urgent_result: QualityResult = create_quality_result( + non_urgent_result: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True ) - non_urgent_result_control: QualityResult = create_quality_result( + non_urgent_result_control: SampleQualityResult = create_quality_result( application_tag=MicrosaltAppTags.MWXNXTR003, passes_qc=True, is_control=True ) - non_urgent_results: list[QualityResult] = [non_urgent_result, non_urgent_result_control] + non_urgent_results: list[SampleQualityResult] = [non_urgent_result, non_urgent_result_control] # WHEN checking if the non-urgent samples pass quality control non_urgent_pass_qc: bool = non_urgent_samples_pass_qc(non_urgent_results) diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py index 3af4352cd5..38587ac84c 100644 --- a/tests/meta/workflow/microsalt/test_report_generation.py +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -1,18 +1,25 @@ from pathlib import Path from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME -from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator -def test_generate_report_with_results(quality_results: list[QualityResult], tmp_path: Path): +def test_generate_report_with_results( + quality_results: list[SampleQualityResult], case_result: CaseQualityResult, tmp_path: Path +): # GIVEN quality results # GIVEN a file path to write them to out_file = Path(tmp_path, QUALITY_REPORT_FILE_NAME) # WHEN generating a report - ReportGenerator.report(out_file=out_file, sample_results=quality_results) + ReportGenerator.report( + out_file=out_file, samples=quality_results, case=case_result + ) # THEN the report is created assert out_file.exists() From 408e5b691128cf2e9befee8d3d163b963d135195 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 13:02:23 +0100 Subject: [PATCH 48/65] Formatting --- tests/meta/workflow/microsalt/conftest.py | 5 ++++- tests/meta/workflow/microsalt/test_report_generation.py | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/meta/workflow/microsalt/conftest.py b/tests/meta/workflow/microsalt/conftest.py index 8a3c92d769..6dde2e0ed6 100644 --- a/tests/meta/workflow/microsalt/conftest.py +++ b/tests/meta/workflow/microsalt/conftest.py @@ -6,7 +6,10 @@ SampleMetrics, ) -from cg.meta.workflow.microsalt.quality_controller.models import CaseQualityResult, SampleQualityResult +from cg.meta.workflow.microsalt.quality_controller.models import ( + CaseQualityResult, + SampleQualityResult, +) from cg.meta.workflow.microsalt.quality_controller.quality_controller import QualityController from cg.store.api.core import Store diff --git a/tests/meta/workflow/microsalt/test_report_generation.py b/tests/meta/workflow/microsalt/test_report_generation.py index 38587ac84c..494783e15c 100644 --- a/tests/meta/workflow/microsalt/test_report_generation.py +++ b/tests/meta/workflow/microsalt/test_report_generation.py @@ -17,9 +17,7 @@ def test_generate_report_with_results( out_file = Path(tmp_path, QUALITY_REPORT_FILE_NAME) # WHEN generating a report - ReportGenerator.report( - out_file=out_file, samples=quality_results, case=case_result - ) + ReportGenerator.report(out_file=out_file, samples=quality_results, case=case_result) # THEN the report is created assert out_file.exists() From f52c8015b544b57db51652d809cdd7682d3061d0 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 13:28:43 +0100 Subject: [PATCH 49/65] Add logging --- .../microsalt/quality_controller/quality_controller.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index f22b2479d6..cc4f9c4268 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -92,8 +92,12 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: + LOG.info(f"Skipping QC, {case_run_dir} does not exist.") return False qc_done_path: Path = case_run_dir.joinpath(QUALITY_REPORT_FILE_NAME) + qc_already_done: bool = qc_done_path.exists() + if qc_already_done: + LOG.info(f"Skipping QC, {qc_done_path} already exists.") return not qc_done_path.exists() def has_valid_total_reads(self, sample_id: str) -> bool: From f48580455940e6eba30cc0bb954e5e5ebefdf0cf Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 13:35:15 +0100 Subject: [PATCH 50/65] Improve messages --- .../microsalt/quality_controller/quality_controller.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index cc4f9c4268..ad57b76a51 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -92,12 +92,12 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: - LOG.info(f"Skipping QC, {case_run_dir} does not exist.") + LOG.info(f"Skipping QC, run directory {case_run_dir} does not exist.") return False qc_done_path: Path = case_run_dir.joinpath(QUALITY_REPORT_FILE_NAME) qc_already_done: bool = qc_done_path.exists() if qc_already_done: - LOG.info(f"Skipping QC, {qc_done_path} already exists.") + LOG.info(f"Skipping QC, report {qc_done_path} already exists.") return not qc_done_path.exists() def has_valid_total_reads(self, sample_id: str) -> bool: From a6df7b8d54db7f3e2411f523de80e6604d5f3995 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 13:53:53 +0100 Subject: [PATCH 51/65] Improve logging --- .../quality_controller/quality_controller.py | 35 +++++++++++-------- .../quality_controller/result_logger.py | 24 +++++++++---- .../microsalt/quality_controller/utils.py | 5 ++- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index ad57b76a51..e4d46e362d 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -33,11 +33,11 @@ class QualityController: def __init__(self, status_db: Store): self.status_db = status_db - def quality_control(self, metrics_file_path: Path) -> bool: - quality_metrics: QualityMetrics = MetricsParser.parse(metrics_file_path) + def quality_control(self, case_metrics_file_path: Path) -> bool: + quality_metrics: QualityMetrics = MetricsParser.parse(case_metrics_file_path) sample_results: list[SampleQualityResult] = self.quality_control_samples(quality_metrics) case_result: CaseQualityResult = quality_control_case(sample_results) - report_file: Path = get_report_path(metrics_file_path) + report_file: Path = get_report_path(case_metrics_file_path) ReportGenerator.report(out_file=report_file, samples=sample_results, case=case_result) ResultLogger.log_results(case=case_result, samples=sample_results) return case_result.passes_qc @@ -57,27 +57,30 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp valid_coverage: bool = has_valid_average_coverage(metrics) valid_10x_coverage: bool = has_valid_10x_coverage(metrics) - sample_passes_qc: bool = ( - valid_read_count - and valid_mapping - and valid_duplication - and valid_inserts - and valid_coverage - and valid_10x_coverage - ) - sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) application_tag: str = get_application_tag(sample) + if is_control := is_sample_negative_control(sample): - return SampleQualityResult( + result = SampleQualityResult( sample_id=sample_id, - passes_qc=sample_passes_qc, + passes_qc=valid_read_count, is_control=is_control, passes_reads_qc=valid_read_count, application_tag=application_tag, ) + ResultLogger.log_sample_result(result) + return result + + sample_passes_qc: bool = ( + valid_read_count + and valid_mapping + and valid_duplication + and valid_inserts + and valid_coverage + and valid_10x_coverage + ) - return SampleQualityResult( + result = SampleQualityResult( sample_id=sample_id, passes_qc=sample_passes_qc, is_control=is_control, @@ -89,6 +92,8 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp passes_coverage_qc=valid_coverage, passes_10x_coverage_qc=valid_10x_coverage, ) + ResultLogger.log_sample_result(result) + return result def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py index 3aa5cb3d24..a3940c74d9 100644 --- a/cg/meta/workflow/microsalt/quality_controller/result_logger.py +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -11,7 +11,7 @@ class ResultLogger: @staticmethod def log_results(samples: list[SampleQualityResult], case: CaseQualityResult) -> None: if case.passes_qc: - LOG.info("Quality control passed.") + LOG.info("QC passed.") else: message = get_case_fail_message(case) LOG.warning(message) @@ -19,19 +19,29 @@ def log_results(samples: list[SampleQualityResult], case: CaseQualityResult) -> message = sample_result_message(samples) LOG.info(message) + @staticmethod + def log_sample_result(result: SampleQualityResult) -> None: + if not result.passes_qc: + control_message = "Control sample " if result.is_control else "" + message = f"{control_message}{result.sample_id} failed QC." + LOG.warning(message) + + @staticmethod + def log_case_result(result: CaseQualityResult) -> None: + if not result.passes_qc: + LOG.warning("Case failed QC.") + def get_case_fail_message(case: CaseQualityResult) -> str: fail_reasons = [] if not case.control_passes_qc: - fail_reasons.append("The negative control sample failed quality control.\n") + fail_reasons.append("The negative control sample failed QC.\n") if not case.urgent_passes_qc: - fail_reasons.append("The urgent samples failed quality control.\n") + fail_reasons.append("The urgent samples failed QC.\n") if not case.non_urgent_passes_qc: - fail_reasons.append("The non-urgent samples failed quality control.\n") - - fail_message = "Quality control failed.\n" - + fail_reasons.append("The non-urgent samples failed QC.\n") + fail_message = "QC failed.\n" return fail_message + " ".join(fail_reasons) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index a683f83a7f..e677751564 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -7,6 +7,7 @@ CaseQualityResult, SampleQualityResult, ) +from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger from cg.models.orders.sample_base import ControlEnum from cg.store.models import Sample @@ -131,9 +132,11 @@ def quality_control_case(sample_results: list[SampleQualityResult]) -> CaseQuali case_passes_qc: bool = control_pass_qc and urgent_pass_qc and non_urgent_pass_qc - return CaseQualityResult( + result = CaseQualityResult( passes_qc=case_passes_qc, control_passes_qc=control_pass_qc, urgent_passes_qc=urgent_pass_qc, non_urgent_passes_qc=non_urgent_pass_qc, ) + ResultLogger.log_case_result(result) + return result From f946950554c2f0aee5ba6520ffd97d08a0f7f8c2 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Mon, 18 Dec 2023 16:12:58 +0100 Subject: [PATCH 52/65] Simplify run dir retrieval --- cg/meta/workflow/microsalt/microsalt.py | 56 ++++++++++++------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index c9f00d4e76..6e90f2dad3 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -54,29 +54,6 @@ def process(self) -> Process: ) return self._process - def get_case_path(self, case_id: str) -> list[Path]: - """Returns all paths associated with the case or single sample analysis.""" - case_obj: Case = self.status_db.get_case_by_internal_id(internal_id=case_id) - lims_project: str = self.get_project(case_obj.links[0].sample.internal_id) - lims_project_dir_path: Path = Path(self.root_dir, "results", lims_project) - - case_directories: list[Path] = [ - Path(path) for path in glob.glob(f"{lims_project_dir_path}*", recursive=True) - ] - - return sorted(case_directories, key=os.path.getctime, reverse=True) - - def get_latest_case_path(self, case_id: str) -> Path | None: - """Return latest run dir for a microbial case, if no path found it returns None.""" - case: Case = self.status_db.get_case_by_internal_id(case_id) - sample_id: str = case.links[0].sample.internal_id - lims_project: str = self.get_project(sample_id) - - return next( - (path for path in self.get_case_path(case_id) if f"{lims_project}_" in path.as_posix()), - None, - ) - def clean_run_dir(self, case_id: str, yes: bool, case_path: list[Path] | Path) -> int: """Remove workflow run directories for a MicroSALT case.""" @@ -275,7 +252,7 @@ def get_cases_to_store(self) -> list[Case]: LOG.info(f"Found {len(cases_qc_ready)} cases to perform QC on!") for case in cases_qc_ready: - case_run_dir: Path | None = self.get_latest_case_path(case.internal_id) + case_run_dir: Path | None = self.get_case_path(case.internal_id) if self.quality_checker.is_qc_required(case_run_dir): metrics_file_path = self.get_metrics_file_path(case.internal_id) if self.quality_checker.quality_control(metrics_file_path): @@ -301,8 +278,29 @@ def get_completed_cases(self) -> list[Case]: def get_metrics_file_path(self, case_id: str) -> Path: """Return path to metrics file for a case.""" - case_obj: Case = self.status_db.get_case_by_internal_id(case_id) - sample_id: str = case_obj.links[0].sample.internal_id - lims_project: str = self.get_project(sample_id) - case_run_dir: Path = self.get_latest_case_path(case_id) - return Path(case_run_dir, f"{lims_project}{FileExtensions.JSON}") + project_id: str = self.get_project_id(case_id) + case_run_dir: Path = self.get_case_path(case_id) + return Path(case_run_dir, f"{project_id}{FileExtensions.JSON}") + + def extract_project_id(self, sample_id: str) -> str: + return sample_id.rsplit("A", maxsplit=1)[0] + + def get_project_id(self, case_id: str) -> str: + case: Case = self.status_db.get_case_by_internal_id(case_id) + sample_id: str = case.links[0].sample.internal_id + return self.extract_project_id(sample_id) + + def get_results_dir(self) -> Path: + return Path(self.root_dir, "results") + + def get_matching_cases(self, case_id: str) -> list[str]: + project_id: str = self.get_project_id(case_id) + results_dir: Path = self.get_results_dir() + return [d for d in os.listdir(results_dir) if d.startswith(project_id)] + + def get_case_path(self, case_id: str) -> Path: + project_id: str = self.get_project_id(case_id) + results_dir: Path = self.get_results_dir() + matching_cases = [d for d in os.listdir(results_dir) if d.startswith(project_id)] + case_dir: str = max(matching_cases, default=None) + return Path(results_dir, case_dir) From 5ad401a7f4be6bdd7a3568f53e2d3acd8d4ec4b3 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 19 Dec 2023 09:54:07 +0100 Subject: [PATCH 53/65] Fix test --- cg/meta/workflow/microsalt/microsalt.py | 3 +-- tests/meta/workflow/test_microsalt.py | 35 ++++--------------------- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 6e90f2dad3..caa5d68ec8 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -299,8 +299,7 @@ def get_matching_cases(self, case_id: str) -> list[str]: return [d for d in os.listdir(results_dir) if d.startswith(project_id)] def get_case_path(self, case_id: str) -> Path: - project_id: str = self.get_project_id(case_id) results_dir: Path = self.get_results_dir() - matching_cases = [d for d in os.listdir(results_dir) if d.startswith(project_id)] + matching_cases: list[str] = self.get_matching_cases(case_id) case_dir: str = max(matching_cases, default=None) return Path(results_dir, case_dir) diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index 625a117fdd..d2a827525a 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -9,46 +9,21 @@ from cg.store.models import Case -def test_get_latest_case_path( - mocker, - qc_microsalt_context: CGConfig, - microsalt_case_qc_pass: str, - microsalt_analysis_dir: Path, -): - """Test get_latest_case_path return the first case path and not single sample path""" - microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - - # GIVEN a case with different case paths, both single sample and case analyses - mocker.patch.object(MicrosaltAnalysisAPI, "get_project", return_value="ACC12345") - mocker.patch.object( - MicrosaltAnalysisAPI, - "get_case_path", - return_value=[ - Path(microsalt_analysis_dir, "ACC12345A2_2023"), - Path(microsalt_analysis_dir, "ACC12345_2022"), - Path(microsalt_analysis_dir, "ACC12345A1_2023"), - ], - ) - # WHEN getting the latest case path - path = microsalt_api.get_latest_case_path(case_id=microsalt_case_qc_pass) - - # THEN the first case path should be returned - assert Path(microsalt_analysis_dir, "ACC12345_2022") == path - - def test_get_cases_to_store_pass( qc_microsalt_context: CGConfig, mocker, - microsalt_qc_pass_lims_project: str, microsalt_qc_pass_run_dir_path: Path, + metrics_file_passing_qc: Path, ): """Test get cases to store for a microsalt case that passes QC.""" # GIVEN a store with a QC ready microsalt case that will pass QC microsalt_api: MicrosaltAnalysisAPI = qc_microsalt_context.meta_apis["analysis_api"] - mocker.patch.object(LimsAPI, "get_sample_project", return_value=microsalt_qc_pass_lims_project) mocker.patch.object( - MicrosaltAnalysisAPI, "get_latest_case_path", return_value=microsalt_qc_pass_run_dir_path + MicrosaltAnalysisAPI, "get_metrics_file_path", return_value=metrics_file_passing_qc + ) + mocker.patch.object( + MicrosaltAnalysisAPI, "get_case_path", return_value=microsalt_qc_pass_run_dir_path ) # WHEN retrieving cases to store From d62abd96528fc65ce469fa5578b1b288179b1687 Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 19 Dec 2023 10:39:05 +0100 Subject: [PATCH 54/65] Add log messages --- cg/meta/workflow/microsalt/microsalt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index caa5d68ec8..ba59110fc9 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -253,7 +253,9 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_case_path(case.internal_id) + LOG.info(f"Checking QC for case {case.internal_id} in {case_run_dir}") if self.quality_checker.is_qc_required(case_run_dir): + LOG.info(f"QC required for case {case.internal_id}") metrics_file_path = self.get_metrics_file_path(case.internal_id) if self.quality_checker.quality_control(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") @@ -279,7 +281,10 @@ def get_completed_cases(self) -> list[Case]: def get_metrics_file_path(self, case_id: str) -> Path: """Return path to metrics file for a case.""" project_id: str = self.get_project_id(case_id) + LOG.info(case_id) + LOG.info(f"Looking for metrics file for project {project_id}") case_run_dir: Path = self.get_case_path(case_id) + LOG.info(f"Looking for metrics file in {case_run_dir}") return Path(case_run_dir, f"{project_id}{FileExtensions.JSON}") def extract_project_id(self, sample_id: str) -> str: From 6f0a0abd7e5fcbdd5c265b18721aeaadced76a3f Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 19 Dec 2023 10:48:58 +0100 Subject: [PATCH 55/65] Add logging --- cg/meta/workflow/microsalt/microsalt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index ba59110fc9..5b7e7f3de8 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -300,11 +300,17 @@ def get_results_dir(self) -> Path: def get_matching_cases(self, case_id: str) -> list[str]: project_id: str = self.get_project_id(case_id) + LOG.info(project_id) results_dir: Path = self.get_results_dir() + LOG.info(results_dir) return [d for d in os.listdir(results_dir) if d.startswith(project_id)] def get_case_path(self, case_id: str) -> Path: results_dir: Path = self.get_results_dir() + LOG.info(f"Looking for case path in results dir {results_dir}") matching_cases: list[str] = self.get_matching_cases(case_id) + LOG.info(f"Found {len(matching_cases)} matching cases") + LOG.info(matching_cases) case_dir: str = max(matching_cases, default=None) + LOG.info(case_dir) return Path(results_dir, case_dir) From f152d2d0e4b8cfc6d0677018418afc1839a3de4a Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 19 Dec 2023 11:24:37 +0100 Subject: [PATCH 56/65] Improve logging --- cg/exc.py | 10 +++++++++ cg/meta/workflow/microsalt/microsalt.py | 21 +++++++------------ .../quality_controller/quality_controller.py | 2 +- .../quality_controller/result_logger.py | 16 +++++++++----- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/cg/exc.py b/cg/exc.py index 921fefef3f..2a23ecf88c 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -132,6 +132,16 @@ class LimsDataError(CgError): """ +class MicrosaltError(CgError): + """ + Error related to Microsalt analysis. + """ + +class MissingAnalysisDir(CgError): + """ + Error related to missing analysis. + """ + class OrderError(CgError): """ Exception related to orders. diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 5b7e7f3de8..ad7461bfbf 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -12,7 +12,7 @@ from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Pipeline, Priority from cg.constants.constants import FileExtensions from cg.constants.tb import AnalysisStatus -from cg.exc import CgDataError +from cg.exc import CgDataError, MissingAnalysisDir from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MicrosaltFastqHandler from cg.meta.workflow.microsalt.quality_controller import QualityController @@ -281,10 +281,7 @@ def get_completed_cases(self) -> list[Case]: def get_metrics_file_path(self, case_id: str) -> Path: """Return path to metrics file for a case.""" project_id: str = self.get_project_id(case_id) - LOG.info(case_id) - LOG.info(f"Looking for metrics file for project {project_id}") case_run_dir: Path = self.get_case_path(case_id) - LOG.info(f"Looking for metrics file in {case_run_dir}") return Path(case_run_dir, f"{project_id}{FileExtensions.JSON}") def extract_project_id(self, sample_id: str) -> str: @@ -298,19 +295,17 @@ def get_project_id(self, case_id: str) -> str: def get_results_dir(self) -> Path: return Path(self.root_dir, "results") - def get_matching_cases(self, case_id: str) -> list[str]: + def get_analyses_result_dirs(self, case_id: str) -> list[str]: project_id: str = self.get_project_id(case_id) - LOG.info(project_id) results_dir: Path = self.get_results_dir() - LOG.info(results_dir) - return [d for d in os.listdir(results_dir) if d.startswith(project_id)] + matches: list[str] = [d for d in os.listdir(results_dir) if d.startswith(project_id)] + if not matches: + LOG.error(f"No result directory found for {case_id} with project id {project_id}") + raise MissingAnalysisDir + return matches def get_case_path(self, case_id: str) -> Path: results_dir: Path = self.get_results_dir() - LOG.info(f"Looking for case path in results dir {results_dir}") - matching_cases: list[str] = self.get_matching_cases(case_id) - LOG.info(f"Found {len(matching_cases)} matching cases") - LOG.info(matching_cases) + matching_cases: list[str] = self.get_analyses_result_dirs(case_id) case_dir: str = max(matching_cases, default=None) - LOG.info(case_dir) return Path(results_dir, case_dir) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index e4d46e362d..61d79e4db1 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -39,7 +39,7 @@ def quality_control(self, case_metrics_file_path: Path) -> bool: case_result: CaseQualityResult = quality_control_case(sample_results) report_file: Path = get_report_path(case_metrics_file_path) ReportGenerator.report(out_file=report_file, samples=sample_results, case=case_result) - ResultLogger.log_results(case=case_result, samples=sample_results) + ResultLogger.log_results(case=case_result, samples=sample_results, report=report_file) return case_result.passes_qc def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[SampleQualityResult]: diff --git a/cg/meta/workflow/microsalt/quality_controller/result_logger.py b/cg/meta/workflow/microsalt/quality_controller/result_logger.py index a3940c74d9..327705bb32 100644 --- a/cg/meta/workflow/microsalt/quality_controller/result_logger.py +++ b/cg/meta/workflow/microsalt/quality_controller/result_logger.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path from cg.meta.workflow.microsalt.quality_controller.models import ( CaseQualityResult, SampleQualityResult, @@ -9,9 +10,11 @@ class ResultLogger: @staticmethod - def log_results(samples: list[SampleQualityResult], case: CaseQualityResult) -> None: + def log_results( + samples: list[SampleQualityResult], case: CaseQualityResult, report: Path + ) -> None: if case.passes_qc: - LOG.info("QC passed.") + LOG.info(f"QC passed, see {report} for details.") else: message = get_case_fail_message(case) LOG.warning(message) @@ -21,8 +24,11 @@ def log_results(samples: list[SampleQualityResult], case: CaseQualityResult) -> @staticmethod def log_sample_result(result: SampleQualityResult) -> None: - if not result.passes_qc: - control_message = "Control sample " if result.is_control else "" + control_message = "Control sample " if result.is_control else "" + if result.passes_qc: + message = f"{control_message}{result.sample_id} passed QC." + LOG.info(message) + else: message = f"{control_message}{result.sample_id} failed QC." LOG.warning(message) @@ -42,7 +48,7 @@ def get_case_fail_message(case: CaseQualityResult) -> str: if not case.non_urgent_passes_qc: fail_reasons.append("The non-urgent samples failed QC.\n") fail_message = "QC failed.\n" - return fail_message + " ".join(fail_reasons) + return fail_message + "".join(fail_reasons) def sample_result_message(samples: list[SampleQualityResult]) -> str: From bc1b6cbbb6ada6fbe585d33aa95b0509a38fae9d Mon Sep 17 00:00:00 2001 From: Sebastian Allard Date: Tue, 19 Dec 2023 11:25:34 +0100 Subject: [PATCH 57/65] Fix exc --- cg/exc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cg/exc.py b/cg/exc.py index 2a23ecf88c..17596615fa 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -137,11 +137,13 @@ class MicrosaltError(CgError): Error related to Microsalt analysis. """ + class MissingAnalysisDir(CgError): """ Error related to missing analysis. """ + class OrderError(CgError): """ Exception related to orders. From fde8536638e7bf58d56ea555c66e80adb3fbff63 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:23:33 +0100 Subject: [PATCH 58/65] Fix comments --- cg/constants/constants.py | 2 +- .../microsalt/quality_controller/quality_controller.py | 4 ++-- cg/meta/workflow/microsalt/quality_controller/utils.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index 210a4850f3..5074cc9557 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -212,7 +212,7 @@ class APIMethods(StrEnum): class MicrosaltQC: AVERAGE_COVERAGE_THRESHOLD: int = 10 - QC_PERCENT_THRESHOLD_MWX: float = 0.9 + MWX_THRESHOLD_SAMPLES_PASSING: float = 0.9 COVERAGE_10X_THRESHOLD: float = 0.75 DUPLICATION_RATE_THRESHOLD: float = 0.8 INSERT_SIZE_THRESHOLD: int = 100 diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 61d79e4db1..e02364a7b5 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -97,12 +97,12 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: - LOG.info(f"Skipping QC, run directory {case_run_dir} does not exist.") + LOG.warning(f"Skipping QC, run directory {case_run_dir} does not exist.") return False qc_done_path: Path = case_run_dir.joinpath(QUALITY_REPORT_FILE_NAME) qc_already_done: bool = qc_done_path.exists() if qc_already_done: - LOG.info(f"Skipping QC, report {qc_done_path} already exists.") + LOG.warning(f"Skipping QC, report {qc_done_path} already exists.") return not qc_done_path.exists() def has_valid_total_reads(self, sample_id: str) -> bool: diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index e677751564..84d3d2b2bf 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -106,7 +106,7 @@ def non_urgent_samples_pass_qc(results: list[SampleQualityResult]) -> bool: return True fraction_passing_qc: float = len(passing_qc) / len(non_urgent_samples) - return fraction_passing_qc >= MicrosaltQC.QC_PERCENT_THRESHOLD_MWX + return fraction_passing_qc >= MicrosaltQC.MWX_THRESHOLD_SAMPLES_PASSING def is_sample_negative_control(sample: Sample) -> bool: From 9900e2630989be7d1131feadb727b48789f732d7 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:29:07 +0100 Subject: [PATCH 59/65] Handle missing metrics file --- cg/meta/workflow/microsalt/microsalt.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index ad7461bfbf..13ed66077d 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -254,9 +254,14 @@ def get_cases_to_store(self) -> list[Case]: for case in cases_qc_ready: case_run_dir: Path | None = self.get_case_path(case.internal_id) LOG.info(f"Checking QC for case {case.internal_id} in {case_run_dir}") + if self.quality_checker.is_qc_required(case_run_dir): LOG.info(f"QC required for case {case.internal_id}") metrics_file_path = self.get_metrics_file_path(case.internal_id) + + if not metrics_file_path.exists(): + continue + if self.quality_checker.quality_control(metrics_file_path): self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") cases_to_store.append(case) From 517fe95a46a409819539dc65916373f4cca9b926 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:31:09 +0100 Subject: [PATCH 60/65] Handle missing negative sample --- cg/meta/workflow/microsalt/quality_controller/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 84d3d2b2bf..7cb614e167 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -65,16 +65,16 @@ def has_valid_10x_coverage(metrics: SampleMetrics) -> bool: return is_valid_10x_coverage(coverage_10x) if coverage_10x else False -def get_negative_control_result(results: list[SampleQualityResult]) -> SampleQualityResult: +def get_negative_control_result(results: list[SampleQualityResult]) -> SampleQualityResult | None: for result in results: if result.is_control: return result - raise ValueError("No negative control found") def negative_control_pass_qc(results: list[SampleQualityResult]) -> bool: - negative_control_result: SampleQualityResult = get_negative_control_result(results) - return negative_control_result.passes_qc + if negative_control_result := get_negative_control_result(results): + return negative_control_result.passes_qc + return True def get_results_passing_qc(results: list[SampleQualityResult]) -> list[SampleQualityResult]: From 8ab1cbe8cde4c1eeb8717bd09bd3806fbbccc9d7 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:43:55 +0100 Subject: [PATCH 61/65] Use application guaranteed reads --- .../microsalt/quality_controller/quality_controller.py | 9 ++++++++- cg/meta/workflow/microsalt/quality_controller/utils.py | 8 ++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index e02364a7b5..5209181d34 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -11,6 +11,7 @@ from cg.meta.workflow.microsalt.quality_controller.result_logger import ResultLogger from cg.meta.workflow.microsalt.quality_controller.utils import ( get_application_tag, + get_percent_reads_guaranteed, get_report_path, get_sample_target_reads, is_sample_negative_control, @@ -108,10 +109,16 @@ def is_qc_required(self, case_run_dir: Path) -> bool: def has_valid_total_reads(self, sample_id: str) -> bool: sample: Sample = self.status_db.get_sample_by_internal_id(sample_id) target_reads: int = get_sample_target_reads(sample) + percent_reads_guaranteed: int = get_percent_reads_guaranteed(sample) sample_reads: int = sample.reads if is_sample_negative_control(sample): return is_valid_total_reads_for_negative_control( reads=sample_reads, target_reads=target_reads ) - return is_valid_total_reads(reads=sample_reads, target_reads=target_reads) + + return is_valid_total_reads( + reads=sample_reads, + target_reads=target_reads, + threshold_percentage=percent_reads_guaranteed, + ) diff --git a/cg/meta/workflow/microsalt/quality_controller/utils.py b/cg/meta/workflow/microsalt/quality_controller/utils.py index 7cb614e167..f1b4bdb525 100644 --- a/cg/meta/workflow/microsalt/quality_controller/utils.py +++ b/cg/meta/workflow/microsalt/quality_controller/utils.py @@ -12,8 +12,8 @@ from cg.store.models import Sample -def is_valid_total_reads(reads: int, target_reads: int) -> bool: - return reads > target_reads * MicrosaltQC.TARGET_READS_FAIL_THRESHOLD +def is_valid_total_reads(reads: int, target_reads: int, threshold_percentage: int) -> bool: + return reads > target_reads * threshold_percentage / 100 def is_valid_total_reads_for_negative_control(reads: int, target_reads: int) -> bool: @@ -121,6 +121,10 @@ def get_sample_target_reads(sample: Sample) -> int: return sample.application_version.application.target_reads +def get_percent_reads_guaranteed(sample: Sample) -> int: + return sample.application_version.application.percent_reads_guaranteed + + def get_report_path(metrics_file_path: Path) -> Path: return metrics_file_path.parent.joinpath(QUALITY_REPORT_FILE_NAME) From 053886699ff1fcece277e07263db414b6266d0b6 Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:45:41 +0100 Subject: [PATCH 62/65] Fix tests --- .../workflow/microsalt/test_quality_controller_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/meta/workflow/microsalt/test_quality_controller_utils.py b/tests/meta/workflow/microsalt/test_quality_controller_utils.py index 1d71a91c4f..7a6ab022da 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller_utils.py +++ b/tests/meta/workflow/microsalt/test_quality_controller_utils.py @@ -30,7 +30,7 @@ def test_sample_total_reads_passing(): # WHEN checking if the sample has sufficient reads passes_reads_threshold: bool = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 ) # THEN it passes @@ -44,7 +44,7 @@ def test_sample_total_reads_failing(): # WHEN checking if the sample has sufficient reads passes_reads_threshold: bool = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 ) # THEN it fails @@ -58,7 +58,7 @@ def test_sample_total_reads_failing_without_reads(): # WHEN checking if the sample has sufficient reads passes_reads_threshold: bool = is_valid_total_reads( - reads=sample_reads, target_reads=target_reads + reads=sample_reads, target_reads=target_reads, threshold_percentage=90 ) # THEN it fails From 053283f63c1daabaf3198964bbb38e880c4fcacc Mon Sep 17 00:00:00 2001 From: seallard Date: Fri, 22 Dec 2023 16:50:29 +0100 Subject: [PATCH 63/65] Fix name for microsalt qc command --- cg/cli/workflow/microsalt/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cg/cli/workflow/microsalt/base.py b/cg/cli/workflow/microsalt/base.py index 229b926b00..2ae4053bc8 100644 --- a/cg/cli/workflow/microsalt/base.py +++ b/cg/cli/workflow/microsalt/base.py @@ -218,7 +218,7 @@ def start_available(context: click.Context, dry_run: bool = False): raise click.Abort -@microsalt.command("qc-microsalt") +@microsalt.command("qc") @ARGUMENT_UNIQUE_IDENTIFIER @click.pass_context def qc_microsalt(context: click.Context, unique_id: str) -> None: From 4f840d1a2c03b41ab3e4118561207b666e213f48 Mon Sep 17 00:00:00 2001 From: seallard Date: Tue, 2 Jan 2024 09:37:22 +0100 Subject: [PATCH 64/65] Add summary to report --- .../quality_controller/quality_controller.py | 12 ++++++------ .../microsalt/quality_controller/report_generator.py | 4 ++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 5209181d34..38425cc9e4 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -62,15 +62,15 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp application_tag: str = get_application_tag(sample) if is_control := is_sample_negative_control(sample): - result = SampleQualityResult( + sample_quality = SampleQualityResult( sample_id=sample_id, passes_qc=valid_read_count, is_control=is_control, passes_reads_qc=valid_read_count, application_tag=application_tag, ) - ResultLogger.log_sample_result(result) - return result + ResultLogger.log_sample_result(sample_quality) + return sample_quality sample_passes_qc: bool = ( valid_read_count @@ -81,7 +81,7 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp and valid_10x_coverage ) - result = SampleQualityResult( + sample_quality = SampleQualityResult( sample_id=sample_id, passes_qc=sample_passes_qc, is_control=is_control, @@ -93,8 +93,8 @@ def quality_control_sample(self, sample_id: str, metrics: SampleMetrics) -> Samp passes_coverage_qc=valid_coverage, passes_10x_coverage_qc=valid_10x_coverage, ) - ResultLogger.log_sample_result(result) - return result + ResultLogger.log_sample_result(sample_quality) + return sample_quality def is_qc_required(self, case_run_dir: Path) -> bool: if not case_run_dir: diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index 32e9a7c456..ca0d412f83 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -6,12 +6,16 @@ CaseQualityResult, SampleQualityResult, ) +from cg.meta.workflow.microsalt.quality_controller.result_logger import sample_result_message class ReportGenerator: @staticmethod def report(out_file: Path, case: CaseQualityResult, samples: List[SampleQualityResult]) -> None: + case_summary: str = "Case passed QC. " if case.passes_qc else "Case failed QC. " + sample_summary: str = sample_result_message(samples) report_content = { + "summary": case_summary + sample_summary, "case": case.model_dump(), "samples": [sample.model_dump() for sample in samples], } From d463810f4713947b5a58d1b949f0d0f984bdb8bd Mon Sep 17 00:00:00 2001 From: seallard Date: Tue, 2 Jan 2024 10:19:05 +0100 Subject: [PATCH 65/65] Add summary to trailblazer comment --- cg/meta/workflow/microsalt/microsalt.py | 7 ++++--- .../workflow/microsalt/quality_controller/models.py | 10 ++++++++++ .../quality_controller/quality_controller.py | 6 ++++-- .../microsalt/quality_controller/report_generator.py | 11 ++++++++--- .../workflow/microsalt/test_quality_controller.py | 9 +++++---- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 13ed66077d..bbcc1ed6d2 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -16,6 +16,7 @@ from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MicrosaltFastqHandler from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.models.cg_config import CGConfig from cg.store.models import Case, Sample from cg.utils import Process @@ -262,14 +263,14 @@ def get_cases_to_store(self) -> list[Case]: if not metrics_file_path.exists(): continue - if self.quality_checker.quality_control(metrics_file_path): - self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC passed") + result: QualityResult = self.quality_checker.quality_control(metrics_file_path) + self.trailblazer_api.add_comment(case_id=case.internal_id, comment=result.summary) + if result.passes_qc: cases_to_store.append(case) else: self.trailblazer_api.set_analysis_status( case_id=case.internal_id, status=AnalysisStatus.FAILED ) - self.trailblazer_api.add_comment(case_id=case.internal_id, comment="QC failed") else: cases_to_store.append(case) diff --git a/cg/meta/workflow/microsalt/quality_controller/models.py b/cg/meta/workflow/microsalt/quality_controller/models.py index 4a1905c295..f9579579c0 100644 --- a/cg/meta/workflow/microsalt/quality_controller/models.py +++ b/cg/meta/workflow/microsalt/quality_controller/models.py @@ -21,3 +21,13 @@ class CaseQualityResult(BaseModel): control_passes_qc: bool urgent_passes_qc: bool non_urgent_passes_qc: bool + + +class QualityResult(BaseModel): + case: CaseQualityResult + samples: list[SampleQualityResult] + summary: str + + @property + def passes_qc(self) -> bool: + return self.case.passes_qc diff --git a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py index 38425cc9e4..8a00abb1e7 100644 --- a/cg/meta/workflow/microsalt/quality_controller/quality_controller.py +++ b/cg/meta/workflow/microsalt/quality_controller/quality_controller.py @@ -5,6 +5,7 @@ from cg.meta.workflow.microsalt.metrics_parser import MetricsParser, QualityMetrics, SampleMetrics from cg.meta.workflow.microsalt.quality_controller.models import ( CaseQualityResult, + QualityResult, SampleQualityResult, ) from cg.meta.workflow.microsalt.quality_controller.report_generator import ReportGenerator @@ -34,14 +35,15 @@ class QualityController: def __init__(self, status_db: Store): self.status_db = status_db - def quality_control(self, case_metrics_file_path: Path) -> bool: + def quality_control(self, case_metrics_file_path: Path) -> QualityResult: quality_metrics: QualityMetrics = MetricsParser.parse(case_metrics_file_path) sample_results: list[SampleQualityResult] = self.quality_control_samples(quality_metrics) case_result: CaseQualityResult = quality_control_case(sample_results) report_file: Path = get_report_path(case_metrics_file_path) ReportGenerator.report(out_file=report_file, samples=sample_results, case=case_result) ResultLogger.log_results(case=case_result, samples=sample_results, report=report_file) - return case_result.passes_qc + summary: str = ReportGenerator.get_summary(case=case_result, samples=sample_results) + return QualityResult(case=case_result, samples=sample_results, summary=summary) def quality_control_samples(self, quality_metrics: QualityMetrics) -> list[SampleQualityResult]: sample_results: list[SampleQualityResult] = [] diff --git a/cg/meta/workflow/microsalt/quality_controller/report_generator.py b/cg/meta/workflow/microsalt/quality_controller/report_generator.py index ca0d412f83..53932a427a 100644 --- a/cg/meta/workflow/microsalt/quality_controller/report_generator.py +++ b/cg/meta/workflow/microsalt/quality_controller/report_generator.py @@ -12,11 +12,16 @@ class ReportGenerator: @staticmethod def report(out_file: Path, case: CaseQualityResult, samples: List[SampleQualityResult]) -> None: - case_summary: str = "Case passed QC. " if case.passes_qc else "Case failed QC. " - sample_summary: str = sample_result_message(samples) + summary: str = ReportGenerator.get_summary(case=case, samples=samples) report_content = { - "summary": case_summary + sample_summary, + "summary": summary, "case": case.model_dump(), "samples": [sample.model_dump() for sample in samples], } write_json(file_path=out_file, content=report_content) + + @staticmethod + def get_summary(case: CaseQualityResult, samples: List[SampleQualityResult]) -> str: + case_summary: str = "Case passed QC. " if case.passes_qc else "Case failed QC. " + sample_summary: str = sample_result_message(samples) + return case_summary + sample_summary diff --git a/tests/meta/workflow/microsalt/test_quality_controller.py b/tests/meta/workflow/microsalt/test_quality_controller.py index 254bf8ac21..09b5f1e565 100644 --- a/tests/meta/workflow/microsalt/test_quality_controller.py +++ b/tests/meta/workflow/microsalt/test_quality_controller.py @@ -2,6 +2,7 @@ from cg.meta.workflow.microsalt.constants import QUALITY_REPORT_FILE_NAME from cg.meta.workflow.microsalt.quality_controller import QualityController +from cg.meta.workflow.microsalt.quality_controller.models import QualityResult from cg.models.cg_config import CGConfig from cg.store.api.core import Store from cg.store.models import Application, Sample @@ -70,10 +71,10 @@ def test_quality_control_fails(qc_microsalt_context: CGConfig, metrics_file_fail quality_controller = QualityController(store) # WHEN performing the quality control - passes_qc: bool = quality_controller.quality_control(metrics_file_failing_qc) + result: QualityResult = quality_controller.quality_control(metrics_file_failing_qc) # THEN the case should fail the quality control - assert not passes_qc + assert not result.passes_qc # THEN a report should be generated assert metrics_file_failing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists() @@ -89,10 +90,10 @@ def test_quality_control_passes(qc_microsalt_context: CGConfig, metrics_file_pas quality_controller = QualityController(store) # WHEN performing the quality control - passes_qc: bool = quality_controller.quality_control(metrics_file_passing_qc) + result: QualityResult = quality_controller.quality_control(metrics_file_passing_qc) # THEN the case should pass the quality control - assert passes_qc + assert result.passes_qc # THEN a report should be generated assert metrics_file_passing_qc.parent.joinpath(QUALITY_REPORT_FILE_NAME).exists()