diff --git a/.bumpversion.cfg b/.bumpversion.cfg index fbce4ca3b0..52137f77aa 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 62.1.6 +current_version = 62.2.4 commit = True tag = True tag_name = v{new_version} diff --git a/cg/__init__.py b/cg/__init__.py index d34ba6833f..a7779ad0db 100644 --- a/cg/__init__.py +++ b/cg/__init__.py @@ -1,2 +1,2 @@ __title__ = "cg" -__version__ = "62.1.6" +__version__ = "62.2.4" diff --git a/cg/apps/invoice/render.py b/cg/apps/invoice/render.py index f6a835bb66..e73278f9db 100644 --- a/cg/apps/invoice/render.py +++ b/cg/apps/invoice/render.py @@ -1,8 +1,11 @@ import datetime as dt +from pathlib import Path from openpyxl import Workbook, load_workbook from openpyxl.styles import Border, Font, PatternFill, Side -from pkg_resources import resource_filename + +from cg.constants import FileExtensions +from cg.utils.files import get_project_root_dir def render_xlsx(data: dict) -> Workbook: @@ -34,11 +37,17 @@ def render_xlsx(data: dict) -> Workbook: }] } """ - pkg_dir = __name__.rpartition(".")[0] + project_root_dir = get_project_root_dir() sample_type = "pool" if data["pooled_samples"] else "sample" costcenter = data["cost_center"] - template_path = resource_filename(pkg_dir, f"templates/{costcenter}_{sample_type}_invoice.xlsx") - workbook = load_workbook(template_path) + template_path = Path( + project_root_dir, + "apps", + "invoice", + "templates", + f"{costcenter}_{sample_type}_invoice{FileExtensions.XLSX}", + ) + workbook = load_workbook(template_path.as_posix()) if data["pooled_samples"]: worksheet = workbook["Bilaga Prover"] worksheet["C1"] = costcenter.upper() diff --git a/cg/apps/lims/api.py b/cg/apps/lims/api.py index b8975985ae..71a66b799e 100644 --- a/cg/apps/lims/api.py +++ b/cg/apps/lims/api.py @@ -9,8 +9,15 @@ from genologics.lims import Lims from requests.exceptions import HTTPError -from cg.constants import Priority -from cg.constants.lims import MASTER_STEPS_UDFS, PROP2UDF, DocumentationMethod, LimsArtifactTypes +from cg.constants.constants import ControlOptions, CustomerId +from cg.constants.lims import ( + MASTER_STEPS_UDFS, + PROP2UDF, + DocumentationMethod, + LimsArtifactTypes, + LimsProcess, +) +from cg.constants.priority import Priority from cg.exc import LimsDataError from .order import OrderHandler @@ -478,3 +485,73 @@ def get_latest_rna_input_amount(self, sample_id: str) -> float | None: ) input_amount: float | None = self._get_last_used_input_amount(input_amounts=input_amounts) return input_amount + + def get_latest_artifact_for_sample( + self, + process_type: LimsProcess, + sample_internal_id: str, + artifact_type: LimsArtifactTypes | None = LimsArtifactTypes.ANALYTE, + ) -> Artifact: + """Return latest artifact for a given sample, process and artifact type.""" + + artifacts: list[Artifact] = self.get_artifacts( + process_type=process_type, + type=artifact_type, + samplelimsid=sample_internal_id, + ) + + if not artifacts: + raise LimsDataError( + f"No artifacts were found for process {process_type}, type {artifact_type} and sample {sample_internal_id}." + ) + + latest_artifact: Artifact = self._get_latest_artifact_from_list(artifact_list=artifacts) + return latest_artifact + + def _get_latest_artifact_from_list(self, artifact_list: list[Artifact]) -> Artifact: + """Returning the latest artifact in a list of artifacts.""" + artifacts = [] + for artifact in artifact_list: + date = artifact.parent_process.date_run or datetime.today().strftime("%Y-%m-%d") + artifacts.append((date, artifact.id, artifact)) + + artifacts.sort() + date, id, latest_artifact = artifacts[-1] + return latest_artifact + + def get_internal_negative_control_id_from_sample_in_pool( + self, sample_internal_id: str, pooling_step: LimsProcess + ) -> str: + """Retrieve from LIMS the sample ID for the internal negative control sample present in the same pool as the given sample.""" + artifact: Artifact = self.get_latest_artifact_for_sample( + process_type=pooling_step, + sample_internal_id=sample_internal_id, + ) + negative_controls: list[Sample] = self._get_negative_controls_from_list( + samples=artifact.samples + ) + + if not negative_controls: + raise LimsDataError( + f"No internal negative controls found in the pool of sample {sample_internal_id}." + ) + + if len(negative_controls) > 1: + sample_ids = [sample.id for sample in negative_controls] + raise LimsDataError( + f"Multiple internal negative control samples found: {' '.join(sample_ids)}" + ) + + return negative_controls[0].id + + @staticmethod + def _get_negative_controls_from_list(samples: list[Sample]) -> list[Sample]: + """Filter and return a list of internal negative controls from a given sample list.""" + negative_controls = [] + for sample in samples: + if ( + sample.udf.get("Control") == ControlOptions.NEGATIVE + and sample.udf.get("customer") == CustomerId.CG_INTERNAL_CUSTOMER + ): + negative_controls.append(sample) + return negative_controls diff --git a/cg/apps/tb/api.py b/cg/apps/tb/api.py index 5e34424420..a2db0fcabc 100644 --- a/cg/apps/tb/api.py +++ b/cg/apps/tb/api.py @@ -118,6 +118,7 @@ def add_pending_analysis( workflow: Workflow = None, ticket: str = None, workflow_manager: str = WorkflowManager.Slurm, + tower_workflow_id: str | None = None, ) -> TrailblazerAnalysis: request_body = { "case_id": case_id, @@ -130,6 +131,7 @@ def add_pending_analysis( "workflow": workflow.upper(), "ticket": ticket, "workflow_manager": workflow_manager, + "tower_workflow_id": tower_workflow_id, } LOG.debug(f"Submitting job to Trailblazer: {request_body}") if response := self.query_trailblazer( diff --git a/cg/cli/get.py b/cg/cli/get.py index ee7d18ba9b..bf73a29121 100644 --- a/cg/cli/get.py +++ b/cg/cli/get.py @@ -198,7 +198,11 @@ def get_sequencing_run(context: click.Context, samples: bool, flow_cell_id: str) sequencing_run.device.internal_id, sequencing_run.sequencer_type, sequencing_run.sequencer_name, - sequencing_run.sequencing_started_at.date(), + ( + sequencing_run.sequencing_started_at.date() + if sequencing_run.sequencing_started_at + else "Not available" + ), sequencing_run.sequencing_completed_at.date(), sequencing_run.archived_at.date() if sequencing_run.archived_at else "No", sequencing_run.data_availability, diff --git a/cg/cli/post_process/post_process.py b/cg/cli/post_process/post_process.py index 8c4560fd96..93d43b540a 100644 --- a/cg/cli/post_process/post_process.py +++ b/cg/cli/post_process/post_process.py @@ -35,5 +35,4 @@ def post_process_sequencing_run(context: CGConfig, run_name: str, dry_run: bool) post_processing_service.post_process(run_name=run_name, dry_run=dry_run) -post_process_group: click.Group post_process_group.add_command(post_process_sequencing_run) diff --git a/cg/cli/workflow/mutant/base.py b/cg/cli/workflow/mutant/base.py index db79996963..740eb16e2e 100644 --- a/cg/cli/workflow/mutant/base.py +++ b/cg/cli/workflow/mutant/base.py @@ -9,7 +9,6 @@ link, resolve_compression, store, - store_available, ) from cg.constants import EXIT_FAIL, EXIT_SUCCESS from cg.constants.cli_options import DRY_RUN @@ -17,6 +16,7 @@ from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.mutant import MutantAnalysisAPI from cg.models.cg_config import CGConfig +from cg.store.models import Case LOG = logging.getLogger(__name__) @@ -32,7 +32,6 @@ def mutant(context: click.Context) -> None: mutant.add_command(resolve_compression) mutant.add_command(link) mutant.add_command(store) -mutant.add_command(store_available) @mutant.command("config-case") @@ -75,7 +74,6 @@ def start(context: click.Context, dry_run: bool, case_id: str, config_artic: str context.invoke(link, case_id=case_id, dry_run=dry_run) context.invoke(config_case, case_id=case_id, dry_run=dry_run) context.invoke(run, case_id=case_id, dry_run=dry_run, config_artic=config_artic) - context.invoke(store, case_id=case_id, dry_run=dry_run) @mutant.command("start-available") @@ -100,3 +98,49 @@ def start_available(context: click.Context, dry_run: bool = False): exit_code = EXIT_FAIL if exit_code: raise click.Abort + + +@mutant.command("store-available") +@DRY_RUN +@click.pass_context +def store_available(context: click.Context, dry_run: bool) -> None: + """Run QC checks and store bundles for all finished analyses in Housekeeper.""" + + analysis_api: MutantAnalysisAPI = context.obj.meta_apis["analysis_api"] + + exit_code: int = EXIT_SUCCESS + + cases_ready_for_qc: list[Case] = analysis_api.get_cases_to_perform_qc_on() + LOG.info(f"Found {len(cases_ready_for_qc)} cases to perform QC on!") + for case in cases_ready_for_qc: + LOG.info(f"Performing QC on case {case.internal_id}.") + try: + analysis_api.run_qc_on_case(case=case, dry_run=dry_run) + except Exception: + exit_code = EXIT_FAIL + + cases_to_store: list[Case] = analysis_api.get_cases_to_store() + LOG.info(f"Found {len(cases_to_store)} cases to store!") + for case in cases_to_store: + LOG.info(f"Storing deliverables for {case.internal_id}") + try: + context.invoke(store, case_id=case.internal_id, dry_run=dry_run) + except Exception as exception_object: + LOG.error(f"Error storingc {case.internal_id}: {exception_object}") + exit_code = EXIT_FAIL + + if exit_code: + raise click.Abort + + +@mutant.command("run-qc") +@DRY_RUN +@ARGUMENT_CASE_ID +@click.pass_context +def run_qc(context: click.Context, case_id: str, dry_run: bool) -> None: + """ + Run QC on case and generate QC_report file. + """ + analysis_api: MutantAnalysisAPI = context.obj.meta_apis["analysis_api"] + + analysis_api.run_qc(case_id=case_id, dry_run=dry_run) diff --git a/cg/constants/constants.py b/cg/constants/constants.py index f69e1d26ce..c7ae23d812 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -223,6 +223,7 @@ class FileExtensions(StrEnum): TSV: str = ".tsv" TXT: str = ".txt" VCF: str = ".vcf" + XLSX: str = ".xlsx" XML: str = ".xml" YAML: str = ".yaml" @@ -254,6 +255,13 @@ class MicrosaltAppTags(StrEnum): PREP_CATEGORY: str = "mic" +class MutantQC: + EXTERNAL_NEGATIVE_CONTROL_READS_THRESHOLD: int = 100000 + INTERNAL_NEGATIVE_CONTROL_READS_THRESHOLD: int = 2000 + FRACTION_OF_SAMPLES_WITH_FAILED_QC_TRESHOLD: float = 0.2 + QUALITY_REPORT_FILE_NAME: str = f"QC_report{FileExtensions.JSON}" + + DRY_RUN_MESSAGE = "Dry run: process call will not be executed!" diff --git a/cg/constants/lims.py b/cg/constants/lims.py index f08bbbd74e..ce832d0712 100644 --- a/cg/constants/lims.py +++ b/cg/constants/lims.py @@ -157,3 +157,7 @@ class DocumentationMethod(StrEnum): class LimsArtifactTypes(StrEnum): ANALYTE: str = "Analyte" RESULT_FILE: str = "ResultFile" + + +class LimsProcess(StrEnum): + COVID_POOLING_STEP: str = "Pooling and Clean-up (Cov) v1" diff --git a/cg/constants/report.py b/cg/constants/report.py index 86a8890bdb..48716dbc1f 100644 --- a/cg/constants/report.py +++ b/cg/constants/report.py @@ -1,15 +1,17 @@ """Delivery report constants.""" -from importlib.resources import files from pathlib import Path from cg.constants import DataDelivery from cg.constants.constants import CancerAnalysisType, FileExtensions, Workflow from cg.constants.subject import Sex +from cg.utils.files import get_project_root_dir + +project_root_dir: Path = get_project_root_dir() DELIVERY_REPORT_FILE_NAME: str = f"delivery-report{FileExtensions.HTML}" SWEDAC_LOGO_PATH = Path( - files("cg"), "meta", "report", "templates", "static", "images", "SWEDAC_logo.png" + project_root_dir, "meta", "report", "templates", "static", "images", "SWEDAC_logo.png" ) BALSAMIC_REPORT_ACCREDITED_PANELS: list[str] = ["gmsmyeloid"] diff --git a/cg/meta/orders/api.py b/cg/meta/orders/api.py index 3d73ac47ed..0222d4f971 100644 --- a/cg/meta/orders/api.py +++ b/cg/meta/orders/api.py @@ -13,7 +13,9 @@ from cg.apps.osticket import OsTicket from cg.meta.orders.ticket_handler import TicketHandler from cg.models.orders.order import OrderIn, OrderType -from cg.services.orders.submitters.order_submitter_registry import OrderSubmitterRegistry +from cg.services.orders.submitters.order_submitter_registry import ( + OrderSubmitterRegistry, +) from cg.store.store import Store LOG = logging.getLogger(__name__) @@ -41,6 +43,7 @@ def submit(self, project: OrderType, order_in: OrderIn, user_name: str, user_mai Main entry point for the class towards interfaces that implements it. """ submit_handler = self.submitter_registry.get_order_submitter(project) + submit_handler.order_validation_service.validate_order(order_in) # detect manual ticket assignment ticket_number: str | None = TicketHandler.parse_ticket_number(order_in.name) if not ticket_number: diff --git a/cg/meta/rsync/sbatch.py b/cg/meta/rsync/sbatch.py index e9b495ed56..ef0487074c 100644 --- a/cg/meta/rsync/sbatch.py +++ b/cg/meta/rsync/sbatch.py @@ -9,8 +9,8 @@ """ COVID_RSYNC = """ -rsync -rvtL {source_path} {destination_path} -rsync -rvtL --chmod=777 {covid_report_path} {covid_destination_path} +rsync -rvL {source_path} {destination_path} +rsync -rvL --chmod=777 {covid_report_path} {covid_destination_path} """ ERROR_RSYNC_FUNCTION = """ diff --git a/cg/meta/workflow/analysis.py b/cg/meta/workflow/analysis.py index 079421fde4..11158b1093 100644 --- a/cg/meta/workflow/analysis.py +++ b/cg/meta/workflow/analysis.py @@ -275,7 +275,11 @@ def get_deliverables_file_path(self, case_id: str) -> Path: def get_analysis_finish_path(self, case_id: str) -> Path: raise NotImplementedError - def add_pending_trailblazer_analysis(self, case_id: str) -> None: + def add_pending_trailblazer_analysis( + self, + case_id: str, + tower_workflow_id: str | None = None, + ) -> None: self.check_analysis_ongoing(case_id) application_type: str = self.get_application_type( self.status_db.get_case_by_internal_id(case_id).links[0].sample @@ -299,6 +303,7 @@ def add_pending_trailblazer_analysis(self, case_id: str) -> None: ticket=ticket, workflow=workflow, workflow_manager=workflow_manager, + tower_workflow_id=tower_workflow_id, ) def _get_order_id_from_case_id(self, case_id) -> int: diff --git a/cg/meta/workflow/mutant/__init__.py b/cg/meta/workflow/mutant/__init__.py new file mode 100644 index 0000000000..34bb150721 --- /dev/null +++ b/cg/meta/workflow/mutant/__init__.py @@ -0,0 +1 @@ +from cg.meta.workflow.mutant.mutant import MutantAnalysisAPI diff --git a/cg/meta/workflow/mutant.py b/cg/meta/workflow/mutant/mutant.py similarity index 72% rename from cg/meta/workflow/mutant.py rename to cg/meta/workflow/mutant/mutant.py index c4eb9e5594..ab4550d7d9 100644 --- a/cg/meta/workflow/mutant.py +++ b/cg/meta/workflow/mutant/mutant.py @@ -1,13 +1,16 @@ import logging import shutil from pathlib import Path - from cg.constants import SequencingFileTag, Workflow -from cg.constants.constants import FileFormat +from cg.constants.constants import FileFormat, MutantQC +from cg.constants.tb import AnalysisStatus +from cg.exc import CgError from cg.io.controller import WriteFile from cg.meta.workflow.analysis import AnalysisAPI from cg.meta.workflow.fastq import MutantFastqHandler from cg.services.sequencing_qc_service.sequencing_qc_service import SequencingQCService +from cg.meta.workflow.mutant.quality_controller.models import MutantQualityResult +from cg.meta.workflow.mutant.quality_controller.quality_controller import MutantQualityController from cg.models.cg_config import CGConfig from cg.models.workflow.mutant import MutantSampleConfig from cg.store.models import Application, Case, Sample @@ -24,6 +27,9 @@ def __init__( ): super().__init__(workflow=workflow, config=config) self.root_dir = config.mutant.root + self.quality_checker = MutantQualityController( + status_db=config.status_db, lims=config.lims_api + ) @property def conda_binary(self) -> str: @@ -49,9 +55,17 @@ def get_case_path(self, case_id: str) -> Path: def get_case_output_path(self, case_id: str) -> Path: return Path(self.get_case_path(case_id=case_id), "results") + def get_case_results_file_path(self, case: Case) -> Path: + case_output_path: Path = self.get_case_output_path(case.internal_id) + return Path(case_output_path, f"sars-cov-2_{case.latest_ticket}_results.csv") + def get_case_fastq_dir(self, case_id: str) -> Path: return Path(self.get_case_path(case_id=case_id), "fastq") + def get_case_qc_report_path(self, case_id: str) -> Path: + case_path: Path = self.get_case_path(case_id=case_id) + return Path(case_path, MutantQC.QUALITY_REPORT_FILE_NAME) + def get_job_ids_path(self, case_id: str) -> Path: return Path(self.get_case_output_path(case_id=case_id), "trailblazer_config.yaml") @@ -188,13 +202,24 @@ def run_analysis(self, case_id: str, dry_run: bool, config_artic: str = None) -> ) def get_cases_to_store(self) -> list[Case]: - """Return cases where analysis has a deliverables file, - and is ready to be stored in Housekeeper.""" - return [ + """Return cases for which the analysis is complete on Traiblazer and a QC report has been generated.""" + cases_to_store: list[Case] = [ case - for case in self.status_db.get_running_cases_in_workflow(workflow=self.workflow) - if Path(self.get_deliverables_file_path(case_id=case.internal_id)).exists() + for case in self.status_db.get_running_cases_in_workflow(self.workflow) + if self.trailblazer_api.is_latest_analysis_completed(case.internal_id) + and self.get_case_qc_report_path(case_id=case.internal_id).exists() ] + return cases_to_store + + def get_cases_to_perform_qc_on(self) -> list[Case]: + """Return cases with a completed analysis that are not yet stored.""" + cases_to_perform_qc_on: list[Case] = [ + case + for case in self.status_db.get_running_cases_in_workflow(self.workflow) + if self.trailblazer_api.is_latest_analysis_completed(case.internal_id) + and not self.get_case_qc_report_path(case_id=case.internal_id).exists() + ] + return cases_to_perform_qc_on def get_metadata_for_nanopore_sample(self, sample: Sample) -> list[dict]: return [ @@ -249,3 +274,51 @@ def link_nanopore_fastq_for_sample( LOG.info(f"Concatenation in progress for sample {sample.internal_id}.") self.fastq_handler.concatenate(read_paths, concatenated_path) self.fastq_handler.remove_files(read_paths) + + def run_qc_on_case(self, case: Case, dry_run: bool) -> None: + """Run qc check on case, report qc summary on Trailblazer and set analysis status to fail if it fails QC.""" + try: + qc_result: MutantQualityResult = self.get_qc_result(case=case) + except Exception as exception: + error_message: str = f"Could not perform QC on case {case.internal_id}: {exception}" + LOG.error(error_message) + if not dry_run: + self.trailblazer_api.add_comment( + case_id=case.internal_id, comment="ERROR: Could not perform QC on case" + ) + self.trailblazer_api.set_analysis_status( + case_id=case.internal_id, status=AnalysisStatus.ERROR + ) + raise CgError(error_message) + + if not dry_run: + self.report_qc_on_trailblazer(case=case, qc_result=qc_result) + if not qc_result.passes_qc: + self.trailblazer_api.set_analysis_status( + case_id=case.internal_id, status=AnalysisStatus.FAILED + ) + + def get_qc_result(self, case: Case) -> MutantQualityResult: + case_results_file_path: Path = self.get_case_results_file_path(case=case) + case_qc_report_path: Path = self.get_case_qc_report_path(case_id=case.internal_id) + qc_result: MutantQualityResult = self.quality_checker.get_quality_control_result( + case=case, + case_results_file_path=case_results_file_path, + case_qc_report_path=case_qc_report_path, + ) + return qc_result + + def report_qc_on_trailblazer(self, case: Case, qc_result: MutantQualityResult) -> None: + report_file_path: Path = self.get_case_qc_report_path(case_id=case.internal_id) + + comment = qc_result.summary + ( + f" QC report: {report_file_path}" if not qc_result.passes_qc else "" + ) + self.trailblazer_api.add_comment(case_id=case.internal_id, comment=comment) + + def run_qc(self, case_id: str, dry_run: bool) -> None: + LOG.info(f"Running QC on case {case_id}.") + + case: Case = self.status_db.get_case_by_internal_id(case_id) + + self.run_qc_on_case(case=case, dry_run=dry_run) diff --git a/cg/meta/workflow/mutant/quality_controller/__init__.py b/cg/meta/workflow/mutant/quality_controller/__init__.py new file mode 100644 index 0000000000..188adac75a --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/__init__.py @@ -0,0 +1 @@ +from cg.meta.workflow.mutant.quality_controller.quality_controller import MutantQualityController diff --git a/cg/meta/workflow/mutant/quality_controller/metrics_parser_utils.py b/cg/meta/workflow/mutant/quality_controller/metrics_parser_utils.py new file mode 100644 index 0000000000..45894acdf1 --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/metrics_parser_utils.py @@ -0,0 +1,50 @@ +from pathlib import Path + +from pydantic import TypeAdapter +from cg.io.csv import read_csv +from typing import Any + +from cg.meta.workflow.mutant.quality_controller.models import ParsedSampleResults +from cg.store.models import Case + + +def parse_samples_results(case: Case, results_file_path: Path) -> dict[str, ParsedSampleResults]: + """Takes a case object and a results_file_path and resturns dict[str, SampleResults] with sample.internal_id as keys.""" + + validated_results_list: list[ParsedSampleResults] = _get_validated_results_list( + results_file_path=results_file_path + ) + + samples_results: dict[str, ParsedSampleResults] = _get_samples_results( + case=case, results_list=validated_results_list + ) + + return samples_results + + +def _get_validated_results_list(results_file_path: Path) -> list[ParsedSampleResults]: + """Parses the results file and returns a list of validated SampleResults.""" + raw_results: list[dict[Any, Any]] = read_csv(file_path=results_file_path, read_to_dict=True) + adapter = TypeAdapter(list[ParsedSampleResults]) + return adapter.validate_python(raw_results) + + +def _get_sample_name_to_id_mapping(case: Case) -> dict[str, str]: + sample_name_to_id_mapping: dict[str, str] = {} + for sample in case.samples: + sample_name_to_id_mapping[sample.name] = sample.internal_id + return sample_name_to_id_mapping + + +def _get_samples_results( + case: Case, results_list: list[ParsedSampleResults] +) -> dict[str, ParsedSampleResults]: + """Return the mapping of sample internal ids to SampleResults for a case.""" + + sample_name_to_id_mapping: dict[str, str] = _get_sample_name_to_id_mapping(case=case) + + samples_results: dict[str, ParsedSampleResults] = {} + for result in results_list: + sample_internal_id = sample_name_to_id_mapping[result.sample_name] + samples_results[sample_internal_id] = result + return samples_results diff --git a/cg/meta/workflow/mutant/quality_controller/models.py b/cg/meta/workflow/mutant/quality_controller/models.py new file mode 100644 index 0000000000..815270ceca --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/models.py @@ -0,0 +1,89 @@ +from typing import Annotated, Any +from pydantic import BaseModel, BeforeValidator, Field, ValidationError, ConfigDict +from cg.store.models import Sample + + +# Validator +def str_to_bool(value: str) -> bool: + if value == "TRUE": + return True + elif value == "FALSE": + return False + raise ValidationError(f"String {value} cannot be turned to bool.") + + +# Models +class ParsedSampleResults(BaseModel): + sample_name: str = Field(alias="Sample") + selection: str = Field(alias="Selection") + region_code: str = Field(alias="Region Code") + ticket: int = Field(alias="Ticket") + pct_n_bases: float = Field(alias="%N_bases") + pct_10x_coverage: float = Field(alias="%10X_coverage") + passes_qc: Annotated[bool, BeforeValidator(str_to_bool)] = Field(alias="QC_pass") + lineage: str = Field(alias="Lineage") + pangolin_data_version: str = Field(alias="Pangolin_data_version") + voc: str = Field(alias="VOC") + mutations: str = Field(alias="Mutations") + + +class MutantPoolSamples(BaseModel): + samples: list[Sample] + external_negative_control: Sample + internal_negative_control: Sample + + model_config = ConfigDict(arbitrary_types_allowed=True) + + +class SamplePoolAndResults(BaseModel): + pool: MutantPoolSamples + results: dict[str, ParsedSampleResults] + + +class SampleQualityResults(BaseModel): + sample_id: str + passes_qc: bool + passes_reads_threshold: bool + passes_mutant_qc: bool | None = None + + +class SamplesQualityResults(BaseModel): + internal_negative_control: SampleQualityResults + external_negative_control: SampleQualityResults + samples: list[SampleQualityResults] + + @property + def total_samples_count(self) -> int: + return len(self.samples) + + @property + def passed_samples_count(self) -> int: + samples_pass_qc: list[bool] = [sample_result.passes_qc for sample_result in self.samples] + return sum(samples_pass_qc) + + @property + def failed_samples_count(self) -> int: + return self.total_samples_count - self.passed_samples_count + + +class CaseQualityResult(BaseModel): + passes_qc: bool + internal_negative_control_passes_qc: bool + external_negative_control_passes_qc: bool + fraction_samples_passes_qc: bool + + +class MutantQualityResult(BaseModel): + case_quality_result: CaseQualityResult + samples_quality_results: SamplesQualityResults + summary: str + + @property + def passes_qc(self) -> bool: + return self.case_quality_result.passes_qc + + +class MutantReport(BaseModel): + summary: str + case: dict[str, Any] + samples: dict[str, Any] diff --git a/cg/meta/workflow/mutant/quality_controller/quality_controller.py b/cg/meta/workflow/mutant/quality_controller/quality_controller.py new file mode 100644 index 0000000000..a377a78261 --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/quality_controller.py @@ -0,0 +1,273 @@ +from pathlib import Path +from cg.apps.lims.api import LimsAPI +from cg.constants.constants import MutantQC +from cg.constants.lims import LimsProcess +from cg.exc import CgError +from cg.meta.workflow.mutant.quality_controller.metrics_parser_utils import parse_samples_results +from cg.meta.workflow.mutant.quality_controller.models import ( + MutantPoolSamples, + SamplePoolAndResults, + SampleQualityResults, + CaseQualityResult, + MutantQualityResult, + ParsedSampleResults, + SamplesQualityResults, +) +from cg.meta.workflow.mutant.quality_controller.report_generator_utils import ( + get_summary, + write_report, +) +from cg.meta.workflow.mutant.quality_controller.result_logger_utils import ( + log_case_result, + log_results, + log_sample_result, +) +from cg.meta.workflow.mutant.quality_controller.utils import ( + has_external_negative_control_sample_valid_total_reads, + has_internal_negative_control_sample_valid_total_reads, + has_sample_valid_total_reads, +) +from cg.store.models import Case, Sample +from cg.store.store import Store + + +class MutantQualityController: + def __init__(self, status_db: Store, lims: LimsAPI) -> None: + self.status_db: Store = status_db + self.lims: LimsAPI = lims + + def get_quality_control_result( + self, case: Case, case_results_file_path: Path, case_qc_report_path: Path + ) -> MutantQualityResult: + """Perform QC check on a case and generate the QC_report.""" + sample_pool_and_results: SamplePoolAndResults = self._get_sample_pool_and_results( + case_results_file_path=case_results_file_path, + case=case, + ) + + samples_quality_results: SamplesQualityResults = self._get_samples_quality_results( + sample_pool_and_results=sample_pool_and_results + ) + case_quality_result: CaseQualityResult = self._get_case_quality_result( + samples_quality_results + ) + + write_report( + case_qc_report_path=case_qc_report_path, + samples_quality_results=samples_quality_results, + case_quality_result=case_quality_result, + ) + + log_results( + case_quality_result=case_quality_result, + samples_quality_results=samples_quality_results, + report_file_path=case_qc_report_path, + ) + + summary: str = get_summary( + case_quality_result=case_quality_result, + samples_quality_results=samples_quality_results, + ) + + return MutantQualityResult( + case_quality_result=case_quality_result, + samples_quality_results=samples_quality_results, + summary=summary, + ) + + def _get_samples_quality_results( + self, sample_pool_and_results: SamplePoolAndResults + ) -> SamplesQualityResults: + samples_quality_results: list[SampleQualityResults] = [] + for sample in sample_pool_and_results.pool.samples: + sample_results: ParsedSampleResults = sample_pool_and_results.results[ + sample.internal_id + ] + sample_quality_results: SampleQualityResults = ( + self._get_sample_quality_result_for_sample( + sample=sample, sample_results=sample_results + ) + ) + samples_quality_results.append(sample_quality_results) + + internal_negative_control_sample: Sample = ( + sample_pool_and_results.pool.internal_negative_control + ) + internal_negative_control_quality_metrics: SampleQualityResults = ( + self._get_sample_quality_result_for_internal_negative_control_sample( + sample=internal_negative_control_sample + ) + ) + + external_negative_control_sample: Sample = ( + sample_pool_and_results.pool.external_negative_control + ) + external_negative_control_sample_results: ParsedSampleResults = ( + sample_pool_and_results.results[external_negative_control_sample.internal_id] + ) + external_negative_control_quality_metrics: SampleQualityResults = ( + self._get_sample_quality_result_for_external_negative_control_sample( + sample=external_negative_control_sample, + sample_results=external_negative_control_sample_results, + ) + ) + + return SamplesQualityResults( + samples=samples_quality_results, + internal_negative_control=internal_negative_control_quality_metrics, + external_negative_control=external_negative_control_quality_metrics, + ) + + @staticmethod + def _get_sample_quality_result_for_sample( + sample: Sample, sample_results: ParsedSampleResults + ) -> SampleQualityResults: + does_sample_pass_reads_threshold: bool = has_sample_valid_total_reads(sample=sample) + does_sample_pass_qc: bool = does_sample_pass_reads_threshold and sample_results.passes_qc + sample_quality_result = SampleQualityResults( + sample_id=sample.internal_id, + passes_qc=does_sample_pass_qc, + passes_reads_threshold=does_sample_pass_reads_threshold, + passes_mutant_qc=sample_results.passes_qc, + ) + + log_sample_result( + result=sample_quality_result, + ) + return sample_quality_result + + @staticmethod + def _get_sample_quality_result_for_internal_negative_control_sample( + sample: Sample, + ) -> SampleQualityResults: + does_sample_pass_reads_threshold: bool = ( + has_internal_negative_control_sample_valid_total_reads(sample=sample) + ) + sample_quality_result = SampleQualityResults( + sample_id=sample.internal_id, + passes_qc=does_sample_pass_reads_threshold, + passes_reads_threshold=does_sample_pass_reads_threshold, + ) + + log_sample_result(result=sample_quality_result, is_external_negative_control=True) + return sample_quality_result + + @staticmethod + def _get_sample_quality_result_for_external_negative_control_sample( + sample: Sample, sample_results: ParsedSampleResults + ) -> SampleQualityResults: + does_sample_pass_reads_threshold: bool = ( + has_external_negative_control_sample_valid_total_reads(sample=sample) + ) + sample_passes_qc: bool = does_sample_pass_reads_threshold and not sample_results.passes_qc + sample_quality_result = SampleQualityResults( + sample_id=sample.internal_id, + passes_qc=sample_passes_qc, + passes_reads_threshold=does_sample_pass_reads_threshold, + passes_mutant_qc=sample_results.passes_qc, + ) + + log_sample_result(result=sample_quality_result, is_external_negative_control=True) + return sample_quality_result + + def _get_case_quality_result( + self, samples_quality_results: SamplesQualityResults + ) -> CaseQualityResult: + external_negative_control_pass_qc: bool = ( + samples_quality_results.external_negative_control.passes_qc + ) + internal_negative_control_pass_qc: bool = ( + samples_quality_results.internal_negative_control.passes_qc + ) + + samples_pass_qc: bool = self._samples_pass_qc( + samples_quality_results=samples_quality_results + ) + + case_passes_qc: bool = ( + samples_pass_qc + and internal_negative_control_pass_qc + and external_negative_control_pass_qc + ) + + result = CaseQualityResult( + passes_qc=case_passes_qc, + internal_negative_control_passes_qc=internal_negative_control_pass_qc, + external_negative_control_passes_qc=external_negative_control_pass_qc, + fraction_samples_passes_qc=samples_pass_qc, + ) + + log_case_result(result) + return result + + @staticmethod + def _samples_pass_qc(samples_quality_results: SamplesQualityResults) -> bool: + fraction_failed_samples: float = ( + samples_quality_results.failed_samples_count + / samples_quality_results.total_samples_count + ) + return fraction_failed_samples < MutantQC.FRACTION_OF_SAMPLES_WITH_FAILED_QC_TRESHOLD + + def _get_internal_negative_control_id_for_case(self, case: Case) -> str: + """Query lims to retrive internal_negative_control_id for a mutant case sequenced in one pool.""" + + sample_internal_id = case.sample_ids[0] + internal_negative_control_id: str = ( + self.lims.get_internal_negative_control_id_from_sample_in_pool( + sample_internal_id=sample_internal_id, pooling_step=LimsProcess.COVID_POOLING_STEP + ) + ) + return internal_negative_control_id + + def _get_internal_negative_control_sample_for_case( + self, + case: Case, + ) -> Sample: + internal_negative_control_id: str = self._get_internal_negative_control_id_for_case( + case=case + ) + return self.status_db.get_sample_by_internal_id(internal_id=internal_negative_control_id) + + def _get_mutant_pool_samples(self, case: Case) -> MutantPoolSamples: + samples = [] + external_negative_control = None + + for sample in case.samples: + if sample.is_negative_control: + external_negative_control = sample + continue + samples.append(sample) + + if not external_negative_control: + raise CgError(f"No external negative control sample found for case {case.internal_id}.") + + internal_negative_control: Sample = self._get_internal_negative_control_sample_for_case( + case=case + ) + + return MutantPoolSamples( + samples=samples, + external_negative_control=external_negative_control, + internal_negative_control=internal_negative_control, + ) + + def _get_sample_pool_and_results( + self, case_results_file_path: Path, case: Case + ) -> SamplePoolAndResults: + try: + samples: MutantPoolSamples = self._get_mutant_pool_samples(case=case) + except Exception as exception_object: + raise CgError( + f"Not possible to retrieve samples for case {case.internal_id}: {exception_object}" + ) from exception_object + + try: + samples_results: dict[str, ParsedSampleResults] = parse_samples_results( + case=case, results_file_path=case_results_file_path + ) + except Exception as exception_object: + raise CgError( + f"Not possible to retrieve results for case {case.internal_id}: {exception_object}" + ) + + return SamplePoolAndResults(pool=samples, results=samples_results) diff --git a/cg/meta/workflow/mutant/quality_controller/report_generator_utils.py b/cg/meta/workflow/mutant/quality_controller/report_generator_utils.py new file mode 100644 index 0000000000..c2d361a5ff --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/report_generator_utils.py @@ -0,0 +1,40 @@ +from pathlib import Path +from cg.io.json import write_json +from cg.meta.workflow.mutant.quality_controller.models import ( + CaseQualityResult, + MutantReport, + SamplesQualityResults, +) +from cg.meta.workflow.mutant.quality_controller.result_logger_utils import ( + get_samples_results_message, +) + + +def write_report( + case_qc_report_path: Path, + case_quality_result: CaseQualityResult, + samples_quality_results: SamplesQualityResults, +) -> None: + summary: str = get_summary( + case_quality_result=case_quality_result, + samples_quality_results=samples_quality_results, + ) + report = MutantReport( + summary=summary, + case=case_quality_result.model_dump(), + samples=samples_quality_results.model_dump(), + ) + + write_json(file_path=case_qc_report_path, content=report.model_dump()) + + +def get_summary( + case_quality_result: CaseQualityResult, + samples_quality_results: SamplesQualityResults, +) -> str: + case_summary: str = "Case passed QC. " if case_quality_result.passes_qc else "Case failed QC. " + sample_summary: str = get_samples_results_message( + samples_quality_results=samples_quality_results + ) + summary = case_summary + sample_summary + return summary diff --git a/cg/meta/workflow/mutant/quality_controller/result_logger_utils.py b/cg/meta/workflow/mutant/quality_controller/result_logger_utils.py new file mode 100644 index 0000000000..f830a5999d --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/result_logger_utils.py @@ -0,0 +1,82 @@ +import logging +from pathlib import Path +from cg.meta.workflow.mutant.quality_controller.models import ( + CaseQualityResult, + SampleQualityResults, + SamplesQualityResults, +) + +LOG = logging.getLogger(__name__) + + +def log_results( + case_quality_result: CaseQualityResult, + samples_quality_results: SamplesQualityResults, + report_file_path: Path, +) -> None: + if case_quality_result.passes_qc: + case_message = f"QC passed, see {report_file_path} for details." + else: + case_message = get_case_fail_message(case_quality_result) + LOG.warning(case_message) + + samples_message = get_samples_results_message(samples_quality_results) + LOG.info(samples_message) + + +def log_sample_result( + result: SampleQualityResults, + is_external_negative_control: bool = False, + is_internal_negative_control: bool = False, +) -> None: + control_message = "" + if is_external_negative_control: + control_message = "External negative control sample " + if is_internal_negative_control: + control_message = "Internal negative control sample " + if result.passes_qc: + message = f"{control_message}{result.sample_id} passed QC." + LOG.info(message) + else: + message = f"{control_message}{result.sample_id} failed QC." + LOG.warning(message) + + +def log_case_result(result: CaseQualityResult) -> None: + if not result.passes_qc: + LOG.warning("Case failed QC.") + else: + LOG.warning("Case passed QC.") + + +def get_case_fail_message(case_quality_result: CaseQualityResult) -> str: + fail_reasons = [] + if not case_quality_result.internal_negative_control_passes_qc: + fail_reasons.append("The internal negative control sample failed QC.\n") + if not case_quality_result.external_negative_control_passes_qc: + fail_reasons.append("The external negative control sample failed QC.\n") + + fail_message = "QC failed." + + return fail_message + "\n".join(fail_reasons) + + +def get_samples_results_message(samples_quality_results: SamplesQualityResults) -> str: + internal_negative_control_message: str = "Internal negative control sample " + ( + "passed QC." + if samples_quality_results.internal_negative_control.passes_qc + else "failed QC." + ) + external_negative_control_message: str = "External negative control sample " + ( + "passed QC." + if samples_quality_results.external_negative_control.passes_qc + else "failed QC." + ) + + samples_message: str = ( + f"Sample results: {samples_quality_results.total_samples_count} total, {samples_quality_results.failed_samples_count} failed, {samples_quality_results.passed_samples_count} passed." + ) + + return " ".join( + [internal_negative_control_message, external_negative_control_message, samples_message] + ) diff --git a/cg/meta/workflow/mutant/quality_controller/utils.py b/cg/meta/workflow/mutant/quality_controller/utils.py new file mode 100644 index 0000000000..997d6ee208 --- /dev/null +++ b/cg/meta/workflow/mutant/quality_controller/utils.py @@ -0,0 +1,21 @@ +from cg.constants.constants import MutantQC +from cg.services.sequencing_qc_service.quality_checks.utils import sample_has_enough_reads +from cg.store.models import Sample + + +def has_sample_valid_total_reads( + sample: Sample, +) -> bool: + return sample_has_enough_reads(sample=sample) + + +def has_internal_negative_control_sample_valid_total_reads( + sample: Sample, +) -> bool: + return sample.reads < MutantQC.INTERNAL_NEGATIVE_CONTROL_READS_THRESHOLD + + +def has_external_negative_control_sample_valid_total_reads( + sample: Sample, +) -> bool: + return sample.reads < MutantQC.EXTERNAL_NEGATIVE_CONTROL_READS_THRESHOLD diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py index 225e9d1c6f..a077333c75 100644 --- a/cg/meta/workflow/nf_analysis.py +++ b/cg/meta/workflow/nf_analysis.py @@ -412,7 +412,7 @@ def _run_analysis_with_nextflow( def _run_analysis_with_tower( self, case_id: str, command_args: NfCommandArgs, dry_run: bool - ) -> None: + ) -> str | None: """Run analysis with given options using NF-Tower.""" LOG.info("Workflow will be executed using Tower") if command_args.resume: @@ -434,6 +434,7 @@ def _run_analysis_with_tower( if not dry_run: tower_id = NfTowerHandler.get_tower_id(stdout_lines=self.process.stdout_lines()) self.write_trailblazer_config(case_id=case_id, tower_id=tower_id) + return tower_id LOG.info(self.process.stdout) def get_command_args( @@ -501,7 +502,7 @@ def run_nextflow_analysis( self.verify_sample_sheet_exists(case_id=case_id, dry_run=dry_run) self.check_analysis_ongoing(case_id=case_id) LOG.info(f"Running analysis for {case_id}") - self.run_analysis( + tower_workflow_id: str | None = self.run_analysis( case_id=case_id, command_args=command_args, use_nextflow=use_nextflow, @@ -519,7 +520,10 @@ def run_nextflow_analysis( raise CgError if not dry_run: - self.add_pending_trailblazer_analysis(case_id=case_id) + self.add_pending_trailblazer_analysis( + case_id=case_id, + tower_workflow_id=tower_workflow_id, + ) def run_analysis( self, @@ -527,7 +531,7 @@ def run_analysis( command_args: NfCommandArgs, use_nextflow: bool, dry_run: bool = False, - ) -> None: + ) -> str | None: """Execute run analysis with given options.""" if use_nextflow: self._run_analysis_with_nextflow( @@ -536,7 +540,7 @@ def run_analysis( dry_run=dry_run, ) else: - self._run_analysis_with_tower( + return self._run_analysis_with_tower( case_id=case_id, command_args=command_args, dry_run=dry_run, diff --git a/cg/resources/__init__.py b/cg/resources/__init__.py index f203a60e1a..36f2723f09 100644 --- a/cg/resources/__init__.py +++ b/cg/resources/__init__.py @@ -1,8 +1,9 @@ from pathlib import Path -import pkg_resources - from cg.constants import FileExtensions +from cg.utils.files import get_project_root_dir + +project_root_dir: Path = get_project_root_dir() RAREDISEASE_BUNDLE_FILENAMES: str = ( Path("resources", "raredisease_bundle_filenames").with_suffix(FileExtensions.YAML).as_posix() @@ -20,18 +21,10 @@ Path("resources", "tomte_bundle_filenames").with_suffix(FileExtensions.YAML).as_posix() ) -RAREDISEASE_BUNDLE_FILENAMES_PATH = Path( - pkg_resources.resource_filename("cg", RAREDISEASE_BUNDLE_FILENAMES) -) +RAREDISEASE_BUNDLE_FILENAMES_PATH = Path(project_root_dir, RAREDISEASE_BUNDLE_FILENAMES) -RNAFUSION_BUNDLE_FILENAMES_PATH: Path = Path( - pkg_resources.resource_filename("cg", RNAFUSION_BUNDLE_FILENAMES) -) +RNAFUSION_BUNDLE_FILENAMES_PATH = Path(project_root_dir, RNAFUSION_BUNDLE_FILENAMES) -TAXPROFILER_BUNDLE_FILENAMES_PATH: Path = Path( - pkg_resources.resource_filename("cg", TAXPROFILER_BUNDLE_FILENAMES) -) +TAXPROFILER_BUNDLE_FILENAMES_PATH = Path(project_root_dir, TAXPROFILER_BUNDLE_FILENAMES) -TOMTE_BUNDLE_FILENAMES_PATH: Path = Path( - pkg_resources.resource_filename("cg", TOMTE_BUNDLE_FILENAMES) -) +TOMTE_BUNDLE_FILENAMES_PATH = Path(project_root_dir, TOMTE_BUNDLE_FILENAMES) diff --git a/cg/server/admin.py b/cg/server/admin.py index 4dc8b2b171..a77d1bdc82 100644 --- a/cg/server/admin.py +++ b/cg/server/admin.py @@ -551,7 +551,13 @@ class SampleView(BaseView): "last_sequenced_at", "sex", ] - column_filters = ["customer.internal_id", "priority", "sex", "application_version.application"] + column_filters = [ + "customer.internal_id", + "priority", + "sex", + "application_version.application", + "capture_kit", + ] column_formatters = { "is_external": is_external_application, "internal_id": view_case_sample_link, diff --git a/cg/server/dto/samples/samples_response.py b/cg/server/dto/samples/samples_response.py index d345e9c1a8..fe34cde7f2 100644 --- a/cg/server/dto/samples/samples_response.py +++ b/cg/server/dto/samples/samples_response.py @@ -1,4 +1,5 @@ from datetime import datetime + from pydantic import BaseModel from cg.constants.subject import Sex @@ -88,7 +89,7 @@ class SampleDTO(BaseModel): concentration_ng_ul: int | None = None panels: list[str] | None = None status: str | None = None - tumour: bool | None = None + is_tumour: bool | None = None reference_genome: str | None = None customer: CustomerDto | None = None diff --git a/cg/server/invoices/views.py b/cg/server/invoices/views.py index 0c50526e95..32c2554b22 100644 --- a/cg/server/invoices/views.py +++ b/cg/server/invoices/views.py @@ -197,7 +197,7 @@ def invoice_template(invoice_id): workbook = render_xlsx(invoice_dict) temp_dir = tempfile.gettempdir() - filename = "Invoice_{}_{}.xlsx".format(invoice_obj.id, cost_center) + filename = f"Invoice_{invoice_obj.id}_{cost_center}.xlsx" excel_path = os.path.join(temp_dir, filename) workbook.save(excel_path) diff --git a/cg/services/run_devices/exc.py b/cg/services/run_devices/exc.py index daa2a4e3e7..0b89724dff 100644 --- a/cg/services/run_devices/exc.py +++ b/cg/services/run_devices/exc.py @@ -2,28 +2,42 @@ class PostProcessingRunFileManagerError(CgError): + """Error raised if something goes wrong managing the sequencing run files.""" + pass class PostProcessingRunDataGeneratorError(CgError): + """Error raised if something goes wrong parsing the run directory data.""" + pass class PostProcessingParsingError(CgError): + """Error raised if something goes wrong parsing the sequencing run metrics.""" + pass class PostProcessingDataTransferError(CgError): + """Error raised if something goes wrong creating the DTOs for post-processing.""" + pass class PostProcessingStoreDataError(CgError): + """Error raised if something goes wrong storing the post-processing data in StatusDB.""" + pass class PostProcessingStoreFileError(CgError): + """Error raised if something goes wrong storing the post-processing files in Housekeeper.""" + pass class PostProcessingError(CgError): + """Error raised if something goes wrong during post-processing.""" + pass diff --git a/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py b/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py index 7bbafd44b3..adca59b4e0 100644 --- a/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py +++ b/cg/services/run_devices/pacbio/run_data_generator/pacbio_run_data_generator.py @@ -1,16 +1,10 @@ from pathlib import Path from cg.services.run_devices.abstract_classes import RunDataGenerator -from cg.services.run_devices.error_handler import ( - handle_post_processing_errors, -) +from cg.services.run_devices.error_handler import handle_post_processing_errors from cg.services.run_devices.exc import PostProcessingRunDataGeneratorError from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData -from cg.services.run_devices.validators import ( - validate_has_expected_parts, - validate_name_pre_fix, -) -from cg.utils.string import get_element_from_split +from cg.services.run_devices.validators import validate_has_expected_parts, validate_name_pre_fix class PacBioRunDataGenerator(RunDataGenerator): @@ -40,16 +34,16 @@ def get_run_data(self, run_name: str, sequencing_dir: str) -> PacBioRunData: @staticmethod def _get_sequencing_run_name(run_name: str) -> str: - return get_element_from_split(value=run_name, element_position=0, split="/") + return run_name.split("/")[0] @staticmethod def _get_plate_well(run_name: str) -> str: - return get_element_from_split(value=run_name, element_position=-1, split="/") + return run_name.split("/")[1] def _get_plate(self, run_name: str) -> str: plate_well: str = self._get_plate_well(run_name) - return get_element_from_split(value=plate_well, element_position=0, split="_") + return plate_well.split("_")[0] def _get_well(self, run_name: str) -> str: plate_well: str = self._get_plate_well(run_name) - return get_element_from_split(value=plate_well, element_position=-1, split="_") + return plate_well.split("_")[-1] diff --git a/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py b/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py index 5624dc7467..6e2d230d47 100644 --- a/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py +++ b/cg/services/run_devices/pacbio/run_file_manager/run_file_manager.py @@ -19,9 +19,7 @@ def get_files_to_parse(self, run_data: PacBioRunData) -> list[Path]: """Get the file paths required by the PacBioMetricsParser.""" run_path: Path = run_data.full_path validate_files_or_directories_exist([run_path]) - files_to_parse: list[Path] = self._get_report_files(run_path) - files_to_parse.append(self._get_ccs_report_file(run_path)) - return files_to_parse + return self._get_report_files(run_path) @handle_post_processing_errors( to_except=(FileNotFoundError,), to_raise=PostProcessingRunFileManagerError @@ -29,9 +27,7 @@ def get_files_to_parse(self, run_data: PacBioRunData) -> list[Path]: def get_files_to_store(self, run_data: PacBioRunData) -> list[Path]: """Get the files to store for the PostProcessingHKService.""" run_path: Path = run_data.full_path - files_to_store: list[Path] = self.get_files_to_parse(run_data) - files_to_store.append(self._get_hifi_read_file(run_path)) - return files_to_store + return self.get_files_to_parse(run_data) + self._get_hifi_read_files(run_path) @staticmethod def _get_ccs_report_file(run_path: Path) -> Path: @@ -44,8 +40,7 @@ def _get_ccs_report_file(run_path: Path) -> Path: raise FileNotFoundError(f"No CCS report file found in {statistics_dir}") return files[0] - @staticmethod - def _get_report_files(run_path: Path) -> list[Path]: + def _get_report_files(self, run_path: Path) -> list[Path]: """Return the paths to the unzipped report files.""" unzipped_dir: Path = Path( run_path, PacBioDirsAndFiles.STATISTICS_DIR, PacBioDirsAndFiles.UNZIPPED_REPORTS_DIR @@ -55,16 +50,17 @@ def _get_report_files(run_path: Path) -> list[Path]: Path(unzipped_dir, PacBioDirsAndFiles.LOADING_REPORT), Path(unzipped_dir, PacBioDirsAndFiles.RAW_DATA_REPORT), Path(unzipped_dir, PacBioDirsAndFiles.SMRTLINK_DATASETS_REPORT), + self._get_ccs_report_file(run_path), ] validate_files_or_directories_exist(report_files) return report_files @staticmethod - def _get_hifi_read_file(run_path: Path) -> Path: + def _get_hifi_read_files(run_path: Path) -> list[Path]: """Return the path to the HiFi read file.""" hifi_dir = Path(run_path, PacBioDirsAndFiles.HIFI_READS) - bam_file: Path = get_files_matching_pattern( + bam_files: list[Path] = get_files_matching_pattern( directory=hifi_dir, pattern=f"*{FileExtensions.BAM}" - )[0] - validate_files_or_directories_exist([bam_file]) - return bam_file + ) + validate_files_or_directories_exist(bam_files) + return bam_files diff --git a/cg/services/sample_service/dto_mappers.py b/cg/services/sample_service/dto_mappers.py index 7abfdb938e..ceaf129e18 100644 --- a/cg/services/sample_service/dto_mappers.py +++ b/cg/services/sample_service/dto_mappers.py @@ -23,7 +23,7 @@ def create_sample_dto(sample: Sample) -> SampleDTO: priority=sample.priority_human, reference_genome=sample.reference_genome, subject_id=sample.subject_id, - tumour=sample.is_tumour, + is_tumour=sample.is_tumour, application=application, application_version=application_version, sex=sample.sex, diff --git a/cg/store/models.py b/cg/store/models.py index 36d9413126..8a7b801fc2 100644 --- a/cg/store/models.py +++ b/cg/store/models.py @@ -3,6 +3,7 @@ from enum import Enum from typing import Annotated +from pydantic import ConfigDict from sqlalchemy import ( BLOB, DECIMAL, @@ -24,6 +25,7 @@ from cg.constants.constants import ( CaseActions, ControlOptions, + CustomerId, PrepCategory, SequencingQCStatus, SexOptions, @@ -814,6 +816,18 @@ def expected_reads_for_sample(self) -> int: def has_reads(self) -> bool: return bool(self.reads) + @property + def is_negative_control(self) -> bool: + return self.control == ControlOptions.NEGATIVE + + @property + def is_internal_negative_control(self) -> bool: + return self.is_negative_control and self.customer == CustomerId.CG_INTERNAL_CUSTOMER + + @property + def is_external_negative_control(self) -> bool: + return self.is_negative_control and self.customer != CustomerId.CG_INTERNAL_CUSTOMER + @property def flow_cells(self) -> list[Flowcell]: """Return the flow cells a sample has been sequenced on.""" @@ -967,7 +981,7 @@ class Order(Base): cases: Mapped[list[Case]] = orm.relationship(secondary=order_case, back_populates="orders") customer_id: Mapped[int] = mapped_column(ForeignKey("customer.id")) customer: Mapped[Customer] = orm.relationship(foreign_keys=[customer_id]) - order_date: Mapped[datetime] = mapped_column(default=datetime.now()) + order_date: Mapped[datetime] = mapped_column(default=datetime.now) ticket_id: Mapped[int] = mapped_column(unique=True, index=True) workflow: Mapped[str] = mapped_column(types.Enum(*(workflow.value for workflow in Workflow))) is_delivered: Mapped[bool] = mapped_column(default=False) diff --git a/cg/utils/files.py b/cg/utils/files.py index de66999e14..9f8249eba8 100644 --- a/cg/utils/files.py +++ b/cg/utils/files.py @@ -1,13 +1,18 @@ -"""Some helper functions for working with files""" +"""Some helper functions for working with files.""" import logging import os import shutil +from importlib.resources import files from pathlib import Path LOG = logging.getLogger(__name__) +def get_project_root_dir() -> Path: + return Path(files("cg")) + + def get_file_in_directory(directory: Path, file_name: str) -> Path: """Get a file in a directory and subdirectories. Raises: @@ -15,11 +20,10 @@ def get_file_in_directory(directory: Path, file_name: str) -> Path: """ if not directory.is_dir() or not directory.exists(): raise FileNotFoundError(f"Directory {directory} does not exist") - for directory_path, _, files in os.walk(directory): - for file in files: + for directory_path, _, dir_files in os.walk(directory): + for file in dir_files: if file_name == file: - path_to_file = Path(directory_path, file) - return path_to_file + return Path(directory_path, file) raise FileNotFoundError(f"File {file_name} not found in {directory}") @@ -43,10 +47,10 @@ def get_files_in_directory_with_pattern(directory: Path, pattern: str) -> list[P files_with_pattern: list[Path] = [] if not directory.is_dir() or not directory.exists(): raise FileNotFoundError(f"Directory {directory} does not exist") - for directory_path, _, files in os.walk(directory): - for file in files: - if pattern in file: - files_with_pattern.append(Path(directory_path, file)) + for directory_path, _, dir_files in os.walk(directory): + files_with_pattern.extend( + Path(directory_path, file) for file in dir_files if pattern in file + ) if not files_with_pattern: raise FileNotFoundError(f"No files with pattern {pattern} found in {directory}") return files_with_pattern @@ -117,9 +121,9 @@ def link_or_overwrite_file(src: Path, dst: Path) -> None: def get_all_files_in_directory_tree(directory: Path) -> list[Path]: """Get the relative paths of all files in a directory and its subdirectories.""" files_in_directory: list[Path] = [] - for subdir, _, files in os.walk(directory): + for subdir, _, dir_files in os.walk(directory): subdir = Path(subdir).relative_to(directory) - files_in_directory.extend([Path(subdir, file) for file in files]) + files_in_directory.extend([Path(subdir, file) for file in dir_files]) return files_in_directory diff --git a/cg/utils/string.py b/cg/utils/string.py deleted file mode 100644 index c8c556234e..0000000000 --- a/cg/utils/string.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Utils related to string manipulation.""" - -from cg.exc import CgError - - -def get_element_from_split(value: str, element_position: int, split: str) -> str: - elements: list[str] = value.split(split) - if len(elements) < element_position: - raise CgError(message="Provided element position out of bounds.") - return elements[element_position] diff --git a/pyproject.toml b/pyproject.toml index 3e4aee6a13..b2e27b3f36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "cg" -version = "62.1.6" +version = "62.2.4" description = "Clinical Genomics command center" authors = ["Clinical Genomics "] readme = "README.md" diff --git a/tests/apps/lims/test_api.py b/tests/apps/lims/test_api.py index 84e8c0de12..284d8527fb 100644 --- a/tests/apps/lims/test_api.py +++ b/tests/apps/lims/test_api.py @@ -4,6 +4,9 @@ from requests.exceptions import HTTPError +from cg.constants.lims import LimsProcess +from tests.mocks.limsmock import MockLimsAPI + def test_get_received_date(lims_mock, mocker): """Test to get the received date""" @@ -90,3 +93,18 @@ def test_get_delivery_date_no_sample(lims_api, mocker): # THEN assert that None is returned since a exception was raised assert res is None + + +def test_get_internal_negative_control_id_from_sample_in_pool( + lims_api_with_sample_and_internal_negative_control: MockLimsAPI, +): + # GIVEN a sample_id + sample_id: str = "sample" + + # WHEN retrieving the internal_negative_control_id_from_lims + internal_negative_control_id = lims_api_with_sample_and_internal_negative_control.get_internal_negative_control_id_from_sample_in_pool( + sample_internal_id=sample_id, pooling_step=LimsProcess.COVID_POOLING_STEP + ) + + # THEN no errors are raised and the correct internal_negative_control_id is retrieved + assert internal_negative_control_id == "internal_negative_control" diff --git a/tests/conftest.py b/tests/conftest.py index 38b17354e7..82306f6bfc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,7 @@ from cg.utils import Process from tests.mocks.crunchy import MockCrunchyAPI from tests.mocks.hk_mock import MockHousekeeperAPI -from tests.mocks.limsmock import MockLimsAPI +from tests.mocks.limsmock import LimsSample, LimsUDF, MockLimsAPI from tests.mocks.madeline import MockMadelineAPI from tests.mocks.osticket import MockOsTicket from tests.mocks.process_mock import ProcessMock @@ -710,6 +710,12 @@ def microsalt_analysis_dir(analysis_dir: Path) -> Path: return Path(analysis_dir, "microsalt") +@pytest.fixture(scope="session") +def mutant_analysis_dir(analysis_dir: Path) -> Path: + """Return the path to the mutant analysis directory""" + return Path(analysis_dir, "mutant") + + @pytest.fixture(scope="session") def apps_dir(fixtures_dir: Path) -> Path: """Return the path to the apps dir.""" @@ -1677,6 +1683,27 @@ def lims_api() -> MockLimsAPI: return MockLimsAPI() +@pytest.fixture +def lims_api_with_sample_and_internal_negative_control(lims_api: MockLimsAPI) -> MockLimsAPI: + sample_qc_pass = LimsSample(id="sample", name="sample") + + internal_negative_control_qc_pass = LimsSample( + id="internal_negative_control", + name="internal_negative_control", + udfs=LimsUDF(control="negative", customer="cust000"), + ) + + # Create pools + samples_qc_pass = [ + sample_qc_pass, + internal_negative_control_qc_pass, + ] + # Add pool artifacts + lims_api.add_artifact_for_sample(sample_id=sample_qc_pass.id, samples=samples_qc_pass) + + return lims_api + + @pytest.fixture(scope="session") def config_root_dir() -> Path: """Return a path to the config root directory.""" diff --git a/tests/fixtures/analysis/mutant/case_qc_fail/QC_report.json b/tests/fixtures/analysis/mutant/case_qc_fail/QC_report.json new file mode 100644 index 0000000000..1a343c8a6f --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_fail/QC_report.json @@ -0,0 +1,31 @@ +{ + "summary": "Case failed QC. Internal negative control sample passed QC. External negative control sample passed QC. Sample results: 1 total, 1 failed, 0 passed.", + "case": { + "passes_qc": false, + "internal_negative_control_passes_qc": true, + "external_negative_control_passes_qc": true, + "fraction_samples_passes_qc": false + }, + "samples": { + "internal_negative_control": { + "sample_id": "internal_negative_control_qc_pass", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": null + }, + "external_negative_control": { + "sample_id": "external_negative_control_qc_pass", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": false + }, + "samples": [ + { + "sample_id": "sample_qc_fail", + "passes_qc": false, + "passes_reads_threshold": true, + "passes_mutant_qc": false + } + ] + } +} \ No newline at end of file diff --git a/tests/fixtures/analysis/mutant/case_qc_fail/fail_sars-cov-2_841080_results.csv b/tests/fixtures/analysis/mutant/case_qc_fail/fail_sars-cov-2_841080_results.csv new file mode 100644 index 0000000000..b15c7a270a --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_fail/fail_sars-cov-2_841080_results.csv @@ -0,0 +1,3 @@ +Sample,Selection,Region Code,Ticket,%N_bases,%10X_coverage,QC_pass,Lineage,Pangolin_data_version,VOC,Mutations +sample_qc_fail,Allmän övervakning,01,841080,39.56,60.38,FALSE,BA.3,SCORPIO_v0.1.12,No,S373P;S375F;D614G;N969K +external_negative_control_qc_pass,Information saknas,01,841080,99.90,0.10,FALSE,Unassigned,PUSHER-v1.23.1,No,- diff --git a/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/QC_report.json b/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/QC_report.json new file mode 100644 index 0000000000..9cc7ebbef3 --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/QC_report.json @@ -0,0 +1,31 @@ +{ + "summary": "Case failed QC. Internal negative control sample failed QC. External negative control sample failed QC. Sample results: 1 total, 0 failed, 1 passed.", + "case": { + "passes_qc": false, + "internal_negative_control_passes_qc": false, + "external_negative_control_passes_qc": false, + "fraction_samples_passes_qc": true + }, + "samples": { + "internal_negative_control": { + "sample_id": "internal_negative_control_qc_fail", + "passes_qc": false, + "passes_reads_threshold": false, + "passes_mutant_qc": null + }, + "external_negative_control": { + "sample_id": "external_negative_control_qc_fail", + "passes_qc": false, + "passes_reads_threshold": false, + "passes_mutant_qc": false + }, + "samples": [ + { + "sample_id": "sample_qc_pass_with_failing_controls", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": true + } + ] + } +} \ No newline at end of file diff --git a/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/fail_with_failing_controls_sars-cov-2_841080_results.csv b/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/fail_with_failing_controls_sars-cov-2_841080_results.csv new file mode 100644 index 0000000000..7b726da1f5 --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_fail_with_failing_controls/fail_with_failing_controls_sars-cov-2_841080_results.csv @@ -0,0 +1,3 @@ +Sample,Selection,Region Code,Ticket,%N_bases,%10X_coverage,QC_pass,Lineage,Pangolin_data_version,VOC,Mutations +sample_qc_pass,Allmän övervakning,01,208455,8.53,91.38,TRUE,EG.5.1.3,PUSHER-v1.23.1,No,G142D;D614G;H655Y;N679K;P681H;N764K;D796Y;Q954H;N969K +external_negative_control_qc_fail,Information saknas,01,208455,95.71,4.29,FALSE,Unassigned,PUSHER-v1.23.1,No,- diff --git a/tests/fixtures/analysis/mutant/case_qc_pass/QC_report.json b/tests/fixtures/analysis/mutant/case_qc_pass/QC_report.json new file mode 100644 index 0000000000..ee6566f138 --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_pass/QC_report.json @@ -0,0 +1,31 @@ +{ + "summary": "Case passed QC. Internal negative control sample passed QC. External negative control sample passed QC. Sample results: 1 total, 0 failed, 1 passed.", + "case": { + "passes_qc": true, + "internal_negative_control_passes_qc": true, + "external_negative_control_passes_qc": true, + "fraction_samples_passes_qc": true + }, + "samples": { + "internal_negative_control": { + "sample_id": "internal_negative_control_qc_pass", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": null + }, + "external_negative_control": { + "sample_id": "external_negative_control_qc_pass", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": false + }, + "samples": [ + { + "sample_id": "sample_qc_pass", + "passes_qc": true, + "passes_reads_threshold": true, + "passes_mutant_qc": true + } + ] + } +} \ No newline at end of file diff --git a/tests/fixtures/analysis/mutant/case_qc_pass/pass_sars-cov-2_208455_results.csv b/tests/fixtures/analysis/mutant/case_qc_pass/pass_sars-cov-2_208455_results.csv new file mode 100644 index 0000000000..0667e7c5f5 --- /dev/null +++ b/tests/fixtures/analysis/mutant/case_qc_pass/pass_sars-cov-2_208455_results.csv @@ -0,0 +1,3 @@ +Sample,Selection,Region Code,Ticket,%N_bases,%10X_coverage,QC_pass,Lineage,Pangolin_data_version,VOC,Mutations +sample_qc_pass,Allmän övervakning,01,208455,8.53,91.38,TRUE,EG.5.1.3,PUSHER-v1.23.1,No,G142D;D614G;H655Y;N679K;P681H;N764K;D796Y;Q954H;N969K +external_negative_control_qc_pass,Information saknas,01,208455,95.71,4.29,FALSE,Unassigned,PUSHER-v1.23.1,No,- diff --git a/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240522_133539/1_B01/hifi_reads/m84202_240522_155607_s2.hifi_reads.bam.pbi b/tests/fixtures/devices/pacbio/SMRTcells/r84202_20240522_133539/1_B01/hifi_reads/m84202_240522_155607_s2.hifi_reads.bam.pbi deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/meta/workflow/mutant/conftest.py b/tests/meta/workflow/mutant/conftest.py new file mode 100644 index 0000000000..9ad37700a3 --- /dev/null +++ b/tests/meta/workflow/mutant/conftest.py @@ -0,0 +1,351 @@ +import pytest + +from pathlib import Path + +from cg.meta.workflow.mutant.quality_controller.metrics_parser_utils import ( + _get_validated_results_list, + parse_samples_results, +) +from cg.meta.workflow.mutant.quality_controller.models import ( + SamplePoolAndResults, + ParsedSampleResults, + SamplesQualityResults, +) +from cg.meta.workflow.mutant.quality_controller.quality_controller import MutantQualityController +from cg.store.models import Case, Sample +from cg.store.store import Store +from cg.constants.constants import ControlOptions, MutantQC +from tests.store_helpers import StoreHelpers +from tests.mocks.limsmock import LimsSample, LimsUDF, MockLimsAPI + + +@pytest.fixture +def mutant_store(store: Store, helpers: StoreHelpers) -> Store: + # Add mutant application and application_version + application = helpers.add_application( + store=store, application_tag="VWGDPTR001", target_reads=2000000, percent_reads_guaranteed=1 + ) + + # Add cases + case_qc_pass = helpers.add_case(store=store, name="case_qc_pass", internal_id="case_qc_pass") + case_qc_fail = helpers.add_case(store=store, name="case_qc_fail", internal_id="case_qc_fail") + + case_qc_fail_with_failing_controls = helpers.add_case( + store=store, + name="case_qc_fail_with_failing_controls", + internal_id="case_qc_fail_with_failing_controls", + ) + + # Add samples + sample_qc_pass = helpers.add_sample( + store=store, + internal_id="sample_qc_pass", + name="sample_qc_pass", + control=ControlOptions.EMPTY, + reads=861966, + application_tag=application.tag, + ) + + sample_qc_fail = helpers.add_sample( + store=store, + internal_id="sample_qc_fail", + name="sample_qc_fail", + control=ControlOptions.EMPTY, + reads=438776, + application_tag=application.tag, + ) + + external_negative_control_qc_pass = helpers.add_sample( + store=store, + internal_id="external_negative_control_qc_pass", + name="external_negative_control_qc_pass", + control=ControlOptions.NEGATIVE, + reads=20674, + application_tag=application.tag, + ) + + internal_negative_control_qc_pass = helpers.add_sample( + store=store, + internal_id="internal_negative_control_qc_pass", + name="internal_negative_control_qc_pass", + control=ControlOptions.NEGATIVE, + reads=0, + application_tag=application.tag, + ) + + sample_qc_pass_with_failing_controls = helpers.add_sample( + store=store, + internal_id="sample_qc_pass_with_failing_controls", + name="sample_qc_pass", + control=ControlOptions.EMPTY, + reads=861966, + application_tag=application.tag, + ) + + internal_negative_control_qc_fail = helpers.add_sample( + store=store, + internal_id="internal_negative_control_qc_fail", + name="internal_negative_control_qc_fail", + control=ControlOptions.NEGATIVE, + reads=3000, + application_tag=application.tag, + ) + + external_negative_control_qc_fail = helpers.add_sample( + store=store, + internal_id="external_negative_control_qc_fail", + name="external_negative_control_qc_fail", + control=ControlOptions.NEGATIVE, + reads=200000, + application_tag=application.tag, + ) + + # Add CaseSample relationships + # case_qc_pass + helpers.add_relationship(store=store, case=case_qc_pass, sample=sample_qc_pass) + helpers.add_relationship( + store=store, case=case_qc_pass, sample=external_negative_control_qc_pass + ) + + # case_qc_fail + helpers.add_relationship(store=store, case=case_qc_fail, sample=sample_qc_fail) + helpers.add_relationship( + store=store, case=case_qc_fail, sample=external_negative_control_qc_pass + ) + + # case_qc_fail_with_failing_controls + helpers.add_relationship( + store=store, + case=case_qc_fail_with_failing_controls, + sample=sample_qc_pass_with_failing_controls, + ) + helpers.add_relationship( + store=store, + case=case_qc_fail_with_failing_controls, + sample=external_negative_control_qc_fail, + ) + + return store + + +@pytest.fixture +def mutant_lims(lims_api: MockLimsAPI) -> MockLimsAPI: + # Get samples + sample_qc_pass = LimsSample(id="sample_qc_pass", name="sample_qc_pass") + + sample_qc_fail = LimsSample(id="sample_qc_fail", name="sample_qc_fail") + + external_negative_control_qc_pass = LimsSample( + id="external_negative_control_qc_pass", + name="external_negative_control_qc_pass", + udfs=LimsUDF(control="negative"), + ) + internal_negative_control_qc_pass = LimsSample( + id="internal_negative_control_qc_pass", + name="internal_negative_control_qc_pass", + udfs=LimsUDF(control="negative", customer="cust000"), + ) + + sample_qc_pass_with_failing_controls = LimsSample( + id="sample_qc_pass_with_failing_controls", name="sample_qc_pass" + ) + + external_negative_control_qc_fail = LimsSample( + id="external_negative_control_qc_fail", + name="external_negative_control_qc_fail", + udfs=LimsUDF(control="negative"), + ) + + internal_negative_control_qc_fail = LimsSample( + id="internal_negative_control_qc_fail", + name="internal_negative_control_qc_fail", + udfs=LimsUDF(control="negative", customer="cust000"), + ) + + # Create pools + samples_qc_pass = [ + sample_qc_pass, + external_negative_control_qc_pass, + internal_negative_control_qc_pass, + ] + + samples_qc_fail = [ + sample_qc_fail, + external_negative_control_qc_pass, + internal_negative_control_qc_pass, + ] + + samples_qc_fail_with_failing_controls = [ + sample_qc_pass_with_failing_controls, + external_negative_control_qc_fail, + internal_negative_control_qc_fail, + ] + + # Add pool artifacts + lims_api.add_artifact_for_sample(sample_id=sample_qc_pass.id, samples=samples_qc_pass) + lims_api.add_artifact_for_sample(sample_id=sample_qc_fail.id, samples=samples_qc_fail) + + lims_api.add_artifact_for_sample( + sample_id=sample_qc_pass_with_failing_controls.id, + samples=samples_qc_fail_with_failing_controls, + ) + + return lims_api + + +@pytest.fixture +def mutant_quality_controller( + mutant_store: Store, mutant_lims: MockLimsAPI +) -> MutantQualityController: + return MutantQualityController(status_db=mutant_store, lims=mutant_lims) + + +# Samples +@pytest.fixture +def sample_qc_pass(mutant_store: Store) -> Sample: + return mutant_store.get_sample_by_internal_id("sample_qc_pass") + + +@pytest.fixture +def internal_negative_control_qc_pass(mutant_store: Store) -> Sample: + return mutant_store.get_sample_by_internal_id("internal_negative_control_qc_pass") + + +@pytest.fixture +def external_negative_control_qc_pass(mutant_store: Store) -> Sample: + return mutant_store.get_sample_by_internal_id("external_negative_control_qc_pass") + + +@pytest.fixture +def sample_qc_fail(mutant_store: Store) -> Sample: + return mutant_store.get_sample_by_internal_id("sample_qc_fail") + + +# Cases +## mutant_case_qc_pass +@pytest.fixture +def mutant_case_qc_pass(mutant_store: Store) -> Case: + return mutant_store.get_case_by_internal_id("case_qc_pass") + + +@pytest.fixture +def mutant_analysis_dir_case_qc_pass(mutant_analysis_dir: Path, mutant_case_qc_pass: Case) -> Path: + return Path(mutant_analysis_dir, mutant_case_qc_pass.internal_id) + + +@pytest.fixture +def mutant_results_file_path_case_qc_pass(mutant_analysis_dir_case_qc_pass: Path) -> Path: + return Path(mutant_analysis_dir_case_qc_pass, "pass_sars-cov-2_208455_results.csv") + + +@pytest.fixture +def mutant_qc_report_path_case_qc_pass(mutant_analysis_dir_case_qc_pass: Path) -> Path: + return mutant_analysis_dir_case_qc_pass.joinpath(MutantQC.QUALITY_REPORT_FILE_NAME) + + +@pytest.fixture +def mutant_results_list_qc_pass(mutant_results_file_path_case_qc_pass: Path): + return _get_validated_results_list(results_file_path=mutant_results_file_path_case_qc_pass) + + +@pytest.fixture +def mutant_sample_pool_and_results_case_qc_pass( + mutant_quality_controller: MutantQualityController, + mutant_results_file_path_case_qc_pass: Path, + mutant_case_qc_pass: Case, +) -> SamplePoolAndResults: + return mutant_quality_controller._get_sample_pool_and_results( + case_results_file_path=mutant_results_file_path_case_qc_pass, + case=mutant_case_qc_pass, + ) + + +@pytest.fixture +def mutant_samples_results_case_qc_pass( + mutant_case_qc_pass: Case, mutant_results_file_path_case_qc_pass: Path +) -> dict[str, ParsedSampleResults]: + return parse_samples_results( + case=mutant_case_qc_pass, results_file_path=mutant_results_file_path_case_qc_pass + ) + + +@pytest.fixture +def mutant_sample_results_sample_qc_pass( + sample_qc_pass: Sample, mutant_samples_results_case_qc_pass: dict[str, ParsedSampleResults] +) -> ParsedSampleResults: + sample_results = mutant_samples_results_case_qc_pass[sample_qc_pass.internal_id] + return sample_results + + +@pytest.fixture +def mutant_sample_results_external_negative_control_qc_pass( + external_negative_control_qc_pass: Sample, + mutant_samples_results_case_qc_pass: dict[str, ParsedSampleResults], +) -> ParsedSampleResults: + sample_results = mutant_samples_results_case_qc_pass[ + external_negative_control_qc_pass.internal_id + ] + return sample_results + + +@pytest.fixture +def samples_quality_results_case_qc_pass( + mutant_quality_controller: MutantQualityController, + mutant_sample_pool_and_results_case_qc_pass: SamplePoolAndResults, +) -> SamplesQualityResults: + return mutant_quality_controller._get_samples_quality_results( + sample_pool_and_results=mutant_sample_pool_and_results_case_qc_pass + ) + + +## mutant_case_qc_fail +@pytest.fixture +def mutant_case_qc_fail(mutant_store: Store) -> Case: + return mutant_store.get_case_by_internal_id("case_qc_fail") + + +@pytest.fixture +def mutant_analysis_dir_case_qc_fail(mutant_analysis_dir: Path, mutant_case_qc_fail: Case) -> Path: + return Path(mutant_analysis_dir, mutant_case_qc_fail.internal_id) + + +@pytest.fixture +def mutant_results_file_path_qc_fail(mutant_analysis_dir_case_qc_fail: Path) -> Path: + return Path(mutant_analysis_dir_case_qc_fail, "fail_sars-cov-2_841080_results.csv") + + +@pytest.fixture +def mutant_qc_report_path_case_qc_fail(mutant_analysis_dir_case_qc_fail: Path) -> Path: + return mutant_analysis_dir_case_qc_fail.joinpath(MutantQC.QUALITY_REPORT_FILE_NAME) + + +## mutant_case_qc_fail_with_failing_controls +@pytest.fixture +def mutant_case_qc_fail_with_failing_controls(mutant_store: Store) -> Case: + return mutant_store.get_case_by_internal_id("case_qc_fail_with_failing_controls") + + +@pytest.fixture +def mutant_analysis_dir_case_qc_fail_with_failing_controls( + mutant_analysis_dir: Path, mutant_case_qc_fail_with_failing_controls: Case +) -> Path: + return Path(mutant_analysis_dir, mutant_case_qc_fail_with_failing_controls.internal_id) + + +@pytest.fixture +def mutant_results_file_path_qc_fail_with_failing_controls( + mutant_analysis_dir_case_qc_fail_with_failing_controls: Path, +) -> Path: + return Path( + mutant_analysis_dir_case_qc_fail_with_failing_controls, + "fail_with_failing_controls_sars-cov-2_841080_results.csv", + ) + + +@pytest.fixture +def mutant_qc_report_path_case_qc_fail_with_failing_controls( + mutant_analysis_dir_case_qc_fail_with_failing_controls: Path, +) -> Path: + return mutant_analysis_dir_case_qc_fail_with_failing_controls.joinpath( + MutantQC.QUALITY_REPORT_FILE_NAME + ) diff --git a/tests/meta/workflow/mutant/test_mutant_metrics_parser_utils.py b/tests/meta/workflow/mutant/test_mutant_metrics_parser_utils.py new file mode 100644 index 0000000000..bd1d61669e --- /dev/null +++ b/tests/meta/workflow/mutant/test_mutant_metrics_parser_utils.py @@ -0,0 +1,66 @@ +from pathlib import Path +from cg.meta.workflow.mutant.quality_controller.metrics_parser_utils import ( + _get_sample_name_to_id_mapping, + _get_samples_results, + _get_validated_results_list, + parse_samples_results, +) +from cg.meta.workflow.mutant.quality_controller.models import ParsedSampleResults +from cg.store.models import Case, Sample + + +def test_get_samples_results( + mutant_case_qc_pass: Case, + mutant_results_list_qc_pass: list[ParsedSampleResults], + sample_qc_pass: Sample, +): + # GIVEN a case and corresponding results_list + + # WHEN creating a sample_name_to_id_mapping dict + samples_results: dict[str, ParsedSampleResults] = _get_samples_results( + case=mutant_case_qc_pass, results_list=mutant_results_list_qc_pass + ) + + # THEN the samples_results object has the correct structure + assert isinstance(samples_results, dict) + assert isinstance(samples_results[sample_qc_pass.internal_id], ParsedSampleResults) + + +def test_get_sample_name_to_id_mapping(mutant_case_qc_pass: Case): + # GIVEN a case + + # WHEN creating a sample_name_to_id_mapping dict + sample_name_to_id_mapping: dict[str, str] = _get_sample_name_to_id_mapping( + case=mutant_case_qc_pass + ) + + # THEN the correct associations are present in the dict + assert len(sample_name_to_id_mapping) == 2 + assert sample_name_to_id_mapping["sample_qc_pass"] == "sample_qc_pass" + assert ( + sample_name_to_id_mapping["external_negative_control_qc_pass"] + == "external_negative_control_qc_pass" + ) + + +def test_get_validated_results_list(mutant_results_file_path_case_qc_pass: Path): + # GIVEN a valid raw_results: list[dict[str, Any]] objects + + # WHEN parsing the file + _get_validated_results_list(results_file_path=mutant_results_file_path_case_qc_pass) + + # THEN no error is thrown + + +def test_parse_samples_results( + mutant_case_qc_pass: Case, mutant_results_file_path_case_qc_pass: Path +): + # GIVEN a case and a valid quality metrics file path + + # WHEN parsing the file + samples_results: dict[str, ParsedSampleResults] = parse_samples_results( + case=mutant_case_qc_pass, results_file_path=mutant_results_file_path_case_qc_pass + ) + + # THEN no error is thrown and sample_qc_pass passes QC + assert samples_results["sample_qc_pass"].passes_qc is True diff --git a/tests/meta/workflow/mutant/test_mutant_quality_controller.py b/tests/meta/workflow/mutant/test_mutant_quality_controller.py new file mode 100644 index 0000000000..feb2fc4e73 --- /dev/null +++ b/tests/meta/workflow/mutant/test_mutant_quality_controller.py @@ -0,0 +1,219 @@ +from pathlib import Path +from cg.meta.workflow.mutant.quality_controller.models import ( + MutantPoolSamples, + MutantQualityResult, + CaseQualityResult, + SampleQualityResults, + SamplesQualityResults, + ParsedSampleResults, + SamplePoolAndResults, +) +from cg.meta.workflow.mutant.quality_controller.quality_controller import ( + MutantQualityController, +) +from cg.store.models import Case, Sample + + +def test_get_mutant_pool_samples( + mutant_quality_controller: MutantQualityController, + mutant_case_qc_pass: Case, + sample_qc_pass: Sample, + external_negative_control_qc_pass: Sample, + internal_negative_control_qc_pass: Sample, +): + # WHEN creating a MutantPoolSamples object + mutant_pool_samples: MutantPoolSamples = mutant_quality_controller._get_mutant_pool_samples( + case=mutant_case_qc_pass + ) + + # THEN the pool is created correctly: + # - the external negative control is identified and separated from the rest of the samples + # - all other samples are present in the list under samples + # - the internal negative control corresponding to the case is fetched from lims and added to the pool + + assert mutant_pool_samples.external_negative_control == external_negative_control_qc_pass + assert mutant_pool_samples.samples == [sample_qc_pass] + assert mutant_pool_samples.internal_negative_control == internal_negative_control_qc_pass + + +def test_get_sample_pool_and_results( + mutant_quality_controller: MutantQualityController, + mutant_results_file_path_case_qc_pass: Path, + mutant_case_qc_pass: Case, + mutant_sample_results_sample_qc_pass: ParsedSampleResults, + sample_qc_pass: Sample, +): + # GIVEN a case + + # WHEN generating the quality_metrics + sample_pool_and_results: SamplePoolAndResults = ( + mutant_quality_controller._get_sample_pool_and_results( + case_results_file_path=mutant_results_file_path_case_qc_pass, + case=mutant_case_qc_pass, + ) + ) + + # THEN no errors are raised and the sample_results are created for each sample + assert ( + sample_pool_and_results.results[sample_qc_pass.internal_id] + == mutant_sample_results_sample_qc_pass + ) + + +def test_get_sample_quality_result_for_sample( + mutant_quality_controller: MutantQualityController, + sample_qc_pass: Sample, + mutant_sample_results_sample_qc_pass: ParsedSampleResults, +): + # GIVEN a sample that passes qc and its corresponding SampleResults + + # WHEN peforming quality control on the sample + sample_quality_results_sample_qc_pass: SampleQualityResults = ( + mutant_quality_controller._get_sample_quality_result_for_sample( + sample=sample_qc_pass, + sample_results=mutant_sample_results_sample_qc_pass, + ) + ) + # THEN the sample passes qc + assert sample_quality_results_sample_qc_pass.passes_qc is True + + +def test_get_sample_quality_result_for_internal_negative_control_sample( + mutant_quality_controller: MutantQualityController, + internal_negative_control_qc_pass: Sample, +): + # GIVEN an internal negative control sample that passes qc and its corresponding SampleResults + + # WHEN peforming quality control on the sample + sample_quality_results_sample_qc_pass: SampleQualityResults = ( + mutant_quality_controller._get_sample_quality_result_for_internal_negative_control_sample( + sample=internal_negative_control_qc_pass, + ) + ) + # THEN the sample passes qc + assert sample_quality_results_sample_qc_pass.passes_qc is True + + +def test_get_sample_quality_result_for_external_negative_control_sample( + mutant_quality_controller: MutantQualityController, + external_negative_control_qc_pass: Sample, + mutant_sample_results_external_negative_control_qc_pass: ParsedSampleResults, +): + # GIVEN an external negative control sample that passes qc and its corresponding SampleResults + + # WHEN peforming quality control on the sample + sample_quality_results_sample_qc_pass: SampleQualityResults = ( + mutant_quality_controller._get_sample_quality_result_for_external_negative_control_sample( + sample=external_negative_control_qc_pass, + sample_results=mutant_sample_results_external_negative_control_qc_pass, + ) + ) + # THEN the sample passes qc + assert sample_quality_results_sample_qc_pass.passes_qc is True + + +def test_get_samples_quality_results( + mutant_quality_controller: MutantQualityController, + mutant_sample_pool_and_results_case_qc_pass: SamplePoolAndResults, +): + # GIVEN a quality metrics objrect from a case where all samples pass QC + + # WHEN performing quality control on all the samples + samples_quality_results: SamplesQualityResults = ( + mutant_quality_controller._get_samples_quality_results( + sample_pool_and_results=mutant_sample_pool_and_results_case_qc_pass + ) + ) + + # THEN no error is raised and the correct quality results are generated + assert samples_quality_results.internal_negative_control.passes_qc is True + assert samples_quality_results.external_negative_control.passes_qc is True + assert len(samples_quality_results.samples) == 1 + samples_pass_qc = [ + sample_quality_results.passes_qc + for sample_quality_results in samples_quality_results.samples + ] + assert all(samples_pass_qc) is True + + +def test_get_case_quality_result( + mutant_quality_controller: MutantQualityController, + samples_quality_results_case_qc_pass: SamplesQualityResults, +): + # GIVEN a samples_quality_results object for a case that passes QC + + # WHEN performing QC on the case + case_quality_result: CaseQualityResult = mutant_quality_controller._get_case_quality_result( + samples_quality_results=samples_quality_results_case_qc_pass + ) + + # THEN the correct result is generated + assert case_quality_result.passes_qc is True + assert case_quality_result.internal_negative_control_passes_qc is True + assert case_quality_result.external_negative_control_passes_qc is True + + +def test_get_quality_control_result_case_qc_pass( + mutant_quality_controller: MutantQualityController, + mutant_case_qc_pass: Case, + mutant_results_file_path_case_qc_pass: Path, + mutant_qc_report_path_case_qc_pass: Path, +): + # GIVEN a case that passes QC + + # WHEN performing QC on the case + + case_quality_result: MutantQualityResult = mutant_quality_controller.get_quality_control_result( + case=mutant_case_qc_pass, + case_results_file_path=mutant_results_file_path_case_qc_pass, + case_qc_report_path=mutant_qc_report_path_case_qc_pass, + ) + + # THEN the case passes qc + assert case_quality_result.passes_qc is True + assert case_quality_result.case_quality_result.external_negative_control_passes_qc is True + assert case_quality_result.case_quality_result.internal_negative_control_passes_qc is True + + +def test_get_quality_control_result_case_qc_fail( + mutant_quality_controller: MutantQualityController, + mutant_case_qc_fail: Case, + mutant_results_file_path_qc_fail: Path, + mutant_qc_report_path_case_qc_fail: Path, +): + # GIVEN a case that passes QC + + # WHEN performing QC on the case + + case_quality_result: MutantQualityResult = mutant_quality_controller.get_quality_control_result( + case=mutant_case_qc_fail, + case_results_file_path=mutant_results_file_path_qc_fail, + case_qc_report_path=mutant_qc_report_path_case_qc_fail, + ) + + # THEN the case passes qc + assert case_quality_result.passes_qc is False + assert case_quality_result.case_quality_result.external_negative_control_passes_qc is True + assert case_quality_result.case_quality_result.internal_negative_control_passes_qc is True + + +def test_get_quality_control_result_case_qc_fail_with_failing_controls( + mutant_quality_controller: MutantQualityController, + mutant_case_qc_fail_with_failing_controls: Case, + mutant_results_file_path_qc_fail_with_failing_controls: Path, + mutant_qc_report_path_case_qc_fail_with_failing_controls: Path, +): + # GIVEN a case that does not passe QC due to failing control samples + + # WHEN performing QC on the case + + case_quality_result: MutantQualityResult = mutant_quality_controller.get_quality_control_result( + case=mutant_case_qc_fail_with_failing_controls, + case_results_file_path=mutant_results_file_path_qc_fail_with_failing_controls, + case_qc_report_path=mutant_qc_report_path_case_qc_fail_with_failing_controls, + ) + + # THEN the case does not pass QC and the correct result is retrieved for the control samples + assert case_quality_result.passes_qc is False + assert case_quality_result.case_quality_result.external_negative_control_passes_qc is False + assert case_quality_result.case_quality_result.internal_negative_control_passes_qc is False diff --git a/tests/mocks/limsmock.py b/tests/mocks/limsmock.py index 55a14abee5..9e21de7afc 100644 --- a/tests/mocks/limsmock.py +++ b/tests/mocks/limsmock.py @@ -5,6 +5,9 @@ from cg.apps.lims import LimsAPI +from cg.constants.lims import LimsArtifactTypes, LimsProcess +from cg.exc import LimsDataError + class LimsProject(BaseModel): id: str = "1" @@ -18,6 +21,17 @@ def __init__(self, label: str, sequence: str): self.sequence: str = sequence +class LimsUDF(BaseModel): + control: str | None = None + customer: str = None + + def get(self, argument: str) -> str: + if argument == "Control": + return self.control + if argument == "customer": + return self.customer + + class LimsSample(BaseModel): id: str name: str = None @@ -35,10 +49,17 @@ class LimsSample(BaseModel): received: str = None source: str = None priority: str = None + udfs: LimsUDF = LimsUDF() + + +class LimsArtifactObject(BaseModel): + parent_process: LimsProcess = LimsProcess.COVID_POOLING_STEP + type: LimsArtifactTypes = LimsArtifactTypes.ANALYTE + samples: list[LimsSample] = [] class MockLimsAPI(LimsAPI): - """Mock LIMS API to get target bed from LIMS.""" + """Mock LIMS API for testing.""" def __init__(self, config: dict = None, samples: list[dict] = None): if samples is None: @@ -56,6 +77,7 @@ def __init__(self, config: dict = None, samples: list[dict] = None): self._sequencing_method = "CG002 - Cluster Generation (HiSeq X)" self._delivery_method = "CG002 - Delivery" self._source = "cell-free DNA" + self.artifacts: dict[str, list[LimsArtifactObject]] = {} def set_prep_method(self, method: str = "1337:00 Test prep method"): """Mock function""" @@ -76,6 +98,50 @@ def add_capture_kit(self, internal_id: str, capture_kit): self.add_sample(internal_id) self.sample_vars[internal_id]["capture_kit"] = capture_kit + def add_artifact_for_sample( + self, + sample_id: str, + samples: list[LimsSample] = None, + ): + if sample_id in self.artifacts: + self.artifacts[sample_id].append(LimsArtifactObject(samples=samples)) + else: + self.artifacts[sample_id] = [LimsArtifactObject(samples=samples)] + + def get_latest_artifact_for_sample( + self, process_type: LimsProcess, artifact_type: LimsArtifactTypes, sample_internal_id: str + ) -> LimsArtifactObject: + return self.artifacts[sample_internal_id][0] + + def get_internal_negative_control_id_from_sample_in_pool( + self, sample_internal_id: str, pooling_step: LimsProcess + ) -> str: + """Retrieve from lims the sample_id for the internal negative control sample present in the same pool as the given sample.""" + artifact: LimsArtifactObject = self.get_latest_artifact_for_sample( + process_type=pooling_step, + artifact_type=LimsArtifactTypes.ANALYTE, + sample_internal_id=sample_internal_id, + ) + samples = artifact.samples + + negative_controls: list = self._get_negative_controls_from_list(samples=samples) + + if len(negative_controls) > 1: + sample_ids = [sample.id for sample in negative_controls] + raise LimsDataError( + f"Several internal negative control samples found: {' '.join(sample_ids)}" + ) + return negative_controls[0].id + + @staticmethod + def _get_negative_controls_from_list(samples: list[LimsSample]) -> list[LimsSample]: + """Filter and return a list of internal negative controls from a given sample list.""" + negative_controls = [] + for sample in samples: + if sample.udfs.control == "negative" and sample.udfs.customer == "cust000": + negative_controls.append(sample) + return negative_controls + def capture_kit(self, lims_id: str): if lims_id in self.sample_vars: return self.sample_vars[lims_id].get("capture_kit") diff --git a/tests/services/run_devices/pacbio/post_processing/test_post_processing.py b/tests/services/run_devices/pacbio/post_processing/test_post_processing.py index 519922247d..1dcdfd6d4b 100644 --- a/tests/services/run_devices/pacbio/post_processing/test_post_processing.py +++ b/tests/services/run_devices/pacbio/post_processing/test_post_processing.py @@ -37,7 +37,7 @@ def test_pac_bio_post_processing_run_name_error(pac_bio_context): def test_pac_bio_post_processing_store_data_error( pac_bio_context: CGConfig, pac_bio_sequencing_run_name: str ): - # GIVEN a PacBioPostProcessingService and a wrong run name + # GIVEN a PacBioPostProcessingService that raises an error when storing data in StatusDB post_processing_service: PacBioPostProcessingService = ( pac_bio_context.post_processing_services.pacbio @@ -55,7 +55,7 @@ def test_pac_bio_post_processing_store_data_error( def test_pac_bio_post_processing_store_files_error( pac_bio_context: CGConfig, pac_bio_sequencing_run_name: str ): - # GIVEN a PacBioPostProcessingService + # GIVEN a PacBioPostProcessingService that raises an error when storing files in Housekeeper post_processing_service: PacBioPostProcessingService = ( pac_bio_context.post_processing_services.pacbio ) diff --git a/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py b/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py index 42df23971a..7dc753eff1 100644 --- a/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py +++ b/tests/services/run_devices/pacbio/run_data_generator/test_pacbio_run_data_generator.py @@ -12,6 +12,7 @@ def test_get_run_data( + pac_bio_run_data_generator: PacBioRunDataGenerator, pac_bio_runs_dir: Path, pac_bio_test_run_name: str, pac_bio_smrt_cell_name: str, @@ -21,8 +22,7 @@ def test_get_run_data( run_name: str = "/".join([pac_bio_test_run_name, pac_bio_smrt_cell_name]) # WHEN Generating run data - run_data_generator = PacBioRunDataGenerator() - run_data: PacBioRunData = run_data_generator.get_run_data( + run_data: PacBioRunData = pac_bio_run_data_generator.get_run_data( run_name=run_name, sequencing_dir=pac_bio_runs_dir.as_posix() ) @@ -30,18 +30,18 @@ def test_get_run_data( assert run_data == expected_pac_bio_run_data -@pytest.mark.parametrize("run_name", ["rimproper_name", "d_improper_name "]) +@pytest.mark.parametrize("wrong_run_name", ["rimproper_name", "d_improper_name "]) def test_get_run_data_improper_name( + pac_bio_run_data_generator: PacBioRunDataGenerator, pac_bio_runs_dir: Path, - run_name: str, + wrong_run_name: str, ): - # GIVEN a PacBioRunDataGenerator and an improper run name - run_data_generator = PacBioRunDataGenerator() + # GIVEN a PacBioRunDataGenerator and a wrong run name - # WHEN Generating run data + # WHEN Generating run data with the wrong run name # THEN an PostProcessingRunDataGeneratorError is raised with pytest.raises(PostProcessingRunDataGeneratorError): - run_data_generator.get_run_data( - run_name=run_name, sequencing_dir=pac_bio_runs_dir.as_posix() + pac_bio_run_data_generator.get_run_data( + run_name=wrong_run_name, sequencing_dir=pac_bio_runs_dir.as_posix() ) diff --git a/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py b/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py index 6d45f8d6fa..f6e87897d9 100644 --- a/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py +++ b/tests/services/run_devices/pacbio/run_file_manager/test_pacbio_run_file_manager.py @@ -1,25 +1,22 @@ +from pathlib import Path from unittest import mock import pytest -from pathlib import Path from cg.services.run_devices.exc import PostProcessingRunFileManagerError from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData -from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import ( - PacBioRunFileManager, -) +from cg.services.run_devices.pacbio.run_file_manager.run_file_manager import PacBioRunFileManager def test_get_files_to_parse( - expected_pac_bio_run_data: PacBioRunData, pac_bio_report_files_to_parse: list[Path] + expected_pac_bio_run_data: PacBioRunData, + pac_bio_report_files_to_parse: list[Path], + pac_bio_run_file_manager: PacBioRunFileManager, ): - # GIVEN a run data object - - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a run data object and a PacBio run file manager # WHEN getting the files to parse - files: list[Path] = file_manager.get_files_to_parse(expected_pac_bio_run_data) + files: list[Path] = pac_bio_run_file_manager.get_files_to_parse(expected_pac_bio_run_data) # THEN the correct files are returned assert files == pac_bio_report_files_to_parse @@ -27,16 +24,14 @@ def test_get_files_to_parse( def test_get_files_to_store( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, pac_bio_report_files_to_parse: list[Path], pac_bio_hifi_read_file: Path, ): - # GIVEN a run data object - - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a run data object and a PacBio file manager # WHEN getting the files to store - files: list[Path] = file_manager.get_files_to_store(expected_pac_bio_run_data) + files: list[Path] = pac_bio_run_file_manager.get_files_to_store(expected_pac_bio_run_data) # THEN the correct files are returned full_list: list[Path] = pac_bio_report_files_to_parse + [pac_bio_hifi_read_file] @@ -45,32 +40,32 @@ def test_get_files_to_store( def test_get_files_to_store_error( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, ): # GIVEN a run data object - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a PacBio run file manager that can't find the HiFi read file with mock.patch.object( - file_manager, - attribute="_get_hifi_read_file", + pac_bio_run_file_manager, + attribute="_get_hifi_read_files", side_effect=FileNotFoundError, ): # WHEN getting the files to store # THEN an PostProcessingRunFileManagerError is raised with pytest.raises(PostProcessingRunFileManagerError): - file_manager.get_files_to_store(expected_pac_bio_run_data) + pac_bio_run_file_manager.get_files_to_store(expected_pac_bio_run_data) def test_get_files_to_parse_error( expected_pac_bio_run_data: PacBioRunData, + pac_bio_run_file_manager: PacBioRunFileManager, ): # GIVEN a run data object - # GIVEN a PacBio run file manager - file_manager = PacBioRunFileManager() + # GIVEN a PacBio run file manager that can't find the CCS report file with mock.patch.object( - file_manager, + pac_bio_run_file_manager, attribute="_get_ccs_report_file", side_effect=FileNotFoundError, ): @@ -78,4 +73,4 @@ def test_get_files_to_parse_error( # THEN an PostProcessingRunFileManagerError is raised with pytest.raises(PostProcessingRunFileManagerError): - file_manager.get_files_to_parse(expected_pac_bio_run_data) + pac_bio_run_file_manager.get_files_to_parse(expected_pac_bio_run_data) diff --git a/tests/services/run_devices/pacbio/store_service/test_store_service.py b/tests/services/run_devices/pacbio/store_service/test_store_service.py index 8a526176a0..5fe8df1028 100644 --- a/tests/services/run_devices/pacbio/store_service/test_store_service.py +++ b/tests/services/run_devices/pacbio/store_service/test_store_service.py @@ -5,8 +5,8 @@ import pytest from cg.services.run_devices.exc import ( - PostProcessingStoreDataError, PostProcessingDataTransferError, + PostProcessingStoreDataError, ) from cg.services.run_devices.pacbio.data_storage_service.pacbio_store_service import ( PacBioStoreService, @@ -17,7 +17,6 @@ from cg.services.run_devices.pacbio.data_transfer_service.dto import PacBioDTOs from cg.services.run_devices.pacbio.run_data_generator.run_data import PacBioRunData from cg.store.models import PacBioSampleSequencingMetrics, PacBioSequencingRun, PacBioSMRTCell - from cg.store.store import Store @@ -28,7 +27,7 @@ def test_store_post_processing_data( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( @@ -67,7 +66,7 @@ def test_store_post_processing_data_error_database( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( @@ -85,7 +84,7 @@ def test_store_post_processing_data_error_parser( ): # GIVEN a PacBioStoreService - # GIVEN a successful data transfer service + # GIVEN a data transfer service that returns the correct DTOs # WHEN storing data for a PacBio instrument run with mock.patch( diff --git a/tests/store_helpers.py b/tests/store_helpers.py index 525b0bff0f..25e6d1cd9b 100644 --- a/tests/store_helpers.py +++ b/tests/store_helpers.py @@ -219,6 +219,8 @@ def add_application( prep_category: str = "wgs", description: str = None, is_archived: bool = False, + target_reads: int = None, + percent_reads_guaranteed: int = 75, is_accredited: bool = False, is_external: bool = False, min_sequencing_depth: int = 30, @@ -237,7 +239,8 @@ def add_application( description=description, is_archived=is_archived, percent_kth=80, - percent_reads_guaranteed=75, + target_reads=target_reads, + percent_reads_guaranteed=percent_reads_guaranteed, is_accredited=is_accredited, limitations="A limitation", is_external=is_external, diff --git a/tests/utils/test_files.py b/tests/utils/test_files.py index 7ef3e7972f..59572a5051 100644 --- a/tests/utils/test_files.py +++ b/tests/utils/test_files.py @@ -9,12 +9,21 @@ get_file_with_pattern_from_list, get_files_in_directory_with_pattern, get_files_matching_pattern, + get_project_root_dir, get_source_creation_time_stamp, remove_directory_and_contents, rename_file, ) +def test_get_project_root_dir(): + # WHEN getting the project root dir + root_dir: Path = get_project_root_dir() + + # THEN return the dir path + assert root_dir.name == "cg" + + def test_get_file_in_directory(nested_directory_with_file: Path, some_file: str): """Test function to get a file in a directory and subdirectories.""" # GIVEN a directory with subdirectories with a file diff --git a/tests/utils/test_string_utils.py b/tests/utils/test_string_utils.py deleted file mode 100644 index f70cdc4ec1..0000000000 --- a/tests/utils/test_string_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Test for the string utilities.""" - -import pytest - -from cg.exc import CgError -from cg.utils.string import get_element_from_split - - -def test_get_element_from_split(): - - # GIVEN a string with a seperator - separated_string: str = "zero_one_two_three" - - # WHEN getting an element divided by a separator based on the position - element: str = get_element_from_split(value=separated_string, element_position=2, split="_") - - # THEN the expected element is returned - assert element == "two" - - -def test_get_element_from_split_error(): - - # GIVEN a string with a seperator - separated_string: str = "zero_one_two_three" - - # WHEN getting an element divided by a separator based on the position that is out of bounds - with pytest.raises(CgError): - get_element_from_split(value=separated_string, element_position=12, split="_") - - # THEN an error is raised