From edb13ca36259a5a298766e27e9d0c3e625138ff5 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:06:25 +0200 Subject: [PATCH] Add upload for raredisease to genotype (#3784) ### Added - Upload of raredisease cases to genotype ### Changed - Add _mip suffix to mip related genotype functions --- cg/cli/upload/genotype.py | 5 +- cg/constants/constants.py | 2 + cg/constants/housekeeper_tags.py | 8 + cg/constants/nf_analysis.py | 4 +- cg/meta/upload/gt.py | 191 ++++++++++++------ tests/conftest.py | 6 + .../case_metrics_deliverables.yaml | 128 ++++++++++++ tests/meta/upload/balsamic/test_balsamic.py | 6 +- .../meta/upload/test_upload_genotypes_api.py | 93 +++++++-- 9 files changed, 354 insertions(+), 89 deletions(-) create mode 100644 tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml diff --git a/cg/cli/upload/genotype.py b/cg/cli/upload/genotype.py index f7e2c01f38..ee79a25641 100644 --- a/cg/cli/upload/genotype.py +++ b/cg/cli/upload/genotype.py @@ -31,15 +31,14 @@ def upload_genotypes(context: CGConfig, re_upload: bool, family_id: str | None): status_db: Store = context.status_db housekeeper_api: HousekeeperAPI = context.housekeeper_api genotype_api: GenotypeAPI = context.genotype_api - click.echo(click.style("----------------- GENOTYPES -------------------")) if not family_id: suggest_cases_to_upload(status_db=status_db) raise click.Abort - case_obj: Case = status_db.get_case_by_internal_id(internal_id=family_id) + case: Case = status_db.get_case_by_internal_id(internal_id=family_id) upload_genotypes_api = UploadGenotypesAPI(hk_api=housekeeper_api, gt_api=genotype_api) - results: dict = upload_genotypes_api.data(case_obj.analyses[0]) + results: dict = upload_genotypes_api.get_genotype_data(case.analyses[0]) if not results: LOG.warning("Could not find any results to upload") diff --git a/cg/constants/constants.py b/cg/constants/constants.py index ed03ebd6d9..594a229324 100644 --- a/cg/constants/constants.py +++ b/cg/constants/constants.py @@ -197,6 +197,7 @@ class HastaSlurmPartitions(StrEnum): class FileExtensions(StrEnum): BAM: str = ".bam" + BCF: str = ".bcf" BED: str = ".bed" COMPLETE: str = ".complete" CONFIG: str = ".config" @@ -224,6 +225,7 @@ class FileExtensions(StrEnum): TSV: str = ".tsv" TXT: str = ".txt" VCF: str = ".vcf" + VCF_GZ: str = ".vcf.gz" XLSX: str = ".xlsx" XML: str = ".xml" YAML: str = ".yaml" diff --git a/cg/constants/housekeeper_tags.py b/cg/constants/housekeeper_tags.py index 338700a7f6..d6d9a07d56 100644 --- a/cg/constants/housekeeper_tags.py +++ b/cg/constants/housekeeper_tags.py @@ -96,11 +96,19 @@ class BalsamicAnalysisTag: QC_METRICS: list[str] = ["qc-metrics", "deliverable"] +class HkAnalysisMetricsTag: + QC_METRICS: set[str] = {"qc-metrics", "deliverable"} + + class GensAnalysisTag: COVERAGE: list[str] = ["gens", "coverage", "bed"] FRACSNP: list[str] = ["gens", "fracsnp", "bed"] +class GenotypeAnalysisTag: + GENOTYPE: str = "genotype" + + class BalsamicProtectedTags: """Balsamic workflow protected tags by type.""" diff --git a/cg/constants/nf_analysis.py b/cg/constants/nf_analysis.py index 09e5111b6b..24a3a11458 100644 --- a/cg/constants/nf_analysis.py +++ b/cg/constants/nf_analysis.py @@ -15,13 +15,15 @@ class NfTowerStatus(StrEnum): UNKNOWN: str = "UNKNOWN" +RAREDISEASE_PREDICTED_SEX_METRIC = "predicted_sex_sex_check" + RAREDISEASE_METRIC_CONDITIONS: dict[str, dict[str, Any]] = { "percent_duplicates": {"norm": "lt", "threshold": 20}, "PCT_PF_UQ_READS_ALIGNED": {"norm": "gt", "threshold": 0.95}, "MEDIAN_TARGET_COVERAGE": {"norm": "gt", "threshold": 25}, "PCT_TARGET_BASES_10X": {"norm": "gt", "threshold": 0.95}, "PCT_EXC_ADAPTER": {"norm": "lt", "threshold": 0.0005}, - "predicted_sex_sex_check": {"norm": "eq", "threshold": None}, + RAREDISEASE_PREDICTED_SEX_METRIC: {"norm": "eq", "threshold": None}, "gender": {"norm": "eq", "threshold": None}, } diff --git a/cg/meta/upload/gt.py b/cg/meta/upload/gt.py index 6f2685fb85..05c6b989f0 100644 --- a/cg/meta/upload/gt.py +++ b/cg/meta/upload/gt.py @@ -1,14 +1,16 @@ import logging from pathlib import Path -from housekeeper.store.models import File, Version +from housekeeper.store.models import File from cg.apps.gt import GenotypeAPI from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.constants.constants import FileFormat, PrepCategory, Workflow -from cg.constants.housekeeper_tags import HkMipAnalysisTag +from cg.constants.constants import FileExtensions, FileFormat, PrepCategory, Workflow +from cg.constants.housekeeper_tags import GenotypeAnalysisTag, HkAnalysisMetricsTag +from cg.constants.nf_analysis import RAREDISEASE_PREDICTED_SEX_METRIC from cg.constants.subject import Sex from cg.io.controller import ReadFile +from cg.models.deliverables.metric_deliverables import MetricsBase from cg.models.mip.mip_metrics_deliverables import MIPMetricsDeliverables from cg.store.models import Analysis, Case, Sample @@ -16,6 +18,8 @@ class UploadGenotypesAPI(object): + """Genotype upload API.""" + def __init__( self, hk_api: HousekeeperAPI, @@ -25,8 +29,8 @@ def __init__( self.hk = hk_api self.gt = gt_api - def data(self, analysis: Analysis) -> dict: - """Fetch data about an analysis to load genotypes. + def get_genotype_data(self, analysis: Analysis) -> dict[dict[str, dict[str, str]]]: + """Return data about an analysis to load genotypes. Returns: dict on form @@ -43,93 +47,150 @@ def data(self, analysis: Analysis) -> dict: """ case_id = analysis.case.internal_id LOG.info(f"Fetching upload genotype data for {case_id}") - hk_version = self.hk.last_version(case_id) - hk_bcf = self.get_bcf_file(hk_version) - data = {"bcf": hk_bcf.full_path} + hk_bcf: File = self._get_genotype_file(case_id=case_id) + genotype_load_config: dict = {"bcf": hk_bcf.full_path} if analysis.workflow in [Workflow.BALSAMIC, Workflow.BALSAMIC_UMI]: - data["samples_sex"] = self._get_samples_sex_balsamic(case_obj=analysis.case) + genotype_load_config["samples_sex"] = self._get_samples_sex_balsamic(case=analysis.case) elif analysis.workflow == Workflow.MIP_DNA: - data["samples_sex"] = self._get_samples_sex_mip( - case_obj=analysis.case, hk_version=hk_version + genotype_load_config["samples_sex"] = self._get_samples_sex_mip_dna(case=analysis.case) + elif analysis.workflow == Workflow.RAREDISEASE: + genotype_load_config["samples_sex"] = self._get_samples_sex_raredisease( + case=analysis.case ) else: raise ValueError(f"Workflow {analysis.workflow} does not support Genotype upload") - return data - - def _get_samples_sex_mip(self, case_obj: Case, hk_version: Version) -> dict: - qc_metrics_file = self.get_qcmetrics_file(hk_version) - analysis_sexes = self.analysis_sex(qc_metrics_file) - samples_sex = {} - for link_obj in case_obj.links: - sample_id = link_obj.sample.internal_id + return genotype_load_config + + def upload(self, genotype_load_config: dict, replace: bool = False): + """Upload a genotype config for a case.""" + self.gt.upload( + str(genotype_load_config["bcf"]), genotype_load_config["samples_sex"], force=replace + ) + + @staticmethod + def _is_suitable_for_genotype_upload(case: Case) -> bool: + """Returns True if there are any non-tumor WGS samples in the case.""" + samples: list[Sample] = case.samples + return any( + (not sample.is_tumour and PrepCategory.WHOLE_GENOME_SEQUENCING == sample.prep_category) + for sample in samples + ) + + def _get_genotype_file(self, case_id: str) -> File: + "Returns latest genotype file in housekeeper for given case, raises FileNotFoundError if not found." + LOG.debug("Get Genotype files from Housekeeper") + tags: set[str] = {GenotypeAnalysisTag.GENOTYPE} + hk_genotype_files: list[File] = self.hk.get_files_from_latest_version( + bundle_name=case_id, tags=tags + ) + hk_genotype: File = self._get_single_genotype_file(hk_genotype_files) + if not hk_genotype: + raise FileNotFoundError(f"Genotype file not found for {case_id}") + LOG.debug(f"Found genotype file {hk_genotype.full_path}") + return hk_genotype + + def _get_samples_sex_balsamic(self, case: Case) -> dict[str, dict[str, str]]: + """Return sex information from StatusDB and for analysis.""" + samples_sex: dict[str, dict[str, str]] = {} + for sample in case.samples: + if sample.is_tumour: + continue + sample_id: str = sample.internal_id samples_sex[sample_id] = { - "pedigree": link_obj.sample.sex, - "analysis": analysis_sexes[sample_id], + "pedigree": sample.sex, + "analysis": Sex.UNKNOWN, } return samples_sex - def _get_samples_sex_balsamic(self, case_obj: Case) -> dict: - samples_sex = {} - for link_obj in case_obj.links: - if link_obj.sample.is_tumour: - continue - sample_id = link_obj.sample.internal_id + def _get_samples_sex_mip_dna(self, case: Case) -> dict[str, dict[str, str]]: + """Return sex information from StatusDB and from analysis prediction""" + qc_metrics_file: Path = self._get_qcmetrics_file(case_id=case.internal_id) + analysis_sex: dict = self._get_analysis_sex_mip_dna(qc_metrics_file) + samples_sex: dict[str, dict[str, str]] = {} + for sample in case.samples: + sample_id: str = sample.internal_id samples_sex[sample_id] = { - "pedigree": link_obj.sample.sex, - "analysis": Sex.UNKNOWN, + "pedigree": sample.sex, + "analysis": analysis_sex[sample_id], } return samples_sex - def analysis_sex(self, qc_metrics_file: Path) -> dict: - """Fetch analysis sex for each sample of an analysis.""" - qc_metrics: MIPMetricsDeliverables = self.get_parsed_qc_metrics_data(qc_metrics_file) + @staticmethod + def _get_analysis_sex_mip_dna(qc_metrics_file: Path) -> dict: + """Return analysis sex for each sample of an analysis.""" + qc_metrics: MIPMetricsDeliverables = ( + UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_mip_dna(qc_metrics_file) + ) return { sample_id_metric.sample_id: sample_id_metric.predicted_sex for sample_id_metric in qc_metrics.sample_id_metrics } - def get_bcf_file(self, hk_version_obj: Version) -> File: - """Fetch a bcf file and return the file object""" - genotype_files: list = self._get_genotype_files(version_id=hk_version_obj.id) - for genotype_file in genotype_files: - if self._is_variant_file(genotype_file=genotype_file): - LOG.debug(f"Found bcf file {genotype_file.full_path}") - return genotype_file - raise FileNotFoundError(f"No vcf or bcf file found for bundle {hk_version_obj.bundle_id}") - - def get_qcmetrics_file(self, hk_version_obj: Version) -> Path: - """Fetch a qc_metrics file and return the path""" - hk_qcmetrics = self.hk.files( - version=hk_version_obj.id, tags=HkMipAnalysisTag.QC_METRICS - ).first() - LOG.debug(f"Found qc metrics file {hk_qcmetrics.full_path}") + def _get_qcmetrics_file(self, case_id: str) -> Path: + """Return a QC metrics file path. + Raises: FileNotFoundError if nothing is found in the Housekeeper bundle.""" + tags: set[str] = HkAnalysisMetricsTag.QC_METRICS + hk_qcmetrics: File = self.hk.get_file_from_latest_version(bundle_name=case_id, tags=tags) + if not hk_qcmetrics: + raise FileNotFoundError("QC metrics file not found for the given Housekeeper version.") + LOG.debug(f"Found QC metrics file {hk_qcmetrics.full_path}") return Path(hk_qcmetrics.full_path) @staticmethod - def get_parsed_qc_metrics_data(qc_metrics: Path) -> MIPMetricsDeliverables: - """Parse the information from a qc metrics file""" + def _get_parsed_qc_metrics_deliverables_mip_dna( + qc_metrics_file: Path, + ) -> MIPMetricsDeliverables: + """Parse and return a QC metrics file.""" qcmetrics_raw: dict = ReadFile.get_content_from_file( - file_format=FileFormat.YAML, file_path=qc_metrics + file_format=FileFormat.YAML, file_path=qc_metrics_file ) return MIPMetricsDeliverables(**qcmetrics_raw) - def upload(self, data: dict, replace: bool = False): - """Upload data about genotypes for a family of samples.""" - self.gt.upload(str(data["bcf"]), data["samples_sex"], force=replace) + def _get_samples_sex_raredisease(self, case: Case) -> dict[str, dict[str, str]]: + """Return sex information from StatusDB and from analysis prediction.""" + qc_metrics_file: Path = self._get_qcmetrics_file(case_id=case.internal_id) + samples_sex: dict[str, dict[str, str]] = {} + for sample in case.samples: + sample_id: str = sample.internal_id + samples_sex[sample_id] = { + "pedigree": sample.sex, + "analysis": self._get_analysis_sex_raredisease( + qc_metrics_file=qc_metrics_file, sample_id=sample_id + ), + } + return samples_sex - @staticmethod - def _is_variant_file(genotype_file: File): - return genotype_file.full_path.endswith("vcf.gz") or genotype_file.full_path.endswith("bcf") + def _get_analysis_sex_raredisease(self, qc_metrics_file: Path, sample_id: str) -> str: + """Return analysis sex for each sample of an analysis.""" + qc_metrics: list[MetricsBase] = self._get_parsed_qc_metrics_deliverables_raredisease( + qc_metrics_file + ) + for metric in qc_metrics: + if metric.name == RAREDISEASE_PREDICTED_SEX_METRIC and metric.id == sample_id: + return str(metric.value) - def _get_genotype_files(self, version_id: int) -> list: - return self.hk.files(version=version_id, tags=["genotype"]).all() + @staticmethod + def _get_parsed_qc_metrics_deliverables_raredisease(qc_metrics: Path) -> list[MetricsBase]: + """Parse and return a QC metrics file.""" + qcmetrics_raw: dict = ReadFile.get_content_from_file( + file_format=FileFormat.YAML, file_path=qc_metrics + ) + return [MetricsBase(**metric) for metric in qcmetrics_raw["metrics"]] @staticmethod - def is_suitable_for_genotype_upload(case_obj: Case) -> bool: - """Check if a cancer case is contains WGS and normal sample.""" + def _is_variant_file(genotype_file: File): + return genotype_file.full_path.endswith( + FileExtensions.VCF_GZ + ) or genotype_file.full_path.endswith(FileExtensions.BCF) - samples: list[Sample] = case_obj.samples - return any( - (not sample.is_tumour and PrepCategory.WHOLE_GENOME_SEQUENCING == sample.prep_category) - for sample in samples - ) + def _get_single_genotype_file(self, hk_genotype_files: list[File]) -> File: + """ + Returns the single .bcf or .vcf.gz file expected to be found in the provided list. Raises an error if the amount of such files is different from 1. + """ + filtered_files = [file for file in hk_genotype_files if self._is_variant_file(file)] + if len(filtered_files) != 1: + raise ValueError( + f"Error: Expecte one genotype file, but found {len(filtered_files)} " + f"({', '.join(map(str, filtered_files))})." + ) + return filtered_files[0] diff --git a/tests/conftest.py b/tests/conftest.py index 4fc03bfeed..68f7225c24 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -830,6 +830,12 @@ def case_qc_metrics_deliverables(apps_dir: Path) -> Path: return Path(apps_dir, "mip", "case_metrics_deliverables.yaml") +@pytest.fixture +def case_qc_metrics_deliverables_raredisease(apps_dir: Path) -> Path: + """Return the path to a qc metrics deliverables file with case data.""" + return Path(apps_dir, "raredisease", "case_metrics_deliverables.yaml") + + @pytest.fixture def mip_analysis_dir(analysis_dir: Path) -> Path: """Return the path to the directory with mip analysis files.""" diff --git a/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml b/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml new file mode 100644 index 0000000000..11ba777add --- /dev/null +++ b/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml @@ -0,0 +1,128 @@ +--- +metrics: + - header: ~ + id: ADM1 + input: ADM1_lanes_1_sorted_md + name: fraction_duplicates + step: markduplicates + value: '0.040068546369646' + - header: ~ + id: ADM2 + input: ADM2_lanes_1_sorted_md + name: fraction_duplicates + step: markduplicates + value: '0.0406761321866496' + - header: ~ + id: ADM3 + input: ADM3_lanes_1_sorted_md + name: fraction_duplicates + step: markduplicates + value: '0.0379522720687161' + - header: ~ + id: ADM1 + input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: raw_total_sequences + step: bamstats + value: '599990660' + - header: ~ + id: ADM2 + input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: raw_total_sequences + step: bamstats + value: '600049404' + - header: ~ + id: ADM3 + input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: raw_total_sequences + step: bamstats + value: '600006004' + - header: ~ + id: ADM1 + input: ADM1_lanes_1_sorted_md_sex + name: predicted_sex_sex_check + step: multiqc + value: male + - header: ~ + id: ADM2 + input: ADM2_lanes_1_sorted_md_sex + name: multiqc + step: predicted_sex_sex_check + value: male + - header: ~ + id: ADM3 + input: ADM3_lanes_1_sorted_md_sex + name: multiqc + step: predicted_sex_sex_check + value: female + - header: ~ + id: ADM1 + input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: reads_mapped + step: bamstats + value: '598626360' + - header: ~ + id: ADM2 + input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: reads_mapped + step: bamstats + value: '598568878' + - header: ~ + id: ADM3 + input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: reads_mapped + step: bamstats + value: '598456583' + - header: ~ + id: ADM1 + input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: percentage_mapped_reads + step: bamstats + value: '99.772613127011' + - header: ~ + id: ADM2 + input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: percentage_mapped_reads + step: bamstats + value: '99.7532659827456' + - header: ~ + id: ADM3 + input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted + name: percentage_mapped_reads + step: bamstats + value: '99.7417657507307' + - header: data + id: ADM1 + input: ADM1_lanes_1_sorted_md_collectmultiplemetrics + name: MEAN_INSERT_SIZE + step: collectmultiplemetricsinsertsize + value: '399.191095' + - header: data + id: ADM2 + input: ADM2_lanes_1_sorted_md_collectmultiplemetrics + name: MEAN_INSERT_SIZE + step: collectmultiplemetricsinsertsize + value: '421.846775' + - header: data + id: ADM3 + input: ADM3_lanes_1_sorted_md_colraw_total_sequenceslectmultiplemetrics + name: MEAN_INSERT_SIZE + step: collectmultiplemetricsinsertsize + value: '415.673545' + - header: data + id: ADM1 + input: ADM1_lanes_1_sorted_md_collecthsmetrics + name: MEDIAN_TARGET_COVERAGE + step: collecthsmetrics + value: '29' + - header: data + id: ADM2 + input: ADM2_lanes_1_sorted_md_collecthsmetrics + name: MEDIAN_TARGET_COVERAGE + step: collecthsmetrics + value: '28' + - header: data + id: ADM3 + input: ADM3_lanes_1_sorted_md_collecthsmetrics + name: MEDIAN_TARGET_COVERAGE + step: collecthsmetrics + value: '28' diff --git a/tests/meta/upload/balsamic/test_balsamic.py b/tests/meta/upload/balsamic/test_balsamic.py index dd7865cc65..ad8f75d801 100644 --- a/tests/meta/upload/balsamic/test_balsamic.py +++ b/tests/meta/upload/balsamic/test_balsamic.py @@ -27,7 +27,7 @@ def test_genotype_check_wgs_normal(balsamic_context: CGConfig): case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id) # WHEN checking if the case is Genotype upload compatible - passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case) + passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case) # THEN it should return True assert passed_check @@ -40,7 +40,7 @@ def test_genotype_check_non_wgs_normal(balsamic_context: CGConfig): case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id) # WHEN checking if the case is Genotype upload compatible - passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case) + passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case) # THEN it should return False assert not passed_check @@ -53,7 +53,7 @@ def test_genotype_check_only_tumour(balsamic_context: CGConfig): case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id) # WHEN checking if the case is Genotype upload compatible - passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case) + passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case) # THEN it should return False assert not passed_check diff --git a/tests/meta/upload/test_upload_genotypes_api.py b/tests/meta/upload/test_upload_genotypes_api.py index 4c0c988eea..bffb0b7ad8 100644 --- a/tests/meta/upload/test_upload_genotypes_api.py +++ b/tests/meta/upload/test_upload_genotypes_api.py @@ -1,9 +1,12 @@ -"""Tests for the upload genotypes api""" +"""Tests for the upload Genotypes API.""" -from datetime import datetime from pathlib import Path +from cg.constants import Workflow +from cg.constants.constants import SexOptions +from cg.constants.observations import MipDNAObservationsAnalysisTag from cg.meta.upload.gt import UploadGenotypesAPI +from cg.models.deliverables.metric_deliverables import MetricsBase from cg.models.mip.mip_metrics_deliverables import MIPMetricsDeliverables from cg.store.models import Analysis @@ -13,40 +16,71 @@ def test_get_analysis_sex(case_qc_metrics_deliverables: Path, genotype_analysis_ # GIVEN a UploadGenotypesAPI some qcmetrics data # WHEN fetching the predicted sex by the analysis - sex: dict = UploadGenotypesAPI.analysis_sex( - self=UploadGenotypesAPI, qc_metrics_file=case_qc_metrics_deliverables + sex: dict = UploadGenotypesAPI._get_analysis_sex_mip_dna( + qc_metrics_file=case_qc_metrics_deliverables ) # THEN assert that the the predicted sex per sample_id is returned assert sex == genotype_analysis_sex -def test_get_parsed_qc_metrics_data(case_qc_metrics_deliverables: Path): +def test_get_analysis_sex_raredisease( + case_qc_metrics_deliverables_raredisease: Path, sample_id: str +): + """Test to get the predicted sex from a MIP run using the upload Genotypes API""" + # GIVEN a UploadGenotypesAPI some qcmetrics data + + # WHEN fetching the predicted sex by the analysis + sex: str = UploadGenotypesAPI._get_analysis_sex_raredisease( + UploadGenotypesAPI, + qc_metrics_file=case_qc_metrics_deliverables_raredisease, + sample_id=sample_id, + ) + + # THEN assert that the the predicted sex per sample_id is returned + assert sex == SexOptions.MALE + + +def test_get_parsed_qc_metrics_deliverables_mip(case_qc_metrics_deliverables: Path): """Test to get the predicted sex from a MIP run using the upload genotypes API""" # GIVEN a UploadGenotypesAPI and the path to a qc_metrics file with case data # WHEN fetching the predicted sex - metrics_object: MIPMetricsDeliverables = UploadGenotypesAPI.get_parsed_qc_metrics_data( - case_qc_metrics_deliverables + metrics_object: MIPMetricsDeliverables = ( + UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_mip_dna(case_qc_metrics_deliverables) ) - # THEN assert that it was successfully created assert isinstance(metrics_object, MIPMetricsDeliverables) -def test_get_bcf_file(upload_genotypes_api: UploadGenotypesAPI, case_id: str, timestamp: datetime): +def test_get_parsed_qc_metrics_deliverables_raredisease(case_qc_metrics_deliverables: Path): + """Test to get the predicted sex from a MIP run using the upload genotypes API""" + # GIVEN a UploadGenotypesAPI and the path to a QC metrics file with case data + + # WHEN fetching the predicted sex + metrics_object = UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_raredisease( + case_qc_metrics_deliverables + ) + + # First, check if the object is a list + assert isinstance(metrics_object, list) + + # Then, check if all items in the list are instances of MetricsBase + assert all(isinstance(item, MetricsBase) for item in metrics_object) + + +def test_get_genotype_file(upload_genotypes_api: UploadGenotypesAPI, case_id: str): """Test to get the predicted sex from a MIP run using the upload genotypes API""" # GIVEN a UploadGenotypesAPI populated with some data in housekeeper - hk_version = upload_genotypes_api.hk.version(case_id, timestamp) # WHEN fetching the gbcf file with the api - gbcf = upload_genotypes_api.get_bcf_file(hk_version) + gbcf = upload_genotypes_api._get_genotype_file(case_id) # THEN assert that the file has the correct tag - assert "snv-gbcf" in (tag.name for tag in gbcf.tags) + assert MipDNAObservationsAnalysisTag.PROFILE_GBCF in (tag.name for tag in gbcf.tags) -def test_get_data( +def test_get_data_mip( analysis_obj: Analysis, genotype_analysis_sex: dict, mocker, @@ -57,11 +91,36 @@ def test_get_data( # GIVEN an analysis object with a trio # GIVEN analysis sex were generated and could be found - mocker.patch.object(UploadGenotypesAPI, "analysis_sex") - UploadGenotypesAPI.analysis_sex.return_value = genotype_analysis_sex + mocker.patch.object(UploadGenotypesAPI, "_get_analysis_sex_mip_dna") + UploadGenotypesAPI._get_analysis_sex_mip_dna.return_value = genotype_analysis_sex + + analysis_obj.workflow = Workflow.MIP_DNA + + # WHEN parsing the data + result = upload_genotypes_api.get_genotype_data(analysis=analysis_obj) + + # THEN assert that the the number of samples sex is set + assert len(result["samples_sex"]) == 3 + + +def test_get_data_raredisease( + analysis_obj: Analysis, + genotype_analysis_sex: dict, + mocker, + upload_genotypes_api: UploadGenotypesAPI, +): + """Test to get data from the UploadGenotypesAPI""" + # GIVEN a UploadGenotypeAPI populated with some data + # GIVEN an analysis object with a trio + + # GIVEN analysis sex were generated and could be found + mocker.patch.object(UploadGenotypesAPI, "_get_analysis_sex_raredisease") + UploadGenotypesAPI._get_analysis_sex_raredisease.return_value = genotype_analysis_sex + + analysis_obj.workflow = Workflow.RAREDISEASE # WHEN parsing the data - result = upload_genotypes_api.data(analysis=analysis_obj) + result = upload_genotypes_api.get_genotype_data(analysis=analysis_obj) - # THEN assert that the result looks like expected + # THEN assert that the the number of samples sex is set assert len(result["samples_sex"]) == 3